├── .circleci └── config.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── include └── telemetry.hrl ├── rebar.config ├── rebar.lock ├── rebar3 ├── src ├── gen_opentsdb.erl ├── telemetry.app.src ├── telemetry.erl ├── telemetry_api.erl ├── telemetry_app.erl ├── telemetry_config.erl ├── telemetry_forwarder.erl ├── telemetry_histo.erl ├── telemetry_receiver.erl ├── telemetry_store.erl └── telemetry_sup.erl └── test └── .gitkeep /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | working_directory: ~/telemetry-net 5 | docker: 6 | - image: erlang:20.1 7 | steps: 8 | - checkout 9 | - restore_cache: 10 | key: rebar3 11 | - run: 12 | name: Updating rebar3 13 | command: ./rebar3 update 14 | - restore_cache: 15 | key: rebar3-deps-{{ checksum "rebar.lock" }} 16 | - run: 17 | name: Fetching dependencies 18 | command: ./rebar3 get-deps 19 | - save_cache: 20 | key: rebar3-deps-{{ checksum "rebar.lock" }} 21 | paths: 22 | - _build/default/lib 23 | - run: 24 | name: Building 25 | command: ./rebar3 compile 26 | - run: 27 | name: Checking eunit tests 28 | command: ./rebar3 eunit -v 29 | - run: 30 | name: Checking common tests 31 | command: ./rebar3 ct -v 32 | - run: 33 | name: Running cross reference analysis 34 | command: ./rebar3 xref 35 | - run: 36 | name: Running static analyzer 37 | command: ./rebar3 dialyzer 38 | - run: 39 | name: Checking code style 40 | command: ./rebar3 as lint lint 41 | - run: 42 | name: Performing coverage analysis 43 | command: ./rebar3 as test cover 44 | - run: 45 | name: Generating cover report 46 | command: ./rebar3 as test covertool generate 47 | - save_cache: 48 | key: rebar3 49 | paths: 50 | - ~/.cache/rebar3 51 | - run: 52 | name: Installing python 53 | command: | 54 | apt-get update 55 | apt-get install -y --no-install-recommends python3-pip 56 | - run: 57 | name: Installing codecov 58 | command: pip3 install codecov 59 | - run: 60 | name: Sending cover report 61 | command: | 62 | codecov -X gcov -f _build/test/covertool/telemetry.covertool.xml 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | .eunit 4 | deps 5 | *.o 6 | *.beam 7 | *.plt 8 | erl_crash.dump 9 | ebin 10 | rel/example_project 11 | .concrete/DEV_MODE 12 | .rebar 13 | _build 14 | TEST-*.xml 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2013 Mesosphere 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | all: compile 4 | 5 | ## 6 | ## Compilation targets 7 | ## 8 | 9 | compile: 10 | ./rebar3 compile 11 | 12 | clean: 13 | ./rebar3 clean 14 | 15 | ## 16 | ## Test targets 17 | ## 18 | 19 | check: test xref dialyzer 20 | 21 | test: ct eunit 22 | 23 | eunit: 24 | ./rebar3 eunit -v 25 | 26 | ct: 27 | ./rebar3 ct -v 28 | 29 | dialyzer: 30 | ./rebar3 dialyzer 31 | 32 | xref: 33 | ./rebar3 xref 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI][circleci badge]][circleci] 2 | [![Coverage][coverage badge]][covercov] 3 | [![Jira][jira badge]][jira] 4 | [![License][license badge]][license] 5 | [![Erlang Versions][erlang version badge]][erlang] 6 | 7 | # Telemetry-net 8 | 9 | Telemetry-net is a library for aggregation of metrics across many systems. It supports counters and histograms. You can also register a function to be called periodically to populate a value. You can also specify aggregation tags on the client to specify that certain axes of tags on metrics should be collapsed. 10 | 11 | ## Configuration 12 | 13 | ``` 14 | {telemetry, 15 | [ 16 | % Aggregators retain metrics over time, otherwise all metrics are dropped after trying to forward them. 17 | {is_aggregator, false}, 18 | % Periodically send metrics to a destination 19 | {forward_metrics, true}, 20 | % Don't allow receiving metrics 21 | {receive_metrics, false}, 22 | % Send metrics to all hosts that are in this DNS A record 23 | {forwarder_destinations, ["master.mesos"]}, 24 | % Send metrics every 60 seconds 25 | {interval_seconds, 60}, 26 | % Add 20 seconds of jitter to the interval to avoid thundering herd 27 | {splay_seconds, 20}, 28 | ] 29 | } 30 | ``` 31 | ## Usage 32 | 33 | ``` 34 | Tags = #{host => "host-324242", destination => "10.1.2.3:5"}, 35 | % Aggregate pass-through (include all tags), collapse on hostname to get 36 | % global metrics across all hostnames for various destinations, and also 37 | % aggregate on both hostname and destination for global metrics across 38 | % all hosts and backends. 39 | AggTags = [[], [hostname], [hostname, destination]], 40 | telemetry:counter(connect_successes, Tags, AggTags, 1), 41 | telemetry:histogram(connect_latency, Tags, AggTags, TimeDelta), 42 | ``` 43 | 44 | 45 | [circleci badge]: https://img.shields.io/circleci/project/github/dcos/telemetry-net/master.svg?style=flat-square 46 | [coverage badge]: https://img.shields.io/codecov/c/github/dcos/telemetry-net/master.svg?style=flat-square 47 | [jira badge]: https://img.shields.io/badge/issues-jira-yellow.svg?style=flat-square 48 | [license badge]: https://img.shields.io/github/license/dcos/telemetry-net.svg?style=flat-square 49 | [erlang version badge]: https://img.shields.io/badge/erlang-20.1-blue.svg?style=flat-square 50 | 51 | 52 | [circleci]: https://circleci.com/gh/dcos/telemetry-net 53 | [covercov]: https://codecov.io/gh/dcos/telemetry-net 54 | [jira]: https://jira.dcos.io/issues/?jql=component+%3D+networking+AND+project+%3D+DCOS_OSS 55 | [license]: ./LICENSE 56 | [erlang]: http://erlang.org/ 57 | -------------------------------------------------------------------------------- /include/telemetry.hrl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | 10 | -type time_to_histos() :: orddict:orddict({integer(), string()}, term()). 11 | -type time_to_counters() :: orddict:orddict({integer(), string()}, integer()). 12 | -type histo_summary() :: maps:map(string(), maps:map(integer(), maps:map(atom(), term()))). 13 | -type counter_summary() :: maps:map(string(), maps:map(integer(), integer())). 14 | 15 | -type metric_name() :: atom() | binary() | string(). 16 | 17 | -record(name_tags, { 18 | name :: metric_name(), 19 | tags :: maps:map() 20 | }). 21 | 22 | 23 | -record(metrics, { 24 | time_to_histos = orddict:new(), 25 | time_to_counters = orddict:new(), 26 | dirty_histos = sets:new(), 27 | dirty_counters = sets:new() 28 | }). 29 | 30 | -type metrics() :: #metrics{}. 31 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [ 2 | debug_info, 3 | warnings_as_errors 4 | ]}. 5 | 6 | {eunit_opts, [ 7 | {cover_enabled, true}, 8 | verbose, 9 | {report, {eunit_surefire, [{dir, "."}]}} 10 | ]}. 11 | 12 | {cover_enabled, true}. 13 | {cover_print_enabled, true}. 14 | {cover_export_enabled, true}. 15 | 16 | {xref_checks, []}. 17 | {xref_queries, [{"(XC - UC) || (XU - X - B - \"(dtrace)\" : Mod)", []}]}. 18 | 19 | {profiles, [ 20 | {test, [ 21 | {plugins, [ 22 | {covertool, "2.0.0"} 23 | ]}, 24 | {deps, [ 25 | meck, 26 | proper 27 | ]} 28 | ]}, 29 | {lint, [ 30 | {plugins, [ 31 | {rebar3_lint, "0.1.10"} 32 | ]} 33 | ]} 34 | ]}. 35 | 36 | {elvis, [ 37 | #{ 38 | dirs => [ 39 | "src", 40 | "test" 41 | ], 42 | filter => "telemetry*.erl", 43 | rules => [ 44 | {elvis_style, max_function_length, #{max_length => 30}}, 45 | {elvis_style, no_spec_with_records}, 46 | {elvis_style, dont_repeat_yourself, #{min_complexity => 20}}, 47 | {elvis_style, no_behavior_info}, 48 | {elvis_style, used_ignored_variable}, 49 | {elvis_style, nesting_level, #{level => 4}}, 50 | {elvis_style, god_modules, #{limit => 25}}, 51 | {elvis_style, no_if_expression}, 52 | {elvis_style, line_length, #{limit => 120, count_comments => false}}, 53 | {elvis_style, no_tabs}, 54 | {elvis_style, no_trailing_whitespace}, 55 | {elvis_style, macro_names}, 56 | {elvis_style, macro_module_names}, 57 | {elvis_style, operator_spaces, #{rules => [{right, ","}, {right, "++"}, {left, "++"}]}} 58 | ] 59 | } 60 | ]}. 61 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /rebar3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesosphere-backup/telemetry-net/1b38ce3a44d5e7f65f1c21d2890c4ed2e397ee27/rebar3 -------------------------------------------------------------------------------- /src/gen_opentsdb.erl: -------------------------------------------------------------------------------- 1 | -module(gen_opentsdb). 2 | -behaviour(gen_server). 3 | 4 | %% API 5 | -export([start_link/0, put_metric_batch/1, put_metric/2, put_metric/3, put_metric_/2, put_metric_/3, q/1]). 6 | 7 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). 8 | 9 | -define(TCP_DEFAULT, [binary, {packet, 0}]). 10 | 11 | -record(otsdb, { 12 | host = telemetry_config:opentsdb_endpoint(), 13 | port = 4242 14 | }). 15 | 16 | %% API 17 | start_link() -> 18 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 19 | 20 | put_metric_batch(Metrics) -> 21 | gen_server:call(?MODULE, {put_batch, Metrics}). 22 | 23 | put_metric(Name, Amount) -> 24 | put_metric(Name, Amount, []). 25 | 26 | put_metric(Name, Amount, Tags) -> 27 | gen_server:call(?MODULE, {put, Name, round(Amount), Tags}). 28 | 29 | put_metric_(Name, Amount) -> 30 | put_metric(Name, Amount, []). 31 | 32 | put_metric_(Name, Amount, Tags) -> 33 | gen_server:cast(?MODULE, {put, Name, Amount, Tags}). 34 | 35 | %% TODO add query HTTP API here, return decoded json. 36 | q(Cmd) -> 37 | {ok, Cmd}. 38 | 39 | %% gen_server-y goodness 40 | init([]) -> 41 | {ok, #otsdb{}}. 42 | 43 | handle_call({put, Metric, Amount, Tags}, _From, State) -> 44 | Reply = execute(State, {put, Metric, Amount, Tags}), 45 | {reply, Reply, State}; 46 | handle_call({put_batch, Metrics}, _From, State) -> 47 | Reply = execute(State, {put_batch, Metrics}), 48 | {reply, Reply, State}; 49 | handle_call(_Request, _From, State) -> 50 | {reply, ok, State}. 51 | 52 | handle_cast({put, Metric, Amount, Tags}, State) -> 53 | execute(State, {put, Metric, Amount, Tags}), 54 | {noreply, State}; 55 | handle_cast(_Msg, State) -> 56 | {noreply, State}. 57 | 58 | handle_info(_Info, State) -> 59 | {noreply, State}. 60 | 61 | terminate(_Reason, _State) -> 62 | ok. 63 | 64 | code_change(_OldVsn, State, _Extra) -> 65 | {ok, State}. 66 | 67 | %% Internal functions 68 | unix_timestamp() -> 69 | round(os:system_time() / 1000000000). 70 | 71 | execute(#otsdb{host=false}, _Action) -> 72 | {error, no_opentsdb_endpoint_configured}; 73 | 74 | execute(#otsdb{host=Host, port=Port}, _Action = {put, Metric, Amount, Tags}) -> 75 | case convert_amount(Amount) of 76 | {ok, SafeAmount} -> 77 | Time = list_to_binary(integer_to_list(unix_timestamp())), 78 | Msg = opentsdb_fmt(Metric, Time, SafeAmount, Tags), 79 | send(Host, Port, Msg); 80 | _ -> {error, invalid_amount} 81 | end; 82 | execute(#otsdb{host=Host, port=Port}, _Action = {put_batch, Metrics}) -> 83 | Msg = lists:map(fun ({Name, Time, Amount, Tags}) -> 84 | case convert_amount(Amount) of 85 | {ok, SafeAmount} -> 86 | BinTime = list_to_binary(integer_to_list(Time)), 87 | opentsdb_fmt(Name, BinTime, SafeAmount, Tags); 88 | _ -> 89 | [] 90 | end 91 | end, Metrics), 92 | send(Host, Port, Msg). 93 | 94 | send(Host, Port, Msg) -> 95 | {ok, Sock} = gen_tcp:connect(Host, Port, ?TCP_DEFAULT), 96 | Reply = gen_tcp:send(Sock, Msg), 97 | ok = gen_tcp:close(Sock), 98 | Reply. 99 | 100 | opentsdb_fmt(Metric, Time, Amount, Tags) -> 101 | SafeMetric = sanitize_to_binary(Metric), 102 | SafeTags = format_tags(Tags), 103 | <<$p,$u,$t,$\s, SafeMetric/binary, $\s, Time/binary, $\s, Amount/binary, $\s, SafeTags/binary, $\n>>. 104 | 105 | convert_amount(Amount) -> 106 | NewAmount = case Amount of 107 | A when is_integer(A) -> {ok, list_to_binary(integer_to_list(A))}; 108 | A when is_float(A) -> {ok, list_to_binary(float_to_list(A))}; 109 | A when is_list(A) -> {ok, list_to_binary(A)}; 110 | A when is_binary(A) -> {ok, A}; 111 | _ -> {error, unknown_type} 112 | end, 113 | NewAmount. 114 | 115 | sanitize_to_binary(V) -> 116 | FmtV = io_lib:format("~p", [V]), 117 | SanitizedV = re:replace(FmtV, "[^A-Za-z0-9./\\-_]", "", [global, {return, list}]), 118 | list_to_binary(SanitizedV). 119 | 120 | format_tags(Tags) -> 121 | TagList = maps:to_list(Tags), 122 | BinaryTagList = lists:map(fun({T, V}) -> 123 | {sanitize_to_binary(T), sanitize_to_binary(V)} 124 | end, TagList), 125 | lists:foldl(fun(E, A) -> 126 | <> 127 | end, <<>>, [<> || {K, V} <- BinaryTagList]). 128 | -------------------------------------------------------------------------------- /src/telemetry.app.src: -------------------------------------------------------------------------------- 1 | {application, telemetry, 2 | [ 3 | {description, ""}, 4 | {vsn, "1"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib, 9 | inets 10 | ]}, 11 | {mod, { telemetry_app, []}}, 12 | {env, []} 13 | ]}. 14 | -------------------------------------------------------------------------------- /src/telemetry.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | -module(telemetry). 10 | -author("Tyler Neely"). 11 | 12 | %% API 13 | -export([start/0, 14 | stop/0, 15 | counter/2, 16 | counter/3, 17 | counter/4, 18 | histogram/2, 19 | histogram/3, 20 | histogram/4, 21 | add_gauge_func/2, 22 | remove_gauge_func/1, 23 | metrics_to_summary/1 24 | ]). 25 | 26 | -include("telemetry.hrl"). 27 | 28 | start() -> 29 | application:ensure_all_started(telemetry). 30 | 31 | stop() -> 32 | application:stop(telemetry). 33 | 34 | default_tags() -> 35 | {ok, HN} = inet:gethostname(), 36 | M = maps:new(), 37 | maps:put(hostname, HN, M). 38 | 39 | -spec(counter(Name :: metric_name(), Value :: integer()) -> ok). 40 | counter(Name, Value) -> 41 | Now = os:system_time(seconds), 42 | DefaultTags = default_tags(), 43 | telemetry_store:submit(#name_tags{name = Name, tags = DefaultTags}, 44 | Now, counter, Value). 45 | 46 | %% @doc This is called by an external program to add Value to a counter 47 | -spec(counter(Name :: metric_name(), 48 | Tags :: maps:map(string() | atom(), string() | atom()), 49 | Value :: integer()) -> ok). 50 | counter(Name, Tags, Value) -> 51 | Now = os:system_time(seconds), 52 | MergedTags = maps:merge(default_tags(), Tags), 53 | telemetry_store:submit(#name_tags{name = Name, tags = MergedTags}, 54 | Now, counter, Value), 55 | ok. 56 | 57 | %% @doc This is called by an external program to add Value to a counter 58 | -spec(counter(Name :: metric_name(), 59 | Tags :: maps:map(string() | atom(), string() | atom()), 60 | AggregateTags :: list(list(string() | atom())), 61 | Value :: integer()) -> ok). 62 | counter(Name, Tags, AggregateTags, Value) -> 63 | Now = os:system_time(seconds), 64 | MergedDefaultTags = maps:merge(default_tags(), Tags), 65 | lists:map(fun(AggTagList) -> 66 | lists:map(fun(AggTags) -> 67 | AT2 = lists:map(fun (Tag) -> 68 | {Tag, aggregate} 69 | end, AggTags), 70 | AggTagMap = maps:from_list(AT2), 71 | MergedTags = maps:merge(MergedDefaultTags, AggTagMap), 72 | telemetry_store:submit(#name_tags{name = Name, tags = MergedTags}, 73 | Now, counter, Value) 74 | end, AggTagList) 75 | end, [[], AggregateTags]), 76 | ok. 77 | 78 | 79 | -spec(histogram(Name :: metric_name(), Value :: number()) -> ok). 80 | histogram(Name, Value) -> 81 | Now = os:system_time(seconds), 82 | DefaultTags = default_tags(), 83 | telemetry_store:submit(#name_tags{name = Name, tags = DefaultTags}, 84 | Now, histogram, Value). 85 | 86 | -spec(histogram(Name :: metric_name(), 87 | Tags :: maps:map(string() | atom(), string() | atom()), 88 | Value :: number()) -> ok). 89 | histogram(Name, Tags, Value) -> 90 | Now = os:system_time(seconds), 91 | MergedTags = maps:merge(default_tags(), Tags), 92 | telemetry_store:submit(#name_tags{name = Name, tags = MergedTags}, 93 | Now, histogram, Value). 94 | 95 | %% @doc This is called by an external program to add Value to the histogram specificed by Name. 96 | -spec(histogram(Name :: metric_name(), 97 | Tags :: maps:map(string() | atom(), string() | atom()), 98 | AggregateTags :: list(list(string() | atom())), 99 | Value :: number()) -> ok). 100 | histogram(Name, Tags, AggregateTags, Value) -> 101 | Now = os:system_time(seconds), 102 | MergedDefaultTags = maps:merge(default_tags(), Tags), 103 | lists:map(fun(AggTagList) -> 104 | lists:map(fun(AggTags) -> 105 | AT2 = lists:map(fun (Tag) -> 106 | {Tag, aggregate} 107 | end, AggTags), 108 | AggTagMap = maps:from_list(AT2), 109 | MergedTags = maps:merge(MergedDefaultTags, AggTagMap), 110 | telemetry_store:submit(#name_tags{name = Name, tags = MergedTags}, 111 | Now, histogram, Value) 112 | end, AggTagList) 113 | end, [[], AggregateTags]), 114 | ok. 115 | 116 | 117 | -spec(add_gauge_func(Name :: string() | atom(), 118 | Fun :: fun()) -> ok). 119 | add_gauge_func(Name, Fun) -> 120 | telemetry_store:add_gauge_func(Name, Fun). 121 | 122 | -spec(remove_gauge_func(Name :: string() | atom()) -> ok). 123 | remove_gauge_func(Name) -> 124 | telemetry_store:remove_gauge_func(Name). 125 | 126 | %% Converts orddicts that are {Time, Metric} -> Value to Metric -> Time -> Value 127 | -spec(metrics_to_summary(metrics()) -> maps:map(atom(), histo_summary() | counter_summary())). 128 | metrics_to_summary(#metrics{time_to_histos = TimeToHistos, 129 | time_to_counters = TimeToCounters}) -> 130 | HistoExtractFun = fun telemetry_histo:map_summary/1, 131 | CounterExtractFun = fun (Value) -> Value end, 132 | 133 | Histograms = invert_time_name_to_value_orddict(TimeToHistos, HistoExtractFun), 134 | Counters = invert_time_name_to_value_orddict(TimeToCounters, CounterExtractFun), 135 | 136 | #{ 137 | counters => Counters, 138 | histograms => Histograms 139 | }. 140 | 141 | 142 | -spec(invert_time_name_to_value_orddict(time_to_histos() | time_to_counters(), function()) -> 143 | histo_summary() | counter_summary()). 144 | invert_time_name_to_value_orddict(TimeNameToValueOrddict, ExtractFun) -> 145 | Orddict = orddict:fold(fun({Time, Name}, ValueIn, AccIn) -> 146 | ValueOut = ExtractFun(ValueIn), 147 | orddict:append(Name, {Time, ValueOut}, AccIn) 148 | end, orddict:new(), TimeNameToValueOrddict), 149 | 150 | DictList = orddict:to_list(Orddict), 151 | 152 | lists:foldl(fun({Name, TimeSummary}, AccIn) -> 153 | TimeSummaryMap = maps:from_list(TimeSummary), 154 | maps:put(Name, TimeSummaryMap, AccIn) 155 | end, #{}, DictList). 156 | -------------------------------------------------------------------------------- /src/telemetry_api.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | -module(telemetry_api). 10 | -author("Tyler Neely"). 11 | 12 | -------------------------------------------------------------------------------- /src/telemetry_app.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | -module(telemetry_app). 10 | 11 | -behaviour(application). 12 | 13 | %% Application callbacks 14 | -export([start/2, stop/1]). 15 | 16 | %% =================================================================== 17 | %% Application callbacks 18 | %% =================================================================== 19 | 20 | start(_StartType, _StartArgs) -> 21 | % telemetry_metrics:setup(), 22 | telemetry_sup:start_link(). 23 | 24 | stop(_State) -> 25 | ok. 26 | -------------------------------------------------------------------------------- /src/telemetry_config.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | -module(telemetry_config). 10 | -author("Tyler Neely"). 11 | 12 | %% API 13 | -export([interval_seconds/0, 14 | max_intervals/0, 15 | splay_seconds/0, 16 | forwarder_destinations/0, 17 | forwarder_destinations/1, 18 | forward_to_all_resolved_hosts/0, 19 | is_aggregator/0, 20 | enable_metric_database/0, 21 | forward_metrics/0, 22 | opentsdb_endpoint/0, 23 | rendered_metric_receiver_modules/0 24 | ]). 25 | 26 | 27 | interval_seconds() -> 28 | application:get_env(telemetry, interval_seconds, 60). 29 | 30 | 31 | max_intervals() -> 32 | application:get_env(telemetry, max_intervals, 60). 33 | 34 | 35 | splay_seconds() -> 36 | application:get_env(telemetry, splay_seconds, 10). 37 | 38 | 39 | forwarder_destinations() -> 40 | application:get_env(telemetry, forwarder_destinations, []). 41 | 42 | forwarder_destinations(Servers) -> 43 | application:set_env(telemetry, forwarder_destinations, Servers). 44 | 45 | %%-------------------------------------------------------------------- 46 | %% @doc 47 | %% When resolving each destination in forwarder_destinations, send to 48 | %% ALL resolved hosts or just a single one. This is useful eg. for 49 | %% sending metrics to all mesos masters. 50 | %% @end 51 | %%-------------------------------------------------------------------- 52 | forward_to_all_resolved_hosts() -> 53 | application:get_env(telemetry, forward_to_all_resolved_hosts, true). 54 | 55 | 56 | %%-------------------------------------------------------------------- 57 | %% @doc 58 | %% Determines whether we should retain aggregate metrics after passing 59 | %% them along, or only incremental measurements. This should be set to 60 | %% false for anything that is not the final stage of an aggregation 61 | %% pipeline, otherwise deplicate metrics will be submitted. 62 | %% @end 63 | %%-------------------------------------------------------------------- 64 | is_aggregator() -> 65 | application:get_env(telemetry, is_aggregator, false). 66 | 67 | forward_metrics() -> 68 | application:get_env(telemetry, forward_metrics, false). 69 | 70 | enable_metric_database() -> 71 | application:get_env(telemetry, enable_metric_database, false). 72 | 73 | 74 | opentsdb_endpoint() -> 75 | application:get_env(telemetry, opentsdb_endpoint, false). 76 | 77 | 78 | rendered_metric_receiver_modules() -> 79 | application:get_env(telemetry, rendered_metric_receiver_modules, []). 80 | 81 | -------------------------------------------------------------------------------- /src/telemetry_forwarder.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | 10 | -module(telemetry_forwarder). 11 | -behaviour(gen_server). 12 | 13 | %% API 14 | -export([start_link/0]). 15 | 16 | %% gen_server callbacks 17 | -export([init/1, 18 | handle_call/3, 19 | handle_cast/2, 20 | handle_info/2, 21 | terminate/2, 22 | code_change/3]). 23 | 24 | -include_lib("kernel/include/logger.hrl"). 25 | 26 | -ifdef(TEST). 27 | -include_lib("eunit/include/eunit.hrl"). 28 | -endif. 29 | 30 | -define(SERVER, ?MODULE). 31 | 32 | -record(state, {}). 33 | 34 | %%%=================================================================== 35 | %%% API 36 | %%%=================================================================== 37 | 38 | %%-------------------------------------------------------------------- 39 | %% @doc 40 | %% Starts the server 41 | %% 42 | %% @end 43 | %%-------------------------------------------------------------------- 44 | -spec(start_link() -> 45 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 46 | start_link() -> 47 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 48 | 49 | %%%=================================================================== 50 | %%% gen_server callbacks 51 | %%%=================================================================== 52 | 53 | init([]) -> 54 | self() ! attempt_push, 55 | {ok, #state{}}. 56 | 57 | handle_call(_Request, _From, State) -> 58 | {reply, ok, State}. 59 | 60 | handle_cast(_Req, State) -> 61 | {noreply, State}. 62 | 63 | handle_info(attempt_push, State) -> 64 | push(telemetry_config:forward_metrics()), 65 | erlang:send_after(splay_ms(), self(), attempt_push), 66 | {noreply, State}; 67 | handle_info(_Info, State) -> 68 | {noreply, State}. 69 | 70 | try_submit(Metrics, Servers) -> 71 | Process = telemetry_receiver, 72 | Message = {push_metrics, Metrics}, 73 | case gen_server:multi_call(Servers, Process, Message) of 74 | {[], _BadReps} -> 75 | {error, no_successful_responses}; 76 | {_GoodReps, _BadReps} -> 77 | ok 78 | end. 79 | 80 | terminate(_Reason, _State = #state{}) -> 81 | ok. 82 | 83 | code_change(_OldVsn, State, _Extra) -> 84 | {ok, State}. 85 | 86 | 87 | push(false) -> 88 | ok; 89 | push(true) -> 90 | Metrics = telemetry_store:reap(), 91 | 92 | Destinations = telemetry_config:forwarder_destinations(), 93 | 94 | %% TODO(tyler) persist submissions for failed pushes, and retry them before sending 95 | %% new ones at each interval. 96 | %% Try to submit to the new endpoint first, then fall back to older one. 97 | case try_submit(Metrics, Destinations) of 98 | {error, no_successful_responses} -> 99 | ?LOG_WARNING("failed to submit metrics to any of ~p", [Destinations]); 100 | ok -> ok 101 | end. 102 | 103 | %%-------------------------------------------------------------------- 104 | %% @private 105 | %% @doc 106 | %% Returns the number of milliseconds until the next minute, plus some 107 | %% randomness. The randomness helps prevent thundering herd submission 108 | %% once per minute while submitting metrics. 109 | %% @end 110 | %%-------------------------------------------------------------------- 111 | -spec(splay_ms() -> integer()). 112 | splay_ms() -> 113 | MsPerMinute = telemetry_config:interval_seconds() * 1000, 114 | NextMinute = MsPerMinute - erlang:system_time(millisecond) rem MsPerMinute, 115 | 116 | SplayMS = telemetry_config:splay_seconds() * 1000, 117 | FlooredSplayMS = max(1, SplayMS), 118 | Splay = rand:uniform(FlooredSplayMS), 119 | 120 | NextMinute + Splay. 121 | -------------------------------------------------------------------------------- /src/telemetry_histo.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 14. March 2016 17:43 PM 8 | %%%------------------------------------------------------------------- 9 | -module(telemetry_histo). 10 | -author("Tyler Neely"). 11 | 12 | %% API 13 | -export([new/0, 14 | percentile/2, 15 | record/2, 16 | merge/2, 17 | map_summary/1]). 18 | 19 | -record(histo, { 20 | total = 0 :: integer(), 21 | values = orddict:new() :: orddict:orddict() 22 | }). 23 | 24 | new() -> 25 | #histo{}. 26 | 27 | percentile(#histo{total = 0}, _Pct) -> 28 | {error, empty_histo}; 29 | percentile(#histo{total = T, values = V}, Pct) when Pct >= 0 andalso Pct =< 1.0 -> 30 | Threshold = T * Pct, 31 | PctFun = fun(CompK, Count, _AccIn = {notfound, SoFar}) -> 32 | NewSoFar = Count + SoFar, 33 | case NewSoFar >= Threshold of 34 | true -> 35 | decompress(CompK); 36 | false -> 37 | {notfound, NewSoFar} 38 | end; 39 | (_CompK, _Count, AccIn) -> 40 | AccIn 41 | end, 42 | orddict:fold(PctFun, {notfound, 0}, V). 43 | 44 | record(#histo{total = Total, values = Values}, V) -> 45 | CompK = compress(V), 46 | NewValues = orddict:update_counter(CompK, 1, Values), 47 | #histo{total = Total + 1, values = NewValues}. 48 | 49 | merge(#histo{total = T1, values = V1}, #histo{total = T2, values = V2}) -> 50 | MergeFun = fun(_K, Count1, Count2) -> 51 | Count1 + Count2 52 | end, 53 | NewValues = orddict:merge(MergeFun, V1, V2), 54 | #histo{total = T1 + T2, values = NewValues}. 55 | 56 | -spec(compress(V :: float()) -> integer()). 57 | compress(V) when V >= 0 -> round(100 * math:log(1.0 + abs(V)) + 0.5); 58 | compress(V) -> -1 * compress(-1 * V). 59 | 60 | -spec(decompress(V :: integer()) -> float()). 61 | decompress(V) when V >= 0 -> math:exp(abs(V) / 100) - 1.0; 62 | decompress(V) -> -1.0 * decompress(-1 * V). 63 | 64 | map_summary(H = #histo{total = Total}) -> 65 | #{ 66 | min => percentile(H, 0), 67 | median => percentile(H, 0.50), 68 | max => percentile(H, 1.0), 69 | p75 => percentile(H, 0.75), 70 | p90 => percentile(H, 0.9), 71 | p95 => percentile(H, 0.95), 72 | p99 => percentile(H, 0.99), 73 | p999 => percentile(H, 0.999), 74 | p9999 => percentile(H, 0.9999), 75 | p99999 => percentile(H, 0.99999), 76 | total_count => Total 77 | }. 78 | 79 | -------------------------------------------------------------------------------- /src/telemetry_receiver.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | 10 | -module(telemetry_receiver). 11 | -behaviour(gen_server). 12 | 13 | %% API 14 | -export([start_link/0]). 15 | 16 | %% gen_server callbacks 17 | -export([init/1, 18 | handle_call/3, 19 | handle_cast/2, 20 | handle_info/2, 21 | terminate/2, 22 | code_change/3]). 23 | 24 | 25 | -define(SERVER, ?MODULE). 26 | 27 | -record(state, {}). 28 | 29 | 30 | %%%=================================================================== 31 | %%% API 32 | %%%=================================================================== 33 | 34 | %%-------------------------------------------------------------------- 35 | %% @doc 36 | %% Starts the server 37 | %% 38 | %% @end 39 | %%-------------------------------------------------------------------- 40 | -spec(start_link() -> 41 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 42 | start_link() -> 43 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 44 | 45 | %%%=================================================================== 46 | %%% gen_server callbacks 47 | %%%=================================================================== 48 | 49 | init([]) -> 50 | {ok, #state{}}. 51 | 52 | handle_call({push_metrics, Metrics}, _From, State) -> 53 | Res = telemetry_store:merge(Metrics), 54 | {reply, Res, State}. 55 | 56 | handle_cast(_Req, State) -> 57 | {noreply, State}. 58 | 59 | handle_info(_Info, State) -> 60 | {noreply, State}. 61 | 62 | terminate(_Reason, _State = #state{}) -> 63 | ok. 64 | 65 | code_change(_OldVsn, State, _Extra) -> 66 | {ok, State}. 67 | -------------------------------------------------------------------------------- /src/telemetry_store.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Tyler Neely 3 | %%% @copyright (C) 2016, Mesosphere 4 | %%% @doc 5 | %%% 6 | %%% @end 7 | %%% Created : 2. Feb 2016 11:44 PM 8 | %%%------------------------------------------------------------------- 9 | 10 | -module(telemetry_store). 11 | -behaviour(gen_server). 12 | 13 | %% API 14 | -export([start_link/0, 15 | submit/4, 16 | snapshot/0, 17 | reap/0, 18 | merge/1, 19 | add_gauge_func/2, 20 | remove_gauge_func/1 21 | ]). 22 | 23 | %% gen_server callbacks 24 | -export([init/1, 25 | handle_call/3, 26 | handle_cast/2, 27 | handle_info/2, 28 | terminate/2, 29 | code_change/3]). 30 | 31 | -include_lib("kernel/include/logger.hrl"). 32 | -include("telemetry.hrl"). 33 | 34 | -define(SERVER, ?MODULE). 35 | 36 | -record(store, { 37 | metrics = #metrics{}, 38 | metric_funs = maps:new() 39 | }). 40 | -type state() :: #store{}. 41 | 42 | 43 | %%%=================================================================== 44 | %%% API 45 | %%%=================================================================== 46 | 47 | %%-------------------------------------------------------------------- 48 | %% @doc 49 | %% Submit a metric to the store for aggregation. 50 | %% @end 51 | %%-------------------------------------------------------------------- 52 | -spec(submit(Name :: metric_name(), Time :: integer(), Type :: term(), Value :: term()) -> ok | {error, atom()}). 53 | submit(Name, Time, Type, Value) -> 54 | gen_server:cast(?SERVER, {submit, Name, Time, Type, Value}). 55 | 56 | %%-------------------------------------------------------------------- 57 | %% @doc 58 | %% Get a snapshot of current metrics. 59 | %% @end 60 | %%-------------------------------------------------------------------- 61 | -spec(snapshot() -> metrics()). 62 | snapshot() -> 63 | case ets:lookup(snapcache, last_snap) of 64 | [{last_snap, Cached}] -> 65 | ?LOG_DEBUG("returning cached snapshot"), 66 | Cached; 67 | _ -> 68 | ?LOG_DEBUG("returning generated snapshot"), 69 | gen_server:call(?SERVER, snapshot) 70 | end. 71 | 72 | %%-------------------------------------------------------------------- 73 | %% @doc 74 | %% For all times which have had metrics submitted in the last interval, 75 | %% collect the counters and histogram exports. 76 | %% @end 77 | %%-------------------------------------------------------------------- 78 | -spec(reap() -> metrics()). 79 | reap() -> 80 | gen_server:call(?SERVER, reap). 81 | 82 | %%-------------------------------------------------------------------- 83 | %% @doc 84 | %% Take counters and histograms and merge them with our state. 85 | %% @end 86 | %%-------------------------------------------------------------------- 87 | -spec(merge(Metrics :: metrics()) -> ok | {error, atom()}). 88 | merge(Metrics) -> 89 | gen_server:cast(?SERVER, {merge, Metrics}). 90 | 91 | %%-------------------------------------------------------------------- 92 | %% @doc 93 | %% Register a fun of zero arity that returns a numerical value to be 94 | %% called upon the creation of any metrics snapshot. 95 | %% @end 96 | %%-------------------------------------------------------------------- 97 | -spec(add_gauge_func(string(), fun()) -> ok | {error, atom()}). 98 | add_gauge_func(Name, Fun) -> 99 | gen_server:call(?SERVER, {add_gauge_func, Name, Fun}). 100 | 101 | %%-------------------------------------------------------------------- 102 | %% @doc 103 | %% Remove a metrics function previously registered using add_gauge_func. 104 | %% @end 105 | %%-------------------------------------------------------------------- 106 | -spec(remove_gauge_func(string()) -> ok). 107 | remove_gauge_func(Name) -> 108 | gen_server:call(?SERVER, {remove_gauge_func, Name}). 109 | 110 | %%-------------------------------------------------------------------- 111 | %% @doc 112 | %% Starts the server 113 | %% 114 | %% @end 115 | %%-------------------------------------------------------------------- 116 | -spec(start_link() -> 117 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 118 | start_link() -> 119 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 120 | 121 | %%%=================================================================== 122 | %%% gen_server callbacks 123 | %%%=================================================================== 124 | 125 | init([]) -> 126 | snapcache = ets:new(snapcache, [named_table, set, {read_concurrency, true}]), 127 | {ok, #store{}}. 128 | 129 | handle_call(reap, _From, State) -> 130 | {Reply, NewState} = handle_reap(State), 131 | {reply, Reply, NewState}; 132 | 133 | handle_call(snapshot, _From, State = #store{metrics = Metrics}) -> 134 | ReapedState = export_metrics(Metrics), 135 | {reply, ReapedState, State}; 136 | 137 | handle_call({add_gauge_func, Name, Fun}, _From, State = #store{metric_funs = MetricFuns}) -> 138 | NewMetricFuns = maps:put(Name, Fun, MetricFuns), 139 | NewState = State#store{metric_funs = NewMetricFuns}, 140 | {reply, ok, NewState}; 141 | 142 | handle_call({remove_gauge_func, Name}, _From, State = #store{metric_funs = MetricFuns}) -> 143 | NewMetricFuns = maps:remove(Name, MetricFuns), 144 | NewState = State#store{metric_funs = NewMetricFuns}, 145 | {reply, ok, NewState}; 146 | 147 | handle_call(Request, _From, State) -> 148 | ?LOG_WARNING("got unknown request in telemetry_store handle_call: ~p", [Request]), 149 | {reply, ok, State}. 150 | 151 | handle_cast({submit, Name, Time, histogram, Value}, State) -> 152 | NewState = handle_submit_histogram(Name, Time, Value, State), 153 | {noreply, NewState}; 154 | handle_cast({merge, Metrics}, State) -> 155 | NewState = handle_merge(Metrics, State), 156 | {noreply, NewState}; 157 | handle_cast({submit, Name, Time, counter, Value}, State) -> 158 | NewState = handle_submit_counter(Name, Time, Value, State), 159 | {noreply, NewState}. 160 | 161 | 162 | handle_info(_Info, State) -> 163 | {noreply, State}. 164 | 165 | terminate(_Reason, _State = #store{}) -> 166 | ok. 167 | 168 | code_change(_OldVsn, State, _Extra) -> 169 | {ok, State}. 170 | 171 | %%-------------------------------------------------------------------- 172 | %% @private 173 | %% @doc 174 | %% Takes an orddict of {Time, Name} -> histogram exported binaries, 175 | %% and merges it with an orddict of {Time, Name} -> histogram 176 | %% local instances. 177 | %% @end 178 | %%-------------------------------------------------------------------- 179 | merge_histos(TimeToHistos1, TimeToHistos2) -> 180 | MergeFunc = fun (_K, Histo1, Histo2) -> 181 | telemetry_histo:merge(Histo1, Histo2) 182 | end, 183 | orddict:merge(MergeFunc, TimeToHistos1, TimeToHistos2). 184 | 185 | 186 | merge_counters(TimeToCounters1, TimeToCounters2) -> 187 | MergeFunc = fun(_K, Counter1, Counter2) -> 188 | Counter1 + Counter2 189 | end, 190 | orddict:merge(MergeFunc, TimeToCounters1, TimeToCounters2). 191 | 192 | 193 | record_gauge_funcs(Metrics = #metrics{time_to_counters = TimeToCounters, 194 | dirty_counters = DirtyCounters}, 195 | MetricFuns) -> 196 | Now = os:system_time(seconds), 197 | NormalizedTime = Now - (round(Now) rem telemetry_config:interval_seconds()), 198 | 199 | {RetCounters2, DirtyCounters2} = maps:fold(fun (Name, Fun, {AccIn, AccDirtyIn}) -> 200 | Value = Fun(), 201 | AccCounter = orddict:store({NormalizedTime, Name}, Value, AccIn), 202 | AccDirty = sets:add_element({NormalizedTime, Name}, AccDirtyIn), 203 | {AccCounter, AccDirty} 204 | end, {TimeToCounters, DirtyCounters}, MetricFuns), 205 | 206 | Metrics#metrics{time_to_counters = RetCounters2, 207 | dirty_counters = DirtyCounters2}. 208 | 209 | 210 | -spec(export_metrics(metrics()) -> metrics()). 211 | export_metrics(#metrics{time_to_histos = TimeToHistos, 212 | time_to_counters = TimeToCounters, 213 | dirty_histos = DirtyHistos, 214 | dirty_counters = DirtyCounters}) -> 215 | 216 | ExportedMetrics = #metrics{time_to_histos = TimeToHistos, 217 | time_to_counters = TimeToCounters, 218 | dirty_histos = DirtyHistos, 219 | dirty_counters = DirtyCounters}, 220 | ?LOG_DEBUG("populating the snapcache with metrics"), 221 | true = ets:insert(snapcache, {last_snap, ExportedMetrics}), 222 | ExportedMetrics. 223 | 224 | 225 | 226 | submit_to_opentsdb(#metrics{time_to_histos = TimeToHistos, 227 | time_to_counters = TimeToCounters}) -> 228 | %% TODO(tyler) rip out this filthy hack 229 | Now = os:system_time(seconds), 230 | NormalizedTime = Now - (round(Now) rem telemetry_config:interval_seconds()), 231 | Gate = NormalizedTime - telemetry_config:interval_seconds() + 1, 232 | 233 | Counters = orddict:filter(fun (K, _V) -> 234 | K > Gate 235 | end, TimeToCounters), 236 | Histos = orddict:filter(fun (K, _V) -> 237 | K > Gate 238 | end, TimeToHistos), 239 | Summary = telemetry:metrics_to_summary(#metrics{time_to_histos = Histos, 240 | time_to_counters = Counters}), 241 | #{counters := CounterSummary, histograms := HistoSummary} = Summary, 242 | submit_counters_to_opentsdb(CounterSummary), 243 | submit_histos_to_opentsdb(HistoSummary), 244 | 245 | ok. 246 | 247 | 248 | submit_counters_to_opentsdb(Summary) -> 249 | Metrics = maps:fold(fun (#name_tags{name = Name, tags = Tags}, TimeValue, AccIn) -> 250 | maps:fold(fun (Time, Value, SubAccIn) -> 251 | [{Name, Time, Value, Tags} | SubAccIn] 252 | end, AccIn, TimeValue) 253 | end, [], Summary), 254 | gen_opentsdb:put_metric_batch(Metrics). 255 | 256 | 257 | submit_histos_to_opentsdb(Summary) -> 258 | Metrics = maps:fold(fun (#name_tags{name = Name, tags = Tags}, TimeValue, AccIn) -> 259 | maps:fold(fun (Time, HistoSummary, SubAccIn) -> 260 | maps:fold(fun (SubHistoName, Value, SubSubAccIn) -> 261 | [{Name, Time, Value, Tags#{histo => SubHistoName}} | SubSubAccIn] 262 | end, SubAccIn, HistoSummary) 263 | end, AccIn, TimeValue) 264 | end, [], Summary), 265 | gen_opentsdb:put_metric_batch(Metrics). 266 | 267 | -spec(handle_reap(State :: state()) -> {Reply :: metrics(), NewState :: state()}). 268 | handle_reap(State = #store{metrics = Metrics, metric_funs = MetricFuns}) -> 269 | %% record function gauges 270 | Metrics2 = record_gauge_funcs(Metrics, MetricFuns), 271 | 272 | #metrics{time_to_histos = TimeToHistos, 273 | time_to_counters = TimeToCounters} = Metrics2, 274 | 275 | %% Create a snapshot of current metrics. 276 | ReapedState = export_metrics(Metrics2), 277 | 278 | %% Prune metrics that we should shed. 279 | Now = os:system_time(seconds), 280 | 281 | CutoffTime = Now - (telemetry_config:interval_seconds() * 282 | telemetry_config:max_intervals()), 283 | 284 | TimeGate = fun ({Time, _Name}, _V) -> 285 | Time >= CutoffTime 286 | end, 287 | TimeToHistos2 = orddict:filter(TimeGate, TimeToHistos), 288 | TimeToCounters2 = orddict:filter(TimeGate, TimeToCounters), 289 | 290 | %% Only nodes in aggregator mode should retain non-partial metrics. 291 | IsAggregator = telemetry_config:is_aggregator(), 292 | RetMetrics = case IsAggregator of 293 | true -> #metrics{time_to_histos = TimeToHistos2, 294 | time_to_counters = TimeToCounters2}; 295 | false -> #metrics{} 296 | end, 297 | RetState = State#store{metrics = RetMetrics, metric_funs = MetricFuns}, 298 | {ReapedState, RetState}. 299 | 300 | 301 | -spec(handle_submit_histogram(Name :: term(), Time :: term(), Value :: term(), State :: state()) -> 302 | NewState :: state()). 303 | handle_submit_histogram(Name, Time, Value, State = #store{metrics = Metrics}) -> 304 | #metrics{time_to_histos = TimeToHistos, 305 | dirty_histos = DirtyHistos} = Metrics, 306 | NormalizedTime = Time - (round(Time) rem telemetry_config:interval_seconds()), 307 | TimeToHistos2 = case orddict:is_key({NormalizedTime, Name}, TimeToHistos) of 308 | true -> 309 | TimeToHistos; 310 | false -> 311 | Histo = telemetry_histo:new(), 312 | orddict:store({NormalizedTime, Name}, Histo, TimeToHistos) 313 | end, 314 | 315 | TimeToHistos3 = orddict:update({NormalizedTime, Name}, 316 | fun(Histo) -> 317 | telemetry_histo:record(Histo, Value) 318 | end, TimeToHistos2), 319 | 320 | DirtyHistos2 = sets:add_element({NormalizedTime, Name}, DirtyHistos), 321 | 322 | RetMetrics = Metrics#metrics{time_to_histos = TimeToHistos3, 323 | dirty_histos = DirtyHistos2}, 324 | 325 | State#store{metrics = RetMetrics}. 326 | 327 | -spec(handle_submit_counter(Name :: term(), Time :: term(), Value :: term(), State :: state()) -> 328 | NewState :: state()). 329 | handle_submit_counter(Name, Time, Value, State = #store{metrics = Metrics}) -> 330 | #metrics{time_to_counters = TimeToCounters, 331 | dirty_counters = DirtyCounters} = Metrics, 332 | 333 | NormalizedTime = Time - (round(Time) rem telemetry_config:interval_seconds()), 334 | 335 | TimeToCounters2 = orddict:update_counter({NormalizedTime, Name}, Value, TimeToCounters), 336 | 337 | DirtyCounters2 = sets:add_element({NormalizedTime, Name}, DirtyCounters), 338 | 339 | RetMetrics = Metrics#metrics{time_to_counters = TimeToCounters2, 340 | dirty_counters = DirtyCounters2}, 341 | 342 | State#store{metrics = RetMetrics}. 343 | 344 | 345 | -spec(handle_merge(Metrics :: metrics(), State :: state()) -> NewState :: state()). 346 | handle_merge(#metrics{time_to_histos = TimeToHistosIn, 347 | time_to_counters = TimeToCountersIn, 348 | dirty_histos = DirtyHistosIn, 349 | dirty_counters = DirtyCountersIn}, 350 | _State = #store{metrics = Metrics, metric_funs = MetricFuns}) -> 351 | 352 | #metrics{time_to_histos = TimeToHistos, 353 | time_to_counters = TimeToCounters, 354 | dirty_histos = DirtyHistos, 355 | dirty_counters = DirtyCounters} = Metrics, 356 | MergedDirtyHistos = sets:union(DirtyHistosIn, DirtyHistos), 357 | MergedDirtyCounters = sets:union(DirtyCountersIn, DirtyCounters), 358 | MergedCounters = merge_counters(TimeToCountersIn, TimeToCounters), 359 | MergedHistos = merge_histos(TimeToHistosIn, TimeToHistos), 360 | MergedMetrics = #metrics{time_to_histos = MergedHistos, 361 | time_to_counters = MergedCounters, 362 | dirty_histos = MergedDirtyHistos, 363 | dirty_counters = MergedDirtyCounters}, 364 | maybe_push_to_opentsdb(MergedMetrics, DirtyHistosIn, DirtyCountersIn), 365 | #store{metrics = MergedMetrics, metric_funs = MetricFuns}. 366 | 367 | maybe_push_to_opentsdb(MergedMetrics, DirtyHistosIn, DirtyCountersIn) -> 368 | case telemetry_config:opentsdb_endpoint() of 369 | false -> ok; 370 | _ -> 371 | submit_to_opentsdb(MergedMetrics#metrics{dirty_histos = DirtyHistosIn, 372 | dirty_counters = DirtyCountersIn}), 373 | ok 374 | end. 375 | -------------------------------------------------------------------------------- /src/telemetry_sup.erl: -------------------------------------------------------------------------------- 1 | -module(telemetry_sup). 2 | -behaviour(supervisor). 3 | -export([start_link/0, init/1]). 4 | 5 | -define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). 6 | 7 | start_link() -> 8 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 9 | 10 | init([]) -> 11 | {ok, {{one_for_one, 5, 10}, [ 12 | ?CHILD(telemetry_store, worker), 13 | ?CHILD(telemetry_receiver, worker), 14 | ?CHILD(telemetry_forwarder, worker), 15 | ?CHILD(gen_opentsdb, worker) 16 | ]}}. 17 | -------------------------------------------------------------------------------- /test/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesosphere-backup/telemetry-net/1b38ce3a44d5e7f65f1c21d2890c4ed2e397ee27/test/.gitkeep --------------------------------------------------------------------------------