├── test ├── test_helper.exs └── telemetry_metrics_cloudwatch_test.exs ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .formatter.exs ├── .gitignore ├── lib ├── telemetry_metrics_cloudwatch │ ├── cloudwatch.ex │ └── cache.ex └── telemetry_metrics_cloudwatch.ex ├── LICENSE ├── mix.exs ├── CHANGELOG.md ├── mix.lock ├── README.md └── .credo.exs /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | Logger.configure(level: :info) 2 | ExUnit.start() 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | liberapay: bmuller 3 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | telemetry_metrics_cloudwatch-*.tar 24 | -------------------------------------------------------------------------------- /lib/telemetry_metrics_cloudwatch/cloudwatch.ex: -------------------------------------------------------------------------------- 1 | defmodule TelemetryMetricsCloudwatch.Cloudwatch do 2 | @moduledoc """ 3 | Functions for interacting with [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/). 4 | """ 5 | 6 | require Logger 7 | 8 | def send_metrics(metric_data, namespace) do 9 | # gzip, since we've got a max 40 KB payload 10 | metric_data 11 | |> ExAws.Cloudwatch.put_metric_data(namespace) 12 | |> Map.put(:content_encoding, "gzip") 13 | |> ExAws.request() 14 | |> log_result(metric_data, namespace) 15 | end 16 | 17 | defp log_result({:ok, _resp}, metric_data, namespace) do 18 | msg = 19 | "#{__MODULE__} pushed #{length(metric_data)} metrics to cloudwatch in namespace #{namespace}" 20 | 21 | Logger.debug(msg) 22 | end 23 | 24 | defp log_result({:error, resp}, metric_data, namespace) do 25 | msg = 26 | "#{__MODULE__} failed to push metrics #{inspect(metric_data)} to cloudwatch in namespace #{namespace}: #{inspect(resp)}" 27 | 28 | Logger.error(msg) 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Brian Muller 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: push 3 | 4 | env: 5 | LATEST_ELIXIR_VERSION: 1.18.x 6 | LATEST_OTP_VERSION: 28.x 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | include: 14 | - pair: 15 | otp: 28.x 16 | elixir: 1.18.x 17 | - pair: 18 | otp: 27.x 19 | elixir: 1.18.x 20 | - pair: 21 | otp: 27.x 22 | elixir: 1.17.x 23 | - pair: 24 | otp: 26.x 25 | elixir: 1.17.x 26 | - pair: 27 | otp: 26.x 28 | elixir: 1.16.x 29 | - pair: 30 | otp: 25.x 31 | elixir: 1.15.x 32 | - pair: 33 | otp: 24.x 34 | elixir: 1.15.x 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | - uses: erlef/setup-beam@v1 39 | with: 40 | elixir-version: ${{ matrix.pair.elixir }} 41 | otp-version: ${{ matrix.pair.otp }} 42 | - run: mix deps.get 43 | - run: mix format --check-formatted 44 | - run: mix credo 45 | - run: mix test 46 | 47 | dialyzer: 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v4 51 | - uses: erlef/setup-beam@v1 52 | with: 53 | elixir-version: ${{ env.LATEST_ELIXIR_VERSION }} 54 | otp-version: ${{ env.LATEST_OTP_VERSION }} 55 | - uses: actions/cache@v4 56 | id: mix-cache # id to use in retrieve action 57 | with: 58 | path: | 59 | _build 60 | deps 61 | priv/plts 62 | key: dialyzer-cache-v0-${{matrix.elixir}}-${{matrix.otp}}-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} 63 | - if: steps.mix-cache.outputs.cache-hit != 'true' 64 | run: mix do deps.get, deps.compile 65 | - run: mix dialyzer 66 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule TelemetryMetricsCloudwatch.MixProject do 2 | use Mix.Project 3 | 4 | @source_url "https://github.com/bmuller/telemetry_metrics_cloudwatch" 5 | @version "1.1.1" 6 | 7 | def project do 8 | [ 9 | app: :telemetry_metrics_cloudwatch, 10 | aliases: aliases(), 11 | version: @version, 12 | elixir: "~> 1.15", 13 | start_permanent: Mix.env() == :prod, 14 | deps: deps(), 15 | description: "Provides an AWS CloudWatch reporter for Telemetry Metrics definitions.", 16 | package: package(), 17 | source_url: @source_url, 18 | docs: docs() 19 | ] 20 | end 21 | 22 | def cli do 23 | [preferred_envs: [test: :test, "ci.test": :test]] 24 | end 25 | 26 | defp docs do 27 | [ 28 | source_ref: "v#{@version}", 29 | source_url: @source_url, 30 | main: "TelemetryMetricsCloudwatch", 31 | formatters: ~w(html) 32 | ] 33 | end 34 | 35 | defp aliases do 36 | [ 37 | "ci.test": [ 38 | "format --check-formatted", 39 | "test", 40 | "credo" 41 | ] 42 | ] 43 | end 44 | 45 | def package do 46 | [ 47 | files: ["lib", "mix.exs", "README*", "LICENSE*"], 48 | maintainers: ["Brian Muller"], 49 | licenses: ["MIT"], 50 | links: %{ 51 | "GitHub" => @source_url, 52 | "Changelog" => "#{@source_url}/blob/master/CHANGELOG.md" 53 | } 54 | ] 55 | end 56 | 57 | # Run "mix help compile.app" to learn about applications. 58 | def application do 59 | [ 60 | extra_applications: [:logger] 61 | ] 62 | end 63 | 64 | # Run "mix help deps" to learn about dependencies. 65 | defp deps do 66 | [ 67 | {:ex_aws_cloudwatch, "~> 2.0"}, 68 | {:ex_doc, "~> 0.28", only: :dev}, 69 | {:credo, "~> 1.6", only: [:dev, :test], runtime: false}, 70 | {:dialyxir, "~> 1.4", only: :dev, runtime: false}, 71 | {:telemetry_metrics, "~> 1.0"} 72 | ] 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog for v1.x 2 | 3 | ## v1.1.1 (2025-08-13) 4 | 5 | ### Enhancements 6 | 7 | * Support event filtering based on measurements (#13 h/t @jaminthorns) 8 | 9 | ## v1.1.0 (2025-06-27) 10 | 11 | ### Deprecations 12 | 13 | * Removed support for Elixir before version 1.15 14 | 15 | ## v1.0.0 (2024-07-09) 16 | 17 | API is considered stable. 18 | 19 | ### Enhancements 20 | 21 | * Metrics are now sent on terminate (#9 h/t @devstopfix) 22 | * Core dependency telemetry_metrics updated to stable version 1.0.0 (#11 h/t @aloukissas) 23 | 24 | ## v0.3.4 (2023-06-30) 25 | 26 | ### Enhancements 27 | 28 | * Updates to remove warnings for Elixir 1.15 29 | 30 | ## v0.3.3 (2022-09-22) 31 | 32 | ### Enhancements 33 | 34 | * Support for storage resolution argument to handle high resolution (#6) 35 | * Support for the `:sample_rate` option (#7) 36 | 37 | ## v0.3.2 (2022-05-11) 38 | 39 | ### Enhancements 40 | 41 | * Made call to attached telemetry event function more performant (#5) 42 | 43 | ## v0.3.1 (2020-10-15) 44 | 45 | ### Bug Fixes 46 | 47 | * Fixed `System.stacktrace()` deprecation warning 48 | 49 | ## v0.3.0 (2020-10-4) 50 | 51 | ### Enhancements 52 | 53 | * Support was added for the new `:keep` and `:drop` options in [Telemetry.Metrics 0.5.0](https://github.com/beam-telemetry/telemetry_metrics/blob/master/CHANGELOG.md#050) 54 | 55 | # Changelog for v0.2.x 56 | 57 | ## v0.2.4 (2020-10-04) 58 | 59 | ### Enhancements 60 | 61 | * Support the `Sum` metric type 62 | 63 | ## v0.2.3 (2020-09-22) 64 | 65 | ### Bug Fixes 66 | 67 | * Fixed issue where the `:push_interval` option was ignored when less than 60k milliseconds 68 | 69 | ## v0.2.2 (2020-05-08) 70 | 71 | ### Deprecations 72 | 73 | * When metric values are nil, a `Logger` debug message is now used instead of a warning message. 74 | This is due to an increase in libraries sometimes sending `nil`s as values (Ecto, for instance) 75 | 76 | ## v0.2.1 (2020-01-03) 77 | 78 | ### Bug Fixes 79 | 80 | * Fixed typos in debug message 81 | * Fixed typos in docs 82 | * Fixed syntax of example in README 83 | 84 | ## v0.2.0 (2019-09-17) 85 | 86 | ### Enhancements 87 | 88 | * Support multiple metric types for the same metric name 89 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 3 | "credo": {:hex, :credo, "1.7.12", "9e3c20463de4b5f3f23721527fcaf16722ec815e70ff6c60b86412c695d426c1", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8493d45c656c5427d9c729235b99d498bd133421f3e0a683e5c1b561471291e5"}, 4 | "dialyxir": {:hex, :dialyxir, "1.4.5", "ca1571ac18e0f88d4ab245f0b60fa31ff1b12cbae2b11bd25d207f865e8ae78a", [:mix], [{:erlex, ">= 0.2.7", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b0fb08bb8107c750db5c0b324fa2df5ceaa0f9307690ee3c1f6ba5b9eb5d35c3"}, 5 | "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, 6 | "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, 7 | "ex_aws": {:hex, :ex_aws, "2.5.10", "d3f8ca8959dad6533a2a934dfdf380df1b1bef425feeb215a47a5176dee8736c", [:mix], [{:configparser_ex, "~> 5.0", [hex: :configparser_ex, repo: "hexpm", optional: true]}, {:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:jsx, "~> 2.8 or ~> 3.0", [hex: :jsx, repo: "hexpm", optional: true]}, {:mime, "~> 1.2 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:req, "~> 0.5.10 or ~> 0.6 or ~> 1.0", [hex: :req, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "88fcd9cc1b2e0fcea65106bdaa8340ac56c6e29bf72f46cf7ef174027532d3da"}, 8 | "ex_aws_cloudwatch": {:hex, :ex_aws_cloudwatch, "2.0.4", "f23ac70de91402e14b1393a349e1aba30478cf8af864687eeae05dc968f87acf", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}], "hexpm", "faa5e5ba809c083b0460cc843b5082b200bb37d2abb25becb62c314b2796ae58"}, 9 | "ex_doc": {:hex, :ex_doc, "0.38.2", "504d25eef296b4dec3b8e33e810bc8b5344d565998cd83914ffe1b8503737c02", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "732f2d972e42c116a70802f9898c51b54916e542cc50968ac6980512ec90f42b"}, 10 | "file_system": {:hex, :file_system, "1.1.0", "08d232062284546c6c34426997dd7ef6ec9f8bbd090eb91780283c9016840e8f", [:mix], [], "hexpm", "bfcf81244f416871f2a2e15c1b515287faa5db9c6bcf290222206d120b3d43f6"}, 11 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 12 | "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, 13 | "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, 14 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"}, 15 | "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, 16 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, 17 | "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, 18 | "telemetry_metrics": {:hex, :telemetry_metrics, "1.1.0", "5bd5f3b5637e0abea0426b947e3ce5dd304f8b3bc6617039e2b5a008adc02f8f", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e7b79e8ddfde70adb6db8a6623d1778ec66401f366e9a8f5dd0955c56bc8ce67"}, 19 | } 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TelemetryMetricsCloudwatch 2 | [![Build Status](https://github.com/bmuller/telemetry_metrics_cloudwatch/actions/workflows/ci.yml/badge.svg)](https://github.com/bmuller/telemetry_metrics_cloudwatch/actions/workflows/ci.yml) 3 | [![Hex pm](http://img.shields.io/hexpm/v/telemetry_metrics_cloudwatch.svg?style=flat)](https://hex.pm/packages/telemetry_metrics_cloudwatch) 4 | [![API Docs](https://img.shields.io/badge/api-docs-lightgreen.svg?style=flat)](https://hexdocs.pm/telemetry_metrics_cloudwatch/) 5 | 6 | This is a [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/) Reporter for [`Telemetry.Metrics`](https://github.com/beam-telemetry/telemetry_metrics) definitions. 7 | 8 | ## Installation 9 | 10 | To install `telemetry_metrics_cloudwatch`, just add an entry to your `mix.exs`: 11 | 12 | ```elixir 13 | def deps do 14 | [ 15 | {:telemetry_metrics_cloudwatch, "~> 1.1"} 16 | ] 17 | end 18 | ``` 19 | 20 | (Check [Hex](https://hex.pm/packages/telemetry_metrics_cloudwatch) to make sure you're using an up-to-date version number.) 21 | 22 | ## Usage 23 | 24 | Provide a list of metric definitions to the `init/2` function. It's recommended to 25 | run TelemetryMetricsCloudwatch under a supervision tree, usually under Application. 26 | 27 | ```elixir 28 | def start(_type, _args) do 29 | # List all child processes to be supervised 30 | children = [ 31 | {TelemetryMetricsCloudwatch, [metrics: metrics()]} 32 | ... 33 | ] 34 | 35 | opts = [strategy: :one_for_one, name: ExampleApp.Supervisor] 36 | Supervisor.start_link(children, opts) 37 | end 38 | 39 | defp metrics do 40 | [ 41 | counter("http.request.count"), 42 | last_value("vm.memory.total", unit: :byte), 43 | last_value("vm.total_run_queue_lengths.total") 44 | ] 45 | end 46 | ``` 47 | 48 | You can also provide options for the namespace used in CloudWatch (by default, "Telemetry") 49 | and the minimum frequency (in milliseconds) with which data will be posted (see section 50 | below for posting rules). For instance: 51 | 52 | ```elixir 53 | ... 54 | children = [ 55 | {TelemetryMetricsCloudwatch, metrics: metrics(), namespace: "Backend", push_interval: 30_000} 56 | ] 57 | ... 58 | ``` 59 | 60 | ### Telemetry.Metrics Types Supported 61 | 62 | `TelemetryMetricsCloudwatch` supports 4 of the [Metrics](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#module-metrics): 63 | 64 | * [Counter](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#counter/2): 65 | Counter metric keeps track of the total number of specific events emitted. 66 | * [LastValue](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#last_value/2): 67 | Last value keeps track of the selected measurement found in the most recent event. 68 | * [Summary](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#summary/2): Summary 69 | aggregates measurement's values into statistics, e.g. minimum and maximum, mean, or percentiles. 70 | This sends every measurement to CloudWatch. 71 | * [Sum](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#sum/2): Sum metric keeps track 72 | of the sum of selected measurement's values carried by specific events. If you are using Summary 73 | for a metric already, then CloudWatch can calculate a Sum using that Summary metric. If you 74 | only need a Sum (and no other summary metrics) then use this Sum metric instead. 75 | 76 | These metrics are sent to CloudWatch based on the rules described below. 77 | 78 | To write [high-resolution metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics=), supply the `:storage_resolution` option (which can be the default of `:standard` or `:high`): 79 | 80 | ```elixir 81 | counter( 82 | "http.request.count", 83 | reporter_options: [storage_resolution: :high] 84 | ) 85 | ``` 86 | 87 | ### When Data is Sent 88 | 89 | Cloudwatch has [certain constraints](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html) 90 | on the number of metrics that can be sent up at any given time. `TelemetryMetricsCloudwatch` 91 | will send accumulated metric data at least every minute (configurable by the `:push_interval` 92 | option) or when the data cache has reached the maximum size that CloudWatch will accept. 93 | 94 | ### Event Sampling 95 | You can optionally send only a portion of reported events to CloudWatch via the `:sample_rate` option. This parameter should contain a value between 0.0 and 1.0 (inclusive), and represents the proportion of events that will be reported. For instance, a sample rate value of 0.0 will ensure no events are sent, whereas a sample rate value of 1.0 will ensure all events are sent. A sample rate of 0.25 will result in roughly a quarter of all events being sent. 96 | 97 | *Note:* Using a sampling rate of less than 1.0 will result in incorrect values for the "Sum" and "Counter" metric types (and some of the values like min/max for the "Summary" metric type) due to the under-reporting of events. You should only use a sampling rate of less than 1.0 if you have a guaranteed high rate of events and only care about summary statistics like averages and percentiles. 98 | 99 | ### Units 100 | 101 | In order to report metrics in the CloudWatch UI, they must be one of the following values: 102 | 103 | * Time units: `:second`, `:microsecond`, `:millisecond` 104 | * Byte sizes: `:byte`, `:kilobyte`, `:megabyte`, `:gigabyte`, `:terabyte` 105 | * Bit sizes: `:bit`, `:kilobit`, `:megabit`, `:gigabit`, `:terabit` 106 | 107 | For `Telementry.Metrics.Counter`s, the unit will always be `:count`. Otherwise, the unit will be treated as `nil`. 108 | 109 | ### ExAws Setup 110 | 111 | [`ExAws`](https://hexdocs.pm/ex_aws/ExAws.html) is the library used to send metrics to CloudWatch. Make sure your 112 | [keys are configured](https://hexdocs.pm/ex_aws/ExAws.html#module-aws-key-configuration) and that they have the 113 | [correct permissions](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/permissions-reference-cw.html) of `cloudwatch:PutMetricData`. 114 | 115 | Up to 10 tags are sent up to AWS as dimensions for a given metric. 116 | 117 | ## Running Tests 118 | 119 | To run tests: 120 | 121 | ```shell 122 | $ mix test 123 | ``` 124 | 125 | ## Reporting Issues 126 | 127 | Please report all issues [on github](https://github.com/bmuller/telemetry_metrics_cloudwatch/issues). 128 | -------------------------------------------------------------------------------- /.credo.exs: -------------------------------------------------------------------------------- 1 | # This file contains the configuration for Credo and you are probably reading 2 | # this after creating it with `mix credo.gen.config`. 3 | # 4 | # If you find anything wrong or unclear in this file, please report an 5 | # issue on GitHub: https://github.com/rrrene/credo/issues 6 | # 7 | %{ 8 | # 9 | # You can have as many configs as you like in the `configs:` field. 10 | configs: [ 11 | %{ 12 | # 13 | # Run any config using `mix credo -C `. If no config name is given 14 | # "default" is used. 15 | # 16 | name: "default", 17 | # 18 | # These are the files included in the analysis: 19 | files: %{ 20 | # 21 | # You can give explicit globs or simply directories. 22 | # In the latter case `**/*.{ex,exs}` will be used. 23 | # 24 | included: [ 25 | "lib/", 26 | "src/", 27 | "test/", 28 | "web/", 29 | "apps/*/lib/", 30 | "apps/*/src/", 31 | "apps/*/test/", 32 | "apps/*/web/" 33 | ], 34 | excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] 35 | }, 36 | # 37 | # Load and configure plugins here: 38 | # 39 | plugins: [], 40 | # 41 | # If you create your own checks, you must specify the source files for 42 | # them here, so they can be loaded by Credo before running the analysis. 43 | # 44 | requires: [], 45 | # 46 | # If you want to enforce a style guide and need a more traditional linting 47 | # experience, you can change `strict` to `true` below: 48 | # 49 | strict: true, 50 | # 51 | # To modify the timeout for parsing files, change this value: 52 | # 53 | parse_timeout: 5000, 54 | # 55 | # If you want to use uncolored output by default, you can change `color` 56 | # to `false` below: 57 | # 58 | color: true, 59 | # 60 | # You can customize the parameters of any check by adding a second element 61 | # to the tuple. 62 | # 63 | # To disable a check put `false` as second element: 64 | # 65 | # {Credo.Check.Design.DuplicatedCode, false} 66 | # 67 | checks: [ 68 | # 69 | ## Consistency Checks 70 | # 71 | {Credo.Check.Consistency.ExceptionNames, []}, 72 | {Credo.Check.Consistency.LineEndings, []}, 73 | {Credo.Check.Consistency.ParameterPatternMatching, []}, 74 | {Credo.Check.Consistency.SpaceAroundOperators, []}, 75 | {Credo.Check.Consistency.SpaceInParentheses, []}, 76 | {Credo.Check.Consistency.TabsOrSpaces, []}, 77 | 78 | # 79 | ## Design Checks 80 | # 81 | # You can customize the priority of any check 82 | # Priority values are: `low, normal, high, higher` 83 | # 84 | {Credo.Check.Design.AliasUsage, 85 | [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, 86 | # You can also customize the exit_status of each check. 87 | # If you don't want TODO comments to cause `mix credo` to fail, just 88 | # set this value to 0 (zero). 89 | # 90 | {Credo.Check.Design.TagTODO, [exit_status: 2]}, 91 | {Credo.Check.Design.TagFIXME, []}, 92 | 93 | # 94 | ## Readability Checks 95 | # 96 | {Credo.Check.Readability.AliasOrder, []}, 97 | {Credo.Check.Readability.FunctionNames, []}, 98 | {Credo.Check.Readability.LargeNumbers, []}, 99 | {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, 100 | {Credo.Check.Readability.ModuleAttributeNames, []}, 101 | {Credo.Check.Readability.ModuleDoc, false}, 102 | {Credo.Check.Readability.ModuleNames, []}, 103 | {Credo.Check.Readability.ParenthesesInCondition, []}, 104 | {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, 105 | {Credo.Check.Readability.PredicateFunctionNames, []}, 106 | {Credo.Check.Readability.PreferImplicitTry, []}, 107 | {Credo.Check.Readability.RedundantBlankLines, []}, 108 | {Credo.Check.Readability.Semicolons, []}, 109 | {Credo.Check.Readability.SpaceAfterCommas, []}, 110 | {Credo.Check.Readability.StringSigils, []}, 111 | {Credo.Check.Readability.TrailingBlankLine, []}, 112 | {Credo.Check.Readability.TrailingWhiteSpace, []}, 113 | {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, 114 | {Credo.Check.Readability.VariableNames, []}, 115 | 116 | # 117 | ## Refactoring Opportunities 118 | # 119 | {Credo.Check.Refactor.CondStatements, []}, 120 | {Credo.Check.Refactor.CyclomaticComplexity, [max_complexity: 11]}, 121 | {Credo.Check.Refactor.FunctionArity, []}, 122 | {Credo.Check.Refactor.LongQuoteBlocks, [max_line_count: 170]}, 123 | {Credo.Check.Refactor.MapInto, false}, 124 | {Credo.Check.Refactor.MatchInCondition, []}, 125 | {Credo.Check.Refactor.NegatedConditionsInUnless, []}, 126 | {Credo.Check.Refactor.NegatedConditionsWithElse, []}, 127 | {Credo.Check.Refactor.Nesting, []}, 128 | {Credo.Check.Refactor.UnlessWithElse, []}, 129 | {Credo.Check.Refactor.WithClauses, []}, 130 | 131 | # 132 | ## Warnings 133 | # 134 | {Credo.Check.Warning.BoolOperationOnSameValues, []}, 135 | {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, 136 | {Credo.Check.Warning.IExPry, []}, 137 | {Credo.Check.Warning.IoInspect, []}, 138 | {Credo.Check.Warning.LazyLogging, false}, 139 | {Credo.Check.Warning.MixEnv, []}, 140 | {Credo.Check.Warning.OperationOnSameValues, []}, 141 | {Credo.Check.Warning.OperationWithConstantResult, []}, 142 | {Credo.Check.Warning.RaiseInsideRescue, []}, 143 | {Credo.Check.Warning.UnusedEnumOperation, []}, 144 | {Credo.Check.Warning.UnusedFileOperation, []}, 145 | {Credo.Check.Warning.UnusedKeywordOperation, []}, 146 | {Credo.Check.Warning.UnusedListOperation, []}, 147 | {Credo.Check.Warning.UnusedPathOperation, []}, 148 | {Credo.Check.Warning.UnusedRegexOperation, []}, 149 | {Credo.Check.Warning.UnusedStringOperation, []}, 150 | {Credo.Check.Warning.UnusedTupleOperation, []}, 151 | {Credo.Check.Warning.UnsafeExec, []}, 152 | 153 | # 154 | # Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`) 155 | 156 | # 157 | # Controversial and experimental checks (opt-in, just replace `false` with `[]`) 158 | # 159 | {Credo.Check.Readability.StrictModuleLayout, false}, 160 | {Credo.Check.Consistency.MultiAliasImportRequireUse, false}, 161 | {Credo.Check.Consistency.UnusedVariableNames, false}, 162 | {Credo.Check.Design.DuplicatedCode, false}, 163 | {Credo.Check.Readability.AliasAs, false}, 164 | {Credo.Check.Readability.MultiAlias, false}, 165 | {Credo.Check.Readability.Specs, false}, 166 | {Credo.Check.Readability.SinglePipe, []}, 167 | {Credo.Check.Readability.WithCustomTaggedTuple, false}, 168 | {Credo.Check.Refactor.ABCSize, false}, 169 | {Credo.Check.Refactor.AppendSingleItem, false}, 170 | {Credo.Check.Refactor.DoubleBooleanNegation, []}, 171 | {Credo.Check.Refactor.ModuleDependencies, false}, 172 | {Credo.Check.Refactor.NegatedIsNil, []}, 173 | {Credo.Check.Refactor.PipeChainStart, []}, 174 | {Credo.Check.Refactor.VariableRebinding, []}, 175 | {Credo.Check.Warning.MapGetUnsafePass, false}, 176 | {Credo.Check.Warning.UnsafeToAtom, []}, 177 | {Credo.Check.Warning.LeakyEnvironment, []} 178 | 179 | # 180 | # Custom checks can be created using `mix credo.gen.check`. 181 | # 182 | ] 183 | } 184 | ] 185 | } 186 | -------------------------------------------------------------------------------- /lib/telemetry_metrics_cloudwatch/cache.ex: -------------------------------------------------------------------------------- 1 | defmodule TelemetryMetricsCloudwatch.Cache do 2 | @moduledoc """ 3 | State for `GenServer`. Nothing here should be called directly outside of the 4 | `TelemetryMetricsCloudwatch` module. 5 | """ 6 | 7 | defstruct [ 8 | :metric_names, 9 | :namespace, 10 | :last_run, 11 | :push_interval, 12 | :sample_rate, 13 | counters: %{}, 14 | sums: %{}, 15 | last_values: %{}, 16 | summaries: %{} 17 | ] 18 | 19 | require Logger 20 | 21 | alias Telemetry.Metrics.{Counter, LastValue, Sum, Summary} 22 | alias __MODULE__ 23 | 24 | # the only valid units are: Seconds, Microseconds, Milliseconds, Bytes, Kilobytes, 25 | # Megabytes, Gigabytes, Terabytes, Bits, Kilobits, Megabits, Gigabits, Terabits 26 | @valid_units ~w(second microsecond millisecond byte kilobyte megabyte gigabyte 27 | terabyte bit kilobit megabit gigabit terabit)a 28 | 29 | @metric_names ~w(summaries counters last_values sums)a 30 | 31 | def push_measurement(cache, measurements, metadata, metric) do 32 | measurement = extract_measurement(metric, measurements) 33 | tags = extract_tags(metric, metadata) 34 | 35 | cond do 36 | is_nil(measurement) -> 37 | Logger.debug("Ignoring nil value for #{inspect(metric)}") 38 | cache 39 | 40 | not keep?(metric, metadata, measurements) -> 41 | Logger.debug("Dropping value for #{inspect(metric)}") 42 | cache 43 | 44 | is_number(measurement) -> 45 | sname = extract_string_name(metric) 46 | 47 | msg = 48 | "#{sname}[#{metric.__struct__}] received with value #{measurement} and tags #{inspect(tags)}" 49 | 50 | Logger.debug(msg) 51 | coalesce(cache, metric, measurement, tags) 52 | 53 | true -> 54 | Logger.warning( 55 | "Ignoring non-numeric value for #{inspect(metric)}: #{inspect(measurement)}" 56 | ) 57 | 58 | cache 59 | end 60 | rescue 61 | e -> 62 | Logger.error([ 63 | "Could not process metric #{inspect(metric)}", 64 | Exception.format(:error, e, __STACKTRACE__) 65 | ]) 66 | 67 | cache 68 | end 69 | 70 | # if the measurement is nil 71 | defp coalesce(cache, _metric, nil, _tags), do: cache 72 | 73 | defp coalesce(%Cache{counters: counters} = cache, %Counter{} = metric, _measurement, tags) do 74 | counters = Map.update(counters, {metric, tags}, 1, &(&1 + 1)) 75 | Map.put(cache, :counters, counters) 76 | end 77 | 78 | defp coalesce(%Cache{sums: sums} = cache, %Sum{} = metric, measurement, tags) do 79 | sums = Map.update(sums, {metric, tags}, measurement, &(&1 + measurement)) 80 | Map.put(cache, :sums, sums) 81 | end 82 | 83 | defp coalesce( 84 | %Cache{last_values: last_values} = cache, 85 | %LastValue{} = metric, 86 | measurement, 87 | tags 88 | ) do 89 | lvs = Map.put(last_values, {metric, tags}, measurement) 90 | Map.put(cache, :last_values, lvs) 91 | end 92 | 93 | defp coalesce(%Cache{summaries: summaries} = cache, %Summary{} = metric, measurement, tags) do 94 | summaries = Map.update(summaries, {metric, tags}, [measurement], &(&1 ++ [measurement])) 95 | Map.put(cache, :summaries, summaries) 96 | end 97 | 98 | # no idea how to handle this metric 99 | defp coalesce(cache, _metric, _measurement, _tags), do: cache 100 | 101 | def metric_count(%Cache{} = cache) do 102 | cache 103 | |> Map.take(@metric_names) 104 | |> Map.values() 105 | |> Enum.map(&map_size/1) 106 | |> Enum.sum() 107 | end 108 | 109 | # If summaries are empty, then the max values for last value or count metrics would 110 | # just be 1 if there are any keys with values otherwise 0 111 | def max_values_per_metric(%Cache{summaries: summaries} = cache) when map_size(summaries) == 0, 112 | do: min(metric_count(cache), 1) 113 | 114 | def max_values_per_metric(%Cache{summaries: summaries}) do 115 | # Summaries are the only ones that could have more than one 116 | Enum.reduce(Map.values(summaries), 0, fn measurements, bigsofar -> 117 | max(bigsofar, length(measurements)) 118 | end) 119 | end 120 | 121 | defp extract_string_name(%{name: name}), 122 | do: Enum.map_join(name, ".", &to_string/1) 123 | 124 | defp extract_measurement(metric, measurements) do 125 | case metric.measurement do 126 | fun when is_function(fun, 1) -> fun.(measurements) 127 | key -> measurements[key] 128 | end 129 | end 130 | 131 | defp keep?(%{keep: func}, metadata, _measurements) when is_function(func, 1), 132 | do: func.(metadata) 133 | 134 | defp keep?(%{keep: func}, metadata, measurements) when is_function(func, 2), 135 | do: func.(metadata, measurements) 136 | 137 | defp keep?(_metric, _metadata, _measurements), do: true 138 | 139 | # extract up to 10 tags, and don't include any empty values 140 | # because cloudwatch won't handle any empty dimensions 141 | defp extract_tags(metric, metadata) do 142 | metadata 143 | |> metric.tag_values.() 144 | |> Map.take(metric.tags) 145 | |> Enum.into([], fn {k, v} -> {k, to_string(v)} end) 146 | |> Enum.filter(fn {_k, v} -> String.length(v) > 0 end) 147 | |> Enum.take(10) 148 | end 149 | 150 | def validate_metrics([]), do: nil 151 | 152 | def validate_metrics([head | rest]) do 153 | unless Enum.member?([Counter, Summary, LastValue, Sum], head.__struct__), 154 | do: Logger.warning("#{head.__struct__} is not supported by the Reporter #{__MODULE__}") 155 | 156 | validate_metrics(rest) 157 | end 158 | 159 | def pop_metrics(cache), 160 | do: Enum.reduce(@metric_names, {cache, []}, &pop/2) 161 | 162 | defp pop(:summaries, {cache, items}) do 163 | nitems = 164 | cache 165 | |> Map.get(:summaries) 166 | |> Enum.map(fn {{metric, tags}, measurements} -> 167 | [ 168 | metric_name: extract_string_name(metric) <> ".summary", 169 | values: measurements, 170 | dimensions: tags, 171 | unit: get_unit(metric.unit), 172 | storage_resolution: get_storage_resolution(metric.reporter_options) 173 | ] 174 | end) 175 | 176 | {Map.put(cache, :summaries, %{}), items ++ nitems} 177 | end 178 | 179 | defp pop(:counters, {cache, items}) do 180 | nitems = 181 | cache 182 | |> Map.get(:counters) 183 | |> Enum.map(fn {{metric, tags}, measurement} -> 184 | [ 185 | metric_name: extract_string_name(metric) <> ".count", 186 | value: measurement, 187 | dimensions: tags, 188 | unit: "Count", 189 | storage_resolution: get_storage_resolution(metric.reporter_options) 190 | ] 191 | end) 192 | 193 | {Map.put(cache, :counters, %{}), items ++ nitems} 194 | end 195 | 196 | defp pop(:sums, {cache, items}) do 197 | nitems = 198 | cache 199 | |> Map.get(:sums) 200 | |> Enum.map(fn {{metric, tags}, measurement} -> 201 | [ 202 | metric_name: extract_string_name(metric) <> ".sum", 203 | value: measurement, 204 | dimensions: tags, 205 | unit: get_unit(metric.unit), 206 | storage_resolution: get_storage_resolution(metric.reporter_options) 207 | ] 208 | end) 209 | 210 | {Map.put(cache, :sums, %{}), items ++ nitems} 211 | end 212 | 213 | defp pop(:last_values, {cache, items}) do 214 | nitems = 215 | cache 216 | |> Map.get(:last_values) 217 | |> Enum.map(fn {{metric, tags}, measurement} -> 218 | [ 219 | metric_name: extract_string_name(metric) <> ".last_value", 220 | value: measurement, 221 | dimensions: tags, 222 | unit: get_unit(metric.unit), 223 | storage_resolution: get_storage_resolution(metric.reporter_options) 224 | ] 225 | end) 226 | 227 | {Map.put(cache, :last_values, %{}), items ++ nitems} 228 | end 229 | 230 | defp get_unit(input) do 231 | if Enum.member?(@valid_units, input) do 232 | input 233 | |> to_string() 234 | |> String.capitalize() 235 | |> Kernel.<>("s") 236 | else 237 | "None" 238 | end 239 | end 240 | 241 | defp get_storage_resolution(reporter_options) do 242 | case Keyword.get(reporter_options, :storage_resolution, :standard) do 243 | :high -> 244 | 1 245 | 246 | :standard -> 247 | 60 248 | 249 | other -> 250 | raise "Unsupported storage_resolution: #{inspect(other)}" 251 | end 252 | end 253 | end 254 | -------------------------------------------------------------------------------- /lib/telemetry_metrics_cloudwatch.ex: -------------------------------------------------------------------------------- 1 | defmodule TelemetryMetricsCloudwatch do 2 | @moduledoc """ 3 | This is a [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/) Reporter for 4 | [`Telemetry.Metrics`](https://github.com/beam-telemetry/telemetry_metrics) definitions. 5 | 6 | Provide a list of metric definitions to the `init/2` function. It's recommended to 7 | run TelemetryMetricsCloudwatch under a supervision tree, usually under Application. 8 | 9 | def start(_type, _args) do 10 | # List all child processes to be supervised 11 | children = [ 12 | {TelemetryMetricsCloudwatch, [metrics: metrics()]} 13 | ... 14 | ] 15 | 16 | opts = [strategy: :one_for_one, name: ExampleApp.Supervisor] 17 | Supervisor.start_link(children, opts) 18 | end 19 | 20 | defp metrics do 21 | [ 22 | counter("http.request.count"), 23 | last_value("vm.memory.total", unit: :byte), 24 | last_value("vm.total_run_queue_lengths.total") 25 | ] 26 | end 27 | 28 | You can also provide options for the namespace used in CloudWatch (by default, "Telemetry") 29 | and the minimum frequency (in milliseconds) with which data will be posted (see section 30 | below for posting rules). For instance: 31 | 32 | ... 33 | children = [ 34 | {TelemetryMetricsCloudwatch, metrics: metrics(), namespace: "Backend", push_interval: 30_000} 35 | ] 36 | ... 37 | 38 | ## Telemetry.Metrics Types Supported 39 | 40 | `TelemetryMetricsCloudwatch` supports 4 of the [Metrics](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#module-metrics): 41 | 42 | * [Counter](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#counter/2): 43 | Counter metric keeps track of the total number of specific events emitted. 44 | * [LastValue](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#last_value/2): 45 | Last value keeps track of the selected measurement found in the most recent event. 46 | * [Summary](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#summary/2): Summary 47 | aggregates measurement's values into statistics, e.g. minimum and maximum, mean, or percentiles. 48 | This sends every measurement to CloudWatch. 49 | * [Sum](https://hexdocs.pm/telemetry_metrics/Telemetry.Metrics.html#sum/2): Sum metric keeps track 50 | of the sum of selected measurement's values carried by specific events. If you are using Summary 51 | for a metric already, then CloudWatch can calculate a Sum using that Summary metric. If you 52 | only need a Sum (and no other summary metrics) then use this Sum metric instead. 53 | 54 | These metrics are sent to CloudWatch based on the rules described below. 55 | 56 | ## When Data is Sent 57 | 58 | Cloudwatch has [certain constraints](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html) 59 | on the number of metrics that can be sent up at any given time. `TelemetryMetricsCloudwatch` 60 | will send accumulated metric data at least every minute (configurable by the `:push_interval` 61 | option) or when the data cache has reached the maximum size that CloudWatch will accept. 62 | 63 | ### Event Sampling 64 | You can optionally send only a portion of reported events to CloudWatch via the `:sample_rate` option. This 65 | parameter should contain a value between 0.0 and 1.0 (inclusive), and represents the proportion of events that 66 | will be reported. For instance, a sample rate value of 0.0 will ensure no events are sent, whereas a sample 67 | rate value of 1.0 will ensure all events are sent. A sample rate of 0.25 will result in roughly a quarter of 68 | all events being sent. 69 | 70 | *Note:* Using a sampling rate of less than 1.0 will result in incorrect values for the "Sum" and "Counter" metric 71 | types (and some of the values like min/max for the "Summary" metric type) due to the under-reporting of events. 72 | You should only use a sampling rate of less than 1.0 if you have a guaranteed high rate of events and only care 73 | about summary statistics like averages and percentiles. 74 | 75 | ## Units 76 | 77 | In order to report metrics in the CloudWatch UI, they must be one of the following values: 78 | 79 | * Time units: `:second`, `:microsecond`, `:millisecond` 80 | * Byte sizes: `:byte`, `:kilobyte`, `:megabyte`, `:gigabyte`, `:terabyte` 81 | * Bit sizes: `:bit`, `:kilobit`, `:megabit`, `:gigabit`, `:terabit` 82 | 83 | For `Telemetry.Metrics.Counter`s, the unit will always be `:count`. Otherwise, the unit will be treated as `nil`. 84 | 85 | ## Notes on AWS 86 | 87 | [`ExAws`](https://hexdocs.pm/ex_aws/ExAws.html) is the library used to send metrics to CloudWatch. Make sure your 88 | [keys are configured](https://hexdocs.pm/ex_aws/ExAws.html#module-aws-key-configuration) and that they have the 89 | [correct permissions](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/permissions-reference-cw.html) of `cloudwatch:PutMetricData`. 90 | 91 | Up to 10 tags are sent up to AWS as dimensions for a given metric. Every metric name will have a suffix added based on 92 | the metric type (CloudWatch doesn't allow different units / measurements with the same name). So, for instance, 93 | if your metrics are: 94 | 95 | summary("my_app.repo.query.total_time", unit: {:nanosecond, :millisecond}) 96 | counter("my_app.repo.query.total_time") 97 | 98 | Then the metric names in CloudWatch will be: 99 | 100 | * `my_app.repo.query.total_time.summary` (with all data points recorded) 101 | * `my_app.repo.query.total_time.count` (with the number of queries recorded) 102 | 103 | """ 104 | 105 | use GenServer 106 | require Logger 107 | alias TelemetryMetricsCloudwatch.{Cache, Cloudwatch} 108 | 109 | @doc """ 110 | Start the `TelemetryMetricsCloudwatch` `GenServer`. 111 | 112 | Available options: 113 | * `:name` - name of the reporter instance. 114 | * `:metrics` - a list of `Telemetry.Metrics` to track. 115 | * `:namespace` - Namespace to use in CloudWatch 116 | * `:push_interval` - The minimum interval that metrics are guaranteed to be pushed to cloudwatch (in milliseconds) 117 | * `:sample_rate` - Sampling factor to apply to metrics. 0.0 will deny all events, 1.0 will queue all events. 118 | """ 119 | def start_link(opts) do 120 | server_opts = Keyword.take(opts, [:name]) 121 | 122 | metrics = 123 | opts[:metrics] || 124 | raise ArgumentError, "the :metrics option is required by #{inspect(__MODULE__)}" 125 | 126 | Cache.validate_metrics(metrics) 127 | namespace = Keyword.get(opts, :namespace, "Telemetry") 128 | push_interval = Keyword.get(opts, :push_interval, 60_000) 129 | sample_rate = Keyword.get(opts, :sample_rate, 1.0) 130 | 131 | GenServer.start_link( 132 | __MODULE__, 133 | {metrics, namespace, push_interval, sample_rate}, 134 | server_opts 135 | ) 136 | end 137 | 138 | @impl true 139 | def init({metrics, namespace, push_interval, sample_rate}) do 140 | Process.flag(:trap_exit, true) 141 | groups = Enum.group_by(metrics, & &1.event_name) 142 | 143 | for {event, metrics} <- groups do 144 | id = {__MODULE__, event, self()} 145 | :telemetry.attach(id, event, &__MODULE__.handle_telemetry_event/4, {self(), metrics}) 146 | end 147 | 148 | state = %Cache{ 149 | metric_names: Map.keys(groups), 150 | namespace: namespace, 151 | last_run: System.monotonic_time(:second), 152 | push_interval: push_interval, 153 | sample_rate: sample_rate 154 | } 155 | 156 | schedule_push_check(state) 157 | 158 | {:ok, state} 159 | end 160 | 161 | @impl true 162 | def handle_info(:push_check, state) do 163 | schedule_push_check(state) 164 | {:noreply, push_check(state)} 165 | end 166 | 167 | @impl true 168 | def handle_info({:handle_event, measurements, metadata, metrics}, state) do 169 | %Cache{sample_rate: sample_rate} = state 170 | 171 | newstate = 172 | Enum.reduce(metrics, state, fn metric, state -> 173 | if sample_measurement?(sample_rate) do 174 | state 175 | |> Cache.push_measurement(measurements, metadata, metric) 176 | |> push_check() 177 | else 178 | state 179 | end 180 | end) 181 | 182 | {:noreply, newstate} 183 | end 184 | 185 | @impl true 186 | def handle_info(_message, state), do: {:noreply, state} 187 | 188 | def handle_telemetry_event(_event_name, measurements, metadata, {pid, metrics}), 189 | do: Kernel.send(pid, {:handle_event, measurements, metadata, metrics}) 190 | 191 | defp schedule_push_check(%Cache{push_interval: push_interval}), 192 | do: Process.send_after(self(), :push_check, push_interval) 193 | 194 | defp push_check(%Cache{last_run: last_run, push_interval: push_interval} = state) do 195 | # https://docs.aws.amazon.com/cli/latest/reference/cloudwatch/put-metric-data.html 196 | # We can publish up to 150 values per metric for up to 20 different metrics 197 | metric_count = Cache.metric_count(state) 198 | metric_age = System.monotonic_time(:second) - last_run 199 | push_interval = push_interval / 1000 200 | 201 | cond do 202 | metric_age >= push_interval and metric_count > 0 -> 203 | push(state) 204 | 205 | metric_count == 20 -> 206 | push(state) 207 | 208 | Cache.max_values_per_metric(state) == 150 -> 209 | push(state) 210 | 211 | true -> 212 | state 213 | end 214 | end 215 | 216 | defp push(%Cache{namespace: namespace} = state) do 217 | {state, metric_data} = Cache.pop_metrics(state) 218 | Cloudwatch.send_metrics(metric_data, namespace) 219 | Map.put(state, :last_run, System.monotonic_time(:second)) 220 | end 221 | 222 | @impl true 223 | def terminate(_, %Cache{metric_names: events, namespace: namespace} = state) do 224 | for event <- events do 225 | :telemetry.detach({__MODULE__, event, self()}) 226 | end 227 | 228 | case Cache.pop_metrics(state) do 229 | {_, []} -> 230 | :ok 231 | 232 | {_, metric_data} -> 233 | Logger.debug("#{__MODULE__} flushing metrics") 234 | Cloudwatch.send_metrics(metric_data, namespace) 235 | :ok 236 | end 237 | end 238 | 239 | @spec sample_measurement?(number()) :: boolean() 240 | defp sample_measurement?(sample_rate) when sample_rate == 1, do: true 241 | defp sample_measurement?(sample_rate) when sample_rate == 0, do: false 242 | defp sample_measurement?(sample_rate), do: :rand.uniform() <= sample_rate 243 | end 244 | -------------------------------------------------------------------------------- /test/telemetry_metrics_cloudwatch_test.exs: -------------------------------------------------------------------------------- 1 | defmodule TelemetryMetricsCloudwatchTest do 2 | use ExUnit.Case 3 | 4 | alias Telemetry.Metrics 5 | alias TelemetryMetricsCloudwatch.Cache 6 | 7 | describe "An empty cache" do 8 | test "should have the right metric count and max values per metric" do 9 | empty = %Cache{} 10 | assert Cache.metric_count(empty) == 0 11 | assert Cache.max_values_per_metric(empty) == 0 12 | end 13 | end 14 | 15 | describe "When handling tags a cache" do 16 | test "should be able to handle tags with empty/nil values" do 17 | tvalues = %{host: ~c"a host", port: 123, something: "", somethingelse: nil} 18 | 19 | counter = 20 | Metrics.counter([:aname, :value], 21 | tag_values: &Map.merge(&1, tvalues), 22 | tags: [:host, :port, :something, :somethingelse] 23 | ) 24 | 25 | cache = Cache.push_measurement(%Cache{}, %{value: 112}, %{}, counter) 26 | 27 | assert Cache.metric_count(cache) == 1 28 | assert Cache.max_values_per_metric(cache) == 1 29 | 30 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 31 | 32 | assert Keyword.drop(metrics, [:dimensions]) == [ 33 | metric_name: "aname.value.count", 34 | value: 1, 35 | unit: "Count", 36 | storage_resolution: 60 37 | ] 38 | 39 | target_dimensions = [host: "a host", port: "123"] 40 | assert metrics |> Keyword.get(:dimensions) |> Keyword.equal?(target_dimensions) 41 | end 42 | 43 | test "should be able to handle tags with non string values" do 44 | tvalues = %{host: ~c"a host", port: 123} 45 | 46 | counter = 47 | Metrics.counter([:aname, :value], 48 | tag_values: &Map.merge(&1, tvalues), 49 | tags: [:host, :port] 50 | ) 51 | 52 | cache = Cache.push_measurement(%Cache{}, %{value: 112}, %{}, counter) 53 | 54 | assert Cache.metric_count(cache) == 1 55 | assert Cache.max_values_per_metric(cache) == 1 56 | 57 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 58 | 59 | assert Keyword.drop(metrics, [:dimensions]) == [ 60 | metric_name: "aname.value.count", 61 | value: 1, 62 | unit: "Count", 63 | storage_resolution: 60 64 | ] 65 | 66 | target_dimensions = [host: "a host", port: "123"] 67 | assert metrics |> Keyword.get(:dimensions) |> Keyword.equal?(target_dimensions) 68 | end 69 | 70 | test "should be able to handle more than 10 tags" do 71 | keys = ~w(a b c d e f g h i j k l m n o p)a 72 | tvalues = Enum.into(keys, %{}, &{&1, "value"}) 73 | 74 | counter = 75 | Metrics.counter([:aname, :value], 76 | tag_values: &Map.merge(&1, tvalues), 77 | tags: keys 78 | ) 79 | 80 | cache = Cache.push_measurement(%Cache{}, %{value: 112}, %{}, counter) 81 | 82 | assert Cache.metric_count(cache) == 1 83 | assert Cache.max_values_per_metric(cache) == 1 84 | 85 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 86 | 87 | assert metrics == [ 88 | metric_name: "aname.value.count", 89 | value: 1, 90 | dimensions: Enum.take(tvalues, 10), 91 | unit: "Count", 92 | storage_resolution: 60 93 | ] 94 | end 95 | end 96 | 97 | describe "When handling counts, a cache" do 98 | test "should be able to coalesce a single count metric" do 99 | cache = 100 | Cache.push_measurement(%Cache{}, %{value: 112}, %{}, Metrics.counter([:aname, :value])) 101 | 102 | assert Cache.metric_count(cache) == 1 103 | assert Cache.max_values_per_metric(cache) == 1 104 | 105 | # now pop all metrics 106 | {postcache, metrics} = Cache.pop_metrics(cache) 107 | 108 | assert metrics == [ 109 | [ 110 | metric_name: "aname.value.count", 111 | value: 1, 112 | dimensions: [], 113 | unit: "Count", 114 | storage_resolution: 60 115 | ] 116 | ] 117 | 118 | assert Cache.metric_count(postcache) == 0 119 | assert Cache.max_values_per_metric(postcache) == 0 120 | end 121 | 122 | test "should be able to coalesce multiple count metrics" do 123 | cache = 124 | %Cache{} 125 | |> Cache.push_measurement(%{value: 133}, %{}, Metrics.counter([:aname, :value])) 126 | |> Cache.push_measurement(%{value: 100}, %{}, Metrics.counter([:aname, :value])) 127 | 128 | assert Cache.metric_count(cache) == 1 129 | assert Cache.max_values_per_metric(cache) == 1 130 | 131 | # now pop all metrics 132 | {postcache, [metrics]} = Cache.pop_metrics(cache) 133 | 134 | assert metrics == [ 135 | metric_name: "aname.value.count", 136 | value: 2, 137 | dimensions: [], 138 | unit: "Count", 139 | storage_resolution: 60 140 | ] 141 | 142 | assert Cache.metric_count(postcache) == 0 143 | assert Cache.max_values_per_metric(postcache) == 0 144 | end 145 | 146 | test "should be able to coalesce multiple sum metrics" do 147 | sum_metric = Metrics.sum([:aname, :value]) 148 | 149 | cache = 150 | %Cache{} 151 | |> Cache.push_measurement(%{value: 133}, %{}, sum_metric) 152 | |> Cache.push_measurement(%{value: 100}, %{}, sum_metric) 153 | 154 | assert Cache.metric_count(cache) == 1 155 | assert Cache.max_values_per_metric(cache) == 1 156 | 157 | # now pop all metrics 158 | {postcache, metrics} = Cache.pop_metrics(cache) 159 | 160 | assert metrics == [ 161 | [ 162 | metric_name: "aname.value.sum", 163 | value: 233, 164 | dimensions: [], 165 | unit: "None", 166 | storage_resolution: 60 167 | ] 168 | ] 169 | 170 | assert Cache.metric_count(postcache) == 0 171 | assert Cache.max_values_per_metric(postcache) == 0 172 | end 173 | 174 | test "should keep values when given keep/1 function" do 175 | counter = Metrics.counter([:aname, :value], keep: &(&1.good == 1)) 176 | 177 | cache = 178 | %Cache{} 179 | |> Cache.push_measurement(%{value: 112}, %{good: 1}, counter) 180 | |> Cache.push_measurement(%{value: 112}, %{good: 0}, counter) 181 | 182 | assert Cache.metric_count(cache) == 1 183 | assert Cache.max_values_per_metric(cache) == 1 184 | 185 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 186 | 187 | assert metrics == [ 188 | metric_name: "aname.value.count", 189 | value: 1, 190 | dimensions: [], 191 | unit: "Count", 192 | storage_resolution: 60 193 | ] 194 | end 195 | 196 | test "should keep values when given keep/2 function" do 197 | counter = Metrics.counter([:aname, :value], keep: &(&1.good == 1 and &2.value <= 112)) 198 | 199 | cache = 200 | %Cache{} 201 | |> Cache.push_measurement(%{value: 112}, %{good: 1}, counter) 202 | |> Cache.push_measurement(%{value: 112}, %{good: 0}, counter) 203 | |> Cache.push_measurement(%{value: 113}, %{good: 1}, counter) 204 | 205 | assert Cache.metric_count(cache) == 1 206 | assert Cache.max_values_per_metric(cache) == 1 207 | 208 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 209 | 210 | assert metrics == [ 211 | metric_name: "aname.value.count", 212 | value: 1, 213 | dimensions: [], 214 | unit: "Count", 215 | storage_resolution: 60 216 | ] 217 | end 218 | 219 | test "should drop values when given drop/1 function" do 220 | counter = Metrics.counter([:aname, :value], drop: &(&1.bad == 1)) 221 | 222 | cache = 223 | %Cache{} 224 | |> Cache.push_measurement(%{value: 112}, %{bad: 1}, counter) 225 | |> Cache.push_measurement(%{value: 112}, %{bad: 0}, counter) 226 | 227 | assert Cache.metric_count(cache) == 1 228 | assert Cache.max_values_per_metric(cache) == 1 229 | 230 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 231 | 232 | assert metrics == [ 233 | metric_name: "aname.value.count", 234 | value: 1, 235 | dimensions: [], 236 | unit: "Count", 237 | storage_resolution: 60 238 | ] 239 | end 240 | 241 | test "should drop values when given drop/2 function" do 242 | counter = Metrics.counter([:aname, :value], drop: &(&1.bad == 1 or &2.value > 112)) 243 | 244 | cache = 245 | %Cache{} 246 | |> Cache.push_measurement(%{value: 112}, %{bad: 1}, counter) 247 | |> Cache.push_measurement(%{value: 112}, %{bad: 0}, counter) 248 | |> Cache.push_measurement(%{value: 113}, %{bad: 0}, counter) 249 | 250 | assert Cache.metric_count(cache) == 1 251 | assert Cache.max_values_per_metric(cache) == 1 252 | 253 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 254 | 255 | assert metrics == [ 256 | metric_name: "aname.value.count", 257 | value: 1, 258 | dimensions: [], 259 | unit: "Count", 260 | storage_resolution: 60 261 | ] 262 | end 263 | 264 | test "should be able to handle a nil value" do 265 | assert 0 == 266 | %Cache{} 267 | |> Cache.push_measurement(%{value: nil}, %{}, Metrics.counter([:aname, :value])) 268 | |> Cache.metric_count() 269 | 270 | cache = 271 | %Cache{} 272 | |> Cache.push_measurement(%{value: 133}, %{}, Metrics.counter([:aname, :value])) 273 | |> Cache.push_measurement(%{value: nil}, %{}, Metrics.counter([:aname, :value])) 274 | |> Cache.push_measurement(%{value: 100}, %{}, Metrics.counter([:aname, :value])) 275 | 276 | assert Cache.metric_count(cache) == 1 277 | assert Cache.max_values_per_metric(cache) == 1 278 | 279 | # now pop all metrics 280 | {postcache, [metrics]} = Cache.pop_metrics(cache) 281 | 282 | assert metrics == [ 283 | metric_name: "aname.value.count", 284 | value: 2, 285 | dimensions: [], 286 | unit: "Count", 287 | storage_resolution: 60 288 | ] 289 | 290 | assert Cache.metric_count(postcache) == 0 291 | assert Cache.max_values_per_metric(postcache) == 0 292 | end 293 | 294 | @tag capture_log: true 295 | test "should be able to handle a non-numeric, non-nil value" do 296 | assert 0 == 297 | %Cache{} 298 | |> Cache.push_measurement(%{value: "hi"}, %{}, Metrics.counter([:aname, :value])) 299 | |> Cache.metric_count() 300 | 301 | cache = 302 | %Cache{} 303 | |> Cache.push_measurement(%{value: 133}, %{}, Metrics.counter([:aname, :value])) 304 | |> Cache.push_measurement(%{value: "hi"}, %{}, Metrics.counter([:aname, :value])) 305 | |> Cache.push_measurement(%{value: 100}, %{}, Metrics.counter([:aname, :value])) 306 | 307 | assert Cache.metric_count(cache) == 1 308 | assert Cache.max_values_per_metric(cache) == 1 309 | 310 | # now pop all metrics 311 | {postcache, [metrics]} = Cache.pop_metrics(cache) 312 | 313 | assert metrics == [ 314 | metric_name: "aname.value.count", 315 | value: 2, 316 | dimensions: [], 317 | unit: "Count", 318 | storage_resolution: 60 319 | ] 320 | 321 | assert Cache.metric_count(postcache) == 0 322 | assert Cache.max_values_per_metric(postcache) == 0 323 | end 324 | 325 | test "should respect the storage resolution option" do 326 | counter = 327 | Metrics.counter([:aname, :value], 328 | reporter_options: [storage_resolution: :high] 329 | ) 330 | 331 | cache = Cache.push_measurement(%Cache{}, %{value: 112}, %{}, counter) 332 | 333 | assert Cache.metric_count(cache) == 1 334 | assert Cache.max_values_per_metric(cache) == 1 335 | 336 | {_postcache, [metrics]} = Cache.pop_metrics(cache) 337 | 338 | assert metrics == [ 339 | metric_name: "aname.value.count", 340 | value: 1, 341 | dimensions: [], 342 | unit: "Count", 343 | storage_resolution: 1 344 | ] 345 | end 346 | end 347 | end 348 | --------------------------------------------------------------------------------