├── .gitignore
├── .git-blame-ignore-revs
├── config.nims
├── tests
    ├── duplicate_coll_mod.nim
    ├── nim.cfg
    ├── test_shseq.nim
    ├── chronicles_tests.nim
    ├── chronos_server_tests.nim
    └── main_tests.nim
├── nim.cfg
├── .github
    └── workflows
    │   └── ci.yml
├── benchmarks
    ├── bench_collectors.nim
    └── bench_common.nim
├── LICENSE-MIT
├── metrics
    ├── common.nim
    ├── chronicles_support.nim
    ├── shseq.nim
    └── chronos_httpserver.nim
├── metrics.nimble
├── LICENSE-APACHEv2
├── README.md
└── metrics.nim


/.gitignore:
--------------------------------------------------------------------------------
1 | nimcache/
2 | /build
3 | 
4 | nimble.develop
5 | nimble.paths
6 | vendor/
7 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Formatted with nph v0.6.1-0-g0d8000e
2 | d5d616241559046461394ab0bab5970ec4beb4de
3 | 


--------------------------------------------------------------------------------
/config.nims:
--------------------------------------------------------------------------------
1 | # begin Nimble config (version 1)
2 | when fileExists("nimble.paths"):
3 |   include "nimble.paths"
4 | # end Nimble config
5 | 


--------------------------------------------------------------------------------
/tests/duplicate_coll_mod.nim:
--------------------------------------------------------------------------------
1 | import ../metrics
2 | 
3 | {.used.}
4 | 
5 | declareCounter duplicate_counter, "duplicate counter"
6 | duplicate_counter.inc()
7 | 


--------------------------------------------------------------------------------
/tests/nim.cfg:
--------------------------------------------------------------------------------
1 | # Avoid some rare stack corruption while using exceptions with a SEH-enabled
2 | # toolchain: https://github.com/status-im/nimbus-eth2/issues/3121
3 | @if windows and not vcc:
4 |   --define:nimRawSetjmp
5 | @end
6 | 


--------------------------------------------------------------------------------
/nim.cfg:
--------------------------------------------------------------------------------
1 | nimcache = "build/nimcache/$projectName"
2 | 
3 | # Avoid some rare stack corruption while using exceptions with a SEH-enabled
4 | # toolchain: https://github.com/status-im/nimbus-eth2/issues/3121
5 | @if windows and not vcc:
6 |   --define:nimRawSetjmp
7 | @end
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: status-im/nimbus-common-workflow/.github/workflows/common.yml@main
12 |     with:
13 |       nimble-version: b920dad9ed76c6619be3ec0cfbf0dde6f9e39092
14 |       test-command: |
15 |         nimble --requires:unittest2 test
16 |         nimble --requires:unittest2 --requires:chronicles test_chronicles
17 | 
18 |   nph:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - name: Check `nph` formatting
23 |         uses: arnetheduck/nph-action@v1
24 |         with:
25 |           version: 0.6.1
26 | 


--------------------------------------------------------------------------------
/tests/test_shseq.nim:
--------------------------------------------------------------------------------
 1 | {.used.}
 2 | 
 3 | import unittest2, ../metrics/shseq
 4 | 
 5 | suite "ShSeq":
 6 |   test "basics":
 7 |     var s: ShSeq[int]
 8 | 
 9 |     s.add(1)
10 |     s.add(2)
11 |     s.add(4)
12 | 
13 |     s.insert(0, 0)
14 |     s.insert(3, 3)
15 |     s.insert(5, 5)
16 | 
17 |     for i in 0 ..< s.len:
18 |       check s[i] == i
19 | 
20 |   test "init":
21 |     let s = ShSeq.init([0, 1, 2])
22 |     check:
23 |       s.len == 3
24 |       s[1] == 1
25 | 
26 |   test "cross-thread init/destroy":
27 |     when defined(threads):
28 |       var s: ShSeq[int]
29 | 
30 |       var t: Thread[ptr ShSeq[int]]
31 | 
32 |       proc threadFunc(s: ptr ShSeq[int]) {.thread.} =
33 |         s[].add(2)
34 |         s[].add(1)
35 |         s[].add(0)
36 | 
37 |       createThread(t, threadFunc, addr s)
38 | 
39 |       t.joinThread()
40 | 
41 |       check:
42 |         s[0] == 2
43 | 
44 |       s.destroy()
45 | 
46 |       check:
47 |         s.len == 0
48 |     else:
49 |       skip()
50 | 


--------------------------------------------------------------------------------
/benchmarks/bench_collectors.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | import ./bench_common, ../metrics
 8 | 
 9 | proc main(nb_samples: Natural) =
10 |   warmup()
11 | 
12 |   var res: float64
13 | 
14 |   bench("create a counter and increment it 3 times with different values", res):
15 |     declareCounter counter1, "help"
16 |     counter1.inc()
17 |     counter1.inc(2)
18 |     counter1.inc(2.1)
19 |     res = counter1.value
20 |     counter1.unregister()
21 | 
22 |   let labelValues = @["a", "b"]
23 |   bench(
24 |     "create a counter with 2 labels and increment it 3 times with different values", res
25 |   ):
26 |     declareCounter counter2, "help", @["foo", "bar"]
27 |     counter2.inc(labelValues = labelValues)
28 |     counter2.inc(2, labelValues)
29 |     counter2.inc(2.1, labelValues)
30 |     res = counter2.value(labelValues)
31 |     counter2.unregister()
32 | 
33 | when isMainModule:
34 |   main(10000)
35 | 


--------------------------------------------------------------------------------
/tests/chronicles_tests.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0)
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | import chronicles, unittest2, ../metrics, ../metrics/chronicles_support
 8 | 
 9 | suite "logging":
10 |   test "info":
11 |     var registry = newRegistry()
12 |     declareCounter myCounter, "help", registry = registry
13 |     myCounter.inc()
14 |     info "myCounter", myCounter
15 |     declareCounter lCounter, "l help", @["foo", "bar"], registry
16 |     let labelValues = @["a", "x \"y\" \n\\z"]
17 |     lCounter.inc(4.5, labelValues = labelValues)
18 |     info "lCounter", lCounter
19 |     declareGauge myGauge, "help", registry = registry
20 |     myGauge.set(9.5)
21 |     info "myGauge", myGauge
22 |     declareSummary mySummary, "help", registry = registry
23 |     mySummary.observe(10)
24 |     info "mySummary", mySummary
25 |     declareHistogram myHistogram, "help", registry = registry
26 |     myHistogram.observe(10)
27 |     info "myHistogram", myHistogram
28 | 
29 |     info "registry", registry
30 |     info "default registry", defaultRegistry
31 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | nim-metrics is licensed under the MIT License
 2 | Copyright (c) 2019 Status Research & Development GmbH
 3 | -----------------------------------------------------
 4 | 
 5 | The MIT License (MIT)
 6 | 
 7 | Copyright (c) 2019 Status Research & Development GmbH
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/metrics/common.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | when defined(posix):
 8 |   import os, posix
 9 | 
10 | # https://prometheus.io/docs/instrumenting/exposition_formats/#basic-info
11 | const CONTENT_TYPE* = "text/plain; version=0.0.4; charset=utf-8"
12 | 
13 | proc printError*(msg: string) =
14 |   try:
15 |     writeLine(stderr, "metrics error: " & msg)
16 |   except IOError:
17 |     discard
18 | 
19 | proc ignoreSignalsInThread*() =
20 |   # Block all signals in this thread, so we don't interfere with regular signal
21 |   # handling elsewhere.
22 |   when defined(posix):
23 |     var signalMask, oldSignalMask: Sigset
24 | 
25 |     # sigprocmask() doesn't work on macOS, for multithreaded programs
26 |     if sigfillset(signalMask) != 0:
27 |       echo osErrorMsg(osLastError())
28 |       quit(QuitFailure)
29 |     when defined(boehmgc):
30 |       # https://www.hboehm.info/gc/debugging.html
31 |       const
32 |         SIGPWR = 30
33 |         SIGXCPU = 24
34 |         SIGSEGV = 11
35 |         SIGBUS = 7
36 |       if sigdelset(signalMask, SIGPWR) != 0 or sigdelset(signalMask, SIGXCPU) != 0 or
37 |           sigdelset(signalMask, SIGSEGV) != 0 or sigdelset(signalMask, SIGBUS) != 0:
38 |         echo osErrorMsg(osLastError())
39 |         quit(QuitFailure)
40 |     if pthread_sigmask(SIG_BLOCK, signalMask, oldSignalMask) != 0:
41 |       echo osErrorMsg(osLastError())
42 |       quit(QuitFailure)
43 | 


--------------------------------------------------------------------------------
/metrics/chronicles_support.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | from chronicles import formatIt, expandIt
 8 | 
 9 | import ../metrics, std/[locks, times]
10 | 
11 | when defined(metrics):
12 |   import tables
13 | 
14 |   formatIt(Metric):
15 |     $it
16 | 
17 |   proc toLog(collector: SimpleCollector): seq[string] =
18 |     withLock collector.lock:
19 |       for metrics in collector.metrics:
20 |         for metric in metrics:
21 |           result.add($metric)
22 | 
23 |   formatIt(Counter):
24 |     it.toLog
25 | 
26 |   formatIt(Gauge):
27 |     it.toLog
28 | 
29 |   formatIt(Summary):
30 |     it.toLog
31 | 
32 |   formatIt(Histogram):
33 |     it.toLog
34 | 
35 |   proc toLog(registry: Registry): seq[string] =
36 |     var res: seq[string]
37 |     registry.collect(
38 |       proc(
39 |           name: string,
40 |           value: float64,
41 |           labels: openArray[string],
42 |           labelValues: openArray[string],
43 |           timestamp: Time,
44 |       ) =
45 |         var s: string
46 |         s.addText(name, value, labels, labelValues, timestamp)
47 |         res.add s
48 |     )
49 |     res
50 | 
51 |   formatIt(Registry):
52 |     it.toLog
53 | else:
54 |   # not defined(metrics)
55 |   formatIt(Metric):
56 |     "metrics disabled"
57 | 
58 |   formatIt(Counter):
59 |     "metrics disabled"
60 | 
61 |   formatIt(Gauge):
62 |     "metrics disabled"
63 | 
64 |   formatIt(Summary):
65 |     "metrics disabled"
66 | 
67 |   formatIt(Histogram):
68 |     "metrics disabled"
69 | 
70 |   formatIt(Registry):
71 |     "metrics disabled"
72 | 
73 | # ignored collector
74 | expandIt(type IgnoredCollector):
75 |   ignored = "ignored"
76 | 


--------------------------------------------------------------------------------
/benchmarks/bench_common.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | import times, stats, strformat
 8 | export times, stats, strformat
 9 | 
10 | proc warmup*() =
11 |   # Warmup - make sure cpu is on max perf
12 |   let start = cpuTime()
13 |   var foo = 123'i64
14 |   for i in 0'i64 ..< 300_000_000'i64:
15 |     foo += i * i mod 456
16 |     foo = foo mod 789
17 | 
18 |   # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
19 |   let stop = cpuTime()
20 |   echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)"
21 | 
22 | template printStats*(experiment_name: string, compute_result: typed) {.dirty.} =
23 |   echo "#################################################################"
24 |   echo "\n" & experiment_name
25 |   echo &"Collected {stats.n} samples in {global_stop - global_start:>4.3f} seconds"
26 |   echo &"Average time: {stats.mean * 1000 :>4.3f} ms"
27 |   echo &"Stddev  time: {stats.standardDeviationS * 1000 :>4.3f} ms"
28 |   echo &"Min     time: {stats.min * 1000 :>4.3f} ms"
29 |   echo &"Max     time: {stats.max * 1000 :>4.3f} ms"
30 |   echo "\nDisplay computation result to make sure it's not optimized away"
31 |   echo compute_result # Prevents compiler from optimizing stuff away
32 |   echo '\n'
33 | 
34 | template bench*(name: string, compute_result: typed, body: untyped) {.dirty.} =
35 |   block: # Actual bench
36 |     var stats: RunningStat
37 |     let global_start = cpuTime()
38 |     for _ in 0 ..< nb_samples:
39 |       let start = cpuTime()
40 |       block:
41 |         body
42 |       let stop = cpuTime()
43 |       stats.push stop - start
44 |     let global_stop = cpuTime()
45 |     printStats(name, compute_result)
46 | 


--------------------------------------------------------------------------------
/metrics.nimble:
--------------------------------------------------------------------------------
 1 | mode = ScriptMode.Verbose
 2 | 
 3 | packageName = "metrics"
 4 | version = "0.2.0"
 5 | author = "Status Research & Development GmbH"
 6 | description = "Metrics client library supporting Prometheus"
 7 | license = "MIT or Apache License 2.0"
 8 | skipDirs = @["tests", "benchmarks"]
 9 | 
10 | ### Dependencies
11 | requires "nim >= 1.6.14", "chronos >= 4.0.3", "results", "stew"
12 | 
13 | let nimc = getEnv("NIMC", "nim") # Which nim compiler to use
14 | let lang = getEnv("NIMLANG", "c") # Which backend (c/cpp/js)
15 | let flags = getEnv("NIMFLAGS", "") # Extra flags for the compiler
16 | let verbose = getEnv("V", "") notin ["", "0"]
17 | 
18 | from os import quoteShell
19 | 
20 | let cfg =
21 |   " --styleCheck:usages --styleCheck:error" &
22 |   (if verbose: "" else: " --verbosity:0 --hints:off") &
23 |   " --skipParentCfg --skipUserCfg --outdir:build " &
24 |   quoteShell("--nimcache:build/nimcache/$projectName") & " -d:metricsTest"
25 | 
26 | proc build(args, path: string) =
27 |   exec nimc & " " & lang & " " & cfg & " " & flags & " " & args & " " & path
28 | 
29 | proc run(args, path: string) =
30 |   build args & " --mm:refc -r", path
31 |   if (NimMajor, NimMinor) > (1, 6):
32 |     build args & " --mm:orc -r", path
33 | 
34 | ### tasks
35 | task test, "Main tests":
36 |   # build it with metrics disabled, first
37 |   build "", "tests/main_tests"
38 |   build "--threads:on", "tests/main_tests"
39 |   run "-d:metrics --threads:on -d:useSysAssert -d:useGcAssert", "tests/main_tests"
40 | 
41 |   build "", "benchmarks/bench_collectors"
42 |   run "-d:metrics --threads:on", "benchmarks/bench_collectors"
43 | 
44 |   run "", "tests/chronos_server_tests"
45 |   run "-d:metrics --threads:on -d:nimTypeNames", "tests/chronos_server_tests"
46 | 
47 | when (NimMajor, NimMinor) < (2, 0):
48 |   taskRequires "test_chronicles", "chronicles < 0.12"
49 | 
50 | task test_chronicles, "Chronicles tests":
51 |   build "", "tests/chronicles_tests"
52 |   run "-d:metrics --threads:on", "tests/chronicles_tests"
53 | 
54 | task benchmark, "Run benchmarks":
55 |   run "-d:metrics --debuginfo --threads:on -d:release", "benchmarks/bench_collectors"
56 | 


--------------------------------------------------------------------------------
/metrics/shseq.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Status Research & Development GmbH
 2 | # Licensed and distributed under either of
 3 | #   * MIT license: http://opensource.org/licenses/MIT
 4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
 6 | 
 7 | import std/typetraits
 8 | 
 9 | type ShSeq*[T] = object
10 |   # Sequence whose elements reside in shared memory - only works for copyMem:able types
11 |   items: ptr UncheckedArray[T]
12 |   capacity, len: int
13 | 
14 | proc grow(s: var ShSeq, size: int) =
15 |   type T = typeof(s).T
16 | 
17 |   static:
18 |     doAssert supportsCopyMem(T)
19 | 
20 |   if size <= s.capacity:
21 |     return
22 | 
23 |   var tmp = cast[ptr UncheckedArray[T]](createSharedU(T, size))
24 |   if s.len > 0:
25 |     copyMem(addr tmp[0], addr s.items[0], s.len * sizeof(T))
26 |   s.capacity = size
27 |   if s.items != nil:
28 |     deallocShared(s.items)
29 |   s.items = tmp
30 | 
31 | proc destroy*(s: var ShSeq) =
32 |   if not isNil(s.items):
33 |     deallocShared(s.items)
34 |     reset(s)
35 | 
36 | proc init*[T](_: type ShSeq, v: openArray[T]): ShSeq[T] =
37 |   var s: ShSeq[T]
38 |   if v.len > 0:
39 |     s.grow(v.len)
40 |     copyMem(addr s.items[0], unsafeAddr v[0], v.len * sizeof(T))
41 |     s.len = v.len
42 | 
43 |   s
44 | 
45 | proc add*(s: var ShSeq, v: auto) =
46 |   if s.len == s.capacity:
47 |     s.grow(max(64, s.len + s.len div 2))
48 |   s.items[s.len] = v
49 |   s.len += 1
50 | 
51 | func `[]`*(s: ShSeq, i: int): lent s.T =
52 |   doAssert i >= 0 and i < s.len, "Bounds check"
53 |   s.items[i]
54 | 
55 | func `[]`*(s: var ShSeq, i: int): var s.T =
56 |   doAssert i >= 0 and i < s.len, "Bounds check"
57 |   s.items[i]
58 | 
59 | proc insert*(s: var ShSeq, v: auto, pos: int) =
60 |   type T = typeof(s).T
61 | 
62 |   doAssert pos >= 0 and pos <= s.len, "Bounds check"
63 | 
64 |   if s.len == s.capacity:
65 |     s.grow(max(64, s.len + s.len div 2))
66 | 
67 |   if pos < s.len:
68 |     moveMem(addr s.items[pos + 1], addr s.items[pos], (s.len - pos) * sizeof(T))
69 | 
70 |   s.items[pos] = v
71 |   s.len += 1
72 | 
73 | template len*(s: ShSeq): int =
74 |   s.len
75 | 
76 | template data*(s: ShSeq): openArray =
77 |   s.items.toOpenArray(0, s.len - 1)
78 | 
79 | iterator items*(s: ShSeq): lent s.T =
80 |   for i in 0 ..< s.len:
81 |     yield s[i]
82 | 


--------------------------------------------------------------------------------
/tests/chronos_server_tests.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021-2022 Status Research & Development GmbH
  2 | # Licensed and distributed under either of
  3 | #   * MIT license: http://opensource.org/licenses/MIT
  4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
  5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
  6 | 
  7 | import std/[uri]
  8 | import chronos, chronos/apps/http/[httpclient, httpserver]
  9 | import chronos/unittest2/asynctests
 10 | import ../metrics, ../metrics/chronos_httpserver
 11 | 
 12 | suite "Chronos metrics HTTP server test suite":
 13 |   proc httpClient(
 14 |       url: string
 15 |   ): Future[HttpResponseTuple] {.async: (raises: [CancelledError, HttpError]).} =
 16 |     let session = HttpSessionRef.new()
 17 |     try:
 18 |       await session.fetch(parseUri(url))
 19 |     finally:
 20 |       await session.closeWait()
 21 | 
 22 |   asyncTest "new()/close() test":
 23 |     when defined(metrics):
 24 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
 25 |       block:
 26 |         let status = await server.status()
 27 |         check status == MetricsHttpServerStatus.Stopped
 28 |       await server.close()
 29 |       block:
 30 |         let status = await server.status()
 31 |         check status == MetricsHttpServerStatus.Closed
 32 |     else:
 33 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
 34 | 
 35 |   asyncTest "new()/start()/stop()/close() test":
 36 |     when defined(metrics):
 37 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
 38 |       block:
 39 |         let status = await server.status()
 40 |         check status == MetricsHttpServerStatus.Stopped
 41 |       await server.start()
 42 |       block:
 43 |         let status = await server.status()
 44 |         check status == MetricsHttpServerStatus.Running
 45 |       await server.stop()
 46 |       block:
 47 |         let status = await server.status()
 48 |         check status == MetricsHttpServerStatus.Stopped
 49 |       await server.close()
 50 |       block:
 51 |         let status = await server.status()
 52 |         check status == MetricsHttpServerStatus.Closed
 53 |     else:
 54 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
 55 | 
 56 |   asyncTest "new()/start()/response/stop()/start()/response/stop()/close() " & "test":
 57 |     when defined(metrics):
 58 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
 59 |       block:
 60 |         let status = await server.status()
 61 |         check status == MetricsHttpServerStatus.Stopped
 62 |       await server.start()
 63 |       block:
 64 |         let status = await server.status()
 65 |         check status == MetricsHttpServerStatus.Running
 66 |       block:
 67 |         let resp = await httpClient("http://127.0.0.1:8080/health")
 68 |         check:
 69 |           resp.status == 200
 70 |           resp.data.bytesToString == "OK"
 71 |       await server.stop()
 72 |       block:
 73 |         let status = await server.status()
 74 |         check status == MetricsHttpServerStatus.Stopped
 75 |       await server.start()
 76 |       block:
 77 |         let status = await server.status()
 78 |         check status == MetricsHttpServerStatus.Running
 79 |       block:
 80 |         let resp = await httpClient("http://127.0.0.1:8080/health")
 81 |         check:
 82 |           resp.status == 200
 83 |           resp.data.bytesToString == "OK"
 84 |       await server.stop()
 85 |       block:
 86 |         let status = await server.status()
 87 |         check status == MetricsHttpServerStatus.Stopped
 88 |       await server.close()
 89 |       block:
 90 |         let status = await server.status()
 91 |         check status == MetricsHttpServerStatus.Closed
 92 |     else:
 93 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
 94 | 
 95 |   asyncTest "new()/start()/close() test":
 96 |     when defined(metrics):
 97 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
 98 |       block:
 99 |         let status = await server.status()
100 |         check status == MetricsHttpServerStatus.Stopped
101 |       await server.start()
102 |       block:
103 |         let status = await server.status()
104 |         check status == MetricsHttpServerStatus.Running
105 |       await server.close()
106 |       block:
107 |         let status = await server.status()
108 |         check status == MetricsHttpServerStatus.Closed
109 |     else:
110 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
111 | 
112 |   asyncTest "HTTP 200/responses check test":
113 |     when defined(metrics):
114 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
115 |       block:
116 |         let status = await server.status()
117 |         check status == MetricsHttpServerStatus.Stopped
118 |       await server.start()
119 |       block:
120 |         let status = await server.status()
121 |         check status == MetricsHttpServerStatus.Running
122 |       block:
123 |         let resp = await httpClient("http://127.0.0.1:8080/metrics")
124 |         check:
125 |           resp.status == 200
126 |           len(resp.data) > 0
127 | 
128 |       block:
129 |         let resp = await httpClient("http://127.0.0.1:8080/health")
130 |         check:
131 |           resp.status == 200
132 |           resp.data.bytesToString() == "OK"
133 |       await server.stop()
134 |       block:
135 |         let status = await server.status()
136 |         check status == MetricsHttpServerStatus.Stopped
137 |       await server.close()
138 |       block:
139 |         let status = await server.status()
140 |         check status == MetricsHttpServerStatus.Closed
141 |     else:
142 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
143 | 
144 |   asyncTest "HTTP 404/response check test":
145 |     when defined(metrics):
146 |       let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get()
147 |       block:
148 |         let status = await server.status()
149 |         check status == MetricsHttpServerStatus.Stopped
150 |       await server.start()
151 |       block:
152 |         let status = await server.status()
153 |         check status == MetricsHttpServerStatus.Running
154 | 
155 |       block:
156 |         let resp = await httpClient("http://127.0.0.1:8080/somePath")
157 |         check:
158 |           resp.status == 404
159 |           len(resp.data) > 0
160 | 
161 |       await server.stop()
162 |       block:
163 |         let status = await server.status()
164 |         check status == MetricsHttpServerStatus.Stopped
165 |       await server.close()
166 |       block:
167 |         let status = await server.status()
168 |         check status == MetricsHttpServerStatus.Closed
169 |     else:
170 |       check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true
171 | 
172 |   asyncTest "Chronos middleware test":
173 |     when defined(metrics):
174 |       proc process(
175 |           r: RequestFence
176 |       ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} =
177 |         if r.isOk():
178 |           let request = r.get()
179 |           if request.uri.path == "/test":
180 |             try:
181 |               await request.respond(Http200, "TESTOK")
182 |             except HttpWriteError as exc:
183 |               defaultResponse(exc)
184 |           else:
185 |             defaultResponse()
186 |         else:
187 |           defaultResponse()
188 | 
189 |       let
190 |         socketFlags = {ServerFlags.TcpNoDelay, ServerFlags.ReuseAddr}
191 |         middlewares = [MetricsHttpServerMiddlewareRef.new()]
192 |         res = HttpServerRef.new(
193 |           initTAddress("127.0.0.1:0"),
194 |           process,
195 |           middlewares = middlewares,
196 |           socketFlags = socketFlags,
197 |         )
198 |       check res.isOk()
199 |       let server = res.get()
200 |       server.start()
201 |       try:
202 |         let
203 |           address = server.instance.localAddress()
204 |           uri1 = "http://" & $address & "/metrics"
205 |           uri2 = "http://" & $address & "/health"
206 |           uri3 = "http://" & $address & "/test"
207 |           res1 = await httpClient(uri1)
208 |           res2 = await httpClient(uri2)
209 |           res3 = await httpClient(uri3)
210 |         check:
211 |           res1.status == 200
212 |           len(res1.data) > 0
213 |           res2.status == 200
214 |           res2.data.bytesToString() == "OK"
215 |           res3.status == 200
216 |           res3.data.bytesToString() == "TESTOK"
217 |       finally:
218 |         await server.stop()
219 |         await server.closeWait()
220 |     else:
221 |       check not (isNil(MetricsHttpServerMiddlewareRef.new()))
222 | 


--------------------------------------------------------------------------------
/LICENSE-APACHEv2:
--------------------------------------------------------------------------------
  1 | nim-metrics is licensed under the Apache License version 2
  2 | Copyright (c) 2019 Status Research & Development GmbH
  3 | -----------------------------------------------------
  4 | 
  5 |                                  Apache License
  6 |                            Version 2.0, January 2004
  7 |                         http://www.apache.org/licenses/
  8 | 
  9 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 10 | 
 11 |    1. Definitions.
 12 | 
 13 |       "License" shall mean the terms and conditions for use, reproduction,
 14 |       and distribution as defined by Sections 1 through 9 of this document.
 15 | 
 16 |       "Licensor" shall mean the copyright owner or entity authorized by
 17 |       the copyright owner that is granting the License.
 18 | 
 19 |       "Legal Entity" shall mean the union of the acting entity and all
 20 |       other entities that control, are controlled by, or are under common
 21 |       control with that entity. For the purposes of this definition,
 22 |       "control" means (i) the power, direct or indirect, to cause the
 23 |       direction or management of such entity, whether by contract or
 24 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 25 |       outstanding shares, or (iii) beneficial ownership of such entity.
 26 | 
 27 |       "You" (or "Your") shall mean an individual or Legal Entity
 28 |       exercising permissions granted by this License.
 29 | 
 30 |       "Source" form shall mean the preferred form for making modifications,
 31 |       including but not limited to software source code, documentation
 32 |       source, and configuration files.
 33 | 
 34 |       "Object" form shall mean any form resulting from mechanical
 35 |       transformation or translation of a Source form, including but
 36 |       not limited to compiled object code, generated documentation,
 37 |       and conversions to other media types.
 38 | 
 39 |       "Work" shall mean the work of authorship, whether in Source or
 40 |       Object form, made available under the License, as indicated by a
 41 |       copyright notice that is included in or attached to the work
 42 |       (an example is provided in the Appendix below).
 43 | 
 44 |       "Derivative Works" shall mean any work, whether in Source or Object
 45 |       form, that is based on (or derived from) the Work and for which the
 46 |       editorial revisions, annotations, elaborations, or other modifications
 47 |       represent, as a whole, an original work of authorship. For the purposes
 48 |       of this License, Derivative Works shall not include works that remain
 49 |       separable from, or merely link (or bind by name) to the interfaces of,
 50 |       the Work and Derivative Works thereof.
 51 | 
 52 |       "Contribution" shall mean any work of authorship, including
 53 |       the original version of the Work and any modifications or additions
 54 |       to that Work or Derivative Works thereof, that is intentionally
 55 |       submitted to Licensor for inclusion in the Work by the copyright owner
 56 |       or by an individual or Legal Entity authorized to submit on behalf of
 57 |       the copyright owner. For the purposes of this definition, "submitted"
 58 |       means any form of electronic, verbal, or written communication sent
 59 |       to the Licensor or its representatives, including but not limited to
 60 |       communication on electronic mailing lists, source code control systems,
 61 |       and issue tracking systems that are managed by, or on behalf of, the
 62 |       Licensor for the purpose of discussing and improving the Work, but
 63 |       excluding communication that is conspicuously marked or otherwise
 64 |       designated in writing by the copyright owner as "Not a Contribution."
 65 | 
 66 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 67 |       on behalf of whom a Contribution has been received by Licensor and
 68 |       subsequently incorporated within the Work.
 69 | 
 70 |    2. Grant of Copyright License. Subject to the terms and conditions of
 71 |       this License, each Contributor hereby grants to You a perpetual,
 72 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 73 |       copyright license to reproduce, prepare Derivative Works of,
 74 |       publicly display, publicly perform, sublicense, and distribute the
 75 |       Work and such Derivative Works in Source or Object form.
 76 | 
 77 |    3. Grant of Patent License. Subject to the terms and conditions of
 78 |       this License, each Contributor hereby grants to You a perpetual,
 79 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 80 |       (except as stated in this section) patent license to make, have made,
 81 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 82 |       where such license applies only to those patent claims licensable
 83 |       by such Contributor that are necessarily infringed by their
 84 |       Contribution(s) alone or by combination of their Contribution(s)
 85 |       with the Work to which such Contribution(s) was submitted. If You
 86 |       institute patent litigation against any entity (including a
 87 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 88 |       or a Contribution incorporated within the Work constitutes direct
 89 |       or contributory patent infringement, then any patent licenses
 90 |       granted to You under this License for that Work shall terminate
 91 |       as of the date such litigation is filed.
 92 | 
 93 |    4. Redistribution. You may reproduce and distribute copies of the
 94 |       Work or Derivative Works thereof in any medium, with or without
 95 |       modifications, and in Source or Object form, provided that You
 96 |       meet the following conditions:
 97 | 
 98 |       (a) You must give any other recipients of the Work or
 99 |           Derivative Works a copy of this License; and
100 | 
101 |       (b) You must cause any modified files to carry prominent notices
102 |           stating that You changed the files; and
103 | 
104 |       (c) You must retain, in the Source form of any Derivative Works
105 |           that You distribute, all copyright, patent, trademark, and
106 |           attribution notices from the Source form of the Work,
107 |           excluding those notices that do not pertain to any part of
108 |           the Derivative Works; and
109 | 
110 |       (d) If the Work includes a "NOTICE" text file as part of its
111 |           distribution, then any Derivative Works that You distribute must
112 |           include a readable copy of the attribution notices contained
113 |           within such NOTICE file, excluding those notices that do not
114 |           pertain to any part of the Derivative Works, in at least one
115 |           of the following places: within a NOTICE text file distributed
116 |           as part of the Derivative Works; within the Source form or
117 |           documentation, if provided along with the Derivative Works; or,
118 |           within a display generated by the Derivative Works, if and
119 |           wherever such third-party notices normally appear. The contents
120 |           of the NOTICE file are for informational purposes only and
121 |           do not modify the License. You may add Your own attribution
122 |           notices within Derivative Works that You distribute, alongside
123 |           or as an addendum to the NOTICE text from the Work, provided
124 |           that such additional attribution notices cannot be construed
125 |           as modifying the License.
126 | 
127 |       You may add Your own copyright statement to Your modifications and
128 |       may provide additional or different license terms and conditions
129 |       for use, reproduction, or distribution of Your modifications, or
130 |       for any such Derivative Works as a whole, provided Your use,
131 |       reproduction, and distribution of the Work otherwise complies with
132 |       the conditions stated in this License.
133 | 
134 |    5. Submission of Contributions. Unless You explicitly state otherwise,
135 |       any Contribution intentionally submitted for inclusion in the Work
136 |       by You to the Licensor shall be under the terms and conditions of
137 |       this License, without any additional terms or conditions.
138 |       Notwithstanding the above, nothing herein shall supersede or modify
139 |       the terms of any separate license agreement you may have executed
140 |       with Licensor regarding such Contributions.
141 | 
142 |    6. Trademarks. This License does not grant permission to use the trade
143 |       names, trademarks, service marks, or product names of the Licensor,
144 |       except as required for reasonable and customary use in describing the
145 |       origin of the Work and reproducing the content of the NOTICE file.
146 | 
147 |    7. Disclaimer of Warranty. Unless required by applicable law or
148 |       agreed to in writing, Licensor provides the Work (and each
149 |       Contributor provides its Contributions) on an "AS IS" BASIS,
150 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
151 |       implied, including, without limitation, any warranties or conditions
152 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
153 |       PARTICULAR PURPOSE. You are solely responsible for determining the
154 |       appropriateness of using or redistributing the Work and assume any
155 |       risks associated with Your exercise of permissions under this License.
156 | 
157 |    8. Limitation of Liability. In no event and under no legal theory,
158 |       whether in tort (including negligence), contract, or otherwise,
159 |       unless required by applicable law (such as deliberate and grossly
160 |       negligent acts) or agreed to in writing, shall any Contributor be
161 |       liable to You for damages, including any direct, indirect, special,
162 |       incidental, or consequential damages of any character arising as a
163 |       result of this License or out of the use or inability to use the
164 |       Work (including but not limited to damages for loss of goodwill,
165 |       work stoppage, computer failure or malfunction, or any and all
166 |       other commercial damages or losses), even if such Contributor
167 |       has been advised of the possibility of such damages.
168 | 
169 |    9. Accepting Warranty or Additional Liability. While redistributing
170 |       the Work or Derivative Works thereof, You may choose to offer,
171 |       and charge a fee for, acceptance of support, warranty, indemnity,
172 |       or other liability obligations and/or rights consistent with this
173 |       License. However, in accepting such obligations, You may act only
174 |       on Your own behalf and on Your sole responsibility, not on behalf
175 |       of any other Contributor, and only if You agree to indemnify,
176 |       defend, and hold each Contributor harmless for any liability
177 |       incurred by, or claims asserted against, such Contributor by reason
178 |       of your accepting any such warranty or additional liability.
179 | 
180 |    END OF TERMS AND CONDITIONS
181 | 
182 |    APPENDIX: How to apply the Apache License to your work.
183 | 
184 |       To apply the Apache License to your work, attach the following
185 |       boilerplate notice, with the fields enclosed by brackets "[]"
186 |       replaced with your own identifying information. (Don't include
187 |       the brackets!)  The text should be enclosed in the appropriate
188 |       comment syntax for the file format. We also recommend that a
189 |       file or class name and description of purpose be included on the
190 |       same "printed page" as the copyright notice for easier
191 |       identification within third-party archives.
192 | 
193 |    Copyright 2019 Status Research & Development GmbH
194 | 
195 |    Licensed under the Apache License, Version 2.0 (the "License");
196 |    you may not use this file except in compliance with the License.
197 |    You may obtain a copy of the License at
198 | 
199 |        http://www.apache.org/licenses/LICENSE-2.0
200 | 
201 |    Unless required by applicable law or agreed to in writing, software
202 |    distributed under the License is distributed on an "AS IS" BASIS,
203 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
204 |    See the License for the specific language governing permissions and
205 |    limitations under the License.
206 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # nim-metrics
  2 | 
  3 | [![CI](https://github.com/status-im/nim-metrics/actions/workflows/ci.yml/badge.svg)](https://github.com/status-im/nim-metrics/actions/workflows/ci.yml)
  4 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
  5 | [![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  6 | ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg)
  7 | 
  8 | ## Introduction
  9 | 
 10 | Nim metrics client library supporting the [Prometheus](https://prometheus.io/)
 11 | monitoring toolkit.
 12 | 
 13 | Designed to be thread-safe and efficient, it's disabled by default so libraries
 14 | can use it without any overhead for those library users not interested in
 15 | metrics.
 16 | 
 17 | ## Installation
 18 | 
 19 | You can install the development version of the library through Nimble with the
 20 | following command:
 21 | ```
 22 | nimble install metrics
 23 | ```
 24 | 
 25 | ## Usage
 26 | 
 27 | To enable metrics, compile your code with `-d:metrics --threads:on`.
 28 | 
 29 | ## Architectural overview
 30 | 
 31 | `Collector` objects holding various `Metric` objects are registered in one or
 32 | more `Registry` objects. There is a default registry being used for the most
 33 | common case.
 34 | 
 35 | Metric values are `float64`, but the API also accepts `int64` parameters which
 36 | are then cast to `float64`.
 37 | 
 38 | By starting an HTTP server, custom metrics (and some default ones) can be
 39 | pulled by Prometheus. They can also be serialised to strings for some quick and
 40 | dirty logging. Integration with the
 41 | [Chronicles](https://github.com/status-im/nim-chronicles) logging library is
 42 | available in a separate module.
 43 | 
 44 | That HTTP server used for pulling is running in its own thread. Metric pushing
 45 | also uses a dedicated thread for networking, in order to minimise the overhead.
 46 | 
 47 | ## Collector types
 48 | 
 49 | ### Counter
 50 | 
 51 | A counter's value can only be incremented.
 52 | 
 53 | ```nim
 54 | # Declare a variable `myCounter` holding a `Counter` object with a `Metric`
 55 | # having the same name as the variable. The help string is mandatory. The initial
 56 | # value is 0 and it's automatically added to `defaultRegistry`.
 57 | declareCounter myCounter, "an example counter"
 58 | 
 59 | # increment it by 1
 60 | myCounter.inc()
 61 | 
 62 | # increment it by 10
 63 | myCounter.inc(10)
 64 | 
 65 | # count all exceptions in a block
 66 | someCounter.countExceptions:
 67 |   foo()
 68 | 
 69 | # or just an exception type
 70 | otherCounter.countExceptions(ValueError):
 71 |   bar()
 72 | 
 73 | # do you need a variable that's being exported from the module?
 74 | declarePublicCounter seenPeers, "number of seen peers"
 75 | # it's the equivalent of `var seenPeers* = ...`
 76 | 
 77 | # want to avoid declaring a variable, giving it a help string, or anything else for that matter?
 78 | counter("one_off_counter").inc()
 79 | # What this does is generate a {.global.} var, so as long as you use the same
 80 | # string, you're using the same counter. Using strings instead of identifiers
 81 | # skips any compiler protection in case of typos, so this API is not recommended
 82 | # for serious use.
 83 | ```
 84 | 
 85 | ### Gauge
 86 | 
 87 | Gauges can be incremented, decremented or set to a given value.
 88 | 
 89 | ```nim
 90 | declareGauge myGauge, "an example gauge" # or `declarePublicGauge` to export it
 91 | myGauge.inc(4.5)
 92 | myGauge.dec(2)
 93 | myGauge.set(10)
 94 | 
 95 | myGauge.setToCurrentTime() # Unix timestamp in seconds
 96 | 
 97 | myGauge.trackInProgress:
 98 |   # myGauge is incremented at the start of the block (a `myGauge.inc()` is being inserted here)
 99 |   foo()
100 |   # and decremented at the end (`myGauge.dec()`)
101 | 
102 | # set the gauge to the runtime of a block, in seconds
103 | myGauge.time:
104 |   bar()
105 | 
106 | # alternative, unrecommended API
107 | gauge("one_off_gauge").set(42)
108 | ```
109 | 
110 | ### Summary
111 | 
112 | Summaries sample observations and provide a total count and the sum of all observed values.
113 | 
114 | ```nim
115 | declareSummary mySummary, "an example summary" # or `declarePublicSummary` to export it
116 | mySummary.observe(10)
117 | mySummary.observe(0.5)
118 | echo mySummary
119 | ```
120 | 
121 | This will print out:
122 | 
123 | ```text
124 | # HELP mySummary an example summary
125 | # TYPE mySummary summary
126 | mySummary_sum 10.5 1569332171696
127 | mySummary_count 2.0 1569332171696
128 | mySummary_created 1569332171.0
129 | ```
130 | 
131 | ```nim
132 | # observe the execution duration of a block, in seconds
133 | mySummary.time:
134 |   foo()
135 | 
136 | # alternative, unrecommended API
137 | summary("one_off_summary").observe(10)
138 | ```
139 | 
140 | ### Histogram
141 | 
142 | These cumulative histograms store the count and total sum of observed values,
143 | just like summaries. Further more, they place the observed values in
144 | configurable buckets and provide per-bucket counts.
145 | 
146 | Note that an observed value will be counted in all buckets that have a size greater or equal to it.
147 | 
148 | ```nim
149 | declareHistogram myHistogram, "an example histogram" # or `declarePublicHistogram` to export it
150 | # This uses the default bucket sizes: [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0,
151 | # 2.5, 5.0, 7.5, 10.0, Inf]
152 | 
153 | # You can customise the buckets:
154 | declareHistogram withCustomBuckets, "custom buckets", buckets = [0.0, 1.0, 2.0]
155 | # if you leave out the "Inf" bucket, it's added for you
156 | withCustomBuckets.observe(0.5)
157 | withCustomBuckets.observe(1)
158 | withCustomBuckets.observe(1.5)
159 | withCustomBuckets.observe(3.7)
160 | echo withCustomBuckets
161 | ```
162 | 
163 | This will print out:
164 | 
165 | ```text
166 | # HELP withCustomBuckets custom buckets
167 | # TYPE withCustomBuckets histogram
168 | withCustomBuckets_sum 6.7 1569334493506
169 | withCustomBuckets_count 4.0 1569334493506
170 | withCustomBuckets_created 1569334493.0
171 | withCustomBuckets_bucket{le="0.0"} 0.0
172 | withCustomBuckets_bucket{le="1.0"} 2.0 1569334493506
173 | withCustomBuckets_bucket{le="2.0"} 3.0 1569334493506
174 | withCustomBuckets_bucket{le="+Inf"} 4.0 1569334493506
175 | ```
176 | 
177 | ```nim
178 | # observe the execution duration of a block, in seconds
179 | myHistogram.time:
180 |   foo()
181 | 
182 | # alternative, unrecommended API
183 | histogram("one_off_histogram").observe(10)
184 | ```
185 | 
186 | ### Custom collectors
187 | 
188 | Sometimes you need to create metrics on the fly, with a custom `collect()`
189 | method of a custom collector type.
190 | 
191 | Let's say you have an USB-attached power meter and, for some reason, you want
192 | to read the power consumption every time Prometheus reads your metrics:
193 | 
194 | ```nim
195 | import metrics, times
196 | 
197 | when defined(metrics):
198 |   type PowerCollector = ref object of Collector
199 |   let powerCollector = PowerCollector.newCollector(name = "power_usage", help = "Instantaneous power usage - in watts.")
200 | 
201 |   method collect(collector: PowerCollector, output: MetricHandler): Metrics =
202 |     let timestamp = collector.now()
203 |     output(
204 |       name = "power_usage",
205 |       value = getPowerUsage(), # your power-meter reader
206 |       timestamp = timestamp,
207 |     )
208 | 
209 | ```
210 | 
211 | There's a bit of repetition in the collector and metric names, because we no
212 | longer have behind-the-scenes name copying/deriving there.
213 | 
214 | You can output multiple metrics from your custom `collect()` method. It's
215 | perfectly legal and we do that internally for our system/runtime metrics.
216 | 
217 | Try not to get creative with dynamic metric names - Prometheus has a hard time
218 | dealing with that.
219 | 
220 | ## Labels
221 | 
222 | Metric labels are supported for the Prometheus backend, as a way to add extra
223 | dimensions corresponding to each combination of metric name and label values.
224 | This can quickly get out of hand, as you can guess, so don't go overboard with
225 | this feature. (See also the [relevant warnings in Prometheus' docs](https://prometheus.io/docs/practices/instrumentation/#do-not-overuse-labels).)
226 | 
227 | You declare label names when defining the collector and label values each time
228 | you update it:
229 | 
230 | ```nim
231 | declareCounter lCounter, "example counter with labels", ["foo", "bar"]
232 | lCounter.inc(labelValues = ["1", "a"]) # the label values must be strings
233 | lCounter.inc(labelValues = ["2", "b"])
234 | # How many metrics are now in this collector? Two, because we used two sets of label values:
235 | echo lCounter
236 | ```
237 | 
238 | ```text
239 | # HELP lCounter example counter with labels
240 | # TYPE lCounter counter
241 | lCounter_total{foo="1",bar="a"} 1.0 1569340503703
242 | lCounter_created{foo="1",bar="a"} 1569340503.0
243 | lCounter_total{foo="2",bar="b"} 1.0 1569340503703
244 | lCounter_created{foo="2",bar="b"} 1569340503.0
245 | ```
246 | 
247 | (OK, there are four metrics in total, because each one gets a `*_created` buddy.)
248 | 
249 | So if you must use labels, make sure there's a finite and small number of
250 | possible label values being set.
251 | 
252 | ## Metric name and label name validation
253 | 
254 | We use Prometheus standards for that, so metric names must comply with the
255 | `^[a-zA-Z_:][a-zA-Z0-9_:]*$` regex while label names have to comply with
256 | `^[a-zA-Z_][a-zA-Z0-9_]*$`.
257 | 
258 | In the examples you've seen so far, all collectors declared with
259 | `declare<CollectorType>` had more stringent naming rules, since their names were
260 | also identifiers for Nim variables - which can't have colons in them.
261 | 
262 | To overcome this, without relying on the discouraged alternative API, use the `name` parameter:
263 | 
264 | ```nim
265 | declareCounter cCounter, "counter with colons in name", name = "foo:bar:baz"
266 | cCounter.inc()
267 | echo cCounter
268 | ```
269 | 
270 | ```text
271 | # HELP foo:bar:baz counter with colons in name
272 | # TYPE foo:bar:baz counter
273 | foo:bar:baz_total 1.0 1569341756504
274 | foo:bar:baz_created 1569341756.0
275 | ```
276 | 
277 | ## Logging
278 | 
279 | Metrics are not logs, but you might want to log them nonetheless. The `$`
280 | procedure is defined for collectors and registries, so you can just use the
281 | built-in string serialisation to print them:
282 | 
283 | ```nim
284 | echo myCounter, myGauge
285 | echo defaultRegistry
286 | ```
287 | 
288 | Integration with [Chronicles](https://github.com/status-im/nim-chronicles) is available in a separate module:
289 | 
290 | ```nim
291 | import chronicles, metrics, metrics/chronicles_support
292 | 
293 | # ...
294 | 
295 | info "myCounter", myCounter
296 | debug "default registry", defaultRegistry
297 | ```
298 | 
299 | ## Testing
300 | 
301 | When testing, you might want to isolate some collectors by registering them
302 | into a custom registry:
303 | 
304 | ```nim
305 | var myRegistry = newRegistry()
306 | declareCounter myCounter, "help", registry = myRegistry
307 | echo myRegistry
308 | 
309 | # this means that `myCounter` is no longer registered in `defaultRegistry`
310 | echo defaultRegistry
311 | ```
312 | 
313 | These unoptimised (read "very inefficient") `value()` and `valueByName()`
314 | procedures for accessing metric values should only be used inside test suites:
315 | 
316 | ```nim
317 | suite "counter":
318 |   test "basic":
319 |     declareCounter myCounter, "help"
320 |     check myCounter.value == 0
321 |     myCounter.inc()
322 |     check myCounter.value == 1
323 | 
324 |     declareSummary cSummary, "summary with colons in name", name = "foo:bar:baz"
325 |     cSummary.observe(10)
326 |     check cSummary.valueByName("foo:bar:baz_count") == 1
327 |     check cSummary.valueByName("foo:bar:baz_sum") == 10
328 | ```
329 | 
330 | ## Prometheus endpoint
331 | 
332 | First, you need to import the http server module
333 | 
334 | ### Chronos
335 | 
336 | Using [Chronos](https://github.com/status-im/nim-chronos/):
337 | 
338 | ```nim
339 | import metrics, metrics/chronos_httpserver
340 | ```
341 | 
342 | ### Starting the HTTP server
343 | 
344 | Start an HTTP server listening on `127.0.0.1:8000` from which the Prometheus
345 | daemon can pull the metrics from all collectors in `defaultRegistry` (plus the
346 | default metrics):
347 | 
348 | ```nim
349 | startMetricsHttpServer()
350 | ```
351 | 
352 | Or set your own address and port to listen to:
353 | 
354 | ```nim
355 | import net
356 | 
357 | startMetricsHttpServer("127.0.0.1", Port(8000))
358 | ```
359 | 
360 | The HTTP server will run in its own thread. It will expose two endpoints:
361 | 
362 | * http://127.0.0.1:8000/metrics - Returns the metrics consumed by Prometheus.
363 | * http://127.0.0.1:8000/health - Healthcheck that returns `OK` string and 200 code.
364 | 
365 | ### System metrics
366 | 
367 | Default metrics available (see also [the relevant Prometheus docs](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors)):
368 | 
369 | ```text
370 | process_cpu_seconds_total
371 | process_open_fds
372 | process_max_fds
373 | process_virtual_memory_bytes
374 | process_resident_memory_bytes
375 | process_start_time_seconds
376 | ```
377 | 
378 | The `process_*` metrics are only available on Linux, for now.
379 | 
380 | ### Nim runtime metrics
381 | 
382 | The following metrics are automatically exposed for the Nim runtime:
383 | 
384 | ```text
385 | nim_gc_mem_bytes[thread_id]
386 | nim_gc_mem_occupied_bytes[thread_id]
387 | nim_gc_heap_instance_occupied_bytes[type_name]
388 | nim_gc_heap_instance_occupied_summed_bytes
389 | ```
390 | 
391 | `nim_gc_heap_*` metrics are only available when compiling with
392 | `-d:nimTypeNames` and hold the top 10 instance types, in reverse order of
393 | their total heap usage (from all threads), at the time the metric is polled.
394 | Since this set changes with time, you'll see more than 10 types in Grafana.
395 | 
396 | The Nim garbage collector exposes some per-thread metrics whose value is only
397 | accessible from within the thread itself.
398 | 
399 | To update these, call `updateThreadMetrics` regularly from within each relevant
400 | thread - each such metric will include a `thread_id` label.
401 | 
402 | Thread metrics for the main application thread are automatically updated
403 | whenever a metric is updated from the main application thread, though only
404 | at specified intervals.
405 | 
406 | ```nim
407 | import times
408 | when defined(metrics):
409 |   # get the default minimal update interval
410 |   echo getSystemMetricsUpdateInterval()
411 |   # you can change it
412 |   setSystemMetricsUpdateInterval(initDuration(seconds = 2))
413 | ```
414 | 
415 | In performance-sensitive applications, it is recommended that you disable the
416 | piggy-backing and update system metrics manually:
417 | 
418 | ```nim
419 | # disable automatic updates
420 | setSystemMetricsAutomaticUpdate(false)
421 | # somewhere in your event loop, at an interval of your choice
422 | updateThreadMetrics()
423 | ```
424 | 
425 | Screenshot of [Grafana showing data from Prometheus that pulls it from Nimbus which uses nim-metrics](https://github.com/status-im/nimbus-eth1/#metric-visualisation):
426 | 
427 | ![Grafana screenshot](https://i.imgur.com/AdtavDA.png)
428 | 
429 | ## Historical notes
430 | 
431 | Versions up to `v0.1.2` also supported push metric servers such as
432 | [StatsD](https://github.com/statsd/statsd/wiki) and
433 | [Carbon](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) - this
434 | support has since been removed.
435 | 
436 | ## Contributing
437 | 
438 | When submitting pull requests, please add test cases for any new features or
439 | fixes and make sure `nimble test` is still able to execute the entire test
440 | suite successfully.
441 | 
442 | Code formatting is done using [nph](https://github.com/arnetheduck/nph). See
443 | [CI](./.github/workflows/ci.yml) for the correct version.
444 | 
445 | ## License
446 | 
447 | Licensed and distributed under either of
448 | 
449 | * MIT license: [LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT
450 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
451 | 
452 | at your option. These files may not be copied, modified, or distributed except according to those terms.
453 | 


--------------------------------------------------------------------------------
/tests/main_tests.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2023 Status Research & Development GmbH
  2 | # Licensed and distributed under either of
  3 | #   * MIT license: http://opensource.org/licenses/MIT
  4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
  5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
  6 | 
  7 | import net, os, unittest2, ../metrics
  8 | 
  9 | import ./test_shseq
 10 | 
 11 | when defined(metrics):
 12 |   import times
 13 | 
 14 | declareCounter globalCounter, "help"
 15 | declarePublicCounter globalPublicCounter, "help"
 16 | declareGauge globalGauge, "help"
 17 | declarePublicGauge globalPublicGauge, "help"
 18 | 
 19 | const brokenGlobals =
 20 |   (NimMajor, NimMinor) == (2, 0) and (defined(gcOrc) or defined(gcArc))
 21 | 
 22 | proc gcSafetyTest*() {.gcsafe.} = # The test is successful if this proc compiles
 23 |   globalCounter.inc 2
 24 |   globalPublicCounter.inc(2)
 25 |   globalGauge.set 10.0
 26 |   globalGauge.inc
 27 |   globalGauge.dec
 28 |   discard globalCounter.value
 29 |   discard globalCounter.valueByName("test")
 30 |   globalPublicGauge.set(1)
 31 | 
 32 | suite "counter":
 33 |   setup:
 34 |     var registry = newRegistry()
 35 |     declareCounter myCounter, "help", registry = registry
 36 | 
 37 |   test "basic":
 38 |     check myCounter.value == 0
 39 |     myCounter.inc()
 40 |     check myCounter.value == 1
 41 |     myCounter.inc(7)
 42 |     check myCounter.value == 8
 43 |     myCounter.inc(0.5)
 44 |     check myCounter.value == 8.5
 45 |     myCounter.inc(-1)
 46 |       # you shouldn't be doing this - but we don't want metrics to crash the app
 47 |     check myCounter.value == 8.5
 48 |     # name validation (have to use the internal API to get past Nim's identifier validation)
 49 |     when defined(metrics):
 50 |       expect ValueError:
 51 |         var tmp = newCounter("1337", "invalid name")
 52 | 
 53 |   test "alternative API":
 54 |     when brokenGlobals:
 55 |       skip()
 56 |     else:
 57 |       counter("one_off_counter").inc()
 58 |       check counter("one_off_counter").value == 1
 59 |       counter("one_off_counter").inc(0.5)
 60 |       check counter("one_off_counter").value == 1.5
 61 | 
 62 |       # # Can't have different collector types with the same name, but unittest
 63 |       # # can't catch an exception raised in the assignment to a {.global.}
 64 |       # # variable.
 65 |       # expect RegistrationError:
 66 |       # check gauge("one_off_counter").value == 0
 67 | 
 68 |       # colons in name
 69 |       counter("one:off:counter:colons").inc()
 70 |       check counter("one:off:counter:colons").value == 1
 71 | 
 72 |   test "exceptions":
 73 |     proc f(switch: bool) =
 74 |       if switch:
 75 |         raise newException(ValueError, "exc1")
 76 |       else:
 77 |         raise newException(IndexDefect, "exc2")
 78 | 
 79 |     expect IndexDefect:
 80 |       myCounter.countExceptions(ValueError):
 81 |         f(false)
 82 |     check myCounter.value == 0
 83 | 
 84 |     expect ValueError:
 85 |       myCounter.countExceptions(ValueError):
 86 |         f(true)
 87 |     check myCounter.value == 1
 88 | 
 89 |     expect IndexDefect:
 90 |       myCounter.countExceptions:
 91 |         f(false)
 92 |     check myCounter.value == 2
 93 | 
 94 |     myCounter.countExceptions:
 95 |       discard
 96 |     check myCounter.value == 2
 97 | 
 98 |   test "labels":
 99 |     declareCounter lCounter, "l help", ["foo", "bar"], registry
100 |     expect KeyError:
101 |       discard lCounter.value
102 | 
103 |     # you can't access a labelled value before it was initialised
104 |     expect KeyError:
105 |       discard lCounter.value(@["a", "x"])
106 | 
107 |     let labelValues = @["a", "x \"y\" \n\\z"]
108 |     lCounter.inc(labelValues = labelValues)
109 |     check lCounter.value(labelValues) == 1
110 |     # echo registry
111 | 
112 |     # label validation
113 |     expect ValueError:
114 |       declareCounter invalid1, "invalid", ["123", "foo"]
115 |     expect ValueError:
116 |       declareCounter invalid2, "invalid", ["foo", "123"]
117 |     expect ValueError:
118 |       declareCounter invalid3, "invalid", ["foo", "__bar"]
119 | 
120 |     # label names: array instead of sequence
121 |     declareCounter lCounter2, "l2 help", ["foo", "bar"], registry
122 |     let labelValues2 = ["a", "x \"y\" \n\\z"]
123 |     lCounter2.inc(labelValues = labelValues2)
124 |     check lCounter2.value(labelValues2) == 1
125 | 
126 |     declareCounter lCounter3, "l3 help", ["aaa"]
127 |     for i in 0 ..< 4:
128 |       for j in ["d", "b", "c", "a", "e"]:
129 |         lCounter3.inc(1, [j])
130 | 
131 |     for j in ["d", "b", "c", "a", "e"]:
132 |       check lCounter3.value([j]) == 4
133 | 
134 |   test "sample rate":
135 |     declareCounter sCounter, "counter with a sample rate set", registry = registry
136 |     sCounter.inc()
137 |     # No sampling done on our side, just in sending the increments to a StatsD server
138 |     check sCounter.value == 1
139 | 
140 |   test "names with colons":
141 |     declareCounter cCounter,
142 |       "counter with colons in name", registry = registry, name = "foo:bar:baz"
143 |     cCounter.inc()
144 |     check cCounter.value == 1
145 |     check cCounter.valueByName("foo:bar:baz_total") == 1
146 |     # echo cCounter
147 | 
148 |     var myName = "bla:bla"
149 |     declareCounter cCounter2,
150 |       "another counter with colon in name", registry = registry, name = myName
151 |     cCounter2.inc()
152 |     check cCounter2.value == 1
153 |     check cCounter2.valueByName("bla:bla_total") == 1
154 |     # echo cCounter2
155 | 
156 | suite "gauge":
157 |   setup:
158 |     var registry = newRegistry()
159 |     declareGauge myGauge, "help", registry = registry
160 | 
161 |   test "basic":
162 |     check myGauge.value == 0
163 |     myGauge.inc()
164 |     check myGauge.value == 1
165 |     myGauge.dec(3)
166 |     check myGauge.value == -2.0 # weird Nim bug if it's "-2"
167 |     myGauge.dec(0.1)
168 |     check myGauge.value == -2.1
169 |     myGauge.set(9.5)
170 |     check myGauge.value == 9.5
171 |     myGauge.set(1)
172 |     check myGauge.value == 1
173 | 
174 |   test "GlobalGauge value":
175 |     check globalGauge.value == 0.0
176 | 
177 |   test "alternative API":
178 |     when brokenGlobals:
179 |       skip()
180 |     else:
181 |       gauge("one_off_gauge").set(1)
182 |       check gauge("one_off_gauge").value == 1
183 |       gauge("one_off_gauge").inc(0.5)
184 |       check gauge("one_off_gauge").value == 1.5
185 | 
186 |   test "in progress":
187 |     myGauge.trackInProgress:
188 |       check myGauge.value == 1
189 |     check myGauge.value == 0
190 | 
191 |     declareGauge lgauge, "help", @["foobar"], registry = registry
192 |     let labelValues = @["b"]
193 |     lgauge.trackInProgress(labelValues):
194 |       check lgauge.value(labelValues) == 1
195 |     check lgauge.value(labelValues) == 0
196 |     # echo registry
197 | 
198 |   test "timing":
199 |     myGauge.time:
200 |       sleep(1000)
201 |       check myGauge.value == 0
202 |     check myGauge.value >= 1 # may be 2 inside a macOS Travis job
203 |     # echo registry
204 | 
205 |   test "timing with labels":
206 |     declareGauge lgauge2, "help", @["foobar"], registry = registry
207 |     let labelValues = @["b"]
208 |     lgauge2.time(labelValues):
209 |       sleep(1000)
210 |     check lgauge2.value(labelValues) >= 1
211 | 
212 |   test "names with colons":
213 |     declareGauge cGauge,
214 |       "gauge with colons in name", registry = registry, name = "foo:bar:baz"
215 |     cGauge.inc()
216 |     check cGauge.value == 1
217 |     check cGauge.valueByName("foo:bar:baz") == 1
218 |     # echo cGauge
219 | 
220 | suite "summary":
221 |   setup:
222 |     var registry = newRegistry()
223 |     declareSummary mySummary, "help", registry = registry
224 | 
225 |   test "basic":
226 |     check mySummary.valueByName("mySummary_count") == 0
227 |     check mySummary.valueByName("mySummary_sum") == 0
228 |     mySummary.observe(10)
229 |     check mySummary.valueByName("mySummary_count") == 1
230 |     check mySummary.valueByName("mySummary_sum") == 10
231 |     mySummary.observe(0.5)
232 |     check mySummary.valueByName("mySummary_count") == 2
233 |     check mySummary.valueByName("mySummary_sum") == 10.5
234 | 
235 |   test "alternative API":
236 |     when brokenGlobals:
237 |       skip()
238 |     else:
239 |       summary("one_off_summary").observe(10)
240 |       check summary("one_off_summary").valueByName("one_off_summary_count") == 1
241 |       check summary("one_off_summary").valueByName("one_off_summary_sum") == 10
242 | 
243 |   test "timing":
244 |     mySummary.time:
245 |       sleep(1000)
246 |       check mySummary.valueByName("mySummary_sum") == 0
247 |     check mySummary.valueByName("mySummary_sum") >= 1
248 | 
249 |   test "timing with labels":
250 |     declareSummary lsummary, "help", ["foobar"], registry = registry
251 |     let labelValues = ["b"]
252 |     lsummary.time(labelValues):
253 |       sleep(1000)
254 |     check lsummary.valueByName("lsummary_sum", labelValues) >= 1
255 | 
256 |   test "names with colons":
257 |     declareSummary cSummary,
258 |       "summary with colons in name", registry = registry, name = "foo:bar:baz"
259 |     cSummary.observe(10)
260 |     check cSummary.valueByName("foo:bar:baz_count") == 1
261 |     check cSummary.valueByName("foo:bar:baz_sum") == 10
262 |     # echo cSummary
263 | 
264 | suite "histogram":
265 |   setup:
266 |     var registry = newRegistry()
267 |     declareHistogram myHistogram, "help", registry = registry
268 | 
269 |   test "basic":
270 |     check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0
271 |     check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 0
272 |     check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 0
273 |     check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 0
274 |     check myHistogram.valueByName("myHistogram_count") == 0
275 |     check myHistogram.valueByName("myHistogram_sum") == 0
276 | 
277 |     myHistogram.observe(2)
278 |     check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0
279 |     check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 1
280 |     check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 1
281 |     check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 1
282 |     check myHistogram.valueByName("myHistogram_count") == 1
283 |     check myHistogram.valueByName("myHistogram_sum") == 2
284 | 
285 |     myHistogram.observe(2.5)
286 |     check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0
287 |     check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 2
288 |     check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 2
289 |     check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 2
290 |     check myHistogram.valueByName("myHistogram_count") == 2
291 |     check myHistogram.valueByName("myHistogram_sum") == 4.5
292 | 
293 |     myHistogram.observe(Inf)
294 |     check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0
295 |     check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 2
296 |     check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 2
297 |     check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 3
298 |     check myHistogram.valueByName("myHistogram_count") == 3
299 |     check myHistogram.valueByName("myHistogram_sum") == Inf
300 | 
301 |     declareHistogram h1, "help", registry = registry, buckets = [0.0, 1.0, 2.0]
302 |     check h1.valueByName("h1_bucket", [], ["0.0"]) == 0
303 |     check h1.valueByName("h1_bucket", [], ["1.0"]) == 0
304 |     check h1.valueByName("h1_bucket", [], ["2.0"]) == 0
305 |     check h1.valueByName("h1_bucket", [], ["+Inf"]) == 0
306 | 
307 |     declareHistogram h2, "help", registry = registry, buckets = [0.0, 1.0, 2.0, Inf]
308 |     check h2.valueByName("h2_bucket", [], ["0.0"]) == 0
309 |     check h2.valueByName("h2_bucket", [], ["1.0"]) == 0
310 |     check h2.valueByName("h2_bucket", [], ["2.0"]) == 0
311 |     check h2.valueByName("h2_bucket", [], ["+Inf"]) == 0
312 | 
313 |     expect ValueError:
314 |       declareHistogram h3, "help", registry = registry, buckets = []
315 |     expect ValueError:
316 |       declareHistogram h3, "help", registry = registry, buckets = [Inf]
317 |     expect ValueError:
318 |       declareHistogram h3, "help", registry = registry, buckets = [3.0, 1.0]
319 | 
320 |   test "alternative API":
321 |     when brokenGlobals:
322 |       skip()
323 |     else:
324 |       histogram("one_off_histogram").observe(2)
325 |       check histogram("one_off_histogram").valueByName(
326 |         "one_off_histogram_bucket", [], ["1.0"]
327 |       ) == 0
328 |       check histogram("one_off_histogram").valueByName(
329 |         "one_off_histogram_bucket", [], ["2.5"]
330 |       ) == 1
331 |       check histogram("one_off_histogram").valueByName(
332 |         "one_off_histogram_bucket", [], ["5.0"]
333 |       ) == 1
334 |       check histogram("one_off_histogram").valueByName(
335 |         "one_off_histogram_bucket", [], ["+Inf"]
336 |       ) == 1
337 |       check histogram("one_off_histogram").valueByName("one_off_histogram_count") == 1
338 |       check histogram("one_off_histogram").valueByName("one_off_histogram_sum") == 2
339 | 
340 |   test "timing":
341 |     myHistogram.time:
342 |       sleep(1000)
343 |       check myHistogram.valueByName("myHistogram_sum") == 0
344 |     check myHistogram.valueByName("myHistogram_sum") >= 1
345 |     check myHistogram.valueByName("myHistogram_count") == 1
346 |     check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 1
347 | 
348 |   test "timing with labels":
349 |     declareHistogram lhistogram, "help", ["foobar"], registry = registry
350 |     let labelValues = ["b"]
351 |     lhistogram.time(labelValues):
352 |       sleep(1000)
353 |     check lhistogram.valueByName("lhistogram_sum", labelValues) >= 1
354 |     check lhistogram.valueByName("lhistogram_count", labelValues) == 1
355 |     check lhistogram.valueByName("lhistogram_bucket", labelValues, ["+Inf"]) == 1
356 | 
357 |   test "names with colons":
358 |     declareHistogram cHistogram,
359 |       "histogram with colons in name", registry = registry, name = "foo:bar:baz"
360 |     cHistogram.observe(10)
361 |     check cHistogram.valueByName("foo:bar:baz_count") == 1
362 |     check cHistogram.valueByName("foo:bar:baz_sum") == 10
363 |     # echo cHistogram
364 | 
365 | import ./duplicate_coll_mod
366 | suite "registry":
367 |   test "duplicate collectors":
368 |     expect RegistrationError:
369 |       declareCounter duplicate_counter, "duplicate counter"
370 |       duplicate_counter.inc()
371 | 
372 | when defined(metrics):
373 |   type MyCustomCollector = ref object of Gauge
374 |   var
375 |     myCustomCollector = MyCustomCollector.newCollector("my_custom_collector", "help")
376 |     registry2 = newRegistry()
377 |     myCustomCollector2 = MyCustomCollector.newCollector(
378 |       "my_custom_collector2", "help2", registry = registry2
379 |     )
380 | 
381 |   method collect(collector: MyCustomCollector, output: MetricHandler) =
382 |     let timestamp = collector.now()
383 |     output(name = "custom_metric", value = 42, timestamp = timestamp)
384 | 
385 |   suite "custom collectors":
386 |     test "42":
387 |       check myCustomCollector.value == 42
388 | 
389 |     test "custom registry":
390 |       var metrics: seq[float64]
391 |       proc output(
392 |           name: string,
393 |           value: float64,
394 |           labels, labelValues: openArray[string],
395 |           timestamp: Time,
396 |       ) =
397 |         metrics.add(value)
398 | 
399 |       registry2.collect(output)
400 |       check:
401 |         metrics.len == 1
402 |         metrics[0] == 42
403 | 
404 | suite "system metrics":
405 |   test "change update interval":
406 |     when defined(metrics):
407 |       declareGauge myGauge, "my gauge"
408 |       myGauge.set(1)
409 |       # echo defaultRegistry
410 |       echo getSystemMetricsUpdateInterval()
411 |       setSystemMetricsUpdateInterval(initDuration(seconds = 1))
412 |       echo getSystemMetricsUpdateInterval()
413 |       sleep(2)
414 |       myGauge.set(2)
415 |       # echo defaultRegistry
416 |       echo getSystemMetricsAutomaticUpdate()
417 |       setSystemMetricsAutomaticUpdate(false)
418 |       updateSystemMetrics()
419 |       updateThreadMetrics()
420 | 


--------------------------------------------------------------------------------
/metrics/chronos_httpserver.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019-2021 Status Research & Development GmbH
  2 | # Licensed and distributed under either of
  3 | #   * MIT license: http://opensource.org/licenses/MIT
  4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
  5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
  6 | 
  7 | ################################
  8 | # HTTP server (for Prometheus) #
  9 | ################################
 10 | 
 11 | {.push raises: [].}
 12 | 
 13 | when defined(nimHasUsed):
 14 |   {.used.}
 15 | 
 16 | import results
 17 | import chronos, chronos/apps/http/httpserver
 18 | export chronos, results
 19 | 
 20 | type
 21 |   MetricsError* = object of CatchableError
 22 | 
 23 |   MetricsHttpServerStatus* {.pure.} = enum
 24 |     Closed
 25 |     Running
 26 |     Stopped
 27 | 
 28 |   MetricsServerData = object
 29 |     when defined(metrics):
 30 |       address: TransportAddress
 31 |       requestPipe: tuple[read: AsyncFD, write: AsyncFD]
 32 |       responsePipe: tuple[read: AsyncFD, write: AsyncFD]
 33 | 
 34 |   MetricsHttpServerRef* = ref object
 35 |     when defined(metrics):
 36 |       data: MetricsServerData
 37 |       thread: Thread[MetricsServerData]
 38 |       reqTransp: StreamTransport
 39 |       respTransp: StreamTransport
 40 | 
 41 |   MetricsHttpServerMiddlewareRef* = ref object of HttpServerMiddlewareRef
 42 | 
 43 | when defined(metrics):
 44 |   import std/os
 45 |   import ../metrics, ./common
 46 | 
 47 |   var httpServerThread: Thread[TransportAddress]
 48 | 
 49 |   proc serveHttp(address: TransportAddress) {.thread.} =
 50 |     ignoreSignalsInThread()
 51 | 
 52 |     proc cb(
 53 |         r: RequestFence
 54 |     ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} =
 55 |       if r.isOk():
 56 |         let request = r.get()
 57 |         try:
 58 |           if request.uri.path == "/metrics":
 59 |             {.gcsafe.}:
 60 |               # Prometheus will drop our metrics in surprising ways if we give
 61 |               # it timestamps, so we don't.
 62 |               let
 63 |                 response = defaultRegistry.toText()
 64 |                 headers = HttpTable.init([("Content-Type", CONTENT_TYPE)])
 65 |               return await request.respond(Http200, response, headers)
 66 |           elif request.uri.path == "/health":
 67 |             return await request.respond(Http200, "OK")
 68 |           else:
 69 |             return await request.respond(Http404, "Try /metrics")
 70 |         except HttpWriteError as exc:
 71 |           return defaultResponse(exc)
 72 | 
 73 |     let socketFlags = {ServerFlags.ReuseAddr}
 74 |     let res = HttpServerRef.new(address, cb, socketFlags = socketFlags)
 75 |     if res.isErr():
 76 |       printError(res.error())
 77 |       return
 78 |     let server = res.get()
 79 |     server.start()
 80 |     while true:
 81 |       try:
 82 |         waitFor server.join()
 83 |       except CatchableError as e:
 84 |         printError(e.msg)
 85 |         sleep(1000)
 86 | 
 87 |   const
 88 |     ResponseOk = 0'u8
 89 |     ResponseError = 1'u8
 90 |     MessageSize = 255
 91 | 
 92 |   type
 93 |     MetricsRequest {.pure.} = enum
 94 |       Status
 95 |       Start
 96 |       Stop
 97 |       Close
 98 | 
 99 |     MetricsResponse = object
100 |       status: byte
101 |       data: array[MessageSize, byte]
102 | 
103 |     MetricsThreadData = object
104 |       reqTransp: StreamTransport
105 |       respTransp: StreamTransport
106 |       http: HttpServerRef
107 | 
108 |     MetricsErrorKind {.pure.} = enum
109 |       Timeout
110 |       Transport
111 |       Communication
112 | 
113 |   proc raiseMetricsError(
114 |       msg: string, exc: ref Exception
115 |   ) {.noreturn, noinline, raises: [MetricsError].} =
116 |     let message = msg & ", reason: [" & $exc.name & "]: " & $exc.msg
117 |     raise (ref MetricsError)(msg: message, parent: exc)
118 | 
119 |   proc raiseMetricsError(msg: string) {.noreturn, noinline, raises: [MetricsError].} =
120 |     raise (ref MetricsError)(msg: msg)
121 | 
122 |   proc raiseMetricsError(
123 |       msg: MetricsErrorKind, exc: ref Exception
124 |   ) {.noreturn, noinline, raises: [MetricsError].} =
125 |     case msg
126 |     of MetricsErrorKind.Timeout:
127 |       raiseMetricsError("Connection with metrics thread timed out", exc)
128 |     of MetricsErrorKind.Transport:
129 |       raiseMetricsError("Communication with metrics thread failed", exc)
130 |     of MetricsErrorKind.Communication:
131 |       raiseMetricsError("Communication with metrics thread failed", exc)
132 | 
133 |   proc raiseMetricsError*(
134 |       msg: string, err: OSErrorCode
135 |   ) {.noreturn, noinline, raises: [MetricsError].} =
136 |     let message = msg & ", reason: [OSError]: (" & $int(err) & ") " & osErrorMsg(err)
137 |     raise (ref MetricsError)(msg: message)
138 | 
139 |   proc respond(
140 |       m: MetricsThreadData, mtype: byte, message: string
141 |   ) {.async: (raises: [CancelledError, MetricsError, TransportError]).} =
142 |     var buffer: array[MessageSize + 1, byte]
143 |     let length = min(len(message), len(buffer) - 1)
144 |     zeroMem(cast[pointer](addr buffer[0]), len(buffer))
145 |     buffer[0] = mtype
146 |     if length > 0:
147 |       copyMem(addr buffer[1], unsafeAddr message[0], length)
148 |     let res = await m.respTransp.write(addr buffer[0], len(buffer))
149 |     if res != len(buffer):
150 |       raiseMetricsError("Incomplete response has been sent")
151 | 
152 |   proc communicate(
153 |       m: MetricsHttpServerRef, req: MetricsRequest
154 |   ): Future[MetricsResponse] {.
155 |       async: (raises: [CancelledError, MetricsError, TransportError])
156 |   .} =
157 |     var buffer: array[MessageSize + 1, byte]
158 |     buffer[0] = byte(req)
159 |     block:
160 |       let res = await m.reqTransp.write(addr buffer[0], 1)
161 |       if res != 1:
162 |         raiseMetricsError("Incomplete request has been sent")
163 |     await m.respTransp.readExactly(addr buffer[0], len(buffer))
164 |     var res = MetricsResponse(status: buffer[0])
165 |     copyMem(addr res.data[0], addr buffer[1], sizeof(res.data))
166 |     res
167 | 
168 |   proc getMessage(m: MetricsResponse): string =
169 |     var res = newStringOfCap(MessageSize + 1)
170 |     for i in 0 ..< len(m.data):
171 |       let ch = m.data[i]
172 |       if ch == 0x00'u8:
173 |         break
174 |       res.add(char(ch))
175 |     res
176 | 
177 |   proc asyncStep(
178 |       server: MetricsServerData, data: MetricsThreadData, lastError: string
179 |   ): Future[bool] {.async: (raises: []).} =
180 |     var buffer: array[1, byte]
181 |     try:
182 |       await data.reqTransp.readExactly(addr buffer[0], len(buffer))
183 | 
184 |       if len(lastError) > 0:
185 |         await data.respond(ResponseError, lastError)
186 |         return true
187 | 
188 |       if isNil(data.http):
189 |         await data.respond(ResponseError, "HTTP server is not bound!")
190 |         return true
191 | 
192 |       case buffer[0]
193 |       of byte(MetricsRequest.Status):
194 |         let message =
195 |           case data.http.state()
196 |           of ServerStopped: "STOPPED"
197 |           of ServerClosed: "CLOSED"
198 |           of ServerRunning: "RUNNING"
199 |         await data.respond(ResponseOk, message)
200 |         true
201 |       of byte(MetricsRequest.Start):
202 |         if data.http.state() != HttpServerState.ServerStopped:
203 |           let message =
204 |             if data.http.state() == HttpServerState.ServerClosed:
205 |               "HTTP server is already closed"
206 |             else:
207 |               "HTTP server is already running"
208 |           await data.respond(ResponseError, message)
209 |         else:
210 |           data.http.start()
211 |           await data.respond(ResponseOk, "")
212 |         true
213 |       of byte(MetricsRequest.Stop):
214 |         if data.http.state() != HttpServerState.ServerRunning:
215 |           let message =
216 |             if data.http.state() == HttpServerState.ServerClosed:
217 |               "HTTP server is already closed"
218 |             else:
219 |               "HTTP server is already stopped"
220 |           await data.respond(ResponseError, message)
221 |         else:
222 |           await data.http.stop()
223 |           await data.respond(ResponseOk, "")
224 |         true
225 |       else:
226 |         if data.http.state() == HttpServerState.ServerClosed:
227 |           await data.respond(ResponseError, "HTTP server is already closed")
228 |           true
229 |         else:
230 |           await data.http.closeWait()
231 |           await data.respond(ResponseOk, "")
232 |           false
233 |     except MetricsError:
234 |       if not (isNil(data.http)):
235 |         await data.http.closeWait()
236 |       return false
237 |     except TransportError:
238 |       if not (isNil(data.http)):
239 |         await data.http.closeWait()
240 |       return false
241 |     except HttpError:
242 |       if not (isNil(data.http)):
243 |         await data.http.closeWait()
244 |       return false
245 |     except CancelledError:
246 |       # We did not use cancellation.
247 |       if not (isNil(data.http)):
248 |         await data.http.closeWait()
249 |       return false
250 | 
251 |   proc asyncLoop(server: MetricsServerData) {.async: (raises: []).} =
252 |     var lastError = ""
253 | 
254 |     proc cb(
255 |         r: RequestFence
256 |     ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} =
257 |       if r.isOk():
258 |         let request = r.get()
259 |         try:
260 |           if request.uri.path == "/metrics":
261 |             # Prometheus will drop our metrics in surprising ways if we give
262 |             # it timestamps, so we don't.
263 |             let
264 |               response = block:
265 |                 {.gcsafe.}:
266 |                   defaultRegistry.toText()
267 |               headers = HttpTable.init([("Content-Type", CONTENT_TYPE)])
268 |             await request.respond(Http200, response, headers)
269 |           elif request.uri.path == "/health":
270 |             await request.respond(Http200, "OK")
271 |           else:
272 |             await request.respond(Http404, "Try /metrics")
273 |         except HttpError as exc:
274 |           defaultResponse(exc)
275 |       else:
276 |         defaultResponse()
277 | 
278 |     let
279 |       http = block:
280 |         let
281 |           socketFlags = {ServerFlags.ReuseAddr}
282 |           res = HttpServerRef.new(server.address, cb, socketFlags = socketFlags)
283 |         if res.isErr():
284 |           lastError = res.error()
285 |           nil
286 |         else:
287 |           res.get()
288 |       reqTransp = fromPipe2(server.requestPipe.read).valueOr:
289 |         await http.closeWait()
290 |         return
291 |       respTransp = fromPipe2(server.responsePipe.write).valueOr:
292 |         await http.closeWait()
293 |         await reqTransp.closeWait()
294 |         return
295 |       threadData =
296 |         MetricsThreadData(reqTransp: reqTransp, respTransp: respTransp, http: http)
297 | 
298 |     while true:
299 |       let res = await asyncStep(server, threadData, lastError)
300 |       if not (res):
301 |         break
302 | 
303 |     await noCancel allFutures(reqTransp.closeWait(), respTransp.closeWait())
304 | 
305 |   proc serveMetricsServer(server: MetricsServerData) {.thread.} =
306 |     ignoreSignalsInThread()
307 |     let loop {.used.} = getThreadDispatcher()
308 |     waitFor asyncLoop(server)
309 | 
310 | proc startMetricsHttpServer*(
311 |     address = "127.0.0.1", port = Port(8000)
312 | ) {.raises: [Exception], deprecated: "Please use MetricsHttpServerRef API".} =
313 |   when defined(metrics):
314 |     httpServerThread.createThread(serveHttp, initTAddress(address, port))
315 | 
316 | proc new*(
317 |     t: typedesc[MetricsHttpServerRef], address: string, port: Port
318 | ): Result[MetricsHttpServerRef, cstring] {.raises: [].} =
319 |   ## Initialize new instance of MetricsHttpServerRef.
320 |   ##
321 |   ## This involves creation of new thread and new processing loop in the new
322 |   ## thread.
323 |   when defined(metrics):
324 |     template closePipe(b: untyped): untyped =
325 |       closeHandle(b.read)
326 |       closeHandle(b.write)
327 | 
328 |     let taddress =
329 |       try:
330 |         initTAddress(address, port)
331 |       except TransportAddressError:
332 |         return err("Invalid server address")
333 |     var
334 |       request = block:
335 |         let res = createAsyncPipe()
336 |         if (res.read == asyncInvalidPipe) or (res.write == asyncInvalidPipe):
337 |           return err("Unable to create communication request pipe")
338 |         res
339 |       cleanupRequest = true
340 |     defer:
341 |       if cleanupRequest:
342 |         request.closePipe()
343 | 
344 |     var
345 |       response = block:
346 |         let res = createAsyncPipe()
347 |         if (res.read == asyncInvalidPipe) or (res.write == asyncInvalidPipe):
348 |           request.closePipe()
349 |           return err("Unable to create communication response pipe")
350 |         res
351 |       cleanupResponse = true
352 |     defer:
353 |       if cleanupResponse:
354 |         response.closePipe()
355 | 
356 |     let data =
357 |       MetricsServerData(address: taddress, requestPipe: request, responsePipe: response)
358 |     var server = MetricsHttpServerRef(data: data)
359 |     try:
360 |       createThread(server.thread, serveMetricsServer, data)
361 |     except Exception:
362 |       return err("Unexpected error while spawning metrics server's thread")
363 |     except ResourceExhaustedError:
364 |       return err("Unable to spawn metrics server's thread")
365 | 
366 |     server.reqTransp =
367 |       try:
368 |         fromPipe(request.write)
369 |       except CatchableError:
370 |         return err(
371 |           "Unable to establish communication channel with " & "metrics server thread"
372 |         )
373 |     server.respTransp =
374 |       try:
375 |         fromPipe(response.read)
376 |       except CatchableError:
377 |         return err(
378 |           "Unable to establish communication channel with " & "metrics server thread"
379 |         )
380 | 
381 |     cleanupRequest = false
382 |     cleanupResponse = false
383 |     ok(server)
384 |   else:
385 |     err("Could not initialize metrics server, because metrics are disabled")
386 | 
387 | proc start*(
388 |     server: MetricsHttpServerRef
389 | ) {.async: (raises: [MetricsError, CancelledError]).} =
390 |   ## Start metrics HTTP server.
391 |   when defined(metrics):
392 |     if not (server.thread.running()):
393 |       raiseMetricsError("Metrics server is not running")
394 |     let resp =
395 |       try:
396 |         await communicate(server, MetricsRequest.Start).wait(5.seconds)
397 |       except AsyncTimeoutError as exc:
398 |         raiseMetricsError(MetricsErrorKind.Timeout, exc)
399 |       except MetricsError as exc:
400 |         raiseMetricsError(MetricsErrorKind.Communication, exc)
401 |       except TransportError as exc:
402 |         raiseMetricsError(MetricsErrorKind.Transport, exc)
403 |     if resp.status != 0x00'u8:
404 |       raiseMetricsError("Metrics server returns an error: " & resp.getMessage())
405 | 
406 | proc stop*(
407 |     server: MetricsHttpServerRef
408 | ) {.async: (raises: [MetricsError, CancelledError]).} =
409 |   ## Force metrics HTTP server to stop accepting new connections.
410 |   when defined(metrics):
411 |     if not (server.thread.running()):
412 |       raiseMetricsError("Metrics server is not running")
413 |     let resp =
414 |       try:
415 |         await communicate(server, MetricsRequest.Stop).wait(5.seconds)
416 |       except AsyncTimeoutError as exc:
417 |         raiseMetricsError(MetricsErrorKind.Timeout, exc)
418 |       except MetricsError as exc:
419 |         raiseMetricsError(MetricsErrorKind.Communication, exc)
420 |       except TransportError as exc:
421 |         raiseMetricsError(MetricsErrorKind.Transport, exc)
422 |     if resp.status != 0x00'u8:
423 |       raiseMetricsError("Metrics server returns an error: " & resp.getMessage())
424 | 
425 | proc close*(server: MetricsHttpServerRef) {.async: (raises: []).} =
426 |   ## Close metrics HTTP server and release all the resources.
427 |   when defined(metrics):
428 |     # We ignore all the exception because there is no way to report error.
429 |     if not (server.thread.running()):
430 |       return
431 | 
432 |     try:
433 |       discard await communicate(server, MetricsRequest.Close).wait(5.seconds)
434 |     except AsyncTimeoutError:
435 |       discard
436 |     except MetricsError:
437 |       discard
438 |     except TransportError:
439 |       discard
440 |     except CancelledError:
441 |       discard
442 | 
443 |     # Closing pipes, other pipe ends should be closed by foreign thread.
444 |     await noCancel allFutures(
445 |       server.reqTransp.closeWait(), server.respTransp.closeWait()
446 |     )
447 |     # Thread should exit very soon.
448 |     server.thread.joinThread()
449 | 
450 | proc status*(
451 |     server: MetricsHttpServerRef
452 | ): Future[MetricsHttpServerStatus] {.async: (raises: [CancelledError, MetricsError]).} =
453 |   ## Returns current status of metrics HTTP server.
454 |   ##
455 |   ## Note, that if `metrics` variable is not defined this procedure will return
456 |   ## ``MetricsHttpServerStatus.Closed``.
457 |   when defined(metrics):
458 |     if not (server.thread.running()):
459 |       return MetricsHttpServerStatus.Closed
460 | 
461 |     let resp =
462 |       try:
463 |         await communicate(server, MetricsRequest.Status).wait(5.seconds)
464 |       except AsyncTimeoutError as exc:
465 |         raiseMetricsError(MetricsErrorKind.Timeout, exc)
466 |       except MetricsError as exc:
467 |         raiseMetricsError(MetricsErrorKind.Communication, exc)
468 |       except TransportError as exc:
469 |         raiseMetricsError(MetricsErrorKind.Transport, exc)
470 | 
471 |     if resp.status != 0x00'u8:
472 |       raiseMetricsError("Metrics server returns an error: " & resp.getMessage())
473 | 
474 |     case resp.getMessage()
475 |     of "STOPPED":
476 |       MetricsHttpServerStatus.Stopped
477 |     of "CLOSED":
478 |       MetricsHttpServerStatus.Closed
479 |     of "RUNNING":
480 |       MetricsHttpServerStatus.Running
481 |     else:
482 |       raiseMetricsError("Metrics server returns unsupported status!")
483 |   else:
484 |     MetricsHttpServerStatus.Closed
485 | 
486 | proc new*(t: typedesc[MetricsHttpServerMiddlewareRef]): HttpServerMiddlewareRef =
487 |   proc middlewareCallback(
488 |       middleware: HttpServerMiddlewareRef,
489 |       reqfence: RequestFence,
490 |       handler: HttpProcessCallback2,
491 |   ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} =
492 |     if reqfence.isOk():
493 |       let request = reqfence.get()
494 |       try:
495 |         if request.uri.path == "/metrics":
496 |           when defined(metrics):
497 |             # Prometheus will drop our metrics in surprising ways if we give
498 |             # it timestamps, so we don't.
499 |             let
500 |               response = block:
501 |                 {.gcsafe.}:
502 |                   defaultRegistry.toText()
503 |               headers = HttpTable.init([("Content-Type", CONTENT_TYPE)])
504 |             await request.respond(Http200, response, headers)
505 |           else:
506 |             await request.respond(
507 |               Http200, "Metrics are not enabled, build your application with -d:metrics"
508 |             )
509 |         elif request.uri.path == "/health":
510 |           await request.respond(Http200, "OK")
511 |         else:
512 |           await handler(reqfence)
513 |       except HttpWriteError as exc:
514 |         defaultResponse(exc)
515 |     else:
516 |       await handler(reqfence)
517 | 
518 |   let middleware = MetricsHttpServerMiddlewareRef(handler: middlewareCallback)
519 |   HttpServerMiddlewareRef(middleware)
520 | 


--------------------------------------------------------------------------------
/metrics.nim:
--------------------------------------------------------------------------------
   1 | # Copyright (c) 2019-2023 Status Research & Development GmbH
   2 | # Licensed and distributed under either of
   3 | #   * MIT license: http://opensource.org/licenses/MIT
   4 | #   * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
   5 | # at your option. This file may not be copied, modified, or distributed except according to those terms.
   6 | 
   7 | # The API is roughly based on the Prometheus client library recommendations:
   8 | # https://prometheus.io/docs/instrumenting/writing_clientlibs/
   9 | #
  10 | # The Prometheus text exposition format is also tightly coupled:
  11 | # https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format
  12 | 
  13 | {.push raises: [].}
  14 | 
  15 | when defined(metricsTest):
  16 |   {.pragma: testOnly.}
  17 | else:
  18 |   {.pragma: testOnly, deprecated: "slow helpers used for tests only".}
  19 | 
  20 | import std/[locks, monotimes, os, sets, times], metrics/shseq
  21 | 
  22 | export shseq
  23 | 
  24 | when defined(metrics):
  25 |   import std/[algorithm, hashes, strutils, sequtils], stew/ptrops, metrics/common
  26 | 
  27 | type
  28 |   CStringArr = object # Fixed-size array of cstrings - ownership is managed manually
  29 |     items: ptr UncheckedArray[cstring]
  30 |     len: int
  31 | 
  32 |   StringArrView = object
  33 |     items: ptr UncheckedArray[string]
  34 |     len: int
  35 | 
  36 |   LabelKey = object # Helper type for heterogeneous lookups in the keys table
  37 |     data: CStringArr
  38 |     refs: StringArrView
  39 | 
  40 |   Metric* = object
  41 |     # Metric needs to be trivial because it's stored in a cross-thread seq and
  42 |     # therefore cannot use GC types
  43 |     name*: cstring
  44 |     value*: float64
  45 |     labels*: CStringArr
  46 |     labelValues*: CStringArr
  47 |     timestamp*: Time
  48 | 
  49 |   MetricHandler* = proc(
  50 |     name: string,
  51 |     value: float64,
  52 |     labels: openArray[string] = [],
  53 |     labelValues: openArray[string] = [],
  54 |     timestamp: Time,
  55 |   ) {.gcsafe, raises: [].}
  56 | 
  57 |   CollecorHandler* = proc(collector: Collector)
  58 | 
  59 |   Collector* = ref object of RootObj
  60 |     lock*: Lock
  61 |     name*: string
  62 |     help*: string
  63 |     typ*: string
  64 |     labels*: seq[string]
  65 |     timestamp*: bool ## Whether or not we're collecting timestamps for this collector
  66 | 
  67 |   SimpleCollector* = ref object of Collector
  68 |     metricKeys*: ShSeq[LabelKey]
  69 |     metrics*: ShSeq[ShSeq[Metric]]
  70 | 
  71 |   IgnoredCollector* = object
  72 | 
  73 |   Counter* = ref object of SimpleCollector
  74 |   Gauge* = ref object of SimpleCollector
  75 |   Summary* = ref object of SimpleCollector
  76 |   Histogram* = ref object of SimpleCollector # a cumulative histogram, not a regular one
  77 |     buckets*: seq[float64]
  78 | 
  79 |   Registry* = ref object of RootObj
  80 |     lock*: Lock
  81 |     collectors*: OrderedSet[Collector]
  82 |     creationThreadId*: int
  83 | 
  84 |   RegistrationError* = object of CatchableError
  85 | 
  86 | #########
  87 | # utils #
  88 | #########
  89 | 
  90 | when defined(metrics):
  91 |   # TODO the shared memory allocated below is never freed - this is fine as long
  92 |   #      as registries / metrics never go away (ie they're globals whose lifetime
  93 |   #      matches that of the application) but to do things properly, this shared
  94 |   #      memory should be released at some point
  95 |   from system/ansi_c import c_strcmp
  96 |   proc createShared(_: type cstring, v: string): cstring =
  97 |     # Create a shared-memory copy of the given string that later must be manually
  98 |     # deallocated
  99 |     var p = cast[cstring](createSharedU(char, v.len + 1))
 100 |     if v.len > 0:
 101 |       copyMem(p, baseAddr v, v.len)
 102 |     p[v.len] = '\0'
 103 |     p
 104 | 
 105 |   proc createShared(_: type CStringArr, v: openArray[string]): CStringArr =
 106 |     if v.len > 0:
 107 |       var p = cast[ptr UncheckedArray[cstring]](createSharedU(cstring, v.len))
 108 |       for i in 0 ..< v.len:
 109 |         p[i] = cstring.createShared(v[i])
 110 | 
 111 |       CStringArr(items: p, len: v.len)
 112 |     else:
 113 |       CStringArr()
 114 | 
 115 |   proc `[]`(s: CStringArr, i: int): cstring =
 116 |     s.items[i]
 117 | 
 118 |   proc toStringSeq(v: CStringArr): seq[string] =
 119 |     for i in 0 ..< v.len:
 120 |       result.add $v[i]
 121 | 
 122 |   proc len(a: LabelKey): int =
 123 |     if a.data.len > 0: a.data.len else: a.refs.len
 124 | 
 125 |   template `[]`(a: LabelKey, i: int): cstring =
 126 |     if a.data.len > 0:
 127 |       a.data[i]
 128 |     else:
 129 |       cstring(a.refs.items[i])
 130 | 
 131 |   proc `==`(a, b: LabelKey): bool =
 132 |     if a.len == b.len:
 133 |       for i in 0 ..< a.len:
 134 |         if c_strcmp(a[i], b[i]) != 0:
 135 |           return false
 136 |       true
 137 |     else:
 138 |       false
 139 | 
 140 |   proc cmp(a, b: LabelKey): int =
 141 |     # TODO https://github.com/nim-lang/Nim/issues/24941
 142 |     for i in 0 ..< min(a.len, b.len):
 143 |       let c = c_strcmp(a[i], b[i])
 144 |       if c != 0:
 145 |         return c
 146 | 
 147 |     cmp(a.len, b.len)
 148 | 
 149 |   proc init(T: type LabelKey, values: openArray[string]): T =
 150 |     # TODO Avoid leaking this shared array, in case we were to clean up the
 151 |     #      registry
 152 |     LabelKey(data: CStringArr.createShared(values))
 153 | 
 154 |   proc view(T: type LabelKey, values: openArray[string]): T =
 155 |     # TODO some day, we might get view types - until then..
 156 |     LabelKey(
 157 |       refs:
 158 |         StringArrView(items: baseAddr(values).makeUncheckedArray(), len: values.len())
 159 |     )
 160 | 
 161 |   proc toMilliseconds*(time: times.Time): int64 =
 162 |     convert(Seconds, Milliseconds, time.toUnix()) +
 163 |       convert(Nanoseconds, Milliseconds, time.nanosecond())
 164 | 
 165 |   template nameOrIdentifier*(identifier: untyped, name: string): string =
 166 |     if name.len == 0:
 167 |       astToStr(identifier)
 168 |     else:
 169 |       name
 170 | 
 171 |   proc processHelp(name, help: string): string =
 172 |     "# HELP " & name & " " & help.multiReplace([("\\", "\\\\"), ("\n", "\\n")]) & "\n"
 173 | 
 174 |   proc processType(name, typ: string): string =
 175 |     "# TYPE " & name & " " & typ & "\n"
 176 | 
 177 |   proc addText*(
 178 |       res: var string,
 179 |       name: auto,
 180 |       value: float64,
 181 |       labels, labelValues: auto,
 182 |       timestamp: Time,
 183 |   ) =
 184 |     # A bit convoluted to mostly avoid pointless memory allocations - there's no
 185 |     # (trivial) way however to append a float to an existing string
 186 |     res.add name
 187 |     if labels.len > 0:
 188 |       res.add('{')
 189 |       for i in 0 ..< labels.len:
 190 |         if i > 0:
 191 |           res.add ","
 192 |         res.add labels[i]
 193 |         res.add "=\""
 194 |         if labelValues.len > i:
 195 |           for c in labelValues[i]:
 196 |             case c
 197 |             of '\\':
 198 |               res.add "\\\\"
 199 |             of '\n':
 200 |               res.add "\\\n"
 201 |             of '"':
 202 |               res.add "\\\""
 203 |             else:
 204 |               res.add c
 205 |         res.add "\""
 206 |       res.add('}')
 207 |     res.add(" ")
 208 |     res.add($value)
 209 |     if toMilliseconds(timestamp) > 0:
 210 |       res.add(" " & $toMilliseconds(timestamp))
 211 | 
 212 |   proc addText(res: var string, metric: Metric) =
 213 |     addText(
 214 |       res, metric.name, metric.value, metric.labels, metric.labelValues,
 215 |       metric.timestamp,
 216 |     )
 217 | 
 218 |   proc `$`*(metric: Metric): string =
 219 |     addText(result, metric)
 220 | 
 221 |   const
 222 |     nameRegexStr = r"^[a-zA-Z_:][a-zA-Z0-9_:]*$"
 223 |     labelRegexStr = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
 224 | 
 225 |     labelStartChars = {'a' .. 'z', 'A' .. 'Z', '_'}
 226 |     labelChars = labelStartChars + {'0' .. '9'}
 227 |     nameStartChars = labelStartChars + {':'}
 228 |     nameChars = labelChars + {':'}
 229 | 
 230 |   template validate(ident: string, startChars, chars: typed): bool =
 231 |     ident.len > 0 and ident[0] in startChars and ident.allIt(it in chars)
 232 | 
 233 |   proc validateName(name: string) {.raises: [ValueError].} =
 234 |     if not validate(name, nameStartChars, nameChars):
 235 |       raise newException(
 236 |         ValueError,
 237 |         "Invalid name: '" & name & "'. It should match the regex: " & nameRegexStr,
 238 |       )
 239 | 
 240 |   proc validateLabels(
 241 |       labels: openArray[string], invalidLabelNames: openArray[string] = []
 242 |   ) {.raises: [ValueError].} =
 243 |     for label in labels:
 244 |       if not validate(label, labelStartChars, labelChars):
 245 |         raise newException(
 246 |           ValueError,
 247 |           "Invalid label: '" & label & "'. It should match the regex: '" & labelRegexStr &
 248 |             "'.",
 249 |         )
 250 |       if label.startsWith("__"):
 251 |         raise newException(
 252 |           ValueError, "Invalid label: '" & label & "'. It should not start with '__'."
 253 |         )
 254 |       if label in invalidLabelNames:
 255 |         raise newException(
 256 |           ValueError,
 257 |           "Invalid label: '" & label & "'. It should not be one of: " &
 258 |             $invalidLabelNames & ".",
 259 |         )
 260 | 
 261 | ######################
 262 | # generic collectors #
 263 | ######################
 264 | 
 265 | when defined(metrics):
 266 |   template withLabelValues(
 267 |       collector: SimpleCollector,
 268 |       labelValues: openArray[string],
 269 |       metricSym, keySym, body, construct: untyped,
 270 |   ) =
 271 |     if labelValues.len > 0 and labelValues.len != collector.labels.len:
 272 |       printError(
 273 |         "The number of label values doesn't match the number of labels: " &
 274 |           collector.name
 275 |       )
 276 |     else:
 277 |       withLock(collector.lock):
 278 |         let pos =
 279 |           collector.metricKeys.data().lowerBound(LabelKey.view(labelValues), cmp)
 280 |         if pos == collector.metricKeys.len or
 281 |             collector.metricKeys[pos] != LabelKey.view(labelValues):
 282 |           let keySym = LabelKey.init(labelValues)
 283 |           collector.metricKeys.insert(keySym, pos)
 284 |           collector.metrics.insert(construct, pos)
 285 | 
 286 |         template metricSym(): untyped =
 287 |           collector.metrics[pos]
 288 | 
 289 |         body
 290 | 
 291 |   method hash*(collector: Collector): Hash {.base.} =
 292 |     result = result !& collector.name.hash
 293 |     for label in collector.labels:
 294 |       result = result !& label.hash
 295 |     result = !$result
 296 | 
 297 |   # `hash` and equals must match
 298 |   method `==`*(x, y: Collector): bool {.base.} =
 299 |     x.name == y.name and x.labels == y.labels
 300 | 
 301 |   proc now*(collector: Collector): Time =
 302 |     if collector.timestamp:
 303 |       getTime()
 304 |     else:
 305 |       Time()
 306 | 
 307 |   proc call(output: MetricHandler, metric: Metric) =
 308 |     output(
 309 |       $metric.name,
 310 |       metric.value,
 311 |       toStringSeq(metric.labels),
 312 |       toStringSeq(metric.labelValues),
 313 |       metric.timestamp,
 314 |     )
 315 | 
 316 |   method collect*(collector: Collector, output: MetricHandler) {.base.} =
 317 |     discard
 318 | 
 319 |   method collect*(collector: SimpleCollector, output: MetricHandler) =
 320 |     {.warning[LockLevel]: off.}
 321 |     withLock(collector.lock):
 322 |       for family in collector.metrics:
 323 |         for metric in family:
 324 |           call(output, metric)
 325 | 
 326 |   proc collect*(registry: Registry, output: MetricHandler) =
 327 |     withLock registry.lock:
 328 |       for collector in registry.collectors:
 329 |         collector.collect(output)
 330 | 
 331 |   proc addText(res: var string, collector: Collector) =
 332 |     res.add collector.help
 333 |     res.add collector.typ
 334 | 
 335 |     let resPtr = addr res
 336 | 
 337 |     proc addMetric(
 338 |         name: string,
 339 |         value: float64,
 340 |         labels, labelValues: openArray[string],
 341 |         timestamp: Time,
 342 |     ) =
 343 |       addText(resPtr[], name, value, labels, labelValues, timestamp)
 344 |       resPtr[].add "\n"
 345 | 
 346 |     collect(collector, addMetric)
 347 | 
 348 |   proc `$`*(collector: Collector): string =
 349 |     addText(result, collector)
 350 | 
 351 | proc `$`*(collector: type IgnoredCollector): string =
 352 |   ""
 353 | 
 354 | when defined(metrics):
 355 |   template localGlobal(init: untyped): untyped =
 356 |     when (NimMajor, NimMinor) == (2, 0) and (defined(gcOrc) or defined(gcArc)):
 357 |       {.error: "Globals are too broken in Nim 2.0/ORC/ARC".}
 358 | 
 359 |     # https://github.com/status-im/nim-metrics/pull/5#discussion_r304687474
 360 |     # https://github.com/nim-lang/Nim/issues/24940
 361 |     var res {.global.}: typeof(init)
 362 |     if isNil(res):
 363 |       res = init
 364 |     res
 365 | 
 366 |   proc valueImpl*(
 367 |       collector: Collector, labelValues: openArray[string] = []
 368 |   ): float64 {.gcsafe, raises: [KeyError].} =
 369 |     var res = NaN
 370 |     # Don't access the "metrics" field directly, so we can support custom
 371 |     # collectors.
 372 |     {.gcsafe.}:
 373 |       proc findMetric(
 374 |           name: string,
 375 |           value: float64,
 376 |           labels, labelValues: openArray[string],
 377 |           timestamp: Time,
 378 |       ) =
 379 |         if res != res and labelValues == labelValues:
 380 |           res = value
 381 | 
 382 |       collect(collector, findMetric)
 383 |       if res != res: # NaN
 384 |         raise newException(
 385 |           KeyError,
 386 |           "No such metric for this collector (label values = " & $(@labelValues) & ").",
 387 |         )
 388 |     res
 389 | 
 390 | template value*(
 391 |     collector: Collector | type IgnoredCollector,
 392 |     labelValuesParam: openArray[string] = [],
 393 | ): float64 {.testOnly.} =
 394 |   when defined(metrics) and collector is not IgnoredCollector:
 395 |     {.gcsafe.}:
 396 |       valueImpl(collector, labelValuesParam)
 397 |   else:
 398 |     0.0'f64
 399 | 
 400 | proc valueByNameInternal*(
 401 |     collector: Collector | type IgnoredCollector,
 402 |     metricName: string,
 403 |     labelValues: openArray[string] = [],
 404 |     extraLabelValues: openArray[string] = [],
 405 | ): float64 {.raises: [ValueError].} =
 406 |   when defined(metrics) and collector is not IgnoredCollector:
 407 |     var res = NaN
 408 |     let allLabelValues = @labelValues & @extraLabelValues
 409 |     proc findMetric(
 410 |         name: string,
 411 |         value: float64,
 412 |         labels, labelValues: openArray[string],
 413 |         timestamp: Time,
 414 |     ) =
 415 |       if res != res and name == metricName and labelValues == allLabelValues:
 416 |         res = value
 417 | 
 418 |     collect(collector, findMetric)
 419 |     if res == res:
 420 |       return res
 421 | 
 422 |     raise newException(
 423 |       KeyError,
 424 |       "No such metric name for this collector: '" & metricName & "' (label values = " &
 425 |         $allLabelValues & ").",
 426 |     )
 427 | 
 428 | template valueByName*(
 429 |     collector: Collector | type IgnoredCollector,
 430 |     metricName: string,
 431 |     labelValues: openArray[string] = [],
 432 |     extraLabelValues: openArray[string] = [],
 433 | ): float64 {.testOnly.} =
 434 |   {.gcsafe.}:
 435 |     valueByNameInternal(collector, metricName, labelValues, extraLabelValues)
 436 | 
 437 | ############
 438 | # registry #
 439 | ############
 440 | 
 441 | proc newRegistry*(): Registry =
 442 |   when defined(metrics):
 443 |     new(result)
 444 |     result.lock.initLock()
 445 |     result.creationThreadId = getThreadId()
 446 | 
 447 | # needs to be {.global.} because of the alternative API's usage of {.global.} collector vars
 448 | let defaultRegistry* {.global.} = newRegistry()
 449 | 
 450 | # We use a generic type here in order to avoid the hidden type casting of
 451 | # Collector child types to the parent type.
 452 | proc register*[T](
 453 |     collector: T, registry = defaultRegistry
 454 | ) {.raises: [RegistrationError].} =
 455 |   when defined(metrics):
 456 |     # TODO To relax this, collectors can no longer be `ref object`
 457 |     if registry.creationThreadId != getThreadId():
 458 |       printError(
 459 |         "New collectors / metrics must be added from same thread as the registry was created from: " &
 460 |           collector.name
 461 |       )
 462 | 
 463 |     withLock registry.lock:
 464 |       if collector in registry.collectors:
 465 |         raise newException(
 466 |           RegistrationError, "Collector already registered: " & collector.name
 467 |         )
 468 | 
 469 |       registry.collectors.incl(collector)
 470 | 
 471 | proc unregister*[T](
 472 |     collector: T, registry = defaultRegistry
 473 | ) {.raises: [RegistrationError].} =
 474 |   when defined(metrics) and collector is not IgnoredCollector:
 475 |     withLock registry.lock:
 476 |       if collector notin registry.collectors:
 477 |         raise newException(RegistrationError, "Collector not registered.")
 478 | 
 479 |       registry.collectors.excl(collector)
 480 | 
 481 | proc unregister*(collector: type IgnoredCollector, registry = defaultRegistry) =
 482 |   discard
 483 | 
 484 | proc len(registry: Registry): int =
 485 |   when defined(metrics):
 486 |     withLock registry.lock:
 487 |       return registry.collectors.len()
 488 |   else:
 489 |     0
 490 | 
 491 | proc addText(res: var string, registry: Registry) =
 492 |   when defined(metrics):
 493 |     withLock registry.lock:
 494 |       for collector in registry.collectors:
 495 |         res.addText(collector)
 496 |         res.add("\n")
 497 | 
 498 | proc toText*(registry: Registry): string =
 499 |   result = newStringOfCap(registry.len() * 64)
 500 |   result.addText(registry)
 501 | 
 502 | proc `$`*(registry: Registry): string =
 503 |   addText(result, registry)
 504 | 
 505 | #####################
 506 | # custom collectors #
 507 | #####################
 508 | 
 509 | when defined(metrics):
 510 |   # Used for custom collectors, to shield the API user from having to deal with
 511 |   # internal details like lock initialisation.
 512 |   # Also used internally, for creating standard collectors, to avoid code
 513 |   # duplication.
 514 |   proc newCollector*[T](
 515 |       typ: typedesc[T],
 516 |       name: string,
 517 |       help: string,
 518 |       labels: openArray[string] = [],
 519 |       registry = defaultRegistry,
 520 |       standardType = "gauge",
 521 |       timestamp = false,
 522 |   ): T {.raises: [ValueError, RegistrationError].} =
 523 |     validateName(name)
 524 |     validateLabels(labels)
 525 |     result = T(
 526 |       name: name,
 527 |       help: processHelp(name, help),
 528 |       typ: processType(name, standardType),
 529 |         # Prometheus does not support a non-standard value here
 530 |       labels: @labels,
 531 |       timestamp: timestamp,
 532 |     )
 533 |     result.lock.initLock()
 534 |     result.register(registry)
 535 | 
 536 | when defined(metrics):
 537 |   proc updateSystemMetrics*() {.gcsafe.} # defined later in this file
 538 |   var systemMetricsAutomaticUpdate = true
 539 |     # whether to piggy-back on changes of user-defined metrics
 540 | 
 541 |   proc getSystemMetricsAutomaticUpdate*(): bool =
 542 |     systemMetricsAutomaticUpdate
 543 | 
 544 |   proc setSystemMetricsAutomaticUpdate*(value: bool) =
 545 |     systemMetricsAutomaticUpdate = value
 546 | 
 547 | ###########
 548 | # counter #
 549 | ###########
 550 | 
 551 | when defined(metrics):
 552 |   proc newCounterMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] =
 553 |     ShSeq.init(
 554 |       [
 555 |         Metric(
 556 |           name: cstring.createShared(name & "_total"),
 557 |           labels: labels,
 558 |           labelValues: labelValues,
 559 |         ),
 560 |         Metric(
 561 |           name: cstring.createShared(name & "_created"),
 562 |           labels: labels,
 563 |           labelValues: labelValues,
 564 |           value: getTime().toUnix().float64,
 565 |         ),
 566 |       ]
 567 |     )
 568 | 
 569 |   # don't document this one, even if we're forced to make it public, because it
 570 |   # won't work when all (or some) collectors are disabled
 571 |   proc newCounter*(
 572 |       name: string,
 573 |       help: string,
 574 |       labels: openArray[string] = [],
 575 |       registry = defaultRegistry,
 576 |       timestamp = false,
 577 |   ): Counter {.raises: [ValueError, RegistrationError].} =
 578 |     result = Counter.newCollector(name, help, labels, registry, "counter", timestamp)
 579 |     if labels.len == 0:
 580 |       result.metrics.add newCounterMetrics(name, CStringArr(), CStringArr())
 581 |       result.metricKeys.add LabelKey.init(labels)
 582 | 
 583 |   proc incCounter(counter: Counter, amount: float64, labelValues: openArray[string]) =
 584 |     if amount < 0:
 585 |       printError(
 586 |         "Counter.inc() cannot be used with negative amounts: " & $counter.name & "=" &
 587 |           $amount
 588 |       )
 589 |       return
 590 | 
 591 |     let timestamp = counter.now()
 592 |     withLabelValues(counter, labelValues, valueSym, keySym):
 593 |       valueSym[0].value += amount
 594 |       valueSym[0].timestamp = timestamp
 595 |     do:
 596 |       newCounterMetrics(
 597 |         counter.name, CStringArr.createShared(counter.labels), keySym.data
 598 |       )
 599 | 
 600 |     updateSystemMetrics()
 601 | 
 602 | template declareCounter*(
 603 |     identifier: untyped,
 604 |     help: static string,
 605 |     labels: openArray[string] = [],
 606 |     registry = defaultRegistry,
 607 |     name = "",
 608 |     timestamp = false,
 609 | ) {.dirty.} =
 610 |   # fine-grained collector disabling will go in here, turning disabled
 611 |   # collectors into type aliases for IgnoredCollector
 612 |   when defined(metrics):
 613 |     let identifier =
 614 |       newCounter(nameOrIdentifier(identifier, name), help, labels, registry, timestamp)
 615 |   else:
 616 |     type identifier = IgnoredCollector
 617 | 
 618 | template declarePublicCounter*(
 619 |     identifier: untyped,
 620 |     help: static string,
 621 |     labels: openArray[string] = [],
 622 |     registry = defaultRegistry,
 623 |     name = "",
 624 |     timestamp = false,
 625 | ) {.dirty.} =
 626 |   when defined(metrics):
 627 |     let identifier* =
 628 |       newCounter(nameOrIdentifier(identifier, name), help, labels, registry, timestamp)
 629 |   else:
 630 |     type identifier* = IgnoredCollector
 631 | 
 632 | #- alternative API (without support for custom help strings, labels or custom registries)
 633 | #- different collector types with the same names are allowed
 634 | when defined(metrics):
 635 |   proc counter*(
 636 |       name: static string
 637 |   ): Counter {.raises: [ValueError, RegistrationError].} =
 638 |     # This {.global.} var assignment is lifted from the procedure and placed in a
 639 |     # special module init section that's guaranteed to run only once per program.
 640 |     # Calls to this proc will just return the globally initialised variable.
 641 |     localGlobal(newCounter(name, ""))
 642 | 
 643 | else:
 644 |   template counter*(name: static string): untyped =
 645 |     IgnoredCollector
 646 | 
 647 | template inc*(
 648 |     counter: Counter | type IgnoredCollector,
 649 |     amount: int64 | float64 = 1,
 650 |     labelValues: openArray[string] = [],
 651 | ) =
 652 |   when defined(metrics) and counter is not IgnoredCollector:
 653 |     {.gcsafe.}:
 654 |       incCounter(counter, amount.float64, labelValues)
 655 | 
 656 | template countExceptions*(
 657 |     counter: Counter | type IgnoredCollector,
 658 |     typ: typedesc,
 659 |     labelValues: openArray[string],
 660 |     body: untyped,
 661 | ) =
 662 |   when defined(metrics) and counter is not IgnoredCollector:
 663 |     try:
 664 |       body
 665 |     except typ as exc:
 666 |       counter.inc(1, labelValues)
 667 |       raise exc
 668 |   else:
 669 |     body
 670 | 
 671 | template countExceptions*(
 672 |     counter: Counter | type IgnoredCollector, typ: typedesc, body: untyped
 673 | ) =
 674 |   when defined(metrics) and counter is not IgnoredCollector:
 675 |     counter.countExceptions(typ, []):
 676 |       body
 677 |   else:
 678 |     body
 679 | 
 680 | template countExceptions*(
 681 |     counter: Counter | type IgnoredCollector,
 682 |     labelValues: openArray[string],
 683 |     body: untyped,
 684 | ) =
 685 |   countExceptions(counter, Exception, labelValues, body)
 686 | 
 687 | template countExceptions*(counter: Counter | type IgnoredCollector, body: untyped) =
 688 |   when defined(metrics) and counter is not IgnoredCollector:
 689 |     counter.countExceptions([]):
 690 |       body
 691 |   else:
 692 |     body
 693 | 
 694 | #########
 695 | # gauge #
 696 | #########
 697 | 
 698 | when defined(metrics):
 699 |   proc newGaugeMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] =
 700 |     ShSeq.init([Metric(name: name, labels: labels, labelValues: labelValues)])
 701 | 
 702 |   proc newGauge*(
 703 |       name: string,
 704 |       help: string,
 705 |       labels: openArray[string] = [],
 706 |       registry = defaultRegistry,
 707 |       timestamp = false,
 708 |   ): Gauge {.raises: [ValueError, RegistrationError].} =
 709 |     result = Gauge.newCollector(name, help, labels, registry, "gauge", timestamp)
 710 |     if labels.len == 0:
 711 |       result.metrics.add newGaugeMetrics(name, CStringArr(), CStringArr())
 712 |       result.metricKeys.add LabelKey.init(labels)
 713 | 
 714 |   proc incGauge(gauge: Gauge, amount: float64, labelValues: openArray[string]) =
 715 |     let timestamp = gauge.now()
 716 | 
 717 |     withLabelValues(gauge, labelValues, valueSym, keySym):
 718 |       valueSym[0].value += amount
 719 |       valueSym[0].timestamp = timestamp
 720 |     do:
 721 |       newGaugeMetrics(gauge.name, CStringArr.createShared(gauge.labels), keySym.data)
 722 | 
 723 |     updateSystemMetrics()
 724 | 
 725 |   proc setGauge(
 726 |       gauge: Gauge,
 727 |       value: float64,
 728 |       labelValues: openArray[string],
 729 |       doUpdateSystemMetrics: bool,
 730 |   ) =
 731 |     let timestamp = gauge.now()
 732 | 
 733 |     withLabelValues(gauge, labelValues, valueSym, keySym):
 734 |       valueSym[0].value = value.float64
 735 |       valueSym[0].timestamp = timestamp
 736 |     do:
 737 |       newGaugeMetrics(gauge.name, CStringArr.createShared(gauge.labels), keySym.data)
 738 | 
 739 |     if doUpdateSystemMetrics:
 740 |       updateSystemMetrics()
 741 | 
 742 | template declareGauge*(
 743 |     identifier: untyped,
 744 |     help: static string,
 745 |     labels: openArray[string] = [],
 746 |     registry = defaultRegistry,
 747 |     name = "",
 748 |     timestamp = false,
 749 | ) {.dirty.} =
 750 |   when defined(metrics):
 751 |     var identifier =
 752 |       newGauge(nameOrIdentifier(identifier, name), help, labels, registry, timestamp)
 753 |   else:
 754 |     type identifier = IgnoredCollector
 755 | 
 756 | # alternative API
 757 | when defined(metrics):
 758 |   proc gauge*(name: static string): Gauge {.raises: [ValueError, RegistrationError].} =
 759 |     localGlobal(newGauge(name, ""))
 760 | 
 761 | else:
 762 |   template gauge*(name: static string): untyped =
 763 |     IgnoredCollector
 764 | 
 765 | template declarePublicGauge*(
 766 |     identifier: untyped,
 767 |     help: static string,
 768 |     labels: openArray[string] = [],
 769 |     registry = defaultRegistry,
 770 |     name = "",
 771 |     timestamp = false,
 772 | ) {.dirty.} =
 773 |   when defined(metrics):
 774 |     var identifier* =
 775 |       newGauge(nameOrIdentifier(identifier, name), help, labels, registry, timestamp)
 776 |   else:
 777 |     type identifier* = IgnoredCollector
 778 | 
 779 | # the "type IgnoredCollector" case is covered by Counter.inc()
 780 | template inc*(
 781 |     gauge: Gauge, amount: int64 | float64 = 1, labelValues: openArray[string] = []
 782 | ) =
 783 |   when defined(metrics):
 784 |     {.gcsafe.}:
 785 |       incGauge(gauge, amount.float64, labelValues)
 786 | 
 787 | template dec*(
 788 |     gauge: Gauge | type IgnoredCollector,
 789 |     amount: int64 | float64 = 1,
 790 |     labelValues: openArray[string] = [],
 791 | ) =
 792 |   when defined(metrics) and gauge is not IgnoredCollector:
 793 |     inc(gauge, -amount, labelValues)
 794 | 
 795 | template set*(
 796 |     gauge: Gauge | type IgnoredCollector,
 797 |     value: int64 | float64,
 798 |     labelValues: openArray[string] = [],
 799 |     doUpdateSystemMetrics = true,
 800 | ) =
 801 |   when defined(metrics) and gauge is not IgnoredCollector:
 802 |     {.gcsafe.}:
 803 |       setGauge(gauge, value.float64, labelValues, doUpdateSystemMetrics)
 804 | 
 805 | # in seconds
 806 | proc setToCurrentTime*(
 807 |     gauge: Gauge | type IgnoredCollector, labelValues: openArray[string] = []
 808 | ) =
 809 |   when defined(metrics) and gauge is not IgnoredCollector:
 810 |     gauge.set(getTime().toUnix(), labelValues)
 811 | 
 812 | template trackInProgress*(
 813 |     gauge: Gauge | type IgnoredCollector, labelValues: openArray[string], body: untyped
 814 | ) =
 815 |   when defined(metrics) and gauge is not IgnoredCollector:
 816 |     gauge.inc(1, labelValues)
 817 |     body
 818 |     gauge.dec(1, labelValues)
 819 |   else:
 820 |     body
 821 | 
 822 | template trackInProgress*(gauge: Gauge | type IgnoredCollector, body: untyped) =
 823 |   when defined(metrics) and gauge is not IgnoredCollector:
 824 |     gauge.trackInProgress([]):
 825 |       body
 826 |   else:
 827 |     body
 828 | 
 829 | # in seconds
 830 | template time*(
 831 |     gauge: Gauge | type IgnoredCollector, labelValues: openArray[string], body: untyped
 832 | ) =
 833 |   when defined(metrics) and gauge is not IgnoredCollector:
 834 |     let start = times.toUnix(getTime())
 835 |     body
 836 |     gauge.set(times.toUnix(getTime()) - start, labelValues)
 837 |   else:
 838 |     body
 839 | 
 840 | template time*(
 841 |     collector: Gauge | Summary | Histogram | type IgnoredCollector, body: untyped
 842 | ) =
 843 |   when defined(metrics) and collector is not IgnoredCollector:
 844 |     collector.time([]):
 845 |       body
 846 |   else:
 847 |     body
 848 | 
 849 | ###########
 850 | # summary #
 851 | ###########
 852 | 
 853 | when defined(metrics):
 854 |   proc newSummaryMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] =
 855 |     ShSeq.init(
 856 |       [
 857 |         Metric(
 858 |           name: cstring.createShared(name & "_sum"),
 859 |           labels: labels,
 860 |           labelValues: labelValues,
 861 |         ),
 862 |         Metric(
 863 |           name: cstring.createShared(name & "_count"),
 864 |           labels: labels,
 865 |           labelValues: labelValues,
 866 |         ),
 867 |         Metric(
 868 |           name: cstring.createShared(name & "_created"),
 869 |           labels: labels,
 870 |           labelValues: labelValues,
 871 |           value: getTime().toUnix().float64,
 872 |         ),
 873 |       ]
 874 |     )
 875 | 
 876 |   proc newSummary*(
 877 |       name: string,
 878 |       help: string,
 879 |       labels: openArray[string] = [],
 880 |       registry = defaultRegistry,
 881 |       timestamp = false,
 882 |   ): Summary {.raises: [ValueError, RegistrationError].} =
 883 |     validateLabels(labels, invalidLabelNames = ["quantile"])
 884 |     result = Summary.newCollector(name, help, labels, registry, "summary", timestamp)
 885 |     if labels.len == 0:
 886 |       result.metrics.add newSummaryMetrics(name, CStringArr(), CStringArr())
 887 |       result.metricKeys.add LabelKey.init(labels)
 888 | 
 889 |   proc observeSummary(
 890 |       summary: Summary, amount: float64, labelValues: openArray[string]
 891 |   ) =
 892 |     let timestamp = summary.now()
 893 | 
 894 |     withLabelValues(summary, labelValues, valueSym, keySym):
 895 |       valueSym[0].value += amount # _sum
 896 |       valueSym[0].timestamp = timestamp
 897 |       valueSym[1].value += 1.float64 # _count
 898 |       valueSym[1].timestamp = timestamp
 899 |     do:
 900 |       newSummaryMetrics(
 901 |         summary.name, CStringArr.createShared(summary.labels), keySym.data
 902 |       )
 903 | 
 904 | template declareSummary*(
 905 |     identifier: untyped,
 906 |     help: static string,
 907 |     labels: openArray[string] = [],
 908 |     registry = defaultRegistry,
 909 |     name = "",
 910 | ) {.dirty.} =
 911 |   when defined(metrics):
 912 |     let identifier =
 913 |       newSummary(nameOrIdentifier(identifier, name), help, labels, registry)
 914 |   else:
 915 |     type identifier = IgnoredCollector
 916 | 
 917 | template declarePublicSummary*(
 918 |     identifier: untyped,
 919 |     help: static string,
 920 |     labels: openArray[string] = [],
 921 |     registry = defaultRegistry,
 922 |     name = "",
 923 | ) {.dirty.} =
 924 |   when defined(metrics):
 925 |     let identifier* =
 926 |       newSummary(nameOrIdentifier(identifier, name), help, labels, registry)
 927 |   else:
 928 |     type identifier* = IgnoredCollector
 929 | 
 930 | when defined(metrics):
 931 |   proc summary*(
 932 |       name: static string
 933 |   ): Summary {.raises: [ValueError, RegistrationError].} =
 934 |     localGlobal(newSummary(name, ""))
 935 | 
 936 | else:
 937 |   template summary*(name: static string): untyped =
 938 |     IgnoredCollector
 939 | 
 940 | template observe*(
 941 |     summary: Summary | type IgnoredCollector,
 942 |     amount: int64 | float64 = 1,
 943 |     labelValues: openArray[string] = [],
 944 | ) =
 945 |   when defined(metrics) and summary is not IgnoredCollector:
 946 |     {.gcsafe.}:
 947 |       observeSummary(summary, amount.float64, labelValues)
 948 | 
 949 | # in seconds
 950 | # the "type IgnoredCollector" case and the version without labels are covered by Gauge.time()
 951 | template time*(
 952 |     collector: Summary | Histogram, labelValues: openArray[string], body: untyped
 953 | ) =
 954 |   when defined(metrics):
 955 |     let start = times.toUnix(getTime())
 956 |     body
 957 |     collector.observe(times.toUnix(getTime()) - start, labelValues)
 958 |   else:
 959 |     body
 960 | 
 961 | #############
 962 | # histogram #
 963 | #############
 964 | 
 965 | const defaultHistogramBuckets* =
 966 |   [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, Inf]
 967 | when defined(metrics):
 968 |   proc newHistogramMetrics(
 969 |       name: string, labels, labelValues: CStringArr, buckets: seq[float64]
 970 |   ): ShSeq[Metric] =
 971 |     result = ShSeq.init(
 972 |       [
 973 |         Metric(
 974 |           name: cstring.createShared(name & "_sum"),
 975 |           labels: labels,
 976 |           labelValues: labelValues,
 977 |         ),
 978 |         Metric(
 979 |           name: cstring.createShared(name & "_count"),
 980 |           labels: labels,
 981 |           labelValues: labelValues,
 982 |         ),
 983 |         Metric(
 984 |           name: cstring.createShared(name & "_created"),
 985 |           labels: labels,
 986 |           labelValues: labelValues,
 987 |           value: getTime().toUnix().float64,
 988 |         ),
 989 |       ]
 990 |     )
 991 |     let
 992 |       bucketLabels = CStringArr.createShared(labels.toStringSeq & "le")
 993 |       labelValues = labelValues.toStringSeq()
 994 |     for bucket in buckets:
 995 |       let bucketStr =
 996 |         if bucket == Inf:
 997 |           "+Inf"
 998 |         else:
 999 |           $bucket
1000 |       result.add(
1001 |         Metric(
1002 |           name: cstring.createShared(name & "_bucket"),
1003 |           labels: bucketLabels,
1004 |           labelValues: CStringArr.createShared(@labelValues & bucketStr),
1005 |         )
1006 |       )
1007 | 
1008 |   proc newHistogram*(
1009 |       name: string,
1010 |       help: string,
1011 |       labels: openArray[string] = [],
1012 |       registry = defaultRegistry,
1013 |       buckets: openArray[float64] = defaultHistogramBuckets,
1014 |       timestamp = false,
1015 |   ): Histogram {.raises: [ValueError, RegistrationError].} =
1016 |     validateLabels(labels, invalidLabelNames = ["le"])
1017 |     var bucketsSeq = @buckets
1018 |     if bucketsSeq.len > 0 and bucketsSeq[^1] != Inf:
1019 |       bucketsSeq.add(Inf)
1020 |     if bucketsSeq.len < 2:
1021 |       raise newException(
1022 |         ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. At least 2 required."
1023 |       )
1024 |     if not bucketsSeq.isSorted(system.cmp[float64]):
1025 |       raise newException(
1026 |         ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. Must be sorted."
1027 |       )
1028 |     result =
1029 |       Histogram.newCollector(name, help, labels, registry, "histogram", timestamp)
1030 |     result.buckets = bucketsSeq
1031 |     if labels.len == 0:
1032 |       result.metrics.add newHistogramMetrics(
1033 |         name, CStringArr(), CStringArr(), bucketsSeq
1034 |       )
1035 |       result.metricKeys.add LabelKey.init(labels)
1036 | 
1037 |   proc observeHistogram(
1038 |       histogram: Histogram, amount: float64, labelValues: openArray[string]
1039 |   ) =
1040 |     let timestamp = histogram.now()
1041 |     withLabelValues(histogram, labelValues, valueSym, keySym):
1042 |       valueSym[0].value += amount # _sum
1043 |       valueSym[0].timestamp = timestamp
1044 |       valueSym[1].value += 1.float64 # _count
1045 |       valueSym[1].timestamp = timestamp
1046 |       for i, bucket in histogram.buckets:
1047 |         if amount.float64 <= bucket:
1048 |           #- "le" probably stands for "less or equal"
1049 |           #- the same observed value can increase multiple buckets, because this is
1050 |           #  a cumulative histogram
1051 |           valueSym[i + 3].value += 1.float64 # _bucket{le="<bucket value>"}
1052 |           valueSym[i + 3].timestamp = timestamp
1053 |     do:
1054 |       newHistogramMetrics(
1055 |         histogram.name,
1056 |         CStringArr.createShared(histogram.labels),
1057 |         keySym.data,
1058 |         histogram.buckets,
1059 |       )
1060 | 
1061 | template declareHistogram*(
1062 |     identifier: untyped,
1063 |     help: static string,
1064 |     labels: openArray[string] = [],
1065 |     registry = defaultRegistry,
1066 |     buckets: openArray[float64] = defaultHistogramBuckets,
1067 |     name = "",
1068 |     timestamp = false,
1069 | ) {.dirty.} =
1070 |   when defined(metrics):
1071 |     let identifier = newHistogram(
1072 |       nameOrIdentifier(identifier, name), help, labels, registry, buckets, timestamp
1073 |     )
1074 |   else:
1075 |     type identifier = IgnoredCollector
1076 | 
1077 | template declarePublicHistogram*(
1078 |     identifier: untyped,
1079 |     help: static string,
1080 |     labels: openArray[string] = [],
1081 |     registry = defaultRegistry,
1082 |     buckets: openArray[float64] = defaultHistogramBuckets,
1083 |     name = "",
1084 |     timestamp = false,
1085 | ) {.dirty.} =
1086 |   when defined(metrics):
1087 |     let identifier* = newHistogram(
1088 |       nameOrIdentifier(identifier, name), help, labels, registry, buckets, timestamp
1089 |     )
1090 |   else:
1091 |     type identifier* = IgnoredCollector
1092 | 
1093 | when defined(metrics):
1094 |   proc histogram*(
1095 |       name: static string
1096 |   ): Histogram {.raises: [ValueError, RegistrationError].} =
1097 |     localGlobal(newHistogram(name, ""))
1098 | 
1099 | else:
1100 |   template histogram*(name: static string): untyped =
1101 |     IgnoredCollector
1102 | 
1103 | # the "type IgnoredCollector" case is covered by Summary.observe()
1104 | template observe*(
1105 |     histogram: Histogram,
1106 |     amount: int64 | float64 = 1,
1107 |     labelValues: openArray[string] = [],
1108 | ) =
1109 |   when defined(metrics):
1110 |     {.gcsafe.}:
1111 |       observeHistogram(histogram, amount.float64, labelValues)
1112 | 
1113 | #########################
1114 | # update system metrics #
1115 | #########################
1116 | 
1117 | when defined(metrics):
1118 |   let mainThreadID = getThreadId()
1119 |   var
1120 |     systemMetricsUpdateInterval = initDuration(seconds = 10)
1121 |     systemMetricsLastUpdated = getMonoTime()
1122 | 
1123 |   proc getSystemMetricsUpdateInterval*(): Duration =
1124 |     return systemMetricsUpdateInterval
1125 | 
1126 |   proc setSystemMetricsUpdateInterval*(value: Duration) =
1127 |     systemMetricsUpdateInterval = value
1128 | 
1129 |   proc updateThreadMetrics*() {.gcsafe.}
1130 |     ## Function that should regularly be called from within each thread for
1131 |     ## which per-thread metrics are desired - currently, this is limited to
1132 |     ## GC heap statistics.
1133 | 
1134 |   proc updateSystemMetrics*() =
1135 |     ## Update metrics related to the main application thread
1136 |     if systemMetricsAutomaticUpdate:
1137 |       # Update system metrics if at least systemMetricsUpdateInterval seconds
1138 |       # have passed and if we are being called from the main thread.
1139 |       if getThreadId() == mainThreadID:
1140 |         let currTime = getMonoTime()
1141 |         if currTime >= (systemMetricsLastUpdated + systemMetricsUpdateInterval):
1142 |           systemMetricsLastUpdated = currTime
1143 |           # Update thread metrics, only when automation is on and we're in the
1144 |           # main thread.
1145 |           updateThreadMetrics()
1146 | 
1147 | ################
1148 | # process info #
1149 | ################
1150 | 
1151 | when defined(metrics) and defined(linux):
1152 |   from posix import sysconf, SC_CLK_TCK, SC_PAGESIZE
1153 |   var
1154 |     btime {.global.}: float64 = 0
1155 |     ticks {.global.}: float64 # clock ticks per second
1156 |     pagesize {.global.}: float64 # page size in bytes
1157 | 
1158 |   if btime == 0:
1159 |     try:
1160 |       for line in lines("/proc/stat"):
1161 |         if line.startsWith("btime"):
1162 |           btime = line.split(' ')[1].parseFloat()
1163 |     except IOError:
1164 |       # /proc not mounted?
1165 |       discard
1166 |     ticks = sysconf(SC_CLK_TCK).float64
1167 |     pagesize = sysconf(SC_PAGESIZE).float64
1168 | 
1169 |   type ProcessInfo = ref object of Gauge
1170 |   var processInfo* {.global.} =
1171 |     ProcessInfo.newCollector("process_info", "CPU and memory usage")
1172 | 
1173 |   method collect*(collector: ProcessInfo, output: MetricHandler) =
1174 |     let timestamp = collector.now()
1175 | 
1176 |     try:
1177 |       if btime == 0:
1178 |         # we couldn't access /proc
1179 |         return
1180 | 
1181 |       # the content of /proc/self/stat looks like this (the command name may contain spaces):
1182 |       #
1183 |       # $ cat /proc/self/stat
1184 |       # 30494 (cat) R 3022 30494 3022 34830 30494 4210688 98 0 0 0 0 0 0 0 20 0 1 0 73800491 10379264 189 18446744073709551615 94060049248256 94060049282149 140735229395104 0 0 0 0 0 0 0 0 0 17 6 0 0 0 0 0 94060049300560 94060049302112 94060076990464 140735229397011 140735229397031 140735229397031 140735229403119 0
1185 |       let selfStat = readFile("/proc/self/stat").split(") ")[^1].split(' ')
1186 |       output(
1187 |         name = "process_virtual_memory_bytes", # Virtual memory size in bytes.
1188 |         value = selfStat[20].parseFloat(),
1189 |         timestamp = timestamp,
1190 |       )
1191 | 
1192 |       output(
1193 |         name = "process_resident_memory_bytes", # Resident memory size in bytes.
1194 |         value = selfStat[21].parseFloat() * pagesize,
1195 |         timestamp = timestamp,
1196 |       )
1197 |       output(
1198 |         name = "process_start_time_seconds",
1199 |           # Start time of the process since unix epoch in seconds.
1200 |         value = selfStat[19].parseFloat() / ticks + btime,
1201 |         timestamp = timestamp,
1202 |       )
1203 |       output(
1204 |         name = "process_cpu_seconds_total",
1205 |           # Total user and system CPU time spent in seconds.
1206 |         value = (selfStat[11].parseFloat() + selfStat[12].parseFloat()) / ticks,
1207 |         timestamp = timestamp,
1208 |       )
1209 | 
1210 |       for line in lines("/proc/self/limits"):
1211 |         if line.startsWith("Max open files"):
1212 |           output(
1213 |             name = "process_max_fds", # Maximum number of open file descriptors.
1214 |             value = line.splitWhitespace()[3].parseFloat(),
1215 |               # a simple `split()` does not combine adjacent whitespace
1216 |             timestamp = timestamp,
1217 |           )
1218 |           break
1219 | 
1220 |       output(
1221 |         name = "process_open_fds", # Number of open file descriptors.
1222 |         value = toSeq(walkDir("/proc/self/fd")).len.float64,
1223 |         timestamp = timestamp,
1224 |       )
1225 |     except CatchableError as e:
1226 |       printError(e.msg)
1227 | 
1228 | ####################
1229 | # Nim runtime info #
1230 | ####################
1231 | 
1232 | when defined(metrics):
1233 |   type NimRuntimeInfo = ref object of Collector
1234 |   let nimRuntimeInfo* {.global.} =
1235 |     NimRuntimeInfo.newCollector("nim_runtime_info", "Nim runtime info")
1236 | 
1237 |   method collect*(collector: NimRuntimeInfo, output: MetricHandler) =
1238 |     try:
1239 |       when defined(nimTypeNames) and declared(dumpHeapInstances):
1240 |         # Too high cardinality causes performance issues in Prometheus.
1241 |         const labelsLimit = 10
1242 |         let timestamp = collector.now()
1243 |         var
1244 |           # Higher size than in the loop for adding metrics
1245 |           # to avoid missing same name metrics far apart with low values.
1246 |           heapSizes: array[100, (cstring, int)]
1247 |           counter: int
1248 |           heapSum: int # total size of all instances
1249 |         for data in dumpHeapInstances():
1250 |           counter += 1
1251 |           heapSum += data.sizes
1252 |           var smallest = 0
1253 |           var dedupe = false
1254 |           for i in 0 ..< heapSizes.len:
1255 |             if heapSizes[i][0] == data.name:
1256 |               heapSizes[i][1] += data.sizes
1257 |               dedupe = true
1258 |               break
1259 |             if heapSizes[smallest][1] >= heapSizes[i][1]:
1260 |               smallest = i
1261 |           if not dedupe and data.sizes > heapSizes[smallest][1]:
1262 |             heapSizes[smallest] = (data.name, data.sizes)
1263 |         sort(
1264 |           heapSizes,
1265 |           proc(a, b: auto): auto =
1266 |             b[1] - a[1],
1267 |         )
1268 |         # Lower the number of metrics to reduce metric cardinality.
1269 |         for i in 0 ..< labelsLimit:
1270 |           let (typeName, size) = heapSizes[i]
1271 |           output(
1272 |             name = "nim_gc_heap_instance_occupied_bytes",
1273 |               # total bytes occupied, by instance type (all threads)
1274 |             value = size.float64,
1275 |             timestamp = timestamp,
1276 |             labels = ["type_name"],
1277 |             labelValues = [$typeName],
1278 |           )
1279 | 
1280 |         output(
1281 |           name = "nim_gc_heap_instance_occupied_summed_bytes",
1282 |             # total bytes occupied by all instance types, in all threads - should be equal to 'sum(nim_gc_mem_occupied_bytes)' when 'updateThreadMetrics()' is being called in all threads, but it's somewhat smaller
1283 |           value = heapSum.float64,
1284 |           timestamp = timestamp,
1285 |         )
1286 |     except CatchableError as e:
1287 |       printError(e.msg)
1288 | 
1289 |   declareGauge nim_gc_mem_bytes,
1290 |     "the number of bytes that are owned by a thread's GC", ["thread_id"]
1291 |   declareGauge nim_gc_mem_occupied_bytes,
1292 |     "the number of bytes that are owned by a thread's GC and hold data", ["thread_id"]
1293 | 
1294 |   proc updateThreadMetrics() =
1295 |     try:
1296 |       let threadID = getThreadId()
1297 | 
1298 |       when declared(getTotalMem):
1299 |         nim_gc_mem_bytes.set(
1300 |           getTotalMem().float64,
1301 |           labelValues = @[$threadID],
1302 |           doUpdateSystemMetrics = false,
1303 |         )
1304 | 
1305 |       when declared(getOccupiedMem):
1306 |         nim_gc_mem_occupied_bytes.set(
1307 |           getOccupiedMem().float64,
1308 |           labelValues = @[$threadID],
1309 |           doUpdateSystemMetrics = false,
1310 |         )
1311 | 
1312 |         # TODO: parse the output of `GC_getStatistics()` for more stats
1313 |     except CatchableError as e:
1314 |       printError(e.msg)
1315 | 


--------------------------------------------------------------------------------