├── .git-blame-ignore-revs ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE-APACHEv2 ├── LICENSE-MIT ├── README.md ├── benchmarks ├── bench_collectors.nim └── bench_common.nim ├── config.nims ├── metrics.nim ├── metrics.nimble ├── metrics ├── chronicles_support.nim ├── chronos_httpserver.nim ├── common.nim └── shseq.nim ├── nim.cfg └── tests ├── chronicles_tests.nim ├── chronos_server_tests.nim ├── duplicate_coll_mod.nim ├── main_tests.nim ├── nim.cfg └── test_shseq.nim /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Formatted with nph v0.6.1-0-g0d8000e 2 | d5d616241559046461394ab0bab5970ec4beb4de 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build: 11 | uses: status-im/nimbus-common-workflow/.github/workflows/common.yml@main 12 | with: 13 | test-command: | 14 | nimble install -y chronicles@#head 15 | nimble install -y unittest2 16 | nimble test 17 | nimble test_chronicles 18 | 19 | nph: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Check `nph` formatting 24 | uses: arnetheduck/nph-action@v1 25 | with: 26 | version: 0.6.1 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/ 2 | /build 3 | 4 | nimble.develop 5 | nimble.paths 6 | vendor/ 7 | -------------------------------------------------------------------------------- /LICENSE-APACHEv2: -------------------------------------------------------------------------------- 1 | nim-metrics is licensed under the Apache License version 2 2 | Copyright (c) 2019 Status Research & Development GmbH 3 | ----------------------------------------------------- 4 | 5 | Apache License 6 | Version 2.0, January 2004 7 | http://www.apache.org/licenses/ 8 | 9 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 10 | 11 | 1. Definitions. 12 | 13 | "License" shall mean the terms and conditions for use, reproduction, 14 | and distribution as defined by Sections 1 through 9 of this document. 15 | 16 | "Licensor" shall mean the copyright owner or entity authorized by 17 | the copyright owner that is granting the License. 18 | 19 | "Legal Entity" shall mean the union of the acting entity and all 20 | other entities that control, are controlled by, or are under common 21 | control with that entity. For the purposes of this definition, 22 | "control" means (i) the power, direct or indirect, to cause the 23 | direction or management of such entity, whether by contract or 24 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 25 | outstanding shares, or (iii) beneficial ownership of such entity. 26 | 27 | "You" (or "Your") shall mean an individual or Legal Entity 28 | exercising permissions granted by this License. 29 | 30 | "Source" form shall mean the preferred form for making modifications, 31 | including but not limited to software source code, documentation 32 | source, and configuration files. 33 | 34 | "Object" form shall mean any form resulting from mechanical 35 | transformation or translation of a Source form, including but 36 | not limited to compiled object code, generated documentation, 37 | and conversions to other media types. 38 | 39 | "Work" shall mean the work of authorship, whether in Source or 40 | Object form, made available under the License, as indicated by a 41 | copyright notice that is included in or attached to the work 42 | (an example is provided in the Appendix below). 43 | 44 | "Derivative Works" shall mean any work, whether in Source or Object 45 | form, that is based on (or derived from) the Work and for which the 46 | editorial revisions, annotations, elaborations, or other modifications 47 | represent, as a whole, an original work of authorship. For the purposes 48 | of this License, Derivative Works shall not include works that remain 49 | separable from, or merely link (or bind by name) to the interfaces of, 50 | the Work and Derivative Works thereof. 51 | 52 | "Contribution" shall mean any work of authorship, including 53 | the original version of the Work and any modifications or additions 54 | to that Work or Derivative Works thereof, that is intentionally 55 | submitted to Licensor for inclusion in the Work by the copyright owner 56 | or by an individual or Legal Entity authorized to submit on behalf of 57 | the copyright owner. For the purposes of this definition, "submitted" 58 | means any form of electronic, verbal, or written communication sent 59 | to the Licensor or its representatives, including but not limited to 60 | communication on electronic mailing lists, source code control systems, 61 | and issue tracking systems that are managed by, or on behalf of, the 62 | Licensor for the purpose of discussing and improving the Work, but 63 | excluding communication that is conspicuously marked or otherwise 64 | designated in writing by the copyright owner as "Not a Contribution." 65 | 66 | "Contributor" shall mean Licensor and any individual or Legal Entity 67 | on behalf of whom a Contribution has been received by Licensor and 68 | subsequently incorporated within the Work. 69 | 70 | 2. Grant of Copyright License. Subject to the terms and conditions of 71 | this License, each Contributor hereby grants to You a perpetual, 72 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 73 | copyright license to reproduce, prepare Derivative Works of, 74 | publicly display, publicly perform, sublicense, and distribute the 75 | Work and such Derivative Works in Source or Object form. 76 | 77 | 3. Grant of Patent License. Subject to the terms and conditions of 78 | this License, each Contributor hereby grants to You a perpetual, 79 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 80 | (except as stated in this section) patent license to make, have made, 81 | use, offer to sell, sell, import, and otherwise transfer the Work, 82 | where such license applies only to those patent claims licensable 83 | by such Contributor that are necessarily infringed by their 84 | Contribution(s) alone or by combination of their Contribution(s) 85 | with the Work to which such Contribution(s) was submitted. If You 86 | institute patent litigation against any entity (including a 87 | cross-claim or counterclaim in a lawsuit) alleging that the Work 88 | or a Contribution incorporated within the Work constitutes direct 89 | or contributory patent infringement, then any patent licenses 90 | granted to You under this License for that Work shall terminate 91 | as of the date such litigation is filed. 92 | 93 | 4. Redistribution. You may reproduce and distribute copies of the 94 | Work or Derivative Works thereof in any medium, with or without 95 | modifications, and in Source or Object form, provided that You 96 | meet the following conditions: 97 | 98 | (a) You must give any other recipients of the Work or 99 | Derivative Works a copy of this License; and 100 | 101 | (b) You must cause any modified files to carry prominent notices 102 | stating that You changed the files; and 103 | 104 | (c) You must retain, in the Source form of any Derivative Works 105 | that You distribute, all copyright, patent, trademark, and 106 | attribution notices from the Source form of the Work, 107 | excluding those notices that do not pertain to any part of 108 | the Derivative Works; and 109 | 110 | (d) If the Work includes a "NOTICE" text file as part of its 111 | distribution, then any Derivative Works that You distribute must 112 | include a readable copy of the attribution notices contained 113 | within such NOTICE file, excluding those notices that do not 114 | pertain to any part of the Derivative Works, in at least one 115 | of the following places: within a NOTICE text file distributed 116 | as part of the Derivative Works; within the Source form or 117 | documentation, if provided along with the Derivative Works; or, 118 | within a display generated by the Derivative Works, if and 119 | wherever such third-party notices normally appear. The contents 120 | of the NOTICE file are for informational purposes only and 121 | do not modify the License. You may add Your own attribution 122 | notices within Derivative Works that You distribute, alongside 123 | or as an addendum to the NOTICE text from the Work, provided 124 | that such additional attribution notices cannot be construed 125 | as modifying the License. 126 | 127 | You may add Your own copyright statement to Your modifications and 128 | may provide additional or different license terms and conditions 129 | for use, reproduction, or distribution of Your modifications, or 130 | for any such Derivative Works as a whole, provided Your use, 131 | reproduction, and distribution of the Work otherwise complies with 132 | the conditions stated in this License. 133 | 134 | 5. Submission of Contributions. Unless You explicitly state otherwise, 135 | any Contribution intentionally submitted for inclusion in the Work 136 | by You to the Licensor shall be under the terms and conditions of 137 | this License, without any additional terms or conditions. 138 | Notwithstanding the above, nothing herein shall supersede or modify 139 | the terms of any separate license agreement you may have executed 140 | with Licensor regarding such Contributions. 141 | 142 | 6. Trademarks. This License does not grant permission to use the trade 143 | names, trademarks, service marks, or product names of the Licensor, 144 | except as required for reasonable and customary use in describing the 145 | origin of the Work and reproducing the content of the NOTICE file. 146 | 147 | 7. Disclaimer of Warranty. Unless required by applicable law or 148 | agreed to in writing, Licensor provides the Work (and each 149 | Contributor provides its Contributions) on an "AS IS" BASIS, 150 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 151 | implied, including, without limitation, any warranties or conditions 152 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 153 | PARTICULAR PURPOSE. You are solely responsible for determining the 154 | appropriateness of using or redistributing the Work and assume any 155 | risks associated with Your exercise of permissions under this License. 156 | 157 | 8. Limitation of Liability. In no event and under no legal theory, 158 | whether in tort (including negligence), contract, or otherwise, 159 | unless required by applicable law (such as deliberate and grossly 160 | negligent acts) or agreed to in writing, shall any Contributor be 161 | liable to You for damages, including any direct, indirect, special, 162 | incidental, or consequential damages of any character arising as a 163 | result of this License or out of the use or inability to use the 164 | Work (including but not limited to damages for loss of goodwill, 165 | work stoppage, computer failure or malfunction, or any and all 166 | other commercial damages or losses), even if such Contributor 167 | has been advised of the possibility of such damages. 168 | 169 | 9. Accepting Warranty or Additional Liability. While redistributing 170 | the Work or Derivative Works thereof, You may choose to offer, 171 | and charge a fee for, acceptance of support, warranty, indemnity, 172 | or other liability obligations and/or rights consistent with this 173 | License. However, in accepting such obligations, You may act only 174 | on Your own behalf and on Your sole responsibility, not on behalf 175 | of any other Contributor, and only if You agree to indemnify, 176 | defend, and hold each Contributor harmless for any liability 177 | incurred by, or claims asserted against, such Contributor by reason 178 | of your accepting any such warranty or additional liability. 179 | 180 | END OF TERMS AND CONDITIONS 181 | 182 | APPENDIX: How to apply the Apache License to your work. 183 | 184 | To apply the Apache License to your work, attach the following 185 | boilerplate notice, with the fields enclosed by brackets "[]" 186 | replaced with your own identifying information. (Don't include 187 | the brackets!) The text should be enclosed in the appropriate 188 | comment syntax for the file format. We also recommend that a 189 | file or class name and description of purpose be included on the 190 | same "printed page" as the copyright notice for easier 191 | identification within third-party archives. 192 | 193 | Copyright 2019 Status Research & Development GmbH 194 | 195 | Licensed under the Apache License, Version 2.0 (the "License"); 196 | you may not use this file except in compliance with the License. 197 | You may obtain a copy of the License at 198 | 199 | http://www.apache.org/licenses/LICENSE-2.0 200 | 201 | Unless required by applicable law or agreed to in writing, software 202 | distributed under the License is distributed on an "AS IS" BASIS, 203 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 204 | See the License for the specific language governing permissions and 205 | limitations under the License. 206 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | nim-metrics is licensed under the MIT License 2 | Copyright (c) 2019 Status Research & Development GmbH 3 | ----------------------------------------------------- 4 | 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2019 Status Research & Development GmbH 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nim-metrics 2 | 3 | [![CI](https://github.com/status-im/nim-metrics/actions/workflows/ci.yml/badge.svg)](https://github.com/status-im/nim-metrics/actions/workflows/ci.yml) 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) 5 | [![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 6 | ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg) 7 | 8 | ## Introduction 9 | 10 | Nim metrics client library supporting the [Prometheus](https://prometheus.io/) 11 | monitoring toolkit. 12 | 13 | Designed to be thread-safe and efficient, it's disabled by default so libraries 14 | can use it without any overhead for those library users not interested in 15 | metrics. 16 | 17 | ## Installation 18 | 19 | You can install the development version of the library through Nimble with the 20 | following command: 21 | ``` 22 | nimble install metrics 23 | ``` 24 | 25 | ## Usage 26 | 27 | To enable metrics, compile your code with `-d:metrics --threads:on`. 28 | 29 | ## Architectural overview 30 | 31 | `Collector` objects holding various `Metric` objects are registered in one or 32 | more `Registry` objects. There is a default registry being used for the most 33 | common case. 34 | 35 | Metric values are `float64`, but the API also accepts `int64` parameters which 36 | are then cast to `float64`. 37 | 38 | By starting an HTTP server, custom metrics (and some default ones) can be 39 | pulled by Prometheus. They can also be serialised to strings for some quick and 40 | dirty logging. Integration with the 41 | [Chronicles](https://github.com/status-im/nim-chronicles) logging library is 42 | available in a separate module. 43 | 44 | That HTTP server used for pulling is running in its own thread. Metric pushing 45 | also uses a dedicated thread for networking, in order to minimise the overhead. 46 | 47 | ## Collector types 48 | 49 | ### Counter 50 | 51 | A counter's value can only be incremented. 52 | 53 | ```nim 54 | # Declare a variable `myCounter` holding a `Counter` object with a `Metric` 55 | # having the same name as the variable. The help string is mandatory. The initial 56 | # value is 0 and it's automatically added to `defaultRegistry`. 57 | declareCounter myCounter, "an example counter" 58 | 59 | # increment it by 1 60 | myCounter.inc() 61 | 62 | # increment it by 10 63 | myCounter.inc(10) 64 | 65 | # count all exceptions in a block 66 | someCounter.countExceptions: 67 | foo() 68 | 69 | # or just an exception type 70 | otherCounter.countExceptions(ValueError): 71 | bar() 72 | 73 | # do you need a variable that's being exported from the module? 74 | declarePublicCounter seenPeers, "number of seen peers" 75 | # it's the equivalent of `var seenPeers* = ...` 76 | 77 | # want to avoid declaring a variable, giving it a help string, or anything else for that matter? 78 | counter("one_off_counter").inc() 79 | # What this does is generate a {.global.} var, so as long as you use the same 80 | # string, you're using the same counter. Using strings instead of identifiers 81 | # skips any compiler protection in case of typos, so this API is not recommended 82 | # for serious use. 83 | ``` 84 | 85 | ### Gauge 86 | 87 | Gauges can be incremented, decremented or set to a given value. 88 | 89 | ```nim 90 | declareGauge myGauge, "an example gauge" # or `declarePublicGauge` to export it 91 | myGauge.inc(4.5) 92 | myGauge.dec(2) 93 | myGauge.set(10) 94 | 95 | myGauge.setToCurrentTime() # Unix timestamp in seconds 96 | 97 | myGauge.trackInProgress: 98 | # myGauge is incremented at the start of the block (a `myGauge.inc()` is being inserted here) 99 | foo() 100 | # and decremented at the end (`myGauge.dec()`) 101 | 102 | # set the gauge to the runtime of a block, in seconds 103 | myGauge.time: 104 | bar() 105 | 106 | # alternative, unrecommended API 107 | gauge("one_off_gauge").set(42) 108 | ``` 109 | 110 | ### Summary 111 | 112 | Summaries sample observations and provide a total count and the sum of all observed values. 113 | 114 | ```nim 115 | declareSummary mySummary, "an example summary" # or `declarePublicSummary` to export it 116 | mySummary.observe(10) 117 | mySummary.observe(0.5) 118 | echo mySummary 119 | ``` 120 | 121 | This will print out: 122 | 123 | ```text 124 | # HELP mySummary an example summary 125 | # TYPE mySummary summary 126 | mySummary_sum 10.5 1569332171696 127 | mySummary_count 2.0 1569332171696 128 | mySummary_created 1569332171.0 129 | ``` 130 | 131 | ```nim 132 | # observe the execution duration of a block, in seconds 133 | mySummary.time: 134 | foo() 135 | 136 | # alternative, unrecommended API 137 | summary("one_off_summary").observe(10) 138 | ``` 139 | 140 | ### Histogram 141 | 142 | These cumulative histograms store the count and total sum of observed values, 143 | just like summaries. Further more, they place the observed values in 144 | configurable buckets and provide per-bucket counts. 145 | 146 | Note that an observed value will be counted in all buckets that have a size greater or equal to it. 147 | 148 | ```nim 149 | declareHistogram myHistogram, "an example histogram" # or `declarePublicHistogram` to export it 150 | # This uses the default bucket sizes: [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 151 | # 2.5, 5.0, 7.5, 10.0, Inf] 152 | 153 | # You can customise the buckets: 154 | declareHistogram withCustomBuckets, "custom buckets", buckets = [0.0, 1.0, 2.0] 155 | # if you leave out the "Inf" bucket, it's added for you 156 | withCustomBuckets.observe(0.5) 157 | withCustomBuckets.observe(1) 158 | withCustomBuckets.observe(1.5) 159 | withCustomBuckets.observe(3.7) 160 | echo withCustomBuckets 161 | ``` 162 | 163 | This will print out: 164 | 165 | ```text 166 | # HELP withCustomBuckets custom buckets 167 | # TYPE withCustomBuckets histogram 168 | withCustomBuckets_sum 6.7 1569334493506 169 | withCustomBuckets_count 4.0 1569334493506 170 | withCustomBuckets_created 1569334493.0 171 | withCustomBuckets_bucket{le="0.0"} 0.0 172 | withCustomBuckets_bucket{le="1.0"} 2.0 1569334493506 173 | withCustomBuckets_bucket{le="2.0"} 3.0 1569334493506 174 | withCustomBuckets_bucket{le="+Inf"} 4.0 1569334493506 175 | ``` 176 | 177 | ```nim 178 | # observe the execution duration of a block, in seconds 179 | myHistogram.time: 180 | foo() 181 | 182 | # alternative, unrecommended API 183 | histogram("one_off_histogram").observe(10) 184 | ``` 185 | 186 | ### Custom collectors 187 | 188 | Sometimes you need to create metrics on the fly, with a custom `collect()` 189 | method of a custom collector type. 190 | 191 | Let's say you have an USB-attached power meter and, for some reason, you want 192 | to read the power consumption every time Prometheus reads your metrics: 193 | 194 | ```nim 195 | import metrics, times 196 | 197 | when defined(metrics): 198 | type PowerCollector = ref object of Collector 199 | let powerCollector = PowerCollector.newCollector(name = "power_usage", help = "Instantaneous power usage - in watts.") 200 | 201 | method collect(collector: PowerCollector, output: MetricHandler): Metrics = 202 | let timestamp = collector.now() 203 | output( 204 | name = "power_usage", 205 | value = getPowerUsage(), # your power-meter reader 206 | timestamp = timestamp, 207 | ) 208 | 209 | ``` 210 | 211 | There's a bit of repetition in the collector and metric names, because we no 212 | longer have behind-the-scenes name copying/deriving there. 213 | 214 | You can output multiple metrics from your custom `collect()` method. It's 215 | perfectly legal and we do that internally for our system/runtime metrics. 216 | 217 | Try not to get creative with dynamic metric names - Prometheus has a hard time 218 | dealing with that. 219 | 220 | ## Labels 221 | 222 | Metric labels are supported for the Prometheus backend, as a way to add extra 223 | dimensions corresponding to each combination of metric name and label values. 224 | This can quickly get out of hand, as you can guess, so don't go overboard with 225 | this feature. (See also the [relevant warnings in Prometheus' docs](https://prometheus.io/docs/practices/instrumentation/#do-not-overuse-labels).) 226 | 227 | You declare label names when defining the collector and label values each time 228 | you update it: 229 | 230 | ```nim 231 | declareCounter lCounter, "example counter with labels", ["foo", "bar"] 232 | lCounter.inc(labelValues = ["1", "a"]) # the label values must be strings 233 | lCounter.inc(labelValues = ["2", "b"]) 234 | # How many metrics are now in this collector? Two, because we used two sets of label values: 235 | echo lCounter 236 | ``` 237 | 238 | ```text 239 | # HELP lCounter example counter with labels 240 | # TYPE lCounter counter 241 | lCounter_total{foo="1",bar="a"} 1.0 1569340503703 242 | lCounter_created{foo="1",bar="a"} 1569340503.0 243 | lCounter_total{foo="2",bar="b"} 1.0 1569340503703 244 | lCounter_created{foo="2",bar="b"} 1569340503.0 245 | ``` 246 | 247 | (OK, there are four metrics in total, because each one gets a `*_created` buddy.) 248 | 249 | So if you must use labels, make sure there's a finite and small number of 250 | possible label values being set. 251 | 252 | ## Metric name and label name validation 253 | 254 | We use Prometheus standards for that, so metric names must comply with the 255 | `^[a-zA-Z_:][a-zA-Z0-9_:]*$` regex while label names have to comply with 256 | `^[a-zA-Z_][a-zA-Z0-9_]*$`. 257 | 258 | In the examples you've seen so far, all collectors declared with 259 | `declare` had more stringent naming rules, since their names were 260 | also identifiers for Nim variables - which can't have colons in them. 261 | 262 | To overcome this, without relying on the discouraged alternative API, use the `name` parameter: 263 | 264 | ```nim 265 | declareCounter cCounter, "counter with colons in name", name = "foo:bar:baz" 266 | cCounter.inc() 267 | echo cCounter 268 | ``` 269 | 270 | ```text 271 | # HELP foo:bar:baz counter with colons in name 272 | # TYPE foo:bar:baz counter 273 | foo:bar:baz_total 1.0 1569341756504 274 | foo:bar:baz_created 1569341756.0 275 | ``` 276 | 277 | ## Logging 278 | 279 | Metrics are not logs, but you might want to log them nonetheless. The `$` 280 | procedure is defined for collectors and registries, so you can just use the 281 | built-in string serialisation to print them: 282 | 283 | ```nim 284 | echo myCounter, myGauge 285 | echo defaultRegistry 286 | ``` 287 | 288 | Integration with [Chronicles](https://github.com/status-im/nim-chronicles) is available in a separate module: 289 | 290 | ```nim 291 | import chronicles, metrics, metrics/chronicles_support 292 | 293 | # ... 294 | 295 | info "myCounter", myCounter 296 | debug "default registry", defaultRegistry 297 | ``` 298 | 299 | ## Testing 300 | 301 | When testing, you might want to isolate some collectors by registering them 302 | into a custom registry: 303 | 304 | ```nim 305 | var myRegistry = newRegistry() 306 | declareCounter myCounter, "help", registry = myRegistry 307 | echo myRegistry 308 | 309 | # this means that `myCounter` is no longer registered in `defaultRegistry` 310 | echo defaultRegistry 311 | ``` 312 | 313 | These unoptimised (read "very inefficient") `value()` and `valueByName()` 314 | procedures for accessing metric values should only be used inside test suites: 315 | 316 | ```nim 317 | suite "counter": 318 | test "basic": 319 | declareCounter myCounter, "help" 320 | check myCounter.value == 0 321 | myCounter.inc() 322 | check myCounter.value == 1 323 | 324 | declareSummary cSummary, "summary with colons in name", name = "foo:bar:baz" 325 | cSummary.observe(10) 326 | check cSummary.valueByName("foo:bar:baz_count") == 1 327 | check cSummary.valueByName("foo:bar:baz_sum") == 10 328 | ``` 329 | 330 | ## Prometheus endpoint 331 | 332 | First, you need to import the http server module 333 | 334 | ### Chronos 335 | 336 | Using [Chronos](https://github.com/status-im/nim-chronos/): 337 | 338 | ```nim 339 | import metrics, metrics/chronos_httpserver 340 | ``` 341 | 342 | ### Starting the HTTP server 343 | 344 | Start an HTTP server listening on `127.0.0.1:8000` from which the Prometheus 345 | daemon can pull the metrics from all collectors in `defaultRegistry` (plus the 346 | default metrics): 347 | 348 | ```nim 349 | startMetricsHttpServer() 350 | ``` 351 | 352 | Or set your own address and port to listen to: 353 | 354 | ```nim 355 | import net 356 | 357 | startMetricsHttpServer("127.0.0.1", Port(8000)) 358 | ``` 359 | 360 | The HTTP server will run in its own thread. It will expose two endpoints: 361 | 362 | * http://127.0.0.1:8000/metrics - Returns the metrics consumed by Prometheus. 363 | * http://127.0.0.1:8000/health - Healthcheck that returns `OK` string and 200 code. 364 | 365 | ### System metrics 366 | 367 | Default metrics available (see also [the relevant Prometheus docs](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors)): 368 | 369 | ```text 370 | process_cpu_seconds_total 371 | process_open_fds 372 | process_max_fds 373 | process_virtual_memory_bytes 374 | process_resident_memory_bytes 375 | process_start_time_seconds 376 | ``` 377 | 378 | The `process_*` metrics are only available on Linux, for now. 379 | 380 | ### Nim runtime metrics 381 | 382 | The following metrics are automatically exposed for the Nim runtime: 383 | 384 | ```text 385 | nim_gc_mem_bytes[thread_id] 386 | nim_gc_mem_occupied_bytes[thread_id] 387 | nim_gc_heap_instance_occupied_bytes[type_name] 388 | nim_gc_heap_instance_occupied_summed_bytes 389 | ``` 390 | 391 | `nim_gc_heap_*` metrics are only available when compiling with 392 | `-d:nimTypeNames` and hold the top 10 instance types, in reverse order of 393 | their total heap usage (from all threads), at the time the metric is polled. 394 | Since this set changes with time, you'll see more than 10 types in Grafana. 395 | 396 | The Nim garbage collector exposes some per-thread metrics whose value is only 397 | accessible from within the thread itself. 398 | 399 | To update these, call `updateThreadMetrics` regularly from within each relevant 400 | thread - each such metric will include a `thread_id` label. 401 | 402 | Thread metrics for the main application thread are automatically updated 403 | whenever a metric is updated from the main application thread, though only 404 | at specified intervals. 405 | 406 | ```nim 407 | import times 408 | when defined(metrics): 409 | # get the default minimal update interval 410 | echo getSystemMetricsUpdateInterval() 411 | # you can change it 412 | setSystemMetricsUpdateInterval(initDuration(seconds = 2)) 413 | ``` 414 | 415 | In performance-sensitive applications, it is recommended that you disable the 416 | piggy-backing and update system metrics manually: 417 | 418 | ```nim 419 | # disable automatic updates 420 | setSystemMetricsAutomaticUpdate(false) 421 | # somewhere in your event loop, at an interval of your choice 422 | updateThreadMetrics() 423 | ``` 424 | 425 | Screenshot of [Grafana showing data from Prometheus that pulls it from Nimbus which uses nim-metrics](https://github.com/status-im/nimbus-eth1/#metric-visualisation): 426 | 427 | ![Grafana screenshot](https://i.imgur.com/AdtavDA.png) 428 | 429 | ## Historical notes 430 | 431 | Versions up to `v0.1.2` also supported push metric servers such as 432 | [StatsD](https://github.com/statsd/statsd/wiki) and 433 | [Carbon](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) - this 434 | support has since been removed. 435 | 436 | ## Contributing 437 | 438 | When submitting pull requests, please add test cases for any new features or 439 | fixes and make sure `nimble test` is still able to execute the entire test 440 | suite successfully. 441 | 442 | Code formatting is done using [nph](https://github.com/arnetheduck/nph). See 443 | [CI](./.github/workflows/ci.yml) for the correct version. 444 | 445 | ## License 446 | 447 | Licensed and distributed under either of 448 | 449 | * MIT license: [LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT 450 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 451 | 452 | at your option. These files may not be copied, modified, or distributed except according to those terms. 453 | -------------------------------------------------------------------------------- /benchmarks/bench_collectors.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import ./bench_common, ../metrics 8 | 9 | proc main(nb_samples: Natural) = 10 | warmup() 11 | 12 | var res: float64 13 | 14 | bench("create a counter and increment it 3 times with different values", res): 15 | declareCounter counter1, "help" 16 | counter1.inc() 17 | counter1.inc(2) 18 | counter1.inc(2.1) 19 | res = counter1.value 20 | counter1.unregister() 21 | 22 | let labelValues = @["a", "b"] 23 | bench( 24 | "create a counter with 2 labels and increment it 3 times with different values", res 25 | ): 26 | declareCounter counter2, "help", @["foo", "bar"] 27 | counter2.inc(labelValues = labelValues) 28 | counter2.inc(2, labelValues) 29 | counter2.inc(2.1, labelValues) 30 | res = counter2.value(labelValues) 31 | counter2.unregister() 32 | 33 | when isMainModule: 34 | main(10000) 35 | -------------------------------------------------------------------------------- /benchmarks/bench_common.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import times, stats, strformat 8 | export times, stats, strformat 9 | 10 | proc warmup*() = 11 | # Warmup - make sure cpu is on max perf 12 | let start = cpuTime() 13 | var foo = 123'i64 14 | for i in 0'i64 ..< 300_000_000'i64: 15 | foo += i * i mod 456 16 | foo = foo mod 789 17 | 18 | # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects 19 | let stop = cpuTime() 20 | echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)" 21 | 22 | template printStats*(experiment_name: string, compute_result: typed) {.dirty.} = 23 | echo "#################################################################" 24 | echo "\n" & experiment_name 25 | echo &"Collected {stats.n} samples in {global_stop - global_start:>4.3f} seconds" 26 | echo &"Average time: {stats.mean * 1000 :>4.3f} ms" 27 | echo &"Stddev time: {stats.standardDeviationS * 1000 :>4.3f} ms" 28 | echo &"Min time: {stats.min * 1000 :>4.3f} ms" 29 | echo &"Max time: {stats.max * 1000 :>4.3f} ms" 30 | echo "\nDisplay computation result to make sure it's not optimized away" 31 | echo compute_result # Prevents compiler from optimizing stuff away 32 | echo '\n' 33 | 34 | template bench*(name: string, compute_result: typed, body: untyped) {.dirty.} = 35 | block: # Actual bench 36 | var stats: RunningStat 37 | let global_start = cpuTime() 38 | for _ in 0 ..< nb_samples: 39 | let start = cpuTime() 40 | block: 41 | body 42 | let stop = cpuTime() 43 | stats.push stop - start 44 | let global_stop = cpuTime() 45 | printStats(name, compute_result) 46 | -------------------------------------------------------------------------------- /config.nims: -------------------------------------------------------------------------------- 1 | # begin Nimble config (version 1) 2 | when fileExists("nimble.paths"): 3 | include "nimble.paths" 4 | # end Nimble config 5 | -------------------------------------------------------------------------------- /metrics.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2023 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | # The API is roughly based on the Prometheus client library recommendations: 8 | # https://prometheus.io/docs/instrumenting/writing_clientlibs/ 9 | # 10 | # The Prometheus text exposition format is also tightly coupled: 11 | # https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format 12 | 13 | {.push raises: [].} 14 | 15 | when defined(metricsTest): 16 | {.pragma: testOnly.} 17 | else: 18 | {.pragma: testOnly, deprecated: "slow helpers used for tests only".} 19 | 20 | import std/[locks, monotimes, os, sets, times], metrics/shseq 21 | 22 | export shseq 23 | 24 | when defined(metrics): 25 | import std/[algorithm, hashes, strutils, sequtils], stew/ptrops, metrics/common 26 | 27 | type 28 | CStringArr = object # Fixed-size array of cstrings - ownership is managed manually 29 | items: ptr UncheckedArray[cstring] 30 | len: int 31 | 32 | StringArrView = object 33 | items: ptr UncheckedArray[string] 34 | len: int 35 | 36 | LabelKey = object # Helper type for heterogeneous lookups in the keys table 37 | data: CStringArr 38 | refs: StringArrView 39 | 40 | Metric* = object 41 | # Metric needs to be trivial because it's stored in a cross-thread seq and 42 | # therefore cannot use GC types 43 | name*: cstring 44 | value*: float64 45 | labels*: CStringArr 46 | labelValues*: CStringArr 47 | timestamp*: Time 48 | 49 | MetricHandler* = proc( 50 | name: string, 51 | value: float64, 52 | labels: openArray[string] = [], 53 | labelValues: openArray[string] = [], 54 | timestamp: Time, 55 | ) {.gcsafe, raises: [].} 56 | 57 | CollecorHandler* = proc(collector: Collector) 58 | 59 | Collector* = ref object of RootObj 60 | lock*: Lock 61 | name*: string 62 | help*: string 63 | typ*: string 64 | labels*: seq[string] 65 | timestamp*: bool ## Whether or not we're collecting timestamps for this collector 66 | 67 | SimpleCollector* = ref object of Collector 68 | metricKeys*: ShSeq[LabelKey] 69 | metrics*: ShSeq[ShSeq[Metric]] 70 | 71 | IgnoredCollector* = object 72 | 73 | Counter* = ref object of SimpleCollector 74 | Gauge* = ref object of SimpleCollector 75 | Summary* = ref object of SimpleCollector 76 | Histogram* = ref object of SimpleCollector # a cumulative histogram, not a regular one 77 | buckets*: seq[float64] 78 | 79 | Registry* = ref object of RootObj 80 | lock*: Lock 81 | collectors*: OrderedSet[Collector] 82 | creationThreadId*: int 83 | 84 | RegistrationError* = object of CatchableError 85 | 86 | ######### 87 | # utils # 88 | ######### 89 | 90 | when defined(metrics): 91 | # TODO the shared memory allocated below is never freed - this is fine as long 92 | # as registries / metrics never go away (ie they're globals whose lifetime 93 | # matches that of the application) but to do things properly, this shared 94 | # memory should be released at some point 95 | from system/ansi_c import c_strcmp 96 | proc createShared(_: type cstring, v: string): cstring = 97 | # Create a shared-memory copy of the given string that later must be manually 98 | # deallocated 99 | var p = cast[cstring](createSharedU(char, v.len + 1)) 100 | if v.len > 0: 101 | copyMem(p, baseAddr v, v.len) 102 | p[v.len] = '\0' 103 | p 104 | 105 | proc createShared(_: type CStringArr, v: openArray[string]): CStringArr = 106 | if v.len > 0: 107 | var p = cast[ptr UncheckedArray[cstring]](createSharedU(cstring, v.len)) 108 | for i in 0 ..< v.len: 109 | p[i] = cstring.createShared(v[i]) 110 | 111 | CStringArr(items: p, len: v.len) 112 | else: 113 | CStringArr() 114 | 115 | proc `[]`(s: CStringArr, i: int): cstring = 116 | s.items[i] 117 | 118 | proc toStringSeq(v: CStringArr): seq[string] = 119 | for i in 0 ..< v.len: 120 | result.add $v[i] 121 | 122 | proc len(a: LabelKey): int = 123 | if a.data.len > 0: a.data.len else: a.refs.len 124 | 125 | template `[]`(a: LabelKey, i: int): cstring = 126 | if a.data.len > 0: 127 | a.data[i] 128 | else: 129 | cstring(a.refs.items[i]) 130 | 131 | proc `==`(a, b: LabelKey): bool = 132 | if a.len == b.len: 133 | for i in 0 ..< a.len: 134 | if c_strcmp(a[i], b[i]) != 0: 135 | return false 136 | true 137 | else: 138 | false 139 | 140 | proc cmp(a, b: LabelKey): int = 141 | # TODO https://github.com/nim-lang/Nim/issues/24941 142 | for i in 0 ..< min(a.len, b.len): 143 | let c = c_strcmp(a[i], b[i]) 144 | if c != 0: 145 | return c 146 | 147 | cmp(a.len, b.len) 148 | 149 | proc init(T: type LabelKey, values: openArray[string]): T = 150 | # TODO Avoid leaking this shared array, in case we were to clean up the 151 | # registry 152 | LabelKey(data: CStringArr.createShared(values)) 153 | 154 | proc view(T: type LabelKey, values: openArray[string]): T = 155 | # TODO some day, we might get view types - until then.. 156 | LabelKey( 157 | refs: 158 | StringArrView(items: baseAddr(values).makeUncheckedArray(), len: values.len()) 159 | ) 160 | 161 | proc toMilliseconds*(time: times.Time): int64 = 162 | convert(Seconds, Milliseconds, time.toUnix()) + 163 | convert(Nanoseconds, Milliseconds, time.nanosecond()) 164 | 165 | template nameOrIdentifier*(identifier: untyped, name: string): string = 166 | if name.len == 0: 167 | astToStr(identifier) 168 | else: 169 | name 170 | 171 | proc processHelp(name, help: string): string = 172 | "# HELP " & name & " " & help.multiReplace([("\\", "\\\\"), ("\n", "\\n")]) & "\n" 173 | 174 | proc processType(name, typ: string): string = 175 | "# TYPE " & name & " " & typ & "\n" 176 | 177 | proc addText*( 178 | res: var string, 179 | name: auto, 180 | value: float64, 181 | labels, labelValues: auto, 182 | timestamp: Time, 183 | ) = 184 | # A bit convoluted to mostly avoid pointless memory allocations - there's no 185 | # (trivial) way however to append a float to an existing string 186 | res.add name 187 | if labels.len > 0: 188 | res.add('{') 189 | for i in 0 ..< labels.len: 190 | if i > 0: 191 | res.add "," 192 | res.add labels[i] 193 | res.add "=\"" 194 | if labelValues.len > i: 195 | for c in labelValues[i]: 196 | case c 197 | of '\\': 198 | res.add "\\\\" 199 | of '\n': 200 | res.add "\\\n" 201 | of '"': 202 | res.add "\\\"" 203 | else: 204 | res.add c 205 | res.add "\"" 206 | res.add('}') 207 | res.add(" ") 208 | res.add($value) 209 | if toMilliseconds(timestamp) > 0: 210 | res.add(" " & $toMilliseconds(timestamp)) 211 | 212 | proc addText(res: var string, metric: Metric) = 213 | addText( 214 | res, metric.name, metric.value, metric.labels, metric.labelValues, 215 | metric.timestamp, 216 | ) 217 | 218 | proc `$`*(metric: Metric): string = 219 | addText(result, metric) 220 | 221 | const 222 | nameRegexStr = r"^[a-zA-Z_:][a-zA-Z0-9_:]*$" 223 | labelRegexStr = r"^[a-zA-Z_][a-zA-Z0-9_]*$" 224 | 225 | labelStartChars = {'a' .. 'z', 'A' .. 'Z', '_'} 226 | labelChars = labelStartChars + {'0' .. '9'} 227 | nameStartChars = labelStartChars + {':'} 228 | nameChars = labelChars + {':'} 229 | 230 | template validate(ident: string, startChars, chars: typed): bool = 231 | ident.len > 0 and ident[0] in startChars and ident.allIt(it in chars) 232 | 233 | proc validateName(name: string) {.raises: [ValueError].} = 234 | if not validate(name, nameStartChars, nameChars): 235 | raise newException( 236 | ValueError, 237 | "Invalid name: '" & name & "'. It should match the regex: " & nameRegexStr, 238 | ) 239 | 240 | proc validateLabels( 241 | labels: openArray[string], invalidLabelNames: openArray[string] = [] 242 | ) {.raises: [ValueError].} = 243 | for label in labels: 244 | if not validate(label, labelStartChars, labelChars): 245 | raise newException( 246 | ValueError, 247 | "Invalid label: '" & label & "'. It should match the regex: '" & labelRegexStr & 248 | "'.", 249 | ) 250 | if label.startsWith("__"): 251 | raise newException( 252 | ValueError, "Invalid label: '" & label & "'. It should not start with '__'." 253 | ) 254 | if label in invalidLabelNames: 255 | raise newException( 256 | ValueError, 257 | "Invalid label: '" & label & "'. It should not be one of: " & 258 | $invalidLabelNames & ".", 259 | ) 260 | 261 | ###################### 262 | # generic collectors # 263 | ###################### 264 | 265 | when defined(metrics): 266 | template withLabelValues( 267 | collector: SimpleCollector, 268 | labelValues: openArray[string], 269 | metricSym, keySym, body, construct: untyped, 270 | ) = 271 | if labelValues.len > 0 and labelValues.len != collector.labels.len: 272 | printError( 273 | "The number of label values doesn't match the number of labels: " & 274 | collector.name 275 | ) 276 | else: 277 | withLock(collector.lock): 278 | let pos = 279 | collector.metricKeys.data().lowerBound(LabelKey.view(labelValues), cmp) 280 | if pos == collector.metricKeys.len or 281 | collector.metricKeys[pos] != LabelKey.view(labelValues): 282 | let keySym = LabelKey.init(labelValues) 283 | collector.metricKeys.insert(keySym, pos) 284 | collector.metrics.insert(construct, pos) 285 | 286 | template metricSym(): untyped = 287 | collector.metrics[pos] 288 | 289 | body 290 | 291 | method hash*(collector: Collector): Hash {.base.} = 292 | result = result !& collector.name.hash 293 | for label in collector.labels: 294 | result = result !& label.hash 295 | result = !$result 296 | 297 | # `hash` and equals must match 298 | method `==`*(x, y: Collector): bool {.base.} = 299 | x.name == y.name and x.labels == y.labels 300 | 301 | proc now*(collector: Collector): Time = 302 | if collector.timestamp: 303 | getTime() 304 | else: 305 | Time() 306 | 307 | proc call(output: MetricHandler, metric: Metric) = 308 | output( 309 | $metric.name, 310 | metric.value, 311 | toStringSeq(metric.labels), 312 | toStringSeq(metric.labelValues), 313 | metric.timestamp, 314 | ) 315 | 316 | method collect*(collector: Collector, output: MetricHandler) {.base.} = 317 | discard 318 | 319 | method collect*(collector: SimpleCollector, output: MetricHandler) = 320 | {.warning[LockLevel]: off.} 321 | withLock(collector.lock): 322 | for family in collector.metrics: 323 | for metric in family: 324 | call(output, metric) 325 | 326 | proc collect*(registry: Registry, output: MetricHandler) = 327 | withLock registry.lock: 328 | for collector in registry.collectors: 329 | collector.collect(output) 330 | 331 | proc addText(res: var string, collector: Collector) = 332 | res.add collector.help 333 | res.add collector.typ 334 | 335 | let resPtr = addr res 336 | 337 | proc addMetric( 338 | name: string, 339 | value: float64, 340 | labels, labelValues: openArray[string], 341 | timestamp: Time, 342 | ) = 343 | addText(resPtr[], name, value, labels, labelValues, timestamp) 344 | resPtr[].add "\n" 345 | 346 | collect(collector, addMetric) 347 | 348 | proc `$`*(collector: Collector): string = 349 | addText(result, collector) 350 | 351 | proc `$`*(collector: type IgnoredCollector): string = 352 | "" 353 | 354 | when defined(metrics): 355 | template localGlobal(init: untyped): untyped = 356 | when (NimMajor, NimMinor) == (2, 0) and (defined(gcOrc) or defined(gcArc)): 357 | {.error: "Globals are too broken in Nim 2.0/ORC/ARC".} 358 | 359 | # https://github.com/status-im/nim-metrics/pull/5#discussion_r304687474 360 | # https://github.com/nim-lang/Nim/issues/24940 361 | var res {.global.}: typeof(init) 362 | if isNil(res): 363 | res = init 364 | res 365 | 366 | proc valueImpl*( 367 | collector: Collector, labelValues: openArray[string] = [] 368 | ): float64 {.gcsafe, raises: [KeyError].} = 369 | var res = NaN 370 | # Don't access the "metrics" field directly, so we can support custom 371 | # collectors. 372 | {.gcsafe.}: 373 | proc findMetric( 374 | name: string, 375 | value: float64, 376 | labels, labelValues: openArray[string], 377 | timestamp: Time, 378 | ) = 379 | if res != res and labelValues == labelValues: 380 | res = value 381 | 382 | collect(collector, findMetric) 383 | if res != res: # NaN 384 | raise newException( 385 | KeyError, 386 | "No such metric for this collector (label values = " & $(@labelValues) & ").", 387 | ) 388 | res 389 | 390 | template value*( 391 | collector: Collector | type IgnoredCollector, 392 | labelValuesParam: openArray[string] = [], 393 | ): float64 {.testOnly.} = 394 | when defined(metrics) and collector is not IgnoredCollector: 395 | {.gcsafe.}: 396 | valueImpl(collector, labelValuesParam) 397 | else: 398 | 0.0'f64 399 | 400 | proc valueByNameInternal*( 401 | collector: Collector | type IgnoredCollector, 402 | metricName: string, 403 | labelValues: openArray[string] = [], 404 | extraLabelValues: openArray[string] = [], 405 | ): float64 {.raises: [ValueError].} = 406 | when defined(metrics) and collector is not IgnoredCollector: 407 | var res = NaN 408 | let allLabelValues = @labelValues & @extraLabelValues 409 | proc findMetric( 410 | name: string, 411 | value: float64, 412 | labels, labelValues: openArray[string], 413 | timestamp: Time, 414 | ) = 415 | if res != res and name == metricName and labelValues == allLabelValues: 416 | res = value 417 | 418 | collect(collector, findMetric) 419 | if res == res: 420 | return res 421 | 422 | raise newException( 423 | KeyError, 424 | "No such metric name for this collector: '" & metricName & "' (label values = " & 425 | $allLabelValues & ").", 426 | ) 427 | 428 | template valueByName*( 429 | collector: Collector | type IgnoredCollector, 430 | metricName: string, 431 | labelValues: openArray[string] = [], 432 | extraLabelValues: openArray[string] = [], 433 | ): float64 {.testOnly.} = 434 | {.gcsafe.}: 435 | valueByNameInternal(collector, metricName, labelValues, extraLabelValues) 436 | 437 | ############ 438 | # registry # 439 | ############ 440 | 441 | proc newRegistry*(): Registry = 442 | when defined(metrics): 443 | new(result) 444 | result.lock.initLock() 445 | result.creationThreadId = getThreadId() 446 | 447 | # needs to be {.global.} because of the alternative API's usage of {.global.} collector vars 448 | let defaultRegistry* {.global.} = newRegistry() 449 | 450 | # We use a generic type here in order to avoid the hidden type casting of 451 | # Collector child types to the parent type. 452 | proc register*[T]( 453 | collector: T, registry = defaultRegistry 454 | ) {.raises: [RegistrationError].} = 455 | when defined(metrics): 456 | # TODO To relax this, collectors can no longer be `ref object` 457 | if registry.creationThreadId != getThreadId(): 458 | printError( 459 | "New collectors / metrics must be added from same thread as the registry was created from: " & 460 | collector.name 461 | ) 462 | 463 | withLock registry.lock: 464 | if collector in registry.collectors: 465 | raise newException( 466 | RegistrationError, "Collector already registered: " & collector.name 467 | ) 468 | 469 | registry.collectors.incl(collector) 470 | 471 | proc unregister*[T]( 472 | collector: T, registry = defaultRegistry 473 | ) {.raises: [RegistrationError].} = 474 | when defined(metrics) and collector is not IgnoredCollector: 475 | withLock registry.lock: 476 | if collector notin registry.collectors: 477 | raise newException(RegistrationError, "Collector not registered.") 478 | 479 | registry.collectors.excl(collector) 480 | 481 | proc unregister*(collector: type IgnoredCollector, registry = defaultRegistry) = 482 | discard 483 | 484 | proc len(registry: Registry): int = 485 | when defined(metrics): 486 | withLock registry.lock: 487 | return registry.collectors.len() 488 | else: 489 | 0 490 | 491 | proc addText(res: var string, registry: Registry) = 492 | when defined(metrics): 493 | withLock registry.lock: 494 | for collector in registry.collectors: 495 | res.addText(collector) 496 | res.add("\n") 497 | 498 | proc toText*(registry: Registry): string = 499 | result = newStringOfCap(registry.len() * 64) 500 | result.addText(registry) 501 | 502 | proc `$`*(registry: Registry): string = 503 | addText(result, registry) 504 | 505 | ##################### 506 | # custom collectors # 507 | ##################### 508 | 509 | when defined(metrics): 510 | # Used for custom collectors, to shield the API user from having to deal with 511 | # internal details like lock initialisation. 512 | # Also used internally, for creating standard collectors, to avoid code 513 | # duplication. 514 | proc newCollector*[T]( 515 | typ: typedesc[T], 516 | name: string, 517 | help: string, 518 | labels: openArray[string] = [], 519 | registry = defaultRegistry, 520 | standardType = "gauge", 521 | timestamp = false, 522 | ): T {.raises: [ValueError, RegistrationError].} = 523 | validateName(name) 524 | validateLabels(labels) 525 | result = T( 526 | name: name, 527 | help: processHelp(name, help), 528 | typ: processType(name, standardType), 529 | # Prometheus does not support a non-standard value here 530 | labels: @labels, 531 | timestamp: timestamp, 532 | ) 533 | result.lock.initLock() 534 | result.register(registry) 535 | 536 | when defined(metrics): 537 | proc updateSystemMetrics*() {.gcsafe.} # defined later in this file 538 | var systemMetricsAutomaticUpdate = true 539 | # whether to piggy-back on changes of user-defined metrics 540 | 541 | proc getSystemMetricsAutomaticUpdate*(): bool = 542 | systemMetricsAutomaticUpdate 543 | 544 | proc setSystemMetricsAutomaticUpdate*(value: bool) = 545 | systemMetricsAutomaticUpdate = value 546 | 547 | ########### 548 | # counter # 549 | ########### 550 | 551 | when defined(metrics): 552 | proc newCounterMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] = 553 | ShSeq.init( 554 | [ 555 | Metric( 556 | name: cstring.createShared(name & "_total"), 557 | labels: labels, 558 | labelValues: labelValues, 559 | ), 560 | Metric( 561 | name: cstring.createShared(name & "_created"), 562 | labels: labels, 563 | labelValues: labelValues, 564 | value: getTime().toUnix().float64, 565 | ), 566 | ] 567 | ) 568 | 569 | # don't document this one, even if we're forced to make it public, because it 570 | # won't work when all (or some) collectors are disabled 571 | proc newCounter*( 572 | name: string, 573 | help: string, 574 | labels: openArray[string] = [], 575 | registry = defaultRegistry, 576 | timestamp = false, 577 | ): Counter {.raises: [ValueError, RegistrationError].} = 578 | result = Counter.newCollector(name, help, labels, registry, "counter", timestamp) 579 | if labels.len == 0: 580 | result.metrics.add newCounterMetrics(name, CStringArr(), CStringArr()) 581 | result.metricKeys.add LabelKey.init(labels) 582 | 583 | proc incCounter(counter: Counter, amount: float64, labelValues: openArray[string]) = 584 | if amount < 0: 585 | printError( 586 | "Counter.inc() cannot be used with negative amounts: " & $counter.name & "=" & 587 | $amount 588 | ) 589 | return 590 | 591 | let timestamp = counter.now() 592 | withLabelValues(counter, labelValues, valueSym, keySym): 593 | valueSym[0].value += amount 594 | valueSym[0].timestamp = timestamp 595 | do: 596 | newCounterMetrics( 597 | counter.name, CStringArr.createShared(counter.labels), keySym.data 598 | ) 599 | 600 | updateSystemMetrics() 601 | 602 | template declareCounter*( 603 | identifier: untyped, 604 | help: static string, 605 | labels: openArray[string] = [], 606 | registry = defaultRegistry, 607 | name = "", 608 | timestamp = false, 609 | ) {.dirty.} = 610 | # fine-grained collector disabling will go in here, turning disabled 611 | # collectors into type aliases for IgnoredCollector 612 | when defined(metrics): 613 | let identifier = 614 | newCounter(nameOrIdentifier(identifier, name), help, labels, registry, timestamp) 615 | else: 616 | type identifier = IgnoredCollector 617 | 618 | template declarePublicCounter*( 619 | identifier: untyped, 620 | help: static string, 621 | labels: openArray[string] = [], 622 | registry = defaultRegistry, 623 | name = "", 624 | timestamp = false, 625 | ) {.dirty.} = 626 | when defined(metrics): 627 | let identifier* = 628 | newCounter(nameOrIdentifier(identifier, name), help, labels, registry, timestamp) 629 | else: 630 | type identifier* = IgnoredCollector 631 | 632 | #- alternative API (without support for custom help strings, labels or custom registries) 633 | #- different collector types with the same names are allowed 634 | when defined(metrics): 635 | proc counter*( 636 | name: static string 637 | ): Counter {.raises: [ValueError, RegistrationError].} = 638 | # This {.global.} var assignment is lifted from the procedure and placed in a 639 | # special module init section that's guaranteed to run only once per program. 640 | # Calls to this proc will just return the globally initialised variable. 641 | localGlobal(newCounter(name, "")) 642 | 643 | else: 644 | template counter*(name: static string): untyped = 645 | IgnoredCollector 646 | 647 | template inc*( 648 | counter: Counter | type IgnoredCollector, 649 | amount: int64 | float64 = 1, 650 | labelValues: openArray[string] = [], 651 | ) = 652 | when defined(metrics) and counter is not IgnoredCollector: 653 | {.gcsafe.}: 654 | incCounter(counter, amount.float64, labelValues) 655 | 656 | template countExceptions*( 657 | counter: Counter | type IgnoredCollector, 658 | typ: typedesc, 659 | labelValues: openArray[string], 660 | body: untyped, 661 | ) = 662 | when defined(metrics) and counter is not IgnoredCollector: 663 | try: 664 | body 665 | except typ as exc: 666 | counter.inc(1, labelValues) 667 | raise exc 668 | else: 669 | body 670 | 671 | template countExceptions*( 672 | counter: Counter | type IgnoredCollector, typ: typedesc, body: untyped 673 | ) = 674 | when defined(metrics) and counter is not IgnoredCollector: 675 | counter.countExceptions(typ, []): 676 | body 677 | else: 678 | body 679 | 680 | template countExceptions*( 681 | counter: Counter | type IgnoredCollector, 682 | labelValues: openArray[string], 683 | body: untyped, 684 | ) = 685 | countExceptions(counter, Exception, labelValues, body) 686 | 687 | template countExceptions*(counter: Counter | type IgnoredCollector, body: untyped) = 688 | when defined(metrics) and counter is not IgnoredCollector: 689 | counter.countExceptions([]): 690 | body 691 | else: 692 | body 693 | 694 | ######### 695 | # gauge # 696 | ######### 697 | 698 | when defined(metrics): 699 | proc newGaugeMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] = 700 | ShSeq.init([Metric(name: name, labels: labels, labelValues: labelValues)]) 701 | 702 | proc newGauge*( 703 | name: string, 704 | help: string, 705 | labels: openArray[string] = [], 706 | registry = defaultRegistry, 707 | timestamp = false, 708 | ): Gauge {.raises: [ValueError, RegistrationError].} = 709 | result = Gauge.newCollector(name, help, labels, registry, "gauge", timestamp) 710 | if labels.len == 0: 711 | result.metrics.add newGaugeMetrics(name, CStringArr(), CStringArr()) 712 | result.metricKeys.add LabelKey.init(labels) 713 | 714 | proc incGauge(gauge: Gauge, amount: float64, labelValues: openArray[string]) = 715 | let timestamp = gauge.now() 716 | 717 | withLabelValues(gauge, labelValues, valueSym, keySym): 718 | valueSym[0].value += amount 719 | valueSym[0].timestamp = timestamp 720 | do: 721 | newGaugeMetrics(gauge.name, CStringArr.createShared(gauge.labels), keySym.data) 722 | 723 | updateSystemMetrics() 724 | 725 | proc setGauge( 726 | gauge: Gauge, 727 | value: float64, 728 | labelValues: openArray[string], 729 | doUpdateSystemMetrics: bool, 730 | ) = 731 | let timestamp = gauge.now() 732 | 733 | withLabelValues(gauge, labelValues, valueSym, keySym): 734 | valueSym[0].value = value.float64 735 | valueSym[0].timestamp = timestamp 736 | do: 737 | newGaugeMetrics(gauge.name, CStringArr.createShared(gauge.labels), keySym.data) 738 | 739 | if doUpdateSystemMetrics: 740 | updateSystemMetrics() 741 | 742 | template declareGauge*( 743 | identifier: untyped, 744 | help: static string, 745 | labels: openArray[string] = [], 746 | registry = defaultRegistry, 747 | name = "", 748 | timestamp = false, 749 | ) {.dirty.} = 750 | when defined(metrics): 751 | var identifier = 752 | newGauge(nameOrIdentifier(identifier, name), help, labels, registry, timestamp) 753 | else: 754 | type identifier = IgnoredCollector 755 | 756 | # alternative API 757 | when defined(metrics): 758 | proc gauge*(name: static string): Gauge {.raises: [ValueError, RegistrationError].} = 759 | localGlobal(newGauge(name, "")) 760 | 761 | else: 762 | template gauge*(name: static string): untyped = 763 | IgnoredCollector 764 | 765 | template declarePublicGauge*( 766 | identifier: untyped, 767 | help: static string, 768 | labels: openArray[string] = [], 769 | registry = defaultRegistry, 770 | name = "", 771 | timestamp = false, 772 | ) {.dirty.} = 773 | when defined(metrics): 774 | var identifier* = 775 | newGauge(nameOrIdentifier(identifier, name), help, labels, registry, timestamp) 776 | else: 777 | type identifier* = IgnoredCollector 778 | 779 | # the "type IgnoredCollector" case is covered by Counter.inc() 780 | template inc*( 781 | gauge: Gauge, amount: int64 | float64 = 1, labelValues: openArray[string] = [] 782 | ) = 783 | when defined(metrics): 784 | {.gcsafe.}: 785 | incGauge(gauge, amount.float64, labelValues) 786 | 787 | template dec*( 788 | gauge: Gauge | type IgnoredCollector, 789 | amount: int64 | float64 = 1, 790 | labelValues: openArray[string] = [], 791 | ) = 792 | when defined(metrics) and gauge is not IgnoredCollector: 793 | inc(gauge, -amount, labelValues) 794 | 795 | template set*( 796 | gauge: Gauge | type IgnoredCollector, 797 | value: int64 | float64, 798 | labelValues: openArray[string] = [], 799 | doUpdateSystemMetrics = true, 800 | ) = 801 | when defined(metrics) and gauge is not IgnoredCollector: 802 | {.gcsafe.}: 803 | setGauge(gauge, value.float64, labelValues, doUpdateSystemMetrics) 804 | 805 | # in seconds 806 | proc setToCurrentTime*( 807 | gauge: Gauge | type IgnoredCollector, labelValues: openArray[string] = [] 808 | ) = 809 | when defined(metrics) and gauge is not IgnoredCollector: 810 | gauge.set(getTime().toUnix(), labelValues) 811 | 812 | template trackInProgress*( 813 | gauge: Gauge | type IgnoredCollector, labelValues: openArray[string], body: untyped 814 | ) = 815 | when defined(metrics) and gauge is not IgnoredCollector: 816 | gauge.inc(1, labelValues) 817 | body 818 | gauge.dec(1, labelValues) 819 | else: 820 | body 821 | 822 | template trackInProgress*(gauge: Gauge | type IgnoredCollector, body: untyped) = 823 | when defined(metrics) and gauge is not IgnoredCollector: 824 | gauge.trackInProgress([]): 825 | body 826 | else: 827 | body 828 | 829 | # in seconds 830 | template time*( 831 | gauge: Gauge | type IgnoredCollector, labelValues: openArray[string], body: untyped 832 | ) = 833 | when defined(metrics) and gauge is not IgnoredCollector: 834 | let start = times.toUnix(getTime()) 835 | body 836 | gauge.set(times.toUnix(getTime()) - start, labelValues) 837 | else: 838 | body 839 | 840 | template time*( 841 | collector: Gauge | Summary | Histogram | type IgnoredCollector, body: untyped 842 | ) = 843 | when defined(metrics) and collector is not IgnoredCollector: 844 | collector.time([]): 845 | body 846 | else: 847 | body 848 | 849 | ########### 850 | # summary # 851 | ########### 852 | 853 | when defined(metrics): 854 | proc newSummaryMetrics(name: string, labels, labelValues: CStringArr): ShSeq[Metric] = 855 | ShSeq.init( 856 | [ 857 | Metric( 858 | name: cstring.createShared(name & "_sum"), 859 | labels: labels, 860 | labelValues: labelValues, 861 | ), 862 | Metric( 863 | name: cstring.createShared(name & "_count"), 864 | labels: labels, 865 | labelValues: labelValues, 866 | ), 867 | Metric( 868 | name: cstring.createShared(name & "_created"), 869 | labels: labels, 870 | labelValues: labelValues, 871 | value: getTime().toUnix().float64, 872 | ), 873 | ] 874 | ) 875 | 876 | proc newSummary*( 877 | name: string, 878 | help: string, 879 | labels: openArray[string] = [], 880 | registry = defaultRegistry, 881 | timestamp = false, 882 | ): Summary {.raises: [ValueError, RegistrationError].} = 883 | validateLabels(labels, invalidLabelNames = ["quantile"]) 884 | result = Summary.newCollector(name, help, labels, registry, "summary", timestamp) 885 | if labels.len == 0: 886 | result.metrics.add newSummaryMetrics(name, CStringArr(), CStringArr()) 887 | result.metricKeys.add LabelKey.init(labels) 888 | 889 | proc observeSummary( 890 | summary: Summary, amount: float64, labelValues: openArray[string] 891 | ) = 892 | let timestamp = summary.now() 893 | 894 | withLabelValues(summary, labelValues, valueSym, keySym): 895 | valueSym[0].value += amount # _sum 896 | valueSym[0].timestamp = timestamp 897 | valueSym[1].value += 1.float64 # _count 898 | valueSym[1].timestamp = timestamp 899 | do: 900 | newSummaryMetrics( 901 | summary.name, CStringArr.createShared(summary.labels), keySym.data 902 | ) 903 | 904 | template declareSummary*( 905 | identifier: untyped, 906 | help: static string, 907 | labels: openArray[string] = [], 908 | registry = defaultRegistry, 909 | name = "", 910 | ) {.dirty.} = 911 | when defined(metrics): 912 | let identifier = 913 | newSummary(nameOrIdentifier(identifier, name), help, labels, registry) 914 | else: 915 | type identifier = IgnoredCollector 916 | 917 | template declarePublicSummary*( 918 | identifier: untyped, 919 | help: static string, 920 | labels: openArray[string] = [], 921 | registry = defaultRegistry, 922 | name = "", 923 | ) {.dirty.} = 924 | when defined(metrics): 925 | let identifier* = 926 | newSummary(nameOrIdentifier(identifier, name), help, labels, registry) 927 | else: 928 | type identifier* = IgnoredCollector 929 | 930 | when defined(metrics): 931 | proc summary*( 932 | name: static string 933 | ): Summary {.raises: [ValueError, RegistrationError].} = 934 | localGlobal(newSummary(name, "")) 935 | 936 | else: 937 | template summary*(name: static string): untyped = 938 | IgnoredCollector 939 | 940 | template observe*( 941 | summary: Summary | type IgnoredCollector, 942 | amount: int64 | float64 = 1, 943 | labelValues: openArray[string] = [], 944 | ) = 945 | when defined(metrics) and summary is not IgnoredCollector: 946 | {.gcsafe.}: 947 | observeSummary(summary, amount.float64, labelValues) 948 | 949 | # in seconds 950 | # the "type IgnoredCollector" case and the version without labels are covered by Gauge.time() 951 | template time*( 952 | collector: Summary | Histogram, labelValues: openArray[string], body: untyped 953 | ) = 954 | when defined(metrics): 955 | let start = times.toUnix(getTime()) 956 | body 957 | collector.observe(times.toUnix(getTime()) - start, labelValues) 958 | else: 959 | body 960 | 961 | ############# 962 | # histogram # 963 | ############# 964 | 965 | const defaultHistogramBuckets* = 966 | [0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, Inf] 967 | when defined(metrics): 968 | proc newHistogramMetrics( 969 | name: string, labels, labelValues: CStringArr, buckets: seq[float64] 970 | ): ShSeq[Metric] = 971 | result = ShSeq.init( 972 | [ 973 | Metric( 974 | name: cstring.createShared(name & "_sum"), 975 | labels: labels, 976 | labelValues: labelValues, 977 | ), 978 | Metric( 979 | name: cstring.createShared(name & "_count"), 980 | labels: labels, 981 | labelValues: labelValues, 982 | ), 983 | Metric( 984 | name: cstring.createShared(name & "_created"), 985 | labels: labels, 986 | labelValues: labelValues, 987 | value: getTime().toUnix().float64, 988 | ), 989 | ] 990 | ) 991 | let 992 | bucketLabels = CStringArr.createShared(labels.toStringSeq & "le") 993 | labelValues = labelValues.toStringSeq() 994 | for bucket in buckets: 995 | let bucketStr = 996 | if bucket == Inf: 997 | "+Inf" 998 | else: 999 | $bucket 1000 | result.add( 1001 | Metric( 1002 | name: cstring.createShared(name & "_bucket"), 1003 | labels: bucketLabels, 1004 | labelValues: CStringArr.createShared(@labelValues & bucketStr), 1005 | ) 1006 | ) 1007 | 1008 | proc newHistogram*( 1009 | name: string, 1010 | help: string, 1011 | labels: openArray[string] = [], 1012 | registry = defaultRegistry, 1013 | buckets: openArray[float64] = defaultHistogramBuckets, 1014 | timestamp = false, 1015 | ): Histogram {.raises: [ValueError, RegistrationError].} = 1016 | validateLabels(labels, invalidLabelNames = ["le"]) 1017 | var bucketsSeq = @buckets 1018 | if bucketsSeq.len > 0 and bucketsSeq[^1] != Inf: 1019 | bucketsSeq.add(Inf) 1020 | if bucketsSeq.len < 2: 1021 | raise newException( 1022 | ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. At least 2 required." 1023 | ) 1024 | if not bucketsSeq.isSorted(system.cmp[float64]): 1025 | raise newException( 1026 | ValueError, "Invalid buckets list: '" & $bucketsSeq & "'. Must be sorted." 1027 | ) 1028 | result = 1029 | Histogram.newCollector(name, help, labels, registry, "histogram", timestamp) 1030 | result.buckets = bucketsSeq 1031 | if labels.len == 0: 1032 | result.metrics.add newHistogramMetrics( 1033 | name, CStringArr(), CStringArr(), bucketsSeq 1034 | ) 1035 | result.metricKeys.add LabelKey.init(labels) 1036 | 1037 | proc observeHistogram( 1038 | histogram: Histogram, amount: float64, labelValues: openArray[string] 1039 | ) = 1040 | let timestamp = histogram.now() 1041 | withLabelValues(histogram, labelValues, valueSym, keySym): 1042 | valueSym[0].value += amount # _sum 1043 | valueSym[0].timestamp = timestamp 1044 | valueSym[1].value += 1.float64 # _count 1045 | valueSym[1].timestamp = timestamp 1046 | for i, bucket in histogram.buckets: 1047 | if amount.float64 <= bucket: 1048 | #- "le" probably stands for "less or equal" 1049 | #- the same observed value can increase multiple buckets, because this is 1050 | # a cumulative histogram 1051 | valueSym[i + 3].value += 1.float64 # _bucket{le=""} 1052 | valueSym[i + 3].timestamp = timestamp 1053 | do: 1054 | newHistogramMetrics( 1055 | histogram.name, 1056 | CStringArr.createShared(histogram.labels), 1057 | keySym.data, 1058 | histogram.buckets, 1059 | ) 1060 | 1061 | template declareHistogram*( 1062 | identifier: untyped, 1063 | help: static string, 1064 | labels: openArray[string] = [], 1065 | registry = defaultRegistry, 1066 | buckets: openArray[float64] = defaultHistogramBuckets, 1067 | name = "", 1068 | timestamp = false, 1069 | ) {.dirty.} = 1070 | when defined(metrics): 1071 | let identifier = newHistogram( 1072 | nameOrIdentifier(identifier, name), help, labels, registry, buckets, timestamp 1073 | ) 1074 | else: 1075 | type identifier = IgnoredCollector 1076 | 1077 | template declarePublicHistogram*( 1078 | identifier: untyped, 1079 | help: static string, 1080 | labels: openArray[string] = [], 1081 | registry = defaultRegistry, 1082 | buckets: openArray[float64] = defaultHistogramBuckets, 1083 | name = "", 1084 | timestamp = false, 1085 | ) {.dirty.} = 1086 | when defined(metrics): 1087 | let identifier* = newHistogram( 1088 | nameOrIdentifier(identifier, name), help, labels, registry, buckets, timestamp 1089 | ) 1090 | else: 1091 | type identifier* = IgnoredCollector 1092 | 1093 | when defined(metrics): 1094 | proc histogram*( 1095 | name: static string 1096 | ): Histogram {.raises: [ValueError, RegistrationError].} = 1097 | localGlobal(newHistogram(name, "")) 1098 | 1099 | else: 1100 | template histogram*(name: static string): untyped = 1101 | IgnoredCollector 1102 | 1103 | # the "type IgnoredCollector" case is covered by Summary.observe() 1104 | template observe*( 1105 | histogram: Histogram, 1106 | amount: int64 | float64 = 1, 1107 | labelValues: openArray[string] = [], 1108 | ) = 1109 | when defined(metrics): 1110 | {.gcsafe.}: 1111 | observeHistogram(histogram, amount.float64, labelValues) 1112 | 1113 | ######################### 1114 | # update system metrics # 1115 | ######################### 1116 | 1117 | when defined(metrics): 1118 | let mainThreadID = getThreadId() 1119 | var 1120 | systemMetricsUpdateInterval = initDuration(seconds = 10) 1121 | systemMetricsLastUpdated = getMonoTime() 1122 | 1123 | proc getSystemMetricsUpdateInterval*(): Duration = 1124 | return systemMetricsUpdateInterval 1125 | 1126 | proc setSystemMetricsUpdateInterval*(value: Duration) = 1127 | systemMetricsUpdateInterval = value 1128 | 1129 | proc updateThreadMetrics*() {.gcsafe.} 1130 | ## Function that should regularly be called from within each thread for 1131 | ## which per-thread metrics are desired - currently, this is limited to 1132 | ## GC heap statistics. 1133 | 1134 | proc updateSystemMetrics*() = 1135 | ## Update metrics related to the main application thread 1136 | if systemMetricsAutomaticUpdate: 1137 | # Update system metrics if at least systemMetricsUpdateInterval seconds 1138 | # have passed and if we are being called from the main thread. 1139 | if getThreadId() == mainThreadID: 1140 | let currTime = getMonoTime() 1141 | if currTime >= (systemMetricsLastUpdated + systemMetricsUpdateInterval): 1142 | systemMetricsLastUpdated = currTime 1143 | # Update thread metrics, only when automation is on and we're in the 1144 | # main thread. 1145 | updateThreadMetrics() 1146 | 1147 | ################ 1148 | # process info # 1149 | ################ 1150 | 1151 | when defined(metrics) and defined(linux): 1152 | from posix import sysconf, SC_CLK_TCK, SC_PAGESIZE 1153 | var 1154 | btime {.global.}: float64 = 0 1155 | ticks {.global.}: float64 # clock ticks per second 1156 | pagesize {.global.}: float64 # page size in bytes 1157 | 1158 | if btime == 0: 1159 | try: 1160 | for line in lines("/proc/stat"): 1161 | if line.startsWith("btime"): 1162 | btime = line.split(' ')[1].parseFloat() 1163 | except IOError: 1164 | # /proc not mounted? 1165 | discard 1166 | ticks = sysconf(SC_CLK_TCK).float64 1167 | pagesize = sysconf(SC_PAGESIZE).float64 1168 | 1169 | type ProcessInfo = ref object of Gauge 1170 | var processInfo* {.global.} = 1171 | ProcessInfo.newCollector("process_info", "CPU and memory usage") 1172 | 1173 | method collect*(collector: ProcessInfo, output: MetricHandler) = 1174 | let timestamp = collector.now() 1175 | 1176 | try: 1177 | if btime == 0: 1178 | # we couldn't access /proc 1179 | return 1180 | 1181 | # the content of /proc/self/stat looks like this (the command name may contain spaces): 1182 | # 1183 | # $ cat /proc/self/stat 1184 | # 30494 (cat) R 3022 30494 3022 34830 30494 4210688 98 0 0 0 0 0 0 0 20 0 1 0 73800491 10379264 189 18446744073709551615 94060049248256 94060049282149 140735229395104 0 0 0 0 0 0 0 0 0 17 6 0 0 0 0 0 94060049300560 94060049302112 94060076990464 140735229397011 140735229397031 140735229397031 140735229403119 0 1185 | let selfStat = readFile("/proc/self/stat").split(") ")[^1].split(' ') 1186 | output( 1187 | name = "process_virtual_memory_bytes", # Virtual memory size in bytes. 1188 | value = selfStat[20].parseFloat(), 1189 | timestamp = timestamp, 1190 | ) 1191 | 1192 | output( 1193 | name = "process_resident_memory_bytes", # Resident memory size in bytes. 1194 | value = selfStat[21].parseFloat() * pagesize, 1195 | timestamp = timestamp, 1196 | ) 1197 | output( 1198 | name = "process_start_time_seconds", 1199 | # Start time of the process since unix epoch in seconds. 1200 | value = selfStat[19].parseFloat() / ticks + btime, 1201 | timestamp = timestamp, 1202 | ) 1203 | output( 1204 | name = "process_cpu_seconds_total", 1205 | # Total user and system CPU time spent in seconds. 1206 | value = (selfStat[11].parseFloat() + selfStat[12].parseFloat()) / ticks, 1207 | timestamp = timestamp, 1208 | ) 1209 | 1210 | for line in lines("/proc/self/limits"): 1211 | if line.startsWith("Max open files"): 1212 | output( 1213 | name = "process_max_fds", # Maximum number of open file descriptors. 1214 | value = line.splitWhitespace()[3].parseFloat(), 1215 | # a simple `split()` does not combine adjacent whitespace 1216 | timestamp = timestamp, 1217 | ) 1218 | break 1219 | 1220 | output( 1221 | name = "process_open_fds", # Number of open file descriptors. 1222 | value = toSeq(walkDir("/proc/self/fd")).len.float64, 1223 | timestamp = timestamp, 1224 | ) 1225 | except CatchableError as e: 1226 | printError(e.msg) 1227 | 1228 | #################### 1229 | # Nim runtime info # 1230 | #################### 1231 | 1232 | when defined(metrics): 1233 | type NimRuntimeInfo = ref object of Collector 1234 | let nimRuntimeInfo* {.global.} = 1235 | NimRuntimeInfo.newCollector("nim_runtime_info", "Nim runtime info") 1236 | 1237 | method collect*(collector: NimRuntimeInfo, output: MetricHandler) = 1238 | try: 1239 | when defined(nimTypeNames) and declared(dumpHeapInstances): 1240 | # Too high cardinality causes performance issues in Prometheus. 1241 | const labelsLimit = 10 1242 | let timestamp = collector.now() 1243 | var 1244 | # Higher size than in the loop for adding metrics 1245 | # to avoid missing same name metrics far apart with low values. 1246 | heapSizes: array[100, (cstring, int)] 1247 | counter: int 1248 | heapSum: int # total size of all instances 1249 | for data in dumpHeapInstances(): 1250 | counter += 1 1251 | heapSum += data.sizes 1252 | var smallest = 0 1253 | var dedupe = false 1254 | for i in 0 ..< heapSizes.len: 1255 | if heapSizes[i][0] == data.name: 1256 | heapSizes[i][1] += data.sizes 1257 | dedupe = true 1258 | break 1259 | if heapSizes[smallest][1] >= heapSizes[i][1]: 1260 | smallest = i 1261 | if not dedupe and data.sizes > heapSizes[smallest][1]: 1262 | heapSizes[smallest] = (data.name, data.sizes) 1263 | sort( 1264 | heapSizes, 1265 | proc(a, b: auto): auto = 1266 | b[1] - a[1], 1267 | ) 1268 | # Lower the number of metrics to reduce metric cardinality. 1269 | for i in 0 ..< labelsLimit: 1270 | let (typeName, size) = heapSizes[i] 1271 | output( 1272 | name = "nim_gc_heap_instance_occupied_bytes", 1273 | # total bytes occupied, by instance type (all threads) 1274 | value = size.float64, 1275 | timestamp = timestamp, 1276 | labels = ["type_name"], 1277 | labelValues = [$typeName], 1278 | ) 1279 | 1280 | output( 1281 | name = "nim_gc_heap_instance_occupied_summed_bytes", 1282 | # total bytes occupied by all instance types, in all threads - should be equal to 'sum(nim_gc_mem_occupied_bytes)' when 'updateThreadMetrics()' is being called in all threads, but it's somewhat smaller 1283 | value = heapSum.float64, 1284 | timestamp = timestamp, 1285 | ) 1286 | except CatchableError as e: 1287 | printError(e.msg) 1288 | 1289 | declareGauge nim_gc_mem_bytes, 1290 | "the number of bytes that are owned by a thread's GC", ["thread_id"] 1291 | declareGauge nim_gc_mem_occupied_bytes, 1292 | "the number of bytes that are owned by a thread's GC and hold data", ["thread_id"] 1293 | 1294 | proc updateThreadMetrics() = 1295 | try: 1296 | let threadID = getThreadId() 1297 | 1298 | when declared(getTotalMem): 1299 | nim_gc_mem_bytes.set( 1300 | getTotalMem().float64, 1301 | labelValues = @[$threadID], 1302 | doUpdateSystemMetrics = false, 1303 | ) 1304 | 1305 | when declared(getOccupiedMem): 1306 | nim_gc_mem_occupied_bytes.set( 1307 | getOccupiedMem().float64, 1308 | labelValues = @[$threadID], 1309 | doUpdateSystemMetrics = false, 1310 | ) 1311 | 1312 | # TODO: parse the output of `GC_getStatistics()` for more stats 1313 | except CatchableError as e: 1314 | printError(e.msg) 1315 | -------------------------------------------------------------------------------- /metrics.nimble: -------------------------------------------------------------------------------- 1 | mode = ScriptMode.Verbose 2 | 3 | packageName = "metrics" 4 | version = "0.2.0" 5 | author = "Status Research & Development GmbH" 6 | description = "Metrics client library supporting Prometheus" 7 | license = "MIT or Apache License 2.0" 8 | skipDirs = @["tests", "benchmarks"] 9 | 10 | ### Dependencies 11 | requires "nim >= 1.6.14", "chronos >= 4.0.3", "results", "stew" 12 | 13 | let nimc = getEnv("NIMC", "nim") # Which nim compiler to use 14 | let lang = getEnv("NIMLANG", "c") # Which backend (c/cpp/js) 15 | let flags = getEnv("NIMFLAGS", "") # Extra flags for the compiler 16 | let verbose = getEnv("V", "") notin ["", "0"] 17 | 18 | from os import quoteShell 19 | 20 | let cfg = 21 | " --styleCheck:usages --styleCheck:error" & 22 | (if verbose: "" else: " --verbosity:0 --hints:off") & 23 | " --skipParentCfg --skipUserCfg --outdir:build " & 24 | quoteShell("--nimcache:build/nimcache/$projectName") & " -d:metricsTest" 25 | 26 | proc build(args, path: string) = 27 | exec nimc & " " & lang & " " & cfg & " " & flags & " " & args & " " & path 28 | 29 | proc run(args, path: string) = 30 | build args & " --mm:refc -r", path 31 | if (NimMajor, NimMinor) > (1, 6): 32 | build args & " --mm:orc -r", path 33 | 34 | ### tasks 35 | task test, "Main tests": 36 | # build it with metrics disabled, first 37 | build "", "tests/main_tests" 38 | build "--threads:on", "tests/main_tests" 39 | run "-d:metrics --threads:on -d:useSysAssert -d:useGcAssert", "tests/main_tests" 40 | 41 | build "", "benchmarks/bench_collectors" 42 | run "-d:metrics --threads:on", "benchmarks/bench_collectors" 43 | 44 | run "", "tests/chronos_server_tests" 45 | run "-d:metrics --threads:on -d:nimTypeNames", "tests/chronos_server_tests" 46 | 47 | task test_chronicles, "Chronicles tests": 48 | build "", "tests/chronicles_tests" 49 | run "-d:metrics --threads:on", "tests/chronicles_tests" 50 | 51 | task benchmark, "Run benchmarks": 52 | run "-d:metrics --debuginfo --threads:on -d:release", "benchmarks/bench_collectors" 53 | -------------------------------------------------------------------------------- /metrics/chronicles_support.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | from chronicles import formatIt, expandIt 8 | 9 | import ../metrics, std/[locks, times] 10 | 11 | when defined(metrics): 12 | import tables 13 | 14 | formatIt(Metric): 15 | $it 16 | 17 | proc toLog(collector: SimpleCollector): seq[string] = 18 | withLock collector.lock: 19 | for metrics in collector.metrics: 20 | for metric in metrics: 21 | result.add($metric) 22 | 23 | formatIt(Counter): 24 | it.toLog 25 | 26 | formatIt(Gauge): 27 | it.toLog 28 | 29 | formatIt(Summary): 30 | it.toLog 31 | 32 | formatIt(Histogram): 33 | it.toLog 34 | 35 | proc toLog(registry: Registry): seq[string] = 36 | var res: seq[string] 37 | registry.collect( 38 | proc( 39 | name: string, 40 | value: float64, 41 | labels: openArray[string], 42 | labelValues: openArray[string], 43 | timestamp: Time, 44 | ) = 45 | var s: string 46 | s.addText(name, value, labels, labelValues, timestamp) 47 | res.add s 48 | ) 49 | res 50 | 51 | formatIt(Registry): 52 | it.toLog 53 | else: 54 | # not defined(metrics) 55 | formatIt(Metric): 56 | "metrics disabled" 57 | 58 | formatIt(Counter): 59 | "metrics disabled" 60 | 61 | formatIt(Gauge): 62 | "metrics disabled" 63 | 64 | formatIt(Summary): 65 | "metrics disabled" 66 | 67 | formatIt(Histogram): 68 | "metrics disabled" 69 | 70 | formatIt(Registry): 71 | "metrics disabled" 72 | 73 | # ignored collector 74 | expandIt(type IgnoredCollector): 75 | ignored = "ignored" 76 | -------------------------------------------------------------------------------- /metrics/chronos_httpserver.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | ################################ 8 | # HTTP server (for Prometheus) # 9 | ################################ 10 | 11 | {.push raises: [].} 12 | 13 | when defined(nimHasUsed): 14 | {.used.} 15 | 16 | import results 17 | import chronos, chronos/apps/http/httpserver 18 | export chronos, results 19 | 20 | type 21 | MetricsError* = object of CatchableError 22 | 23 | MetricsHttpServerStatus* {.pure.} = enum 24 | Closed 25 | Running 26 | Stopped 27 | 28 | MetricsServerData = object 29 | when defined(metrics): 30 | address: TransportAddress 31 | requestPipe: tuple[read: AsyncFD, write: AsyncFD] 32 | responsePipe: tuple[read: AsyncFD, write: AsyncFD] 33 | 34 | MetricsHttpServerRef* = ref object 35 | when defined(metrics): 36 | data: MetricsServerData 37 | thread: Thread[MetricsServerData] 38 | reqTransp: StreamTransport 39 | respTransp: StreamTransport 40 | 41 | MetricsHttpServerMiddlewareRef* = ref object of HttpServerMiddlewareRef 42 | 43 | when defined(metrics): 44 | import std/os 45 | import ../metrics, ./common 46 | 47 | var httpServerThread: Thread[TransportAddress] 48 | 49 | proc serveHttp(address: TransportAddress) {.thread.} = 50 | ignoreSignalsInThread() 51 | 52 | proc cb( 53 | r: RequestFence 54 | ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} = 55 | if r.isOk(): 56 | let request = r.get() 57 | try: 58 | if request.uri.path == "/metrics": 59 | {.gcsafe.}: 60 | # Prometheus will drop our metrics in surprising ways if we give 61 | # it timestamps, so we don't. 62 | let 63 | response = defaultRegistry.toText() 64 | headers = HttpTable.init([("Content-Type", CONTENT_TYPE)]) 65 | return await request.respond(Http200, response, headers) 66 | elif request.uri.path == "/health": 67 | return await request.respond(Http200, "OK") 68 | else: 69 | return await request.respond(Http404, "Try /metrics") 70 | except HttpWriteError as exc: 71 | return defaultResponse(exc) 72 | 73 | let socketFlags = {ServerFlags.ReuseAddr} 74 | let res = HttpServerRef.new(address, cb, socketFlags = socketFlags) 75 | if res.isErr(): 76 | printError(res.error()) 77 | return 78 | let server = res.get() 79 | server.start() 80 | while true: 81 | try: 82 | waitFor server.join() 83 | except CatchableError as e: 84 | printError(e.msg) 85 | sleep(1000) 86 | 87 | const 88 | ResponseOk = 0'u8 89 | ResponseError = 1'u8 90 | MessageSize = 255 91 | 92 | type 93 | MetricsRequest {.pure.} = enum 94 | Status 95 | Start 96 | Stop 97 | Close 98 | 99 | MetricsResponse = object 100 | status: byte 101 | data: array[MessageSize, byte] 102 | 103 | MetricsThreadData = object 104 | reqTransp: StreamTransport 105 | respTransp: StreamTransport 106 | http: HttpServerRef 107 | 108 | MetricsErrorKind {.pure.} = enum 109 | Timeout 110 | Transport 111 | Communication 112 | 113 | proc raiseMetricsError( 114 | msg: string, exc: ref Exception 115 | ) {.noreturn, noinline, raises: [MetricsError].} = 116 | let message = msg & ", reason: [" & $exc.name & "]: " & $exc.msg 117 | raise (ref MetricsError)(msg: message, parent: exc) 118 | 119 | proc raiseMetricsError(msg: string) {.noreturn, noinline, raises: [MetricsError].} = 120 | raise (ref MetricsError)(msg: msg) 121 | 122 | proc raiseMetricsError( 123 | msg: MetricsErrorKind, exc: ref Exception 124 | ) {.noreturn, noinline, raises: [MetricsError].} = 125 | case msg 126 | of MetricsErrorKind.Timeout: 127 | raiseMetricsError("Connection with metrics thread timed out", exc) 128 | of MetricsErrorKind.Transport: 129 | raiseMetricsError("Communication with metrics thread failed", exc) 130 | of MetricsErrorKind.Communication: 131 | raiseMetricsError("Communication with metrics thread failed", exc) 132 | 133 | proc raiseMetricsError*( 134 | msg: string, err: OSErrorCode 135 | ) {.noreturn, noinline, raises: [MetricsError].} = 136 | let message = msg & ", reason: [OSError]: (" & $int(err) & ") " & osErrorMsg(err) 137 | raise (ref MetricsError)(msg: message) 138 | 139 | proc respond( 140 | m: MetricsThreadData, mtype: byte, message: string 141 | ) {.async: (raises: [CancelledError, MetricsError, TransportError]).} = 142 | var buffer: array[MessageSize + 1, byte] 143 | let length = min(len(message), len(buffer) - 1) 144 | zeroMem(cast[pointer](addr buffer[0]), len(buffer)) 145 | buffer[0] = mtype 146 | if length > 0: 147 | copyMem(addr buffer[1], unsafeAddr message[0], length) 148 | let res = await m.respTransp.write(addr buffer[0], len(buffer)) 149 | if res != len(buffer): 150 | raiseMetricsError("Incomplete response has been sent") 151 | 152 | proc communicate( 153 | m: MetricsHttpServerRef, req: MetricsRequest 154 | ): Future[MetricsResponse] {. 155 | async: (raises: [CancelledError, MetricsError, TransportError]) 156 | .} = 157 | var buffer: array[MessageSize + 1, byte] 158 | buffer[0] = byte(req) 159 | block: 160 | let res = await m.reqTransp.write(addr buffer[0], 1) 161 | if res != 1: 162 | raiseMetricsError("Incomplete request has been sent") 163 | await m.respTransp.readExactly(addr buffer[0], len(buffer)) 164 | var res = MetricsResponse(status: buffer[0]) 165 | copyMem(addr res.data[0], addr buffer[1], sizeof(res.data)) 166 | res 167 | 168 | proc getMessage(m: MetricsResponse): string = 169 | var res = newStringOfCap(MessageSize + 1) 170 | for i in 0 ..< len(m.data): 171 | let ch = m.data[i] 172 | if ch == 0x00'u8: 173 | break 174 | res.add(char(ch)) 175 | res 176 | 177 | proc asyncStep( 178 | server: MetricsServerData, data: MetricsThreadData, lastError: string 179 | ): Future[bool] {.async: (raises: []).} = 180 | var buffer: array[1, byte] 181 | try: 182 | await data.reqTransp.readExactly(addr buffer[0], len(buffer)) 183 | 184 | if len(lastError) > 0: 185 | await data.respond(ResponseError, lastError) 186 | return true 187 | 188 | if isNil(data.http): 189 | await data.respond(ResponseError, "HTTP server is not bound!") 190 | return true 191 | 192 | case buffer[0] 193 | of byte(MetricsRequest.Status): 194 | let message = 195 | case data.http.state() 196 | of ServerStopped: "STOPPED" 197 | of ServerClosed: "CLOSED" 198 | of ServerRunning: "RUNNING" 199 | await data.respond(ResponseOk, message) 200 | true 201 | of byte(MetricsRequest.Start): 202 | if data.http.state() != HttpServerState.ServerStopped: 203 | let message = 204 | if data.http.state() == HttpServerState.ServerClosed: 205 | "HTTP server is already closed" 206 | else: 207 | "HTTP server is already running" 208 | await data.respond(ResponseError, message) 209 | else: 210 | data.http.start() 211 | await data.respond(ResponseOk, "") 212 | true 213 | of byte(MetricsRequest.Stop): 214 | if data.http.state() != HttpServerState.ServerRunning: 215 | let message = 216 | if data.http.state() == HttpServerState.ServerClosed: 217 | "HTTP server is already closed" 218 | else: 219 | "HTTP server is already stopped" 220 | await data.respond(ResponseError, message) 221 | else: 222 | await data.http.stop() 223 | await data.respond(ResponseOk, "") 224 | true 225 | else: 226 | if data.http.state() == HttpServerState.ServerClosed: 227 | await data.respond(ResponseError, "HTTP server is already closed") 228 | true 229 | else: 230 | await data.http.closeWait() 231 | await data.respond(ResponseOk, "") 232 | false 233 | except MetricsError: 234 | if not (isNil(data.http)): 235 | await data.http.closeWait() 236 | return false 237 | except TransportError: 238 | if not (isNil(data.http)): 239 | await data.http.closeWait() 240 | return false 241 | except HttpError: 242 | if not (isNil(data.http)): 243 | await data.http.closeWait() 244 | return false 245 | except CancelledError: 246 | # We did not use cancellation. 247 | if not (isNil(data.http)): 248 | await data.http.closeWait() 249 | return false 250 | 251 | proc asyncLoop(server: MetricsServerData) {.async: (raises: []).} = 252 | var lastError = "" 253 | 254 | proc cb( 255 | r: RequestFence 256 | ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} = 257 | if r.isOk(): 258 | let request = r.get() 259 | try: 260 | if request.uri.path == "/metrics": 261 | # Prometheus will drop our metrics in surprising ways if we give 262 | # it timestamps, so we don't. 263 | let 264 | response = block: 265 | {.gcsafe.}: 266 | defaultRegistry.toText() 267 | headers = HttpTable.init([("Content-Type", CONTENT_TYPE)]) 268 | await request.respond(Http200, response, headers) 269 | elif request.uri.path == "/health": 270 | await request.respond(Http200, "OK") 271 | else: 272 | await request.respond(Http404, "Try /metrics") 273 | except HttpError as exc: 274 | defaultResponse(exc) 275 | else: 276 | defaultResponse() 277 | 278 | let 279 | http = block: 280 | let 281 | socketFlags = {ServerFlags.ReuseAddr} 282 | res = HttpServerRef.new(server.address, cb, socketFlags = socketFlags) 283 | if res.isErr(): 284 | lastError = res.error() 285 | nil 286 | else: 287 | res.get() 288 | reqTransp = fromPipe2(server.requestPipe.read).valueOr: 289 | await http.closeWait() 290 | return 291 | respTransp = fromPipe2(server.responsePipe.write).valueOr: 292 | await http.closeWait() 293 | await reqTransp.closeWait() 294 | return 295 | threadData = 296 | MetricsThreadData(reqTransp: reqTransp, respTransp: respTransp, http: http) 297 | 298 | while true: 299 | let res = await asyncStep(server, threadData, lastError) 300 | if not (res): 301 | break 302 | 303 | await noCancel allFutures(reqTransp.closeWait(), respTransp.closeWait()) 304 | 305 | proc serveMetricsServer(server: MetricsServerData) {.thread.} = 306 | ignoreSignalsInThread() 307 | let loop {.used.} = getThreadDispatcher() 308 | waitFor asyncLoop(server) 309 | 310 | proc startMetricsHttpServer*( 311 | address = "127.0.0.1", port = Port(8000) 312 | ) {.raises: [Exception], deprecated: "Please use MetricsHttpServerRef API".} = 313 | when defined(metrics): 314 | httpServerThread.createThread(serveHttp, initTAddress(address, port)) 315 | 316 | proc new*( 317 | t: typedesc[MetricsHttpServerRef], address: string, port: Port 318 | ): Result[MetricsHttpServerRef, cstring] {.raises: [].} = 319 | ## Initialize new instance of MetricsHttpServerRef. 320 | ## 321 | ## This involves creation of new thread and new processing loop in the new 322 | ## thread. 323 | when defined(metrics): 324 | template closePipe(b: untyped): untyped = 325 | closeHandle(b.read) 326 | closeHandle(b.write) 327 | 328 | let taddress = 329 | try: 330 | initTAddress(address, port) 331 | except TransportAddressError: 332 | return err("Invalid server address") 333 | var 334 | request = block: 335 | let res = createAsyncPipe() 336 | if (res.read == asyncInvalidPipe) or (res.write == asyncInvalidPipe): 337 | return err("Unable to create communication request pipe") 338 | res 339 | cleanupRequest = true 340 | defer: 341 | if cleanupRequest: 342 | request.closePipe() 343 | 344 | var 345 | response = block: 346 | let res = createAsyncPipe() 347 | if (res.read == asyncInvalidPipe) or (res.write == asyncInvalidPipe): 348 | request.closePipe() 349 | return err("Unable to create communication response pipe") 350 | res 351 | cleanupResponse = true 352 | defer: 353 | if cleanupResponse: 354 | response.closePipe() 355 | 356 | let data = 357 | MetricsServerData(address: taddress, requestPipe: request, responsePipe: response) 358 | var server = MetricsHttpServerRef(data: data) 359 | try: 360 | createThread(server.thread, serveMetricsServer, data) 361 | except Exception: 362 | return err("Unexpected error while spawning metrics server's thread") 363 | except ResourceExhaustedError: 364 | return err("Unable to spawn metrics server's thread") 365 | 366 | server.reqTransp = 367 | try: 368 | fromPipe(request.write) 369 | except CatchableError: 370 | return err( 371 | "Unable to establish communication channel with " & "metrics server thread" 372 | ) 373 | server.respTransp = 374 | try: 375 | fromPipe(response.read) 376 | except CatchableError: 377 | return err( 378 | "Unable to establish communication channel with " & "metrics server thread" 379 | ) 380 | 381 | cleanupRequest = false 382 | cleanupResponse = false 383 | ok(server) 384 | else: 385 | err("Could not initialize metrics server, because metrics are disabled") 386 | 387 | proc start*( 388 | server: MetricsHttpServerRef 389 | ) {.async: (raises: [MetricsError, CancelledError]).} = 390 | ## Start metrics HTTP server. 391 | when defined(metrics): 392 | if not (server.thread.running()): 393 | raiseMetricsError("Metrics server is not running") 394 | let resp = 395 | try: 396 | await communicate(server, MetricsRequest.Start).wait(5.seconds) 397 | except AsyncTimeoutError as exc: 398 | raiseMetricsError(MetricsErrorKind.Timeout, exc) 399 | except MetricsError as exc: 400 | raiseMetricsError(MetricsErrorKind.Communication, exc) 401 | except TransportError as exc: 402 | raiseMetricsError(MetricsErrorKind.Transport, exc) 403 | if resp.status != 0x00'u8: 404 | raiseMetricsError("Metrics server returns an error: " & resp.getMessage()) 405 | 406 | proc stop*( 407 | server: MetricsHttpServerRef 408 | ) {.async: (raises: [MetricsError, CancelledError]).} = 409 | ## Force metrics HTTP server to stop accepting new connections. 410 | when defined(metrics): 411 | if not (server.thread.running()): 412 | raiseMetricsError("Metrics server is not running") 413 | let resp = 414 | try: 415 | await communicate(server, MetricsRequest.Stop).wait(5.seconds) 416 | except AsyncTimeoutError as exc: 417 | raiseMetricsError(MetricsErrorKind.Timeout, exc) 418 | except MetricsError as exc: 419 | raiseMetricsError(MetricsErrorKind.Communication, exc) 420 | except TransportError as exc: 421 | raiseMetricsError(MetricsErrorKind.Transport, exc) 422 | if resp.status != 0x00'u8: 423 | raiseMetricsError("Metrics server returns an error: " & resp.getMessage()) 424 | 425 | proc close*(server: MetricsHttpServerRef) {.async: (raises: []).} = 426 | ## Close metrics HTTP server and release all the resources. 427 | when defined(metrics): 428 | # We ignore all the exception because there is no way to report error. 429 | if not (server.thread.running()): 430 | return 431 | 432 | try: 433 | discard await communicate(server, MetricsRequest.Close).wait(5.seconds) 434 | except AsyncTimeoutError: 435 | discard 436 | except MetricsError: 437 | discard 438 | except TransportError: 439 | discard 440 | except CancelledError: 441 | discard 442 | 443 | # Closing pipes, other pipe ends should be closed by foreign thread. 444 | await noCancel allFutures( 445 | server.reqTransp.closeWait(), server.respTransp.closeWait() 446 | ) 447 | # Thread should exit very soon. 448 | server.thread.joinThread() 449 | 450 | proc status*( 451 | server: MetricsHttpServerRef 452 | ): Future[MetricsHttpServerStatus] {.async: (raises: [CancelledError, MetricsError]).} = 453 | ## Returns current status of metrics HTTP server. 454 | ## 455 | ## Note, that if `metrics` variable is not defined this procedure will return 456 | ## ``MetricsHttpServerStatus.Closed``. 457 | when defined(metrics): 458 | if not (server.thread.running()): 459 | return MetricsHttpServerStatus.Closed 460 | 461 | let resp = 462 | try: 463 | await communicate(server, MetricsRequest.Status).wait(5.seconds) 464 | except AsyncTimeoutError as exc: 465 | raiseMetricsError(MetricsErrorKind.Timeout, exc) 466 | except MetricsError as exc: 467 | raiseMetricsError(MetricsErrorKind.Communication, exc) 468 | except TransportError as exc: 469 | raiseMetricsError(MetricsErrorKind.Transport, exc) 470 | 471 | if resp.status != 0x00'u8: 472 | raiseMetricsError("Metrics server returns an error: " & resp.getMessage()) 473 | 474 | case resp.getMessage() 475 | of "STOPPED": 476 | MetricsHttpServerStatus.Stopped 477 | of "CLOSED": 478 | MetricsHttpServerStatus.Closed 479 | of "RUNNING": 480 | MetricsHttpServerStatus.Running 481 | else: 482 | raiseMetricsError("Metrics server returns unsupported status!") 483 | else: 484 | MetricsHttpServerStatus.Closed 485 | 486 | proc new*(t: typedesc[MetricsHttpServerMiddlewareRef]): HttpServerMiddlewareRef = 487 | proc middlewareCallback( 488 | middleware: HttpServerMiddlewareRef, 489 | reqfence: RequestFence, 490 | handler: HttpProcessCallback2, 491 | ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} = 492 | if reqfence.isOk(): 493 | let request = reqfence.get() 494 | try: 495 | if request.uri.path == "/metrics": 496 | when defined(metrics): 497 | # Prometheus will drop our metrics in surprising ways if we give 498 | # it timestamps, so we don't. 499 | let 500 | response = block: 501 | {.gcsafe.}: 502 | defaultRegistry.toText() 503 | headers = HttpTable.init([("Content-Type", CONTENT_TYPE)]) 504 | await request.respond(Http200, response, headers) 505 | else: 506 | await request.respond( 507 | Http200, "Metrics are not enabled, build your application with -d:metrics" 508 | ) 509 | elif request.uri.path == "/health": 510 | await request.respond(Http200, "OK") 511 | else: 512 | await handler(reqfence) 513 | except HttpWriteError as exc: 514 | defaultResponse(exc) 515 | else: 516 | await handler(reqfence) 517 | 518 | let middleware = MetricsHttpServerMiddlewareRef(handler: middlewareCallback) 519 | HttpServerMiddlewareRef(middleware) 520 | -------------------------------------------------------------------------------- /metrics/common.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2021 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | when defined(posix): 8 | import os, posix 9 | 10 | # https://prometheus.io/docs/instrumenting/exposition_formats/#basic-info 11 | const CONTENT_TYPE* = "text/plain; version=0.0.4; charset=utf-8" 12 | 13 | proc printError*(msg: string) = 14 | try: 15 | writeLine(stderr, "metrics error: " & msg) 16 | except IOError: 17 | discard 18 | 19 | proc ignoreSignalsInThread*() = 20 | # Block all signals in this thread, so we don't interfere with regular signal 21 | # handling elsewhere. 22 | when defined(posix): 23 | var signalMask, oldSignalMask: Sigset 24 | 25 | # sigprocmask() doesn't work on macOS, for multithreaded programs 26 | if sigfillset(signalMask) != 0: 27 | echo osErrorMsg(osLastError()) 28 | quit(QuitFailure) 29 | when defined(boehmgc): 30 | # https://www.hboehm.info/gc/debugging.html 31 | const 32 | SIGPWR = 30 33 | SIGXCPU = 24 34 | SIGSEGV = 11 35 | SIGBUS = 7 36 | if sigdelset(signalMask, SIGPWR) != 0 or sigdelset(signalMask, SIGXCPU) != 0 or 37 | sigdelset(signalMask, SIGSEGV) != 0 or sigdelset(signalMask, SIGBUS) != 0: 38 | echo osErrorMsg(osLastError()) 39 | quit(QuitFailure) 40 | if pthread_sigmask(SIG_BLOCK, signalMask, oldSignalMask) != 0: 41 | echo osErrorMsg(osLastError()) 42 | quit(QuitFailure) 43 | -------------------------------------------------------------------------------- /metrics/shseq.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import std/typetraits 8 | 9 | type ShSeq*[T] = object 10 | # Sequence whose elements reside in shared memory - only works for copyMem:able types 11 | items: ptr UncheckedArray[T] 12 | capacity, len: int 13 | 14 | proc grow(s: var ShSeq, size: int) = 15 | type T = typeof(s).T 16 | 17 | static: 18 | doAssert supportsCopyMem(T) 19 | 20 | if size <= s.capacity: 21 | return 22 | 23 | var tmp = cast[ptr UncheckedArray[T]](createSharedU(T, size)) 24 | if s.len > 0: 25 | copyMem(addr tmp[0], addr s.items[0], s.len * sizeof(T)) 26 | s.capacity = size 27 | if s.items != nil: 28 | deallocShared(s.items) 29 | s.items = tmp 30 | 31 | proc destroy*(s: var ShSeq) = 32 | if not isNil(s.items): 33 | deallocShared(s.items) 34 | reset(s) 35 | 36 | proc init*[T](_: type ShSeq, v: openArray[T]): ShSeq[T] = 37 | var s: ShSeq[T] 38 | if v.len > 0: 39 | s.grow(v.len) 40 | copyMem(addr s.items[0], unsafeAddr v[0], v.len * sizeof(T)) 41 | s.len = v.len 42 | 43 | s 44 | 45 | proc add*(s: var ShSeq, v: auto) = 46 | if s.len == s.capacity: 47 | s.grow(max(64, s.len + s.len div 2)) 48 | s.items[s.len] = v 49 | s.len += 1 50 | 51 | func `[]`*(s: ShSeq, i: int): lent s.T = 52 | doAssert i >= 0 and i < s.len, "Bounds check" 53 | s.items[i] 54 | 55 | func `[]`*(s: var ShSeq, i: int): var s.T = 56 | doAssert i >= 0 and i < s.len, "Bounds check" 57 | s.items[i] 58 | 59 | proc insert*(s: var ShSeq, v: auto, pos: int) = 60 | type T = typeof(s).T 61 | 62 | doAssert pos >= 0 and pos <= s.len, "Bounds check" 63 | 64 | if s.len == s.capacity: 65 | s.grow(max(64, s.len + s.len div 2)) 66 | 67 | if pos < s.len: 68 | moveMem(addr s.items[pos + 1], addr s.items[pos], (s.len - pos) * sizeof(T)) 69 | 70 | s.items[pos] = v 71 | s.len += 1 72 | 73 | template len*(s: ShSeq): int = 74 | s.len 75 | 76 | template data*(s: ShSeq): openArray = 77 | s.items.toOpenArray(0, s.len - 1) 78 | 79 | iterator items*(s: ShSeq): lent s.T = 80 | for i in 0 ..< s.len: 81 | yield s[i] 82 | -------------------------------------------------------------------------------- /nim.cfg: -------------------------------------------------------------------------------- 1 | nimcache = "build/nimcache/$projectName" 2 | 3 | # Avoid some rare stack corruption while using exceptions with a SEH-enabled 4 | # toolchain: https://github.com/status-im/nimbus-eth2/issues/3121 5 | @if windows and not vcc: 6 | --define:nimRawSetjmp 7 | @end 8 | -------------------------------------------------------------------------------- /tests/chronicles_tests.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0) 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import chronicles, unittest2, ../metrics, ../metrics/chronicles_support 8 | 9 | suite "logging": 10 | test "info": 11 | var registry = newRegistry() 12 | declareCounter myCounter, "help", registry = registry 13 | myCounter.inc() 14 | info "myCounter", myCounter 15 | declareCounter lCounter, "l help", @["foo", "bar"], registry 16 | let labelValues = @["a", "x \"y\" \n\\z"] 17 | lCounter.inc(4.5, labelValues = labelValues) 18 | info "lCounter", lCounter 19 | declareGauge myGauge, "help", registry = registry 20 | myGauge.set(9.5) 21 | info "myGauge", myGauge 22 | declareSummary mySummary, "help", registry = registry 23 | mySummary.observe(10) 24 | info "mySummary", mySummary 25 | declareHistogram myHistogram, "help", registry = registry 26 | myHistogram.observe(10) 27 | info "myHistogram", myHistogram 28 | 29 | info "registry", registry 30 | info "default registry", defaultRegistry 31 | -------------------------------------------------------------------------------- /tests/chronos_server_tests.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2022 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import std/[uri] 8 | import chronos, chronos/apps/http/[httpclient, httpserver] 9 | import chronos/unittest2/asynctests 10 | import ../metrics, ../metrics/chronos_httpserver 11 | 12 | suite "Chronos metrics HTTP server test suite": 13 | proc httpClient( 14 | url: string 15 | ): Future[HttpResponseTuple] {.async: (raises: [CancelledError, HttpError]).} = 16 | let session = HttpSessionRef.new() 17 | try: 18 | await session.fetch(parseUri(url)) 19 | finally: 20 | await session.closeWait() 21 | 22 | asyncTest "new()/close() test": 23 | when defined(metrics): 24 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 25 | block: 26 | let status = await server.status() 27 | check status == MetricsHttpServerStatus.Stopped 28 | await server.close() 29 | block: 30 | let status = await server.status() 31 | check status == MetricsHttpServerStatus.Closed 32 | else: 33 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 34 | 35 | asyncTest "new()/start()/stop()/close() test": 36 | when defined(metrics): 37 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 38 | block: 39 | let status = await server.status() 40 | check status == MetricsHttpServerStatus.Stopped 41 | await server.start() 42 | block: 43 | let status = await server.status() 44 | check status == MetricsHttpServerStatus.Running 45 | await server.stop() 46 | block: 47 | let status = await server.status() 48 | check status == MetricsHttpServerStatus.Stopped 49 | await server.close() 50 | block: 51 | let status = await server.status() 52 | check status == MetricsHttpServerStatus.Closed 53 | else: 54 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 55 | 56 | asyncTest "new()/start()/response/stop()/start()/response/stop()/close() " & "test": 57 | when defined(metrics): 58 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 59 | block: 60 | let status = await server.status() 61 | check status == MetricsHttpServerStatus.Stopped 62 | await server.start() 63 | block: 64 | let status = await server.status() 65 | check status == MetricsHttpServerStatus.Running 66 | block: 67 | let resp = await httpClient("http://127.0.0.1:8080/health") 68 | check: 69 | resp.status == 200 70 | resp.data.bytesToString == "OK" 71 | await server.stop() 72 | block: 73 | let status = await server.status() 74 | check status == MetricsHttpServerStatus.Stopped 75 | await server.start() 76 | block: 77 | let status = await server.status() 78 | check status == MetricsHttpServerStatus.Running 79 | block: 80 | let resp = await httpClient("http://127.0.0.1:8080/health") 81 | check: 82 | resp.status == 200 83 | resp.data.bytesToString == "OK" 84 | await server.stop() 85 | block: 86 | let status = await server.status() 87 | check status == MetricsHttpServerStatus.Stopped 88 | await server.close() 89 | block: 90 | let status = await server.status() 91 | check status == MetricsHttpServerStatus.Closed 92 | else: 93 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 94 | 95 | asyncTest "new()/start()/close() test": 96 | when defined(metrics): 97 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 98 | block: 99 | let status = await server.status() 100 | check status == MetricsHttpServerStatus.Stopped 101 | await server.start() 102 | block: 103 | let status = await server.status() 104 | check status == MetricsHttpServerStatus.Running 105 | await server.close() 106 | block: 107 | let status = await server.status() 108 | check status == MetricsHttpServerStatus.Closed 109 | else: 110 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 111 | 112 | asyncTest "HTTP 200/responses check test": 113 | when defined(metrics): 114 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 115 | block: 116 | let status = await server.status() 117 | check status == MetricsHttpServerStatus.Stopped 118 | await server.start() 119 | block: 120 | let status = await server.status() 121 | check status == MetricsHttpServerStatus.Running 122 | block: 123 | let resp = await httpClient("http://127.0.0.1:8080/metrics") 124 | check: 125 | resp.status == 200 126 | len(resp.data) > 0 127 | 128 | block: 129 | let resp = await httpClient("http://127.0.0.1:8080/health") 130 | check: 131 | resp.status == 200 132 | resp.data.bytesToString() == "OK" 133 | await server.stop() 134 | block: 135 | let status = await server.status() 136 | check status == MetricsHttpServerStatus.Stopped 137 | await server.close() 138 | block: 139 | let status = await server.status() 140 | check status == MetricsHttpServerStatus.Closed 141 | else: 142 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 143 | 144 | asyncTest "HTTP 404/response check test": 145 | when defined(metrics): 146 | let server = MetricsHttpServerRef.new("127.0.0.1", Port(8080)).get() 147 | block: 148 | let status = await server.status() 149 | check status == MetricsHttpServerStatus.Stopped 150 | await server.start() 151 | block: 152 | let status = await server.status() 153 | check status == MetricsHttpServerStatus.Running 154 | 155 | block: 156 | let resp = await httpClient("http://127.0.0.1:8080/somePath") 157 | check: 158 | resp.status == 404 159 | len(resp.data) > 0 160 | 161 | await server.stop() 162 | block: 163 | let status = await server.status() 164 | check status == MetricsHttpServerStatus.Stopped 165 | await server.close() 166 | block: 167 | let status = await server.status() 168 | check status == MetricsHttpServerStatus.Closed 169 | else: 170 | check MetricsHttpServerRef.new("127.0.0.1", Port(8080)).isErr() == true 171 | 172 | asyncTest "Chronos middleware test": 173 | when defined(metrics): 174 | proc process( 175 | r: RequestFence 176 | ): Future[HttpResponseRef] {.async: (raises: [CancelledError]).} = 177 | if r.isOk(): 178 | let request = r.get() 179 | if request.uri.path == "/test": 180 | try: 181 | await request.respond(Http200, "TESTOK") 182 | except HttpWriteError as exc: 183 | defaultResponse(exc) 184 | else: 185 | defaultResponse() 186 | else: 187 | defaultResponse() 188 | 189 | let 190 | socketFlags = {ServerFlags.TcpNoDelay, ServerFlags.ReuseAddr} 191 | middlewares = [MetricsHttpServerMiddlewareRef.new()] 192 | res = HttpServerRef.new( 193 | initTAddress("127.0.0.1:0"), 194 | process, 195 | middlewares = middlewares, 196 | socketFlags = socketFlags, 197 | ) 198 | check res.isOk() 199 | let server = res.get() 200 | server.start() 201 | try: 202 | let 203 | address = server.instance.localAddress() 204 | uri1 = "http://" & $address & "/metrics" 205 | uri2 = "http://" & $address & "/health" 206 | uri3 = "http://" & $address & "/test" 207 | res1 = await httpClient(uri1) 208 | res2 = await httpClient(uri2) 209 | res3 = await httpClient(uri3) 210 | check: 211 | res1.status == 200 212 | len(res1.data) > 0 213 | res2.status == 200 214 | res2.data.bytesToString() == "OK" 215 | res3.status == 200 216 | res3.data.bytesToString() == "TESTOK" 217 | finally: 218 | await server.stop() 219 | await server.closeWait() 220 | else: 221 | check not (isNil(MetricsHttpServerMiddlewareRef.new())) 222 | -------------------------------------------------------------------------------- /tests/duplicate_coll_mod.nim: -------------------------------------------------------------------------------- 1 | import ../metrics 2 | 3 | {.used.} 4 | 5 | declareCounter duplicate_counter, "duplicate counter" 6 | duplicate_counter.inc() 7 | -------------------------------------------------------------------------------- /tests/main_tests.nim: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019-2023 Status Research & Development GmbH 2 | # Licensed and distributed under either of 3 | # * MIT license: http://opensource.org/licenses/MIT 4 | # * Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 | # at your option. This file may not be copied, modified, or distributed except according to those terms. 6 | 7 | import net, os, unittest2, ../metrics 8 | 9 | import ./test_shseq 10 | 11 | when defined(metrics): 12 | import times 13 | 14 | declareCounter globalCounter, "help" 15 | declarePublicCounter globalPublicCounter, "help" 16 | declareGauge globalGauge, "help" 17 | declarePublicGauge globalPublicGauge, "help" 18 | 19 | const brokenGlobals = 20 | (NimMajor, NimMinor) == (2, 0) and (defined(gcOrc) or defined(gcArc)) 21 | 22 | proc gcSafetyTest*() {.gcsafe.} = # The test is successful if this proc compiles 23 | globalCounter.inc 2 24 | globalPublicCounter.inc(2) 25 | globalGauge.set 10.0 26 | globalGauge.inc 27 | globalGauge.dec 28 | discard globalCounter.value 29 | discard globalCounter.valueByName("test") 30 | globalPublicGauge.set(1) 31 | 32 | suite "counter": 33 | setup: 34 | var registry = newRegistry() 35 | declareCounter myCounter, "help", registry = registry 36 | 37 | test "basic": 38 | check myCounter.value == 0 39 | myCounter.inc() 40 | check myCounter.value == 1 41 | myCounter.inc(7) 42 | check myCounter.value == 8 43 | myCounter.inc(0.5) 44 | check myCounter.value == 8.5 45 | myCounter.inc(-1) 46 | # you shouldn't be doing this - but we don't want metrics to crash the app 47 | check myCounter.value == 8.5 48 | # name validation (have to use the internal API to get past Nim's identifier validation) 49 | when defined(metrics): 50 | expect ValueError: 51 | var tmp = newCounter("1337", "invalid name") 52 | 53 | test "alternative API": 54 | when brokenGlobals: 55 | skip() 56 | else: 57 | counter("one_off_counter").inc() 58 | check counter("one_off_counter").value == 1 59 | counter("one_off_counter").inc(0.5) 60 | check counter("one_off_counter").value == 1.5 61 | 62 | # # Can't have different collector types with the same name, but unittest 63 | # # can't catch an exception raised in the assignment to a {.global.} 64 | # # variable. 65 | # expect RegistrationError: 66 | # check gauge("one_off_counter").value == 0 67 | 68 | # colons in name 69 | counter("one:off:counter:colons").inc() 70 | check counter("one:off:counter:colons").value == 1 71 | 72 | test "exceptions": 73 | proc f(switch: bool) = 74 | if switch: 75 | raise newException(ValueError, "exc1") 76 | else: 77 | raise newException(IndexDefect, "exc2") 78 | 79 | expect IndexDefect: 80 | myCounter.countExceptions(ValueError): 81 | f(false) 82 | check myCounter.value == 0 83 | 84 | expect ValueError: 85 | myCounter.countExceptions(ValueError): 86 | f(true) 87 | check myCounter.value == 1 88 | 89 | expect IndexDefect: 90 | myCounter.countExceptions: 91 | f(false) 92 | check myCounter.value == 2 93 | 94 | myCounter.countExceptions: 95 | discard 96 | check myCounter.value == 2 97 | 98 | test "labels": 99 | declareCounter lCounter, "l help", ["foo", "bar"], registry 100 | expect KeyError: 101 | discard lCounter.value 102 | 103 | # you can't access a labelled value before it was initialised 104 | expect KeyError: 105 | discard lCounter.value(@["a", "x"]) 106 | 107 | let labelValues = @["a", "x \"y\" \n\\z"] 108 | lCounter.inc(labelValues = labelValues) 109 | check lCounter.value(labelValues) == 1 110 | # echo registry 111 | 112 | # label validation 113 | expect ValueError: 114 | declareCounter invalid1, "invalid", ["123", "foo"] 115 | expect ValueError: 116 | declareCounter invalid2, "invalid", ["foo", "123"] 117 | expect ValueError: 118 | declareCounter invalid3, "invalid", ["foo", "__bar"] 119 | 120 | # label names: array instead of sequence 121 | declareCounter lCounter2, "l2 help", ["foo", "bar"], registry 122 | let labelValues2 = ["a", "x \"y\" \n\\z"] 123 | lCounter2.inc(labelValues = labelValues2) 124 | check lCounter2.value(labelValues2) == 1 125 | 126 | declareCounter lCounter3, "l3 help", ["aaa"] 127 | for i in 0 ..< 4: 128 | for j in ["d", "b", "c", "a", "e"]: 129 | lCounter3.inc(1, [j]) 130 | 131 | for j in ["d", "b", "c", "a", "e"]: 132 | check lCounter3.value([j]) == 4 133 | 134 | test "sample rate": 135 | declareCounter sCounter, "counter with a sample rate set", registry = registry 136 | sCounter.inc() 137 | # No sampling done on our side, just in sending the increments to a StatsD server 138 | check sCounter.value == 1 139 | 140 | test "names with colons": 141 | declareCounter cCounter, 142 | "counter with colons in name", registry = registry, name = "foo:bar:baz" 143 | cCounter.inc() 144 | check cCounter.value == 1 145 | check cCounter.valueByName("foo:bar:baz_total") == 1 146 | # echo cCounter 147 | 148 | var myName = "bla:bla" 149 | declareCounter cCounter2, 150 | "another counter with colon in name", registry = registry, name = myName 151 | cCounter2.inc() 152 | check cCounter2.value == 1 153 | check cCounter2.valueByName("bla:bla_total") == 1 154 | # echo cCounter2 155 | 156 | suite "gauge": 157 | setup: 158 | var registry = newRegistry() 159 | declareGauge myGauge, "help", registry = registry 160 | 161 | test "basic": 162 | check myGauge.value == 0 163 | myGauge.inc() 164 | check myGauge.value == 1 165 | myGauge.dec(3) 166 | check myGauge.value == -2.0 # weird Nim bug if it's "-2" 167 | myGauge.dec(0.1) 168 | check myGauge.value == -2.1 169 | myGauge.set(9.5) 170 | check myGauge.value == 9.5 171 | myGauge.set(1) 172 | check myGauge.value == 1 173 | 174 | test "GlobalGauge value": 175 | check globalGauge.value == 0.0 176 | 177 | test "alternative API": 178 | when brokenGlobals: 179 | skip() 180 | else: 181 | gauge("one_off_gauge").set(1) 182 | check gauge("one_off_gauge").value == 1 183 | gauge("one_off_gauge").inc(0.5) 184 | check gauge("one_off_gauge").value == 1.5 185 | 186 | test "in progress": 187 | myGauge.trackInProgress: 188 | check myGauge.value == 1 189 | check myGauge.value == 0 190 | 191 | declareGauge lgauge, "help", @["foobar"], registry = registry 192 | let labelValues = @["b"] 193 | lgauge.trackInProgress(labelValues): 194 | check lgauge.value(labelValues) == 1 195 | check lgauge.value(labelValues) == 0 196 | # echo registry 197 | 198 | test "timing": 199 | myGauge.time: 200 | sleep(1000) 201 | check myGauge.value == 0 202 | check myGauge.value >= 1 # may be 2 inside a macOS Travis job 203 | # echo registry 204 | 205 | test "timing with labels": 206 | declareGauge lgauge2, "help", @["foobar"], registry = registry 207 | let labelValues = @["b"] 208 | lgauge2.time(labelValues): 209 | sleep(1000) 210 | check lgauge2.value(labelValues) >= 1 211 | 212 | test "names with colons": 213 | declareGauge cGauge, 214 | "gauge with colons in name", registry = registry, name = "foo:bar:baz" 215 | cGauge.inc() 216 | check cGauge.value == 1 217 | check cGauge.valueByName("foo:bar:baz") == 1 218 | # echo cGauge 219 | 220 | suite "summary": 221 | setup: 222 | var registry = newRegistry() 223 | declareSummary mySummary, "help", registry = registry 224 | 225 | test "basic": 226 | check mySummary.valueByName("mySummary_count") == 0 227 | check mySummary.valueByName("mySummary_sum") == 0 228 | mySummary.observe(10) 229 | check mySummary.valueByName("mySummary_count") == 1 230 | check mySummary.valueByName("mySummary_sum") == 10 231 | mySummary.observe(0.5) 232 | check mySummary.valueByName("mySummary_count") == 2 233 | check mySummary.valueByName("mySummary_sum") == 10.5 234 | 235 | test "alternative API": 236 | when brokenGlobals: 237 | skip() 238 | else: 239 | summary("one_off_summary").observe(10) 240 | check summary("one_off_summary").valueByName("one_off_summary_count") == 1 241 | check summary("one_off_summary").valueByName("one_off_summary_sum") == 10 242 | 243 | test "timing": 244 | mySummary.time: 245 | sleep(1000) 246 | check mySummary.valueByName("mySummary_sum") == 0 247 | check mySummary.valueByName("mySummary_sum") >= 1 248 | 249 | test "timing with labels": 250 | declareSummary lsummary, "help", ["foobar"], registry = registry 251 | let labelValues = ["b"] 252 | lsummary.time(labelValues): 253 | sleep(1000) 254 | check lsummary.valueByName("lsummary_sum", labelValues) >= 1 255 | 256 | test "names with colons": 257 | declareSummary cSummary, 258 | "summary with colons in name", registry = registry, name = "foo:bar:baz" 259 | cSummary.observe(10) 260 | check cSummary.valueByName("foo:bar:baz_count") == 1 261 | check cSummary.valueByName("foo:bar:baz_sum") == 10 262 | # echo cSummary 263 | 264 | suite "histogram": 265 | setup: 266 | var registry = newRegistry() 267 | declareHistogram myHistogram, "help", registry = registry 268 | 269 | test "basic": 270 | check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0 271 | check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 0 272 | check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 0 273 | check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 0 274 | check myHistogram.valueByName("myHistogram_count") == 0 275 | check myHistogram.valueByName("myHistogram_sum") == 0 276 | 277 | myHistogram.observe(2) 278 | check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0 279 | check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 1 280 | check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 1 281 | check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 1 282 | check myHistogram.valueByName("myHistogram_count") == 1 283 | check myHistogram.valueByName("myHistogram_sum") == 2 284 | 285 | myHistogram.observe(2.5) 286 | check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0 287 | check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 2 288 | check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 2 289 | check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 2 290 | check myHistogram.valueByName("myHistogram_count") == 2 291 | check myHistogram.valueByName("myHistogram_sum") == 4.5 292 | 293 | myHistogram.observe(Inf) 294 | check myHistogram.valueByName("myHistogram_bucket", [], ["1.0"]) == 0 295 | check myHistogram.valueByName("myHistogram_bucket", [], ["2.5"]) == 2 296 | check myHistogram.valueByName("myHistogram_bucket", [], ["5.0"]) == 2 297 | check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 3 298 | check myHistogram.valueByName("myHistogram_count") == 3 299 | check myHistogram.valueByName("myHistogram_sum") == Inf 300 | 301 | declareHistogram h1, "help", registry = registry, buckets = [0.0, 1.0, 2.0] 302 | check h1.valueByName("h1_bucket", [], ["0.0"]) == 0 303 | check h1.valueByName("h1_bucket", [], ["1.0"]) == 0 304 | check h1.valueByName("h1_bucket", [], ["2.0"]) == 0 305 | check h1.valueByName("h1_bucket", [], ["+Inf"]) == 0 306 | 307 | declareHistogram h2, "help", registry = registry, buckets = [0.0, 1.0, 2.0, Inf] 308 | check h2.valueByName("h2_bucket", [], ["0.0"]) == 0 309 | check h2.valueByName("h2_bucket", [], ["1.0"]) == 0 310 | check h2.valueByName("h2_bucket", [], ["2.0"]) == 0 311 | check h2.valueByName("h2_bucket", [], ["+Inf"]) == 0 312 | 313 | expect ValueError: 314 | declareHistogram h3, "help", registry = registry, buckets = [] 315 | expect ValueError: 316 | declareHistogram h3, "help", registry = registry, buckets = [Inf] 317 | expect ValueError: 318 | declareHistogram h3, "help", registry = registry, buckets = [3.0, 1.0] 319 | 320 | test "alternative API": 321 | when brokenGlobals: 322 | skip() 323 | else: 324 | histogram("one_off_histogram").observe(2) 325 | check histogram("one_off_histogram").valueByName( 326 | "one_off_histogram_bucket", [], ["1.0"] 327 | ) == 0 328 | check histogram("one_off_histogram").valueByName( 329 | "one_off_histogram_bucket", [], ["2.5"] 330 | ) == 1 331 | check histogram("one_off_histogram").valueByName( 332 | "one_off_histogram_bucket", [], ["5.0"] 333 | ) == 1 334 | check histogram("one_off_histogram").valueByName( 335 | "one_off_histogram_bucket", [], ["+Inf"] 336 | ) == 1 337 | check histogram("one_off_histogram").valueByName("one_off_histogram_count") == 1 338 | check histogram("one_off_histogram").valueByName("one_off_histogram_sum") == 2 339 | 340 | test "timing": 341 | myHistogram.time: 342 | sleep(1000) 343 | check myHistogram.valueByName("myHistogram_sum") == 0 344 | check myHistogram.valueByName("myHistogram_sum") >= 1 345 | check myHistogram.valueByName("myHistogram_count") == 1 346 | check myHistogram.valueByName("myHistogram_bucket", [], ["+Inf"]) == 1 347 | 348 | test "timing with labels": 349 | declareHistogram lhistogram, "help", ["foobar"], registry = registry 350 | let labelValues = ["b"] 351 | lhistogram.time(labelValues): 352 | sleep(1000) 353 | check lhistogram.valueByName("lhistogram_sum", labelValues) >= 1 354 | check lhistogram.valueByName("lhistogram_count", labelValues) == 1 355 | check lhistogram.valueByName("lhistogram_bucket", labelValues, ["+Inf"]) == 1 356 | 357 | test "names with colons": 358 | declareHistogram cHistogram, 359 | "histogram with colons in name", registry = registry, name = "foo:bar:baz" 360 | cHistogram.observe(10) 361 | check cHistogram.valueByName("foo:bar:baz_count") == 1 362 | check cHistogram.valueByName("foo:bar:baz_sum") == 10 363 | # echo cHistogram 364 | 365 | import ./duplicate_coll_mod 366 | suite "registry": 367 | test "duplicate collectors": 368 | expect RegistrationError: 369 | declareCounter duplicate_counter, "duplicate counter" 370 | duplicate_counter.inc() 371 | 372 | when defined(metrics): 373 | type MyCustomCollector = ref object of Gauge 374 | var 375 | myCustomCollector = MyCustomCollector.newCollector("my_custom_collector", "help") 376 | registry2 = newRegistry() 377 | myCustomCollector2 = MyCustomCollector.newCollector( 378 | "my_custom_collector2", "help2", registry = registry2 379 | ) 380 | 381 | method collect(collector: MyCustomCollector, output: MetricHandler) = 382 | let timestamp = collector.now() 383 | output(name = "custom_metric", value = 42, timestamp = timestamp) 384 | 385 | suite "custom collectors": 386 | test "42": 387 | check myCustomCollector.value == 42 388 | 389 | test "custom registry": 390 | var metrics: seq[float64] 391 | proc output( 392 | name: string, 393 | value: float64, 394 | labels, labelValues: openArray[string], 395 | timestamp: Time, 396 | ) = 397 | metrics.add(value) 398 | 399 | registry2.collect(output) 400 | check: 401 | metrics.len == 1 402 | metrics[0] == 42 403 | 404 | suite "system metrics": 405 | test "change update interval": 406 | when defined(metrics): 407 | declareGauge myGauge, "my gauge" 408 | myGauge.set(1) 409 | # echo defaultRegistry 410 | echo getSystemMetricsUpdateInterval() 411 | setSystemMetricsUpdateInterval(initDuration(seconds = 1)) 412 | echo getSystemMetricsUpdateInterval() 413 | sleep(2) 414 | myGauge.set(2) 415 | # echo defaultRegistry 416 | echo getSystemMetricsAutomaticUpdate() 417 | setSystemMetricsAutomaticUpdate(false) 418 | updateSystemMetrics() 419 | updateThreadMetrics() 420 | -------------------------------------------------------------------------------- /tests/nim.cfg: -------------------------------------------------------------------------------- 1 | # Avoid some rare stack corruption while using exceptions with a SEH-enabled 2 | # toolchain: https://github.com/status-im/nimbus-eth2/issues/3121 3 | @if windows and not vcc: 4 | --define:nimRawSetjmp 5 | @end 6 | -------------------------------------------------------------------------------- /tests/test_shseq.nim: -------------------------------------------------------------------------------- 1 | {.used.} 2 | 3 | import unittest2, ../metrics/shseq 4 | 5 | suite "ShSeq": 6 | test "basics": 7 | var s: ShSeq[int] 8 | 9 | s.add(1) 10 | s.add(2) 11 | s.add(4) 12 | 13 | s.insert(0, 0) 14 | s.insert(3, 3) 15 | s.insert(5, 5) 16 | 17 | for i in 0 ..< s.len: 18 | check s[i] == i 19 | 20 | test "init": 21 | let s = ShSeq.init([0, 1, 2]) 22 | check: 23 | s.len == 3 24 | s[1] == 1 25 | 26 | test "cross-thread init/destroy": 27 | when defined(threads): 28 | var s: ShSeq[int] 29 | 30 | var t: Thread[ptr ShSeq[int]] 31 | 32 | proc threadFunc(s: ptr ShSeq[int]) {.thread.} = 33 | s[].add(2) 34 | s[].add(1) 35 | s[].add(0) 36 | 37 | createThread(t, threadFunc, addr s) 38 | 39 | t.joinThread() 40 | 41 | check: 42 | s[0] == 2 43 | 44 | s.destroy() 45 | 46 | check: 47 | s.len == 0 48 | else: 49 | skip() 50 | --------------------------------------------------------------------------------