├── .github └── workflows │ └── automated-build.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── api_endpoints.yaml ├── config.yaml.example ├── docker-compose.yaml ├── requirements.txt └── routeros-rest-exporter.py /.github/workflows/automated-build.yaml: -------------------------------------------------------------------------------- 1 | name: Automated Build 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | automated-build: 11 | uses: jpetazzo/workflows/.github/workflows/automated-build.yaml@main 12 | secrets: 13 | DOCKER_HUB_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} 14 | with: 15 | DOCKER_HUB_USERNAME: enixsas 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | config.yaml 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | RUN mkdir /routeros-rest-exporter 4 | WORKDIR /routeros-rest-exporter 5 | 6 | COPY ./requirements.txt ./ 7 | RUN pip install -r requirements.txt 8 | 9 | COPY ./* ./ 10 | 11 | EXPOSE 9100/tcp 12 | 13 | ENTRYPOINT ["python", "/routeros-rest-exporter/routeros-rest-exporter.py", "-e", "/routeros-rest-exporter/api_endpoints.yaml"] 14 | CMD ["-c", "/routeros-rest-exporter/config.yaml"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Enix SAS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RouterOS REST Exporter 2 | 3 | [![License MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) 4 | [![Brought by Enix](https://img.shields.io/badge/Brought%20to%20you%20by-ENIX-%23377dff?labelColor=888&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAQAAAC1QeVaAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAAAmJLR0QA/4ePzL8AAAAHdElNRQfkBAkQIg/iouK/AAABZ0lEQVQY0yXBPU8TYQDA8f/zcu1RSDltKliD0BKNECYZmpjgIAOLiYtubn4EJxI/AImzg3E1+AGcYDIMJA7lxQQQQRAiSSFG2l457+655x4Gfz8B45zwipWJ8rPCQ0g3+p9Pj+AlHxHjnLHAbvPW2+GmLoBN+9/+vNlfGeU2Auokd8Y+VeYk/zk6O2fP9fcO8hGpN/TUbxpiUhJiEorTgy+6hUlU5N1flK+9oIJHiKNCkb5wMyOFw3V9o+zN69o0Exg6ePh4/GKr6s0H72Tc67YsdXbZ5gENNjmigaXbMj0tzEWrZNtqigva5NxjhFP6Wfw1N1pjqpFaZQ7FAY6An6zxTzHs0BGqY/NQSnxSBD6WkDRTf3O0wG2Ztl/7jaQEnGNxZMdy2yET/B2xfGlDagQE1OgRRvL93UOHqhLnesPKqJ4NxLLn2unJgVka/HBpbiIARlHFq1n/cWlMZMne1ZfyD5M/Aa4BiyGSwP4Jl3UAAAAldEVYdGRhdGU6Y3JlYXRlADIwMjAtMDQtMDlUMTQ6MzQ6MTUrMDI6MDDBq8/nAAAAJXRFWHRkYXRlOm1vZGlmeQAyMDIwLTA0LTA5VDE0OjM0OjE1KzAyOjAwsPZ3WwAAAABJRU5ErkJggg==)](https://enix.io) 5 | 6 | A Prometheus exporter for Mikrotik's RouterOS that uses the recent REST API and can be easily extended to support more metrics. 7 | 8 | ## How does this compare to other Mikrotik RouterOS exporters ? 9 | 10 | Unlike other exporters available, this exporter allows you to easily customize which data are queried on the target, and thus exported to Promeheus. 11 | 12 | The goal is to cover specific use-cases where you need an obscure metric, and to reduce the load on routers by allowing you to remove unneeded queries. 13 | 14 | Additionnaly, this exporter uses the more recent REST API and not Mikrotik's custom binary API. Therefore, the code does not depend on any client library other than Python's well-known `requests` to query a target. 15 | 16 | ## Usage 17 | 18 | ``` 19 | $ ./routeros-rest-exporter.py --help 20 | usage: routeros-rest-exporter.py [-h] [-c CONFIG] [-e ENDPOINTS] 21 | 22 | Launch a Prometheus Exporter exposing metrics from Mikrotik RouterOS devices via their REST API. 23 | 24 | options: 25 | -h, --help show this help message and exit 26 | -c CONFIG, --config CONFIG 27 | YAML config file containing targets and credentials 28 | -e ENDPOINTS, --endpoints ENDPOINTS 29 | YAML config file containing API endpoints to query and what metrics to export 30 | ``` 31 | 32 | ## Configuration 33 | 34 | The exporter is configured using a YAML file. Here is an example : 35 | 36 | ```yaml 37 | global: 38 | listen_port: 9100 # TCP port the exporter will bind to and expose the HTTP interface 39 | interval: 300 # Polling interval, in seconds 40 | custom_host_labels: # Custom host-level labels, see below 41 | - tenant 42 | - role 43 | defaults: 44 | username: prometheus # The user to use to connect to the API 45 | password: supersecurep4ssw0rd # The password to use to connect to the API 46 | password_file: /etc/routeros-rest-exporter/password # File where the password will be retrieved. `password` takes precedence over this. 47 | port: 443 # HTTPS port where the routerOS API listens 48 | allow_insecure: false # Allow self-signed API certificate 49 | timeout: 5 # API query timeout, in seconds 50 | targets: # List of Mikrotik RouterOS devices to query 51 | - name: router-1.example.com # Name of the device 52 | hostname: 198.51.100.1 # IP or hostname to connect to. If absent, `name` will be used 53 | tenant: customer1 # Value of the custom host-level label `tenant` 54 | role: primary # Value of the custom host-level label `role` 55 | ``` 56 | 57 | ### Defaults 58 | 59 | The following parameters can be defined at target level or in the `defaults` section : 60 | - `username` 61 | - `password` 62 | - `password_file` 63 | - `port` 64 | - `allow_insecure` 65 | - `timeout` 66 | 67 | They are all mandatory. 68 | 69 | ### Custom host labels 70 | 71 | This exporter supports adding arbitrary labels to metrics, with values identical for all metrics of a single host. The labels' names must be defined in `global.custom_host_labels`, and their values must be set either in each target or in the `defaults`. 72 | 73 | Custom host labels is the appropriate place to add host metadata such as a `tenant`, or whether the target is a primary or secondary device when active-passive redundancy is used. This is useful when later designing alerting rules for instance. 74 | 75 | ## RouterOS configuration 76 | 77 | Here is the required configuration to enter your RouterOS device. Please note that the `service` corresponding to the REST API used by this exporter is `www-ssl` and not the older `api` or `api-ssl`, which can be disabled. 78 | 79 | The `www-ssl` service requires a TLS certificate. These commands generate a self-signed certificate. You should consider using a real one, but it is outside the scope of this documentation. 80 | 81 | ``` 82 | /user/group/add name=api policy=read,api,rest-api,!local,!telnet,!ssh,!ftp,!reboot,!write,!policy,!test,!winbox,!password,!web,!sniff,!sensitive,!romon,!dude 83 | 84 | /user/add name=prometheus password="CHANGEME" group=api 85 | 86 | /certificate add name=LocalCA common-name=LocalCA key-usage=key-cert-sign,crl-sign 87 | /certificate sign LocalCA 88 | /certificate add name=Mikrotik common-name=Mikrotik key-usage=tls-server 89 | /certificate sign ca=LocalCA Mikrotik 90 | /certificate set trusted=yes LocalCA 91 | /certificate set trusted=yes Mikrotik 92 | 93 | /ip/service/set www-ssl certificate=Mikrotik disabled=no 94 | ``` 95 | 96 | ## API endpoints and metrics 97 | 98 | Queried API endpoints and their corresponding metrics are defined in the YAML files passed as `-e` or `--endpoints` (with a default in `/etc/routeros-rest-exporter/api_endpoints.yaml`). 99 | The provided `api_endpoints.yaml` contains a decent starting set of metrics that can be easily customized if needed. 100 | 101 | Each metric is defined as an element of the dictionary `endpoints`, as such : 102 | 103 | ```yaml 104 | system/resource/cpu: # RouterOS REST API HTTP endpoint 105 | metrics: # values to retrieve from the REST response and to expose as prometheus metrics 106 | - name: load 107 | - name: disk 108 | - name: irq 109 | labels: # values to retrieve from the REST response and to expose as metriclabels 110 | - name: cpu 111 | ``` 112 | 113 | Here is an exemple of the corresponding REST API response from a RouterOS device : 114 | 115 | ``` 116 | [{'.id': '*0', 'cpu': 'cpu0', 'disk': '0', 'irq': '31', 'load': '31'}, 117 | {'.id': '*1', 'cpu': 'cpu1', 'disk': '0', 'irq': '30', 'load': '33'}] 118 | ``` 119 | And the corresponding exported prometheus metrics : 120 | 121 | ``` 122 | # HELP routeros_system_resource_cpu_load Mikrotik RouterOS metric 'load' under 'system/resource/cpu' 123 | # TYPE routeros_system_resource_cpu_load gauge 124 | routeros_system_resource_cpu_load{cpu="cpu0",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 31.0 125 | routeros_system_resource_cpu_load{cpu="cpu1",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 33.0 126 | # HELP routeros_system_resource_cpu_disk Mikrotik RouterOS metric 'disk' under 'system/resource/cpu' 127 | # TYPE routeros_system_resource_cpu_disk gauge 128 | routeros_system_resource_cpu_disk{cpu="cpu0",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 0.0 129 | routeros_system_resource_cpu_disk{cpu="cpu1",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 0.0 130 | # HELP routeros_system_resource_cpu_irq Mikrotik RouterOS metric 'irq' under 'system/resource/cpu' 131 | # TYPE routeros_system_resource_cpu_irq gauge 132 | routeros_system_resource_cpu_irq{cpu="cpu0",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 31.0 133 | routeros_system_resource_cpu_irq{cpu="cpu1",hostname="198.51.100.1",name="router-1.example.com",role="primary",tenant="customer1"} 33.0 134 | ``` 135 | 136 | You can easily see how an API response looks like by starting a Python shell and querying a target like this : 137 | 138 | ```python 139 | import requests; requests.get("https://198.51.100.1:443/rest/system/resource/cpu", auth=('user','pass'), verify=False, timeout=5).json() 140 | ``` 141 | 142 | Single-item API endpoints (such as `ip/ipsec/statistics`), i.e. response that do not take the form of a list (list of CPUs, list of interfaces...), are automatically handled. Internally, they are converted to a list with a single item. Metric-level labels may not be appropriate for these metrics since there is nothing to discriminate. 143 | 144 | You can get more information on RouterOS' REST API in the [documentation](https://help.mikrotik.com/docs/display/ROS/REST+API). 145 | 146 | ### Metric types 147 | 148 | Metrics can have different types, depending on what they represent, and how they should be exported to prometheus. 149 | 150 | #### Gauge (default) 151 | 152 | ```yaml 153 | system/resource: 154 | metrics: 155 | - name: free-hdd-space 156 | type: gauge 157 | ``` 158 | 159 | The default metric type, gauge, is suitable for a simple integer counter. It produces a prometheus metric of the same type. 160 | 161 | #### Enum 162 | 163 | ```yaml 164 | ip/ipsec/policy: 165 | metrics: 166 | - name: ph2-state 167 | type: enum 168 | enum: 169 | - established 170 | - expired 171 | - no-phase2 172 | labels: 173 | - name: .id 174 | prom_name: policy_id 175 | - name: dst-address 176 | prom_name: dst_address 177 | - name: src-address 178 | prom_name: src_address 179 | ``` 180 | 181 | Suitable for an API response with text values. Creates a metric of type "Enum", with fixed possible values defined in `enum`, effectively exposing one prometheus metric per possible value, one of whom has a value of `1.0` and the others `0.0`. 182 | 183 | ``` 184 | # HELP routeros_ip_ipsec_policy_ph2_state Mikrotik RouterOS metric 'ph2-state' under 'ip/ipsec/policy' 185 | # TYPE routeros_ip_ipsec_policy_ph2_state gauge 186 | routeros_ip_ipsec_policy_ph2_state{dst_address="10.1.0.0/16",hostname="198.51.100.1",name="router-1.example.com",policy_id="*1000000",routeros_ip_ipsec_policy_ph2_state="established",src_address="10.2.0.0/24"} 1.0 187 | routeros_ip_ipsec_policy_ph2_state{dst_address="10.1.0.0/16",hostname="198.51.100.1",name="router-1.example.com",policy_id="*1000000",routeros_ip_ipsec_policy_ph2_state="expired",src_address="10.2.0.0/24"} 0.0 188 | routeros_ip_ipsec_policy_ph2_state{dst_address="10.1.0.0/16",hostname="198.51.100.1",name="router-1.example.com",policy_id="*1000000",routeros_ip_ipsec_policy_ph2_state="no-phase2",src_address="10.2.0.0/24"} 0.0 189 | ``` 190 | 191 | #### Mapping 192 | 193 | ```yaml 194 | ip/ipsec/policy: 195 | metrics: 196 | - name: ph2-state 197 | type: mapping 198 | mapping: 199 | established: 0 200 | expired: 1 201 | no-phase2: 2 202 | labels: 203 | - name: .id 204 | prom_name: policy_id 205 | - name: dst-address 206 | prom_name: dst_address 207 | - name: src-address 208 | prom_name: src_address 209 | ``` 210 | 211 | Also suitable for API response with text value, maybe easier than an enum to integrate into a Grafana dashboard, creates one metric of type Gauge, where each possible value is represented by a different integer. These text-to-integer mappings are defined in `mapping`. 212 | 213 | ``` 214 | # HELP routeros_ip_ipsec_policy_ph2_state Mikrotik RouterOS metric 'ph2-state' under 'ip/ipsec/policy' 215 | # TYPE routeros_ip_ipsec_policy_ph2_state gauge 216 | routeros_ip_ipsec_policy_ph2_state{dst_address="10.1.0.0/16",hostname="198.51.100.1",name="router-1.example.com",policy_id="*1000000",src_address="10.2.0.0/24"} 0.0 217 | ``` 218 | 219 | ### Labels 220 | 221 | #### prom_name 222 | 223 | ```yaml 224 | ip/firewall/nat: 225 | metrics: 226 | - name: bytes 227 | labels: 228 | - name: .id 229 | prom_name: rule_id 230 | - name: log-prefix 231 | prom_name: log_prefix 232 | ``` 233 | 234 | Sometimes API response items destined to be used as label values can have non explicit names (such as `.id` or `name`) and/or contain forbidden characters (such as `-`). In that case, you can specify a `prom_name` besides the label's `name` to be used as the label name in the exported metrics. 235 | 236 | #### special: index 237 | 238 | ```yaml 239 | ip/firewall/filter: 240 | metrics: 241 | - name: packets 242 | labels: 243 | - name: order 244 | special: index 245 | ``` 246 | 247 | This label is not derived from the API response's values, but the position of the item in the returned list. In this `ip/firewall/filter` example, it is used to denote the order of each firewalling rule (unfortunately, `.id` is not useful in this matter) as they are presented by the API (and as they are evaluated by RouterOS). 248 | 249 | ### API reachability metric 250 | 251 | This exporter also generate one metric, `routeros_api_unreachable`, which is a counter of each time an HTTPS query was unsuccessful (regardless of the reason) on the target. 252 | 253 | ## Using docker 254 | 255 | ### Building an image 256 | 257 | You can build and run a docker image of this exporter using the provided dockerfile. It will embed the `api_endpoints.yaml` present in the repository. You may also create a `config.yaml` file at the root of the repository if you want to embed a config into the image. Alternatively, you can provide a configuration file with another mechanisme (e.g. bind mount, Kubernetes configmap, etc.). 258 | 259 | ### Using automatically built images 260 | 261 | Images available on the Docker Hub (`enix/routeros-rest-exporter`) and on Github Container Registry (`ghcr.io/enix/routeros-rest-exporter`) are autmatically built on each tagged version of this repository. They use the provided `api_endpoints.yaml` but do not embed any configuration. 262 | 263 | To run it, you can use the provided `docker-compose.yaml` file, which mounts a `config.yaml` it expects to find alongside itself. 264 | 265 | To start the latest version of the exporter in the background and immediately start displaying its log output : 266 | ``` 267 | docker compose pull 268 | docker compose up -d && docker compose logs -f 269 | ``` 270 | 271 | To stop it : 272 | ``` 273 | docker compose down 274 | ``` 275 | 276 | Currently, the exporter cannot be configured using environment variables. -------------------------------------------------------------------------------- /api_endpoints.yaml: -------------------------------------------------------------------------------- 1 | endpoints: 2 | system/resource/cpu: 3 | metrics: 4 | - name: load 5 | - name: disk 6 | - name: irq 7 | labels: 8 | - name: cpu 9 | system/resource: 10 | metrics: 11 | - name: free-hdd-space 12 | - name: total-hdd-space 13 | - name: free-memory 14 | - name: total-memory 15 | interface: 16 | metrics: 17 | - name: fp-rx-byte 18 | - name: fp-rx-packet 19 | - name: fp-tx-byte 20 | - name: fp-tx-packet 21 | - name: rx-byte 22 | - name: rx-drop 23 | - name: rx-error 24 | - name: rx-packet 25 | - name: tx-byte 26 | - name: tx-drop 27 | - name: tx-error 28 | - name: tx-packet 29 | - name: tx-queue-drop 30 | labels: 31 | - name: name 32 | prom_name: interface_name 33 | - name: comment 34 | ip/firewall/filter: 35 | metrics: 36 | - name: bytes 37 | - name: packets 38 | labels: 39 | - name: .id 40 | prom_name: rule_id 41 | - name: chain 42 | - name: action 43 | - name: comment 44 | - name: log-prefix 45 | prom_name: log_prefix 46 | - name: order 47 | special: index 48 | ip/firewall/nat: 49 | metrics: 50 | - name: bytes 51 | - name: packets 52 | labels: 53 | - name: .id 54 | prom_name: rule_id 55 | - name: chain 56 | - name: action 57 | - name: comment 58 | - name: log-prefix 59 | prom_name: log_prefix 60 | - name: order 61 | special: index 62 | ip/ipsec/policy: 63 | metrics: 64 | - name: ph2-count 65 | - name: ph2-state 66 | type: mapping 67 | mapping: 68 | established: 0 69 | expired: 1 70 | no-phase2: 2 71 | - name: active 72 | type: mapping 73 | mapping: 74 | "true": 0 75 | "false": 1 76 | labels: 77 | - name: .id 78 | prom_name: policy_id 79 | - name: dst-address 80 | prom_name: dst_address 81 | - name: src-address 82 | prom_name: src_address 83 | ip/ipsec/statistics: 84 | metrics: 85 | - name: in-buffer-errors 86 | - name: in-errors 87 | - name: in-header-errors 88 | - name: in-no-policies 89 | - name: in-no-states 90 | - name: in-policy-blocked 91 | - name: in-policy-errors 92 | - name: in-state-expired 93 | - name: in-state-invalid 94 | - name: in-state-mismatches 95 | - name: in-state-mode-errors 96 | - name: in-state-protocol-errors 97 | - name: in-state-sequence-errors 98 | - name: in-template-mismatches 99 | - name: out-bundle-check-errors 100 | - name: out-bundle-errors 101 | - name: out-errors 102 | - name: out-no-states 103 | - name: out-policy-blocked 104 | - name: out-policy-dead 105 | - name: out-policy-errors 106 | - name: out-state-expired 107 | - name: out-state-mode-errors 108 | - name: out-state-protocol-errors 109 | - name: out-state-sequence-errors 110 | -------------------------------------------------------------------------------- /config.yaml.example: -------------------------------------------------------------------------------- 1 | global: 2 | listen_port: 9100 3 | interval: 300 4 | custom_host_labels: 5 | - tenant 6 | - role 7 | defaults: 8 | username: prometheus 9 | password: supersecurep4ssw0rd 10 | port: 443 11 | allow_insecure: false 12 | timeout: 5 13 | targets: 14 | - name: router-1.example.com 15 | hostname: 198.51.100.1 16 | tenant: customer1 17 | role: primary 18 | - name: router-2.example.com 19 | hostname: 198.51.100.2 20 | tenant: customer2 21 | role: secondary -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | 4 | routeros-rest-exporter: 5 | image: enix/routeros-rest-exporter 6 | ports: 7 | - 9100:9100 8 | volumes: 9 | - ${PWD}/config.yaml:/routeros-rest-exporter/config.yaml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | prometheus_client 3 | pyyaml -------------------------------------------------------------------------------- /routeros-rest-exporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from argparse import ArgumentParser 4 | import sys 5 | import logging 6 | from signal import signal, SIGTERM 7 | 8 | from time import sleep, time 9 | import requests 10 | from urllib3 import disable_warnings 11 | from urllib3.exceptions import InsecureRequestWarning 12 | import yaml 13 | from prometheus_client import start_http_server 14 | from prometheus_client import Counter, Gauge, Enum 15 | 16 | 17 | PROM_PREFIX = "routeros_" # Every metric name will be prefixed with this 18 | 19 | 20 | logging.basicConfig(stream=sys.stdout) 21 | logger = logging.getLogger(__name__) 22 | logger.setLevel(logging.INFO) 23 | 24 | 25 | def terminate(*_): # pylint: disable=missing-function-docstring 26 | logger.info("Received SIGTERM, exiting.") 27 | sys.exit(0) 28 | 29 | 30 | def get_metric_prom_name(api_path, api_name): 31 | "From the API path, and the metric name inside the API, return a suitable name for Prometheus" 32 | return PROM_PREFIX + api_path.replace("/", "_").replace("-", "_") + "_" + api_name.replace("-", "_") 33 | 34 | 35 | def main(): # pylint: disable=missing-function-docstring 36 | signal(SIGTERM, terminate) 37 | 38 | parser = ArgumentParser( 39 | description="Launch a Prometheus Exporter exposing metrics from Mikrotik RouterOS devices via their REST API." 40 | ) 41 | parser.add_argument( 42 | "-c", 43 | "--config", 44 | default="/etc/routeros-rest-exporter/config.yaml", 45 | help="YAML config file containing targets and credentials", 46 | ) 47 | parser.add_argument( 48 | "-e", 49 | "--endpoints", 50 | default="/etc/routeros-rest-exporter/api_endpoints.yaml", 51 | help="YAML config file containing API endpoints to query and what metrics to export", 52 | ) 53 | 54 | args = parser.parse_args() 55 | 56 | # Disable unverified certificate warning. If we request a self-signed API, that's on purpose 57 | disable_warnings(InsecureRequestWarning) 58 | 59 | logging.info("Loading config file at %s", args.config) 60 | with open(args.config, "r", encoding="utf-8") as file: 61 | config = yaml.safe_load(file) 62 | if config is None: 63 | raise ValueError(f"Config file {args.config} is empty") 64 | 65 | logging.info("Loading API endpoints file at %s", args.endpoints) 66 | with open(args.endpoints, "r", encoding="utf-8") as file: 67 | endpoints = yaml.safe_load(file) 68 | if endpoints is None: 69 | raise ValueError(f"API endpoints file {args.endpoints} is empty") 70 | 71 | interval = int(config["global"]["interval"]) 72 | prom_port = int(config["global"]["listen_port"]) 73 | custom_host_labels = config["global"]["custom_host_labels"] 74 | 75 | # These are the metadata (i.e. connection parameters, host labels, etc.) that can either be defined (in the config) 76 | # in the defaults, or in each target 77 | defaultable_parameters = [ 78 | "username", 79 | "password_file", 80 | "password", 81 | "port", 82 | "allow_insecure", 83 | "timeout", 84 | ] + custom_host_labels 85 | 86 | # These are the host labels, i.e. what target metadata will be exposed to prometheus 87 | host_labels = ["hostname", "name"] + custom_host_labels 88 | 89 | # Build target listing, with their connections params and metadata, filling defaults if needed 90 | targets = [] 91 | for target_config in config["targets"]: 92 | t = {} # pylint: disable=invalid-name 93 | target_name = target_config["name"] 94 | logger.info("Building parameters and metadata for %s", target_name) 95 | t["name"] = target_name 96 | 97 | t["hostname"] = target_config.get("hostname", target_name) 98 | 99 | for parameter in defaultable_parameters: 100 | try: 101 | if (value := target_config.get(parameter)) is None: 102 | value = config["defaults"][parameter] 103 | logger.debug( 104 | "%s : setting %s from defaults", target_name, parameter 105 | ) 106 | t[parameter] = value 107 | 108 | except KeyError as exc: 109 | _, exc_value, _ = sys.exc_info() 110 | if str(exc_value) not in ["'password'", "'password_file'"]: 111 | # Password and password file will be handled separately since one can be unset if the other is set 112 | raise ValueError( 113 | f"You need to set the config attribute {exc_value}, on the target {target_name} or in the defaults" 114 | ) from exc 115 | 116 | if "password" not in t: 117 | if "password_file" in t: 118 | with open(t["password_file"], "r", encoding="utf-8") as file: 119 | t["password"] = file.read().splitlines()[0] 120 | else: 121 | raise ValueError( 122 | f'You need to set either the config attributes "password" or "password_file", on the target {target_name} or in the defaults' 123 | ) 124 | 125 | targets.append(t) 126 | 127 | exported_metrics = {} # The prometheus gauges and counters will be stored here 128 | 129 | # Initialize prometheus metrics 130 | for path, endpoint in endpoints["endpoints"].items(): 131 | # Normalize labels, i.e. translate the ones that need to be translated (because of a conflict for instance). 132 | # Those are the one with an attribute "prom_name" instead of just a "name" 133 | normalized_labels = [ 134 | label.get("prom_name", label["name"]) 135 | for label in endpoint.get("labels", []) 136 | ] 137 | 138 | # Create all metrics under the current API endpoint 139 | for metric in endpoint["metrics"]: 140 | metric_name = get_metric_prom_name(path, metric["name"]) 141 | metric_type = metric.get("type", "gauge") # Default metric type is a Gauge 142 | 143 | if metric_type == "gauge" or metric_type == "mapping": 144 | exported_metrics[metric_name] = Gauge( 145 | metric_name, 146 | f"Mikrotik RouterOS metric '{metric['name']}' under '{path}'", 147 | labelnames=host_labels + normalized_labels, 148 | ) 149 | elif metric_type == "enum": 150 | exported_metrics[metric_name] = Enum( 151 | metric_name, 152 | f"Mikrotik RouterOS metric '{metric['name']}' under '{path}'", 153 | labelnames=host_labels + normalized_labels, 154 | states=metric["enum"], 155 | ) 156 | 157 | # This will hold each set of label:value PREVIOUSLY KNOWN for each metric. At each poll cycle, it will be compared 158 | # with the retrieved label:value set, in order to remove the no-longer-valid ones. 159 | # The goal is to clear metrics for removed FW rules, interfaces, etc. 160 | # For now, initialize it with the metrics names. It is done before the initialization of "api_unreachable" by design 161 | # so that it is never cleared. 162 | labelsets_known = {key: [] for key in exported_metrics} 163 | 164 | # Add one to check for API reachability 165 | exported_metrics[PROM_PREFIX + "api_unreachable"] = Counter( 166 | PROM_PREFIX + "api_unreachable", "Number of failed API requests", host_labels 167 | ) 168 | 169 | # Let's roll baby ! 170 | logger.info("Starting the HTTP server on port %s", prom_port) 171 | start_http_server(prom_port) 172 | 173 | # Fetch metrics from routers 174 | while True: 175 | 176 | start_time = time() 177 | 178 | # Same as labelsets_known but will contain only labelsets retrived during this poll cycle 179 | labelsets_current = {key: [] for key in exported_metrics} 180 | 181 | for target in targets: 182 | 183 | logger.info("Starting polling for %s", target["name"]) 184 | 185 | # Extract host-level labels with their values 186 | target_labels = {} 187 | for label in host_labels: 188 | target_labels[label] = target[label] 189 | 190 | # Prepare the request parameters 191 | auth = (target["username"], target["password"]) 192 | verify = not target["allow_insecure"] 193 | 194 | # Start the API calls 195 | for path, endpoint in endpoints["endpoints"].items(): 196 | 197 | url = f"https://{target['hostname']}:{target['port']}/rest/{path}" 198 | logger.info("Polling %s", url) 199 | try: 200 | resp = requests.get( 201 | url, auth=auth, verify=verify, timeout=target["timeout"] 202 | ) 203 | resp.raise_for_status() 204 | except Exception as exc: # pylint: disable=broad-except 205 | logger.error(exc) 206 | logger.error( 207 | "Error while requesting %s, skipping this target.", 208 | target["name"], 209 | ) 210 | exported_metrics[PROM_PREFIX + "api_unreachable"].labels( 211 | **target_labels 212 | ).inc() 213 | break 214 | resp = resp.json() 215 | 216 | # If we are at a single endpoint (e.g. ip/ipsec/statistics), simulate a list for the rest of the processing 217 | if not isinstance(resp, list): 218 | resp = [resp] 219 | 220 | # Loop through all the items (interfaces, cpus, firewall rules, etc) 221 | for index, data in enumerate(resp): 222 | 223 | # Extract label values such as cpu names, comments, etc. depending on 224 | # which API endpoint we are getting data from. 225 | extracted_labels = target_labels.copy() 226 | for label in endpoint.get("labels", []): 227 | # If we have a label name more suitable for prom, use it 228 | label_prom_name = label.get("prom_name", label["name"]) 229 | 230 | special = label.get("special") # Is this a "meta-label" ? 231 | if special == "index": 232 | extracted_labels[label_prom_name] = index 233 | else: 234 | # If the label value is not present in the API response, default to "" 235 | extracted_labels[label_prom_name] = data.get( 236 | label["name"], "" 237 | ) 238 | 239 | # Extract metrics and update the corresponding prom Gauge 240 | for metric in endpoint["metrics"]: 241 | metric_name = get_metric_prom_name(path, metric["name"]) 242 | # Default metric type is a Gauge 243 | metric_type = metric.get("type", "gauge") 244 | 245 | # If the item does not contain our desired metric, just skip it 246 | # E.g. the default IPSEC policy does not have phase 2 count 247 | if metric["name"] not in data: 248 | continue 249 | 250 | # Magic happens here, update prometheus gauge or enum depending on the metric type : 251 | value = data[metric["name"]] 252 | if metric_type == "gauge": 253 | exported_metrics[metric_name].labels( 254 | **extracted_labels 255 | ).set(value) 256 | elif metric_type == "enum": 257 | exported_metrics[metric_name].labels( 258 | **extracted_labels 259 | ).state(value) 260 | elif metric_type == "mapping": 261 | mapped_value = metric["mapping"].get(value) 262 | if mapped_value is None: 263 | logger.error( 264 | "Unknown mapping for %s - %s from %s : got '%s' which is not in the mappings", 265 | path, 266 | metric["name"], 267 | target["name"], 268 | value, 269 | ) 270 | continue 271 | exported_metrics[metric_name].labels( 272 | **extracted_labels 273 | ).set(mapped_value) 274 | 275 | labelsets_current[metric_name].append(extracted_labels) 276 | 277 | logger.info("Finished polling %s", target["name"]) 278 | 279 | # Compare labelsets retrieved during this cycle to labelsets already known 280 | 281 | # First, check that each previously-known labelset is still valid. If not, clear it. 282 | for metric_name, labelsets in labelsets_known.items(): 283 | for known_labelset in labelsets: 284 | if known_labelset not in labelsets_current[metric_name]: 285 | logger.info( 286 | "Removing labelset %s for metric %s", 287 | known_labelset, 288 | metric_name, 289 | ) 290 | # So long, Bowser ! 291 | exported_metrics[metric_name].remove(*known_labelset.values()) 292 | labelsets_known[metric_name].remove(known_labelset) 293 | 294 | # Then, add the newly retrieved labelsets to the known ones for the next cycle 295 | for metric_name, labelsets in labelsets_current.items(): 296 | for current_labelset in labelsets: 297 | if current_labelset not in labelsets_known[metric_name]: 298 | labelsets_known[metric_name].append(current_labelset) 299 | 300 | end_time = time() 301 | elapsed_time = int(end_time - start_time) 302 | if (sleep_time := interval - elapsed_time) < 0: 303 | sleep_time = 0 304 | 305 | logger.info( 306 | "Polling finished for all devices. It took %s secs, so going to sleep for %s secs", 307 | elapsed_time, 308 | sleep_time, 309 | ) 310 | sleep(sleep_time) 311 | 312 | 313 | if __name__ == "__main__": 314 | main() 315 | --------------------------------------------------------------------------------