├── netbox_prometheus.py └── README.md /netbox_prometheus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | import pynetbox 4 | import re 5 | import sys 6 | import yaml 7 | 8 | CLASS_MAP = { 9 | "Devices": "device", 10 | "VirtualMachines": "vm", 11 | } 12 | 13 | class ConfigBuilder: 14 | def __init__(self, nb, filter={}): 15 | self.nb = nb 16 | self.filter = filter 17 | self.metrics = {} # {(instance, kind) => {label=>value}} # static metadata 18 | self.targets = {} # {filename => (labels) => [target]} # targets to be scraped 19 | 20 | def add_target(self, item, filename, labels={}): 21 | if not item.name: 22 | print("Unnamed item %r" % item, file=sys.stderr) 23 | return 24 | kind = CLASS_MAP.get(item.__class__.__name__, item.__class__.__name__) 25 | 26 | # add to prometheus scraping target 27 | target_key = tuple([("netbox_type",kind)] + sorted(labels.items())) 28 | self.targets.setdefault(filename, {}) 29 | tf = self.targets[filename] 30 | tf.setdefault(target_key, []) 31 | tt = tf[target_key] 32 | 33 | if item.primary_ip: 34 | addr = re.sub(r'/\d+$', '', item.primary_ip.address) 35 | if ":" in addr: 36 | addr = "[" + addr + "]" 37 | tt.append(item.name + " " + addr) 38 | else: 39 | tt.append(item.name) 40 | 41 | # add netbox_meta metric (label with role, site etc) 42 | metric_key = (item.name, kind) 43 | self.metrics.setdefault(metric_key, {}) 44 | tenant = getattr(item, "tenant", None) 45 | if tenant: 46 | self.metrics[metric_key]["tenant"] = tenant.slug 47 | role = getattr(item, "device_role", getattr(item, "role", None)) 48 | if role: 49 | self.metrics[metric_key]["role"] = role.slug 50 | site = getattr(item, "site", None) 51 | if site: 52 | self.metrics[metric_key]["site"] = site.slug 53 | rack = getattr(item, "rack", None) 54 | if rack: 55 | self.metrics[metric_key]["rack"] = rack.name # rack has no slug 56 | cluster = getattr(item, "cluster", None) 57 | if cluster: 58 | self.metrics[metric_key]["cluster"] = cluster.name # cluster has no slug 59 | for tag in item.tags: 60 | self.metrics[metric_key]["tags_"+str(tag)] = "1" 61 | 62 | def add_targets(self, items, filename, labels={}): 63 | """Add a target once""" 64 | for item in items: 65 | self.add_target(item, filename, labels) 66 | 67 | def add_targets_cf(self, items, filename, cf_name, param_name): 68 | """Add a target for each value in a given custom field""" 69 | for item in items: 70 | cf = getattr(item, 'custom_fields') 71 | if not cf: 72 | print("Item %r: missing or empty custom_fields" % item) 73 | continue 74 | cv = cf.get(cf_name) 75 | if not cv: 76 | print("Item %r: missing or empty %s" % (item, cf_name)) 77 | continue 78 | if not isinstance(cv, list): 79 | cv = [cv] 80 | for mod in cv: 81 | self.add_target(item, filename, {param_name: mod}) 82 | 83 | def build(self): 84 | """ 85 | Here you assemble the netbox things you wish to query and which files to add them to. 86 | Add queries for the different types of object to be polled. 87 | """ 88 | self.add_targets(self.nb.dcim.devices.filter(tag="prom_node", **self.filter), "node_targets.yml") 89 | self.add_targets(self.nb.virtualization.virtual_machines.filter(tag="prom_node", **self.filter), "node_targets.yml") 90 | self.add_targets_cf(self.nb.dcim.devices.filter(tag="prom_snmp", **self.filter), "snmp_targets.yml", "snmp_module", "module") 91 | self.add_targets_cf(self.nb.virtualization.virtual_machines.filter(tag="prom_snmp", **self.filter), "snmp_targets.yml", "snmp_module", "module") 92 | self.add_targets(self.nb.dcim.devices.filter(tag="prom_windows", **self.filter), "windows_targets.yml") 93 | self.add_targets(self.nb.virtualization.virtual_machines.filter(tag="prom_windows", **self.filter), "windows_targets.yml") 94 | # TODO: blackbox_targets: should this be on Device/VM or on IPAddress object? And/or Service? 95 | 96 | def replace_file(self, filename, content): 97 | try: 98 | with open(filename) as f: 99 | oldconf = f.read() 100 | if oldconf == content: 101 | return 102 | except FileNotFoundError: 103 | pass 104 | with open(filename+".new", "w") as f: 105 | f.write(content) 106 | os.rename(filename+".new", filename) 107 | 108 | def gen_target_file(self, data): 109 | """ data is a dict of (labels) => [target] 110 | Sort it so that it's repeatable 111 | """ 112 | content = [] 113 | for labels, targets in sorted(data.items()): 114 | content.append({"labels": dict(labels), "targets": sorted(targets)}) 115 | return "# Auto-generated from Netbox, do not edit as your changes will be overwritten!\n" + yaml.dump(content, default_flow_style=False) 116 | 117 | def write_targets(self, dir): 118 | for filename, data in self.targets.items(): 119 | self.replace_file(dir+"/"+filename, self.gen_target_file(data)) 120 | 121 | def write_metrics(self, filename): 122 | content = "" 123 | for (instance, kind), labels in sorted(self.metrics.items()): 124 | content += "netbox_meta{instance=\"%s\",netbox_type=\"%s\"" % (instance, kind) 125 | for k, v in labels.items(): 126 | content += ",%s=\"%s\"" % (re.sub(r'[^a-zA-Z0-9_]', '_', k), re.sub(r'"', r'\\"', v)) 127 | content += "} 1\n" 128 | self.replace_file(filename, content) 129 | 130 | if __name__ == "__main__": 131 | API_URL = "https://netbox.example.net" 132 | API_TOKEN = "XXXXXXXX" 133 | SITE_TAG = "prometheus" # we will poll devices in all sites with this tag (and VMs in clusters where the cluster's site has this tag) 134 | DIR = "/etc/prometheus/targets.d" 135 | METRICS = "/var/www/html/metrics/netbox" 136 | # Uncomment when testing: 137 | #DIR = "/tmp" 138 | #METRICS = "/tmp/netbox.prom" 139 | 140 | nb = pynetbox.api(API_URL, token=API_TOKEN) 141 | builder = ConfigBuilder( 142 | nb=nb, 143 | filter={ 144 | "exclude": "config_context", 145 | "site_id": [s.id for s in nb.dcim.sites.filter(tag=SITE_TAG)], 146 | # This changed in 2.7: https://github.com/netbox-community/netbox/issues/3569 147 | "status": "active", # "status": 1, 148 | }, 149 | ) 150 | builder.build() 151 | builder.write_targets(DIR) 152 | builder.write_metrics(METRICS) 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This project is now obsolete 2 | 3 | *Use [netbox-plugin-prometheus-sd](https://github.com/FlxPeters/netbox-plugin-prometheus-sd) 4 | instead, together with Prometheus 2.28.0+ using HTTP SD and target 5 | relabelling* 6 | 7 | # Netbox Prometheus configuration generator 8 | 9 | This script generates targets files for prometheus from devices and VMs in 10 | the Netbox database. Example: 11 | 12 | ``` 13 | # Auto-generated from Netbox, do not edit as your changes will be overwritten! 14 | - labels: 15 | module: if_mib_secret 16 | netbox_type: device 17 | targets: 18 | - sw1 192.168.1.2 19 | - sw2 192.168.1.3 20 | - labels: 21 | module: mikrotik_secret 22 | netbox_type: device 23 | targets: 24 | - gw 192.168.1.1 25 | ``` 26 | 27 | It writes separate files for each type of target: `node_targets.yml`, 28 | `snmp_targets.yml`, `windows_targets.yml`. 29 | 30 | It also generates synthetic metrics which can be used for 31 | [machine role queries](https://www.robustperception.io/how-to-have-labels-for-machine-roles) 32 | and to add extra labels to alerts: 33 | 34 | ``` 35 | netbox_meta{instance="gw",netbox_type="device",rack="r1",site="dc",tags_prom_snmp="1",role="router"} 1 36 | netbox_meta{instance="sw1",netbox_type="device",rack="r1",site="dc1",tags_prom_snmp="1",role="core-switch"} 1 37 | netbox_meta{instance="sw2",netbox_type="device",rack="r2",site="dc1",tags_prom_snmp="1",role="core-switch"} 1 38 | ``` 39 | 40 | # Installation 41 | 42 | Normally you would install script this on your prometheus server, so that it 43 | can write the targets files directly. 44 | 45 | Copy the python source file to your prometheus server, e.g. as 46 | `/usr/local/bin/netbox_prometheus.py` 47 | 48 | ## Dependencies 49 | 50 | ``` 51 | apt-get install python3-pip 52 | pip3 install pynetbox 53 | ``` 54 | 55 | ## Netbox Configuration 56 | 57 | ### API token 58 | 59 | In Netbox, create an API token with write disabled. 60 | 61 | Inside the python source file, set API_URL and API_TOKEN to be able to 62 | communicate with Netbox REST API. 63 | 64 | ### Tags 65 | 66 | In your Netbox instance: 67 | 68 | * Add tag "prometheus" onto each of the site(s) where you have things to to poll (*) 69 | * Add tag "prom_node" to each Linux device/VM that you want to poll 70 | * Add tag "prom_windows" to each Windows device/VM that you want to poll 71 | * Add tag "prom_snmp" to each network device that you want to poll 72 | * Ensure that each device or VM that you want to poll has status "Active", 73 | and either has a primary IP address assigned, or its name is resolvable 74 | 75 | Note: the script *requires* all those tags to exist, even if there are no 76 | devices with them, because the Netbox API gives an error if you try to query 77 | non-existent tags. 78 | 79 | Therefore if you don't need `prom_windows` or `prom_snmp`, you still need to 80 | create an unused tag in Netbox (prior to v2.9.0 you had to add it to a 81 | device then remove it again), or else comment out the relevant lines in the 82 | script. 83 | 84 | (*) To scrape Virtual Machines, the *cluster* must be associated with a 85 | site, and that site must have the label "prometheus". Site Groups are 86 | currently not tested, but you can adjust the filter yourself if you wish. 87 | 88 | ### SNMP configuration 89 | 90 | If you have any SNMP devices to poll, then you need to create a new custom 91 | field as follows: 92 | 93 | * Type: Selection (or Multiple Selection) 94 | * Name: `snmp_module` 95 | * Label: `SNMP Module` 96 | * Content Types: `DCIM > device` and `Virtualization > virtual machine` 97 | * Choices: list of SNMP modules as required, e.g. `if_mib,apcups,synology` 98 | (these refer to modules in your snmp_exporter `snmp.yml`) 99 | 100 | Then select one or more of these choices on each device or VM that you wish 101 | to poll, as well as setting the `prom_snmp` tag. 102 | 103 | (The tag is required to minimise the data returned in the API query; Netbox 104 | does not yet have 105 | [custom field filters](https://github.com/netbox-community/netbox/issues/6615) 106 | such as `cf_snmp_module__empty=0`) 107 | 108 | ## Script setup 109 | 110 | ### Create the output directories 111 | 112 | ``` 113 | mkdir -p /etc/prometheus/targets.d 114 | mkdir -p /var/www/html/metrics 115 | ``` 116 | 117 | If you want the output to go somewhere else, then modify the 118 | relevant constants in the script. 119 | 120 | ### Run the script 121 | 122 | Run the script, check for no errors, and that it creates output files in the 123 | given directories. 124 | 125 | ### Add cronjob 126 | 127 | Create `/etc/cron.d/netbox_prometheus` to keep the files up-to-date: 128 | 129 | ``` 130 | */5 * * * * /usr/local/bin/netbox_prometheus.py 131 | ``` 132 | 133 | Prometheus `file_sd` automatically detects files which change, and doesn't 134 | need to be reloaded. 135 | 136 | ## Prometheus scrape configuration 137 | 138 | ### Targets 139 | 140 | This script can output targets of the following forms: 141 | 142 | ``` 143 | - foo # name only 144 | - x.x.x.x # IPv4 address only 145 | - foo x.x.x.x # name and IPv4 address 146 | - [dead:beef::] # IPv6 address only 147 | - foo [dead:beef::] # name and IPv6 address 148 | ``` 149 | 150 | The IP addresses come from the "primary" IP address defined in Netbox, and 151 | the name from the device/VM name. This approach allows you to have 152 | [meaningful instance labels](https://www.robustperception.io/controlling-the-instance-label) 153 | like `{instance="foo"}` whilst using IP addresses for targets, avoiding 154 | the need for DNS resolution. 155 | 156 | To use these target files, you will need some relabelling configuration. 157 | 158 | Node Exporter: 159 | 160 | ``` 161 | - job_name: node 162 | scrape_interval: 1m 163 | file_sd_configs: 164 | - files: 165 | - /etc/prometheus/targets.d/node_targets.yml 166 | metrics_path: /metrics 167 | relabel_configs: 168 | # When __address__ consists of just a name or IP address, 169 | # copy it to the "instance" label. Doing this explicitly 170 | # keeps the port number out of the instance label. 171 | - source_labels: [__address__] 172 | regex: '([^ ]+)' 173 | target_label: instance 174 | 175 | # When __address__ is of the form "name address", extract 176 | # name to "instance" label and address to "__address__" 177 | - source_labels: [__address__] 178 | regex: '(.+) (.+)' 179 | target_label: instance 180 | replacement: '${1}' 181 | - source_labels: [__address__] 182 | regex: '(.+) (.+)' 183 | target_label: __address__ 184 | replacement: '${2}' 185 | 186 | # Append port number to __address__ so that scrape gets 187 | # sent to the right port 188 | - source_labels: [__address__] 189 | target_label: __address__ 190 | replacement: '${1}:9100' 191 | ``` 192 | 193 | Windows exporter is similar (just change the job_name, the filename, and the 194 | replacement port number to 9182). 195 | 196 | SNMP exporter is slightly trickier because the target parameter 197 | cannot contain square brackets around IPv6 addresses. 198 | 199 | ``` 200 | - job_name: snmp 201 | scrape_interval: 1m 202 | file_sd_configs: 203 | - files: 204 | - /etc/prometheus/targets.d/snmp_targets.yml 205 | metrics_path: /snmp 206 | relabel_configs: 207 | # When __address__ consists of just a name or IP address, 208 | # copy it to both the "instance" label (visible to user) 209 | # and "__param_target" (where snmp_exporter sends SNMP) 210 | - source_labels: [__address__] 211 | regex: '([^ ]+)' 212 | target_label: instance 213 | - source_labels: [__address__] 214 | regex: '([^ ]+)' 215 | target_label: __param_target 216 | 217 | # When __address__ is of the form "name address", extract 218 | # name to "instance" label and address to "__param_target" 219 | - source_labels: [__address__] 220 | regex: '(.+) (.+)' 221 | target_label: instance 222 | replacement: '${1}' 223 | - source_labels: [__address__] 224 | regex: '(.+) (.+)' 225 | target_label: __param_target 226 | replacement: '${2}' 227 | 228 | # If __param_target is enclosed by square brackets, remove them 229 | - source_labels: [__param_target] 230 | regex: '\[(.+)\]' 231 | target_label: __param_target 232 | replacement: '${1}' 233 | 234 | # Copy "module" label to "__param_module" so that snmp_exporter 235 | # receives it as part of the scrape URL 236 | - source_labels: [module] 237 | target_label: __param_module 238 | 239 | # Send the actual scrape to SNMP exporter 240 | - target_label: __address__ 241 | replacement: 127.0.0.1:9116 242 | ``` 243 | 244 | Reload prometheus config and check there are no errors: 245 | 246 | ``` 247 | killall -HUP prometheus 248 | journalctl -eu prometheus 249 | ``` 250 | 251 | See also: 252 | 253 | * https://www.robustperception.io/controlling-the-instance-label 254 | * https://www.robustperception.io/target-labels-are-for-life-not-just-for-christmas/ 255 | * https://www.robustperception.io/reloading-prometheus-configuration 256 | 257 | ### Metadata 258 | 259 | In order to use the metadata metrics, you'll need to expose them using http 260 | (`apt-get install apache2`) and add a scrape job: 261 | 262 | ``` 263 | # Pick up netbox_meta metrics exported from netbox database 264 | - job_name: netbox 265 | metrics_path: /metrics/netbox 266 | scrape_interval: 5m 267 | honor_labels: true 268 | static_configs: 269 | - targets: 270 | - 127.0.0.1:80 271 | ``` 272 | 273 | You can then use queries and alerting rules with extra labels from Netbox, e.g. 274 | 275 | ``` 276 | # Filter based on Netbox attributes 277 | (up == 1) * on (instance) group_left netbox_meta{role="core-switch"} 278 | 279 | # Add extra labels from Netbox 280 | (up == 1) * on (instance) group_left(tenant,role,site,rack,cluster) netbox_meta 281 | ``` 282 | 283 | You can modify the python code to add extra labels, e.g. "platform". 284 | 285 | See also: 286 | 287 | * [How to have labels for machine roles](https://www.robustperception.io/how-to-have-labels-for-machine-roles) 288 | * [Exposing the software version to prometheus](https://www.robustperception.io/exposing-the-software-version-to-prometheus) 289 | * [Many-to-one and one-to-one vector matches](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches) 290 | 291 | # Complex deployments 292 | 293 | ## Multiple prometheus instances 294 | 295 | You might have multiple prometheus instances. Say prometheus1 should poll 296 | sites A, B and C, while prometheus2 polls sites A (for redundancy), D and E. 297 | 298 | You can control this with the SITE_TAG setting. On the two prometheus 299 | instances run the same script, but one configured with 300 | 301 | ``` 302 | SITE_TAG = "prometheus1" 303 | ``` 304 | 305 | and the other with 306 | 307 | ``` 308 | SITE_TAG = "prometheus2" 309 | ``` 310 | 311 | Then in Netbox, tag sites A, B and C with "prometheus1", and sites A, D and 312 | E with "prometheus2". The correct targets will be generated for each 313 | prometheus instance. 314 | --------------------------------------------------------------------------------