├── netbox_prometheus.py
└── README.md


/netbox_prometheus.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import os
  3 | import pynetbox
  4 | import re
  5 | import sys
  6 | import yaml
  7 | 
  8 | CLASS_MAP = {
  9 |     "Devices": "device",
 10 |     "VirtualMachines": "vm",
 11 | }
 12 | 
 13 | class ConfigBuilder:
 14 |     def __init__(self, nb, filter={}):
 15 |         self.nb = nb
 16 |         self.filter = filter
 17 |         self.metrics = {}   #  {(instance, kind) => {label=>value}}  # static metadata
 18 |         self.targets = {}   #  {filename => (labels) => [target]}    # targets to be scraped
 19 | 
 20 |     def add_target(self, item, filename, labels={}):
 21 |         if not item.name:
 22 |             print("Unnamed item %r" % item, file=sys.stderr)
 23 |             return
 24 |         kind = CLASS_MAP.get(item.__class__.__name__, item.__class__.__name__)
 25 | 
 26 |         # add to prometheus scraping target
 27 |         target_key = tuple([("netbox_type",kind)] + sorted(labels.items()))
 28 |         self.targets.setdefault(filename, {})
 29 |         tf = self.targets[filename]
 30 |         tf.setdefault(target_key, [])
 31 |         tt = tf[target_key]
 32 | 
 33 |         if item.primary_ip:
 34 |             addr = re.sub(r'/\d+$', '', item.primary_ip.address)
 35 |             if ":" in addr:
 36 |                 addr = "[" + addr + "]"
 37 |             tt.append(item.name + " " + addr)
 38 |         else:
 39 |             tt.append(item.name)
 40 | 
 41 |         # add netbox_meta metric (label with role, site etc)
 42 |         metric_key = (item.name, kind)
 43 |         self.metrics.setdefault(metric_key, {})
 44 |         tenant = getattr(item, "tenant", None)
 45 |         if tenant:
 46 |             self.metrics[metric_key]["tenant"] = tenant.slug
 47 |         role = getattr(item, "device_role", getattr(item, "role", None))
 48 |         if role:
 49 |             self.metrics[metric_key]["role"] = role.slug
 50 |         site = getattr(item, "site", None)
 51 |         if site:
 52 |             self.metrics[metric_key]["site"] = site.slug
 53 |         rack = getattr(item, "rack", None)
 54 |         if rack:
 55 |             self.metrics[metric_key]["rack"] = rack.name # rack has no slug
 56 |         cluster = getattr(item, "cluster", None)
 57 |         if cluster:
 58 |             self.metrics[metric_key]["cluster"] = cluster.name # cluster has no slug
 59 |         for tag in item.tags:
 60 |             self.metrics[metric_key]["tags_"+str(tag)] = "1"
 61 | 
 62 |     def add_targets(self, items, filename, labels={}):
 63 |         """Add a target once"""
 64 |         for item in items:
 65 |             self.add_target(item, filename, labels)
 66 | 
 67 |     def add_targets_cf(self, items, filename, cf_name, param_name):
 68 |         """Add a target for each value in a given custom field"""
 69 |         for item in items:
 70 |             cf = getattr(item, 'custom_fields')
 71 |             if not cf:
 72 |                 print("Item %r: missing or empty custom_fields" % item)
 73 |                 continue
 74 |             cv = cf.get(cf_name)
 75 |             if not cv:
 76 |                 print("Item %r: missing or empty %s" % (item, cf_name))
 77 |                 continue
 78 |             if not isinstance(cv, list):
 79 |                 cv = [cv]
 80 |             for mod in cv:
 81 |                 self.add_target(item, filename, {param_name: mod})
 82 | 
 83 |     def build(self):
 84 |         """
 85 |         Here you assemble the netbox things you wish to query and which files to add them to.
 86 |         Add queries for the different types of object to be polled.
 87 |         """
 88 |         self.add_targets(self.nb.dcim.devices.filter(tag="prom_node", **self.filter), "node_targets.yml")
 89 |         self.add_targets(self.nb.virtualization.virtual_machines.filter(tag="prom_node", **self.filter), "node_targets.yml")
 90 |         self.add_targets_cf(self.nb.dcim.devices.filter(tag="prom_snmp", **self.filter), "snmp_targets.yml", "snmp_module", "module")
 91 |         self.add_targets_cf(self.nb.virtualization.virtual_machines.filter(tag="prom_snmp", **self.filter), "snmp_targets.yml", "snmp_module", "module")
 92 |         self.add_targets(self.nb.dcim.devices.filter(tag="prom_windows", **self.filter), "windows_targets.yml")
 93 |         self.add_targets(self.nb.virtualization.virtual_machines.filter(tag="prom_windows", **self.filter), "windows_targets.yml")
 94 |         # TODO: blackbox_targets: should this be on Device/VM or on IPAddress object? And/or Service?
 95 | 
 96 |     def replace_file(self, filename, content):
 97 |         try:
 98 |             with open(filename) as f:
 99 |                 oldconf = f.read()
100 |             if oldconf == content:
101 |                 return
102 |         except FileNotFoundError:
103 |             pass
104 |         with open(filename+".new", "w") as f:
105 |             f.write(content)
106 |         os.rename(filename+".new", filename)
107 | 
108 |     def gen_target_file(self, data):
109 |         """ data is a dict of (labels) => [target]
110 |         Sort it so that it's repeatable
111 |         """
112 |         content = []
113 |         for labels, targets in sorted(data.items()):
114 |             content.append({"labels": dict(labels), "targets": sorted(targets)})
115 |         return "# Auto-generated from Netbox, do not edit as your changes will be overwritten!\n" + yaml.dump(content, default_flow_style=False)
116 | 
117 |     def write_targets(self, dir):
118 |         for filename, data in self.targets.items():
119 |             self.replace_file(dir+"/"+filename, self.gen_target_file(data))
120 | 
121 |     def write_metrics(self, filename):
122 |         content = ""
123 |         for (instance, kind), labels in sorted(self.metrics.items()):
124 |             content += "netbox_meta{instance=\"%s\",netbox_type=\"%s\"" % (instance, kind)
125 |             for k, v in labels.items():
126 |                 content += ",%s=\"%s\"" % (re.sub(r'[^a-zA-Z0-9_]', '_', k), re.sub(r'"', r'\\"', v))
127 |             content += "} 1\n"
128 |         self.replace_file(filename, content)
129 | 
130 | if __name__ == "__main__":
131 |     API_URL = "https://netbox.example.net"
132 |     API_TOKEN = "XXXXXXXX"
133 |     SITE_TAG = "prometheus"  # we will poll devices in all sites with this tag (and VMs in clusters where the cluster's site has this tag)
134 |     DIR = "/etc/prometheus/targets.d"
135 |     METRICS = "/var/www/html/metrics/netbox"
136 |     # Uncomment when testing:
137 |     #DIR = "/tmp"
138 |     #METRICS = "/tmp/netbox.prom"
139 | 
140 |     nb = pynetbox.api(API_URL, token=API_TOKEN)
141 |     builder = ConfigBuilder(
142 |         nb=nb,
143 |         filter={
144 |             "exclude": "config_context",
145 |             "site_id": [s.id for s in nb.dcim.sites.filter(tag=SITE_TAG)],
146 |             # This changed in 2.7: https://github.com/netbox-community/netbox/issues/3569
147 |             "status": "active",  # "status": 1,
148 |         },
149 |     )
150 |     builder.build()
151 |     builder.write_targets(DIR)
152 |     builder.write_metrics(METRICS)
153 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # This project is now obsolete
  2 | 
  3 | *Use [netbox-plugin-prometheus-sd](https://github.com/FlxPeters/netbox-plugin-prometheus-sd)
  4 | instead, together with Prometheus 2.28.0+ using HTTP SD and target
  5 | relabelling*
  6 | 
  7 | # Netbox Prometheus configuration generator
  8 | 
  9 | This script generates targets files for prometheus from devices and VMs in
 10 | the Netbox database.  Example:
 11 | 
 12 | ```
 13 | # Auto-generated from Netbox, do not edit as your changes will be overwritten!
 14 | - labels:
 15 |     module: if_mib_secret
 16 |     netbox_type: device
 17 |   targets:
 18 |   - sw1 192.168.1.2
 19 |   - sw2 192.168.1.3
 20 | - labels:
 21 |     module: mikrotik_secret
 22 |     netbox_type: device
 23 |   targets:
 24 |   - gw 192.168.1.1
 25 | ```
 26 | 
 27 | It writes separate files for each type of target: `node_targets.yml`,
 28 | `snmp_targets.yml`, `windows_targets.yml`.
 29 | 
 30 | It also generates synthetic metrics which can be used for
 31 | [machine role queries](https://www.robustperception.io/how-to-have-labels-for-machine-roles)
 32 | and to add extra labels to alerts:
 33 | 
 34 | ```
 35 | netbox_meta{instance="gw",netbox_type="device",rack="r1",site="dc",tags_prom_snmp="1",role="router"} 1
 36 | netbox_meta{instance="sw1",netbox_type="device",rack="r1",site="dc1",tags_prom_snmp="1",role="core-switch"} 1
 37 | netbox_meta{instance="sw2",netbox_type="device",rack="r2",site="dc1",tags_prom_snmp="1",role="core-switch"} 1
 38 | ```
 39 | 
 40 | # Installation
 41 | 
 42 | Normally you would install script this on your prometheus server, so that it
 43 | can write the targets files directly.
 44 | 
 45 | Copy the python source file to your prometheus server, e.g. as
 46 | `/usr/local/bin/netbox_prometheus.py`
 47 | 
 48 | ## Dependencies
 49 | 
 50 | ```
 51 | apt-get install python3-pip
 52 | pip3 install pynetbox
 53 | ```
 54 | 
 55 | ## Netbox Configuration
 56 | 
 57 | ### API token
 58 | 
 59 | In Netbox, create an API token with write disabled.
 60 | 
 61 | Inside the python source file, set API_URL and API_TOKEN to be able to
 62 | communicate with Netbox REST API.
 63 | 
 64 | ### Tags
 65 | 
 66 | In your Netbox instance:
 67 | 
 68 | * Add tag "prometheus" onto each of the site(s) where you have things to to poll (*)
 69 | * Add tag "prom_node" to each Linux device/VM that you want to poll
 70 | * Add tag "prom_windows" to each Windows device/VM that you want to poll
 71 | * Add tag "prom_snmp" to each network device that you want to poll
 72 | * Ensure that each device or VM that you want to poll has status "Active",
 73 |   and either has a primary IP address assigned, or its name is resolvable
 74 | 
 75 | Note: the script *requires* all those tags to exist, even if there are no
 76 | devices with them, because the Netbox API gives an error if you try to query
 77 | non-existent tags.
 78 | 
 79 | Therefore if you don't need `prom_windows` or `prom_snmp`, you still need to
 80 | create an unused tag in Netbox (prior to v2.9.0 you had to add it to a
 81 | device then remove it again), or else comment out the relevant lines in the
 82 | script.
 83 | 
 84 | (*) To scrape Virtual Machines, the *cluster* must be associated with a
 85 | site, and that site must have the label "prometheus".  Site Groups are
 86 | currently not tested, but you can adjust the filter yourself if you wish.
 87 | 
 88 | ### SNMP configuration
 89 | 
 90 | If you have any SNMP devices to poll, then you need to create a new custom
 91 | field as follows:
 92 | 
 93 | * Type: Selection (or Multiple Selection)
 94 | * Name: `snmp_module`
 95 | * Label: `SNMP Module`
 96 | * Content Types: `DCIM > device` and `Virtualization > virtual machine`
 97 | * Choices: list of SNMP modules as required, e.g. `if_mib,apcups,synology`
 98 |   (these refer to modules in your snmp_exporter `snmp.yml`)
 99 | 
100 | Then select one or more of these choices on each device or VM that you wish
101 | to poll, as well as setting the `prom_snmp` tag.
102 | 
103 | (The tag is required to minimise the data returned in the API query; Netbox
104 | does not yet have
105 | [custom field filters](https://github.com/netbox-community/netbox/issues/6615)
106 | such as `cf_snmp_module__empty=0`)
107 | 
108 | ## Script setup
109 | 
110 | ### Create the output directories
111 | 
112 | ```
113 | mkdir -p /etc/prometheus/targets.d
114 | mkdir -p /var/www/html/metrics
115 | ```
116 | 
117 | If you want the output to go somewhere else, then modify the
118 | relevant constants in the script.
119 | 
120 | ### Run the script
121 | 
122 | Run the script, check for no errors, and that it creates output files in the
123 | given directories.
124 | 
125 | ### Add cronjob
126 | 
127 | Create `/etc/cron.d/netbox_prometheus` to keep the files up-to-date:
128 | 
129 | ```
130 | */5 * * * * /usr/local/bin/netbox_prometheus.py
131 | ```
132 | 
133 | Prometheus `file_sd` automatically detects files which change, and doesn't
134 | need to be reloaded.
135 | 
136 | ## Prometheus scrape configuration
137 | 
138 | ### Targets
139 | 
140 | This script can output targets of the following forms:
141 | 
142 | ```
143 | - foo               # name only
144 | - x.x.x.x           # IPv4 address only
145 | - foo x.x.x.x       # name and IPv4 address
146 | - [dead:beef::]     # IPv6 address only
147 | - foo [dead:beef::] # name and IPv6 address
148 | ```
149 | 
150 | The IP addresses come from the "primary" IP address defined in Netbox, and
151 | the name from the device/VM name.  This approach allows you to have
152 | [meaningful instance labels](https://www.robustperception.io/controlling-the-instance-label)
153 | like `{instance="foo"}` whilst using IP addresses for targets, avoiding
154 | the need for DNS resolution.
155 | 
156 | To use these target files, you will need some relabelling configuration.
157 | 
158 | Node Exporter:
159 | 
160 | ```
161 |   - job_name: node
162 |     scrape_interval: 1m
163 |     file_sd_configs:
164 |       - files:
165 |         - /etc/prometheus/targets.d/node_targets.yml
166 |     metrics_path: /metrics
167 |     relabel_configs:
168 |       # When __address__ consists of just a name or IP address,
169 |       # copy it to the "instance" label.  Doing this explicitly
170 |       # keeps the port number out of the instance label.
171 |       - source_labels: [__address__]
172 |         regex: '([^ ]+)'
173 |         target_label: instance
174 | 
175 |       # When __address__ is of the form "name address", extract
176 |       # name to "instance" label and address to "__address__"
177 |       - source_labels: [__address__]
178 |         regex: '(.+) (.+)'
179 |         target_label: instance
180 |         replacement: '${1}'
181 |       - source_labels: [__address__]
182 |         regex: '(.+) (.+)'
183 |         target_label: __address__
184 |         replacement: '${2}'
185 | 
186 |       # Append port number to __address__ so that scrape gets
187 |       # sent to the right port
188 |       - source_labels: [__address__]
189 |         target_label: __address__
190 |         replacement: '${1}:9100'
191 | ```
192 | 
193 | Windows exporter is similar (just change the job_name, the filename, and the
194 | replacement port number to 9182).
195 | 
196 | SNMP exporter is slightly trickier because the target parameter
197 | cannot contain square brackets around IPv6 addresses.
198 | 
199 | ```
200 |   - job_name: snmp
201 |     scrape_interval: 1m
202 |     file_sd_configs:
203 |       - files:
204 |         - /etc/prometheus/targets.d/snmp_targets.yml
205 |     metrics_path: /snmp
206 |     relabel_configs:
207 |       # When __address__ consists of just a name or IP address,
208 |       # copy it to both the "instance" label (visible to user)
209 |       # and "__param_target" (where snmp_exporter sends SNMP)
210 |       - source_labels: [__address__]
211 |         regex: '([^ ]+)'
212 |         target_label: instance
213 |       - source_labels: [__address__]
214 |         regex: '([^ ]+)'
215 |         target_label: __param_target
216 | 
217 |       # When __address__ is of the form "name address", extract
218 |       # name to "instance" label and address to "__param_target"
219 |       - source_labels: [__address__]
220 |         regex: '(.+) (.+)'
221 |         target_label: instance
222 |         replacement: '${1}'
223 |       - source_labels: [__address__]
224 |         regex: '(.+) (.+)'
225 |         target_label: __param_target
226 |         replacement: '${2}'
227 | 
228 |       # If __param_target is enclosed by square brackets, remove them
229 |       - source_labels: [__param_target]
230 |         regex: '\[(.+)\]'
231 |         target_label: __param_target
232 |         replacement: '${1}'
233 | 
234 |       # Copy "module" label to "__param_module" so that snmp_exporter
235 |       # receives it as part of the scrape URL
236 |       - source_labels: [module]
237 |         target_label: __param_module
238 | 
239 |       # Send the actual scrape to SNMP exporter
240 |       - target_label: __address__
241 |         replacement: 127.0.0.1:9116
242 | ```
243 | 
244 | Reload prometheus config and check there are no errors:
245 | 
246 | ```
247 | killall -HUP prometheus
248 | journalctl -eu prometheus
249 | ```
250 | 
251 | See also:
252 | 
253 | * https://www.robustperception.io/controlling-the-instance-label
254 | * https://www.robustperception.io/target-labels-are-for-life-not-just-for-christmas/
255 | * https://www.robustperception.io/reloading-prometheus-configuration
256 | 
257 | ### Metadata
258 | 
259 | In order to use the metadata metrics, you'll need to expose them using http
260 | (`apt-get install apache2`) and add a scrape job:
261 | 
262 | ```
263 |   # Pick up netbox_meta metrics exported from netbox database
264 |   - job_name: netbox
265 |     metrics_path: /metrics/netbox
266 |     scrape_interval: 5m
267 |     honor_labels: true
268 |     static_configs:
269 |       - targets:
270 |         - 127.0.0.1:80
271 | ```
272 | 
273 | You can then use queries and alerting rules with extra labels from Netbox, e.g.
274 | 
275 | ```
276 | # Filter based on Netbox attributes
277 | (up == 1) * on (instance) group_left netbox_meta{role="core-switch"}
278 | 
279 | # Add extra labels from Netbox
280 | (up == 1) * on (instance) group_left(tenant,role,site,rack,cluster) netbox_meta
281 | ```
282 | 
283 | You can modify the python code to add extra labels, e.g. "platform".
284 | 
285 | See also:
286 | 
287 | * [How to have labels for machine roles](https://www.robustperception.io/how-to-have-labels-for-machine-roles)
288 | * [Exposing the software version to prometheus](https://www.robustperception.io/exposing-the-software-version-to-prometheus)
289 | * [Many-to-one and one-to-one vector matches](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches)
290 | 
291 | # Complex deployments
292 | 
293 | ## Multiple prometheus instances
294 | 
295 | You might have multiple prometheus instances.  Say prometheus1 should poll
296 | sites A, B and C, while prometheus2 polls sites A (for redundancy), D and E.
297 | 
298 | You can control this with the SITE_TAG setting.  On the two prometheus
299 | instances run the same script, but one configured with
300 | 
301 | ```
302 | SITE_TAG = "prometheus1"
303 | ```
304 | 
305 | and the other with
306 | 
307 | ```
308 | SITE_TAG = "prometheus2"
309 | ```
310 | 
311 | Then in Netbox, tag sites A, B and C with "prometheus1", and sites A, D and
312 | E with "prometheus2".  The correct targets will be generated for each
313 | prometheus instance.
314 | 


--------------------------------------------------------------------------------