├── network-telemetry-prometheus ├── grafana │ ├── sessions │ │ ├── 5 │ │ │ └── d │ │ │ │ └── 5da0845f4718049c │ │ ├── 9 │ │ │ └── d │ │ │ │ └── 9de316f20e31b9b1 │ │ └── b │ │ │ └── f │ │ │ └── bf4ba050c1c8fad7 │ └── grafana.db ├── monit │ ├── requirements.txt │ ├── inventory │ │ ├── groups.yaml │ │ └── hosts.yaml │ ├── Dockerfile │ ├── __pycache__ │ │ └── monit.cpython-36.pyc │ ├── config.yaml │ └── monit.py ├── diagram.png ├── network.png ├── grafana_1.png ├── grafana_2.png ├── prometheus.png ├── diagram.graffle │ ├── data.plist │ ├── image1.tiff │ ├── image4.tiff │ └── image5.png ├── prometheus │ └── prometheus.yml ├── network │ ├── prometheus_demo_leaf00.cfg │ ├── prometheus_demo_leaf01.cfg │ └── prometheus_demo_spine00.cfg ├── docker-compose.yml ├── Makefile └── README.md ├── README.md └── LICENSE /network-telemetry-prometheus/grafana/sessions/b/f/bf4ba050c1c8fad7: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | nornir>1.0.0 3 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/inventory/groups.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | defaults: {} 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # blogposts-demos 2 | 3 | Most of the content here is available at [www.dravetech.com](www.dravetech.com). 4 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/diagram.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/network.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/grafana_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/grafana_1.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/grafana_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/grafana_2.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/prometheus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/prometheus.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/grafana/grafana.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/grafana/grafana.db -------------------------------------------------------------------------------- /network-telemetry-prometheus/diagram.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/diagram.graffle/data.plist -------------------------------------------------------------------------------- /network-telemetry-prometheus/diagram.graffle/image1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/diagram.graffle/image1.tiff -------------------------------------------------------------------------------- /network-telemetry-prometheus/diagram.graffle/image4.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/diagram.graffle/image4.tiff -------------------------------------------------------------------------------- /network-telemetry-prometheus/diagram.graffle/image5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/diagram.graffle/image5.png -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | RUN pip install nornir flask 4 | 5 | COPY . /monit 6 | 7 | ENTRYPOINT python /monit/monit.py 8 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/grafana/sessions/5/d/5da0845f4718049c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/grafana/sessions/5/d/5da0845f4718049c -------------------------------------------------------------------------------- /network-telemetry-prometheus/grafana/sessions/9/d/9de316f20e31b9b1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/grafana/sessions/9/d/9de316f20e31b9b1 -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/__pycache__/monit.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dravetech/blogposts-demos/HEAD/network-telemetry-prometheus/monit/__pycache__/monit.cpython-36.pyc -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | inventory: nornir.plugins.inventory.simple.SimpleInventory 3 | SimpleInventory: 4 | host_file: "/monit/inventory/hosts.yaml" 5 | group_file: "/monit/inventory/groups.yaml" 6 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | rule_files: 6 | # - "first.rules" 7 | # - "second.rules" 8 | 9 | scrape_configs: 10 | - job_name: prometheus 11 | static_configs: 12 | - targets: 13 | - 'localhost:9090' 14 | - job_name: network_monitoring 15 | static_configs: 16 | - targets: 17 | - '10.200.200.102:5000' 18 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/inventory/hosts.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | spine00: 3 | nornir_host: 10.200.200.1 4 | nornir_username: myuser 5 | nornir_password: mypassword 6 | nornir_network_api_port: 12443 7 | nornir_nos: eos 8 | 9 | leaf00: 10 | nornir_host: 10.200.200.1 11 | nornir_username: myuser 12 | nornir_password: mypassword 13 | nornir_network_api_port: 12444 14 | nornir_nos: eos 15 | 16 | leaf01: 17 | nornir_host: 10.200.200.1 18 | nornir_username: myuser 19 | nornir_password: mypassword 20 | nornir_network_api_port: 12445 21 | nornir_nos: eos 22 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/network/prometheus_demo_leaf00.cfg: -------------------------------------------------------------------------------- 1 | ! Command: show running-config 2 | ! device: leaf00 (cEOSSim, EOS-4.20.5F) 3 | ! 4 | transceiver qsfp default-mode 4x10G 5 | ! 6 | hostname leaf00 7 | ! 8 | spanning-tree mode mstp 9 | ! 10 | no aaa root 11 | ! 12 | username myuser privilege 15 role network-admin secret sha512 $6$CEgINt/wvYTVCbNq$Wp6VRmU6u9rxPri/Fwg3cCJ9.lyT58yfNwNqnEWOnS3XswD0e65mDVlzi7lXqMNah.OR8mJLVv26DAtELyflt. 13 | ! 14 | interface Ethernet1 15 | no switchport 16 | ip address 10.200.201.201/24 17 | ! 18 | interface Ethernet2 19 | ! 20 | interface Loopback0 21 | ip address 10.1.1.2/32 22 | ! 23 | ip routing 24 | ! 25 | management api http-commands 26 | no shutdown 27 | ! 28 | router bgp 65001 29 | neighbor 10.200.201.200 remote-as 65000 30 | neighbor 10.200.201.200 maximum-routes 12000 31 | network 10.1.1.2/32 32 | ! 33 | end 34 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/network/prometheus_demo_leaf01.cfg: -------------------------------------------------------------------------------- 1 | ! Command: show running-config 2 | ! device: leaf00 (cEOSSim, EOS-4.20.5F) 3 | ! 4 | transceiver qsfp default-mode 4x10G 5 | ! 6 | hostname leaf01 7 | ! 8 | spanning-tree mode mstp 9 | ! 10 | no aaa root 11 | ! 12 | username myuser privilege 15 role network-admin secret sha512 $6$CEgINt/wvYTVCbNq$Wp6VRmU6u9rxPri/Fwg3cCJ9.lyT58yfNwNqnEWOnS3XswD0e65mDVlzi7lXqMNah.OR8mJLVv26DAtELyflt. 13 | ! 14 | interface Ethernet1 15 | no switchport 16 | ip address 10.200.202.202/24 17 | ! 18 | interface Ethernet2 19 | ! 20 | interface Loopback0 21 | ip address 10.1.1.3/32 22 | ! 23 | ip routing 24 | ! 25 | management api http-commands 26 | no shutdown 27 | ! 28 | router bgp 65002 29 | neighbor 10.200.202.200 remote-as 65000 30 | neighbor 10.200.202.200 maximum-routes 12000 31 | network 10.1.1.3/32 32 | ! 33 | end 34 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | 5 | prometheus: 6 | image: prom/prometheus 7 | volumes: 8 | - ./prometheus:/etc/prometheus 9 | ports: 10 | - 9090:9090 11 | networks: 12 | management: 13 | ipv4_address: 10.200.200.100 14 | 15 | grafana: 16 | image: grafana/grafana 17 | volumes: 18 | - ./grafana:/var/lib/grafana 19 | ports: 20 | - 3000:3000 21 | networks: 22 | management: 23 | ipv4_address: 10.200.200.101 24 | 25 | monit: 26 | image: prometheus-demo/monit 27 | build: 28 | context: monit 29 | ports: 30 | - 5000:5000 31 | networks: 32 | management: 33 | ipv4_address: 10.200.200.102 34 | 35 | 36 | networks: 37 | management: 38 | driver: bridge 39 | ipam: 40 | driver: default 41 | config: 42 | - subnet: 10.200.200.0/24 43 | gateway: 10.200.200.1 44 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/network/prometheus_demo_spine00.cfg: -------------------------------------------------------------------------------- 1 | ! Command: show running-config 2 | ! device: spine00 (cEOSSim, EOS-4.20.5F) 3 | ! 4 | transceiver qsfp default-mode 4x10G 5 | ! 6 | hostname spine00 7 | ! 8 | spanning-tree mode mstp 9 | ! 10 | no aaa root 11 | ! 12 | username myuser privilege 15 role network-admin secret sha512 $6$CEgINt/wvYTVCbNq$Wp6VRmU6u9rxPri/Fwg3cCJ9.lyT58yfNwNqnEWOnS3XswD0e65mDVlzi7lXqMNah.OR8mJLVv26DAtELyflt. 13 | ! 14 | interface Ethernet1 15 | no switchport 16 | ip address 10.200.201.200/24 17 | ! 18 | interface Ethernet2 19 | no switchport 20 | ip address 10.200.202.200/24 21 | ! 22 | interface Ethernet3 23 | ! 24 | interface Loopback0 25 | ip address 10.1.1.1/32 26 | ! 27 | ip routing 28 | ! 29 | management api http-commands 30 | no shutdown 31 | ! 32 | router bgp 65000 33 | neighbor 10.200.201.201 remote-as 65001 34 | neighbor 10.200.201.201 maximum-routes 12000 35 | neighbor 10.200.202.202 remote-as 65002 36 | neighbor 10.200.202.202 maximum-routes 12000 37 | network 10.1.1.1/32 38 | ! 39 | end 40 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/Makefile: -------------------------------------------------------------------------------- 1 | SPINE00=prometheus_demo_spine00 2 | LEAF00=prometheus_demo_leaf00 3 | LEAF01=prometheus_demo_leaf01 4 | 5 | 6 | .PHONY: start 7 | start: stop build 8 | docker-compose up -d 9 | docker network create -d bridge --subnet 10.200.201.0/24 prometheus-nornir-snmp-replacement_link0 10 | docker network create -d bridge --subnet 10.200.202.0/24 prometheus-nornir-snmp-replacement_link1 11 | make start_ceos DEVICE=$(SPINE00) PORT=12443 IP=200 12 | make start_ceos DEVICE=$(LEAF00) PORT=12444 IP=201 13 | make start_ceos DEVICE=$(LEAF01) PORT=12445 IP=202 14 | 15 | 16 | .PHONY: build 17 | build: 18 | docker-compose build 19 | 20 | 21 | .PHONY: stop 22 | stop: 23 | docker rm -f $(SPINE00) || exit 0 24 | docker rm -f $(LEAF00) || exit 0 25 | docker rm -f $(LEAF01) || exit 0 26 | docker-compose down 27 | docker network rm prometheus-nornir-snmp-replacement_link0 || exit 0 28 | docker network rm prometheus-nornir-snmp-replacement_link1 || exit 0 29 | 30 | 31 | .PHONY: start_ceos 32 | start_ceos: 33 | docker create \ 34 | --name=$(DEVICE) \ 35 | --privileged \ 36 | -v $(PWD)/network/$(DEVICE).cfg:/mnt/flash/startup-config \ 37 | -p $(PORT):443 \ 38 | -e CEOS=1 \ 39 | -e container=docker \ 40 | -e EOS_PLATFORM=ceossim \ 41 | -e SKIP_ZEROTOUCH_BARRIER_IN_SYSDBINIT=1 \ 42 | -e ETBA=1 \ 43 | -e INTFTYPE=eth \ 44 | ceosimage:4.20.5F /sbin/init 45 | docker network connect prometheus-nornir-snmp-replacement_management --ip 10.200.200.$(IP) $(DEVICE) 46 | ifeq ($(DEVICE), $(SPINE00)) 47 | docker network connect prometheus-nornir-snmp-replacement_link0 --ip 10.200.201.$(IP) $(DEVICE) 48 | docker network connect prometheus-nornir-snmp-replacement_link1 --ip 10.200.202.$(IP) $(DEVICE) 49 | else ifeq ($(DEVICE), $(LEAF00)) 50 | docker network connect prometheus-nornir-snmp-replacement_link0 --ip 10.200.201.$(IP) $(DEVICE) 51 | else ifeq ($(DEVICE), $(LEAF01)) 52 | docker network connect prometheus-nornir-snmp-replacement_link1 --ip 10.200.202.$(IP) $(DEVICE) 53 | endif 54 | docker start $(DEVICE) 55 | 56 | 57 | .PHONY: stop_ceos 58 | stop_ceos: 59 | docker rm -f $(SPINE00) 60 | 61 | .PHONY: spine00 62 | spine00: 63 | docker exec -it prometheus_demo_spine00 Cli 64 | 65 | .PHONY: leaf00 66 | leaf00: 67 | docker exec -it prometheus_demo_leaf00 Cli 68 | 69 | .PHONY: leaf01 70 | leaf01: 71 | docker exec -it prometheus_demo_leaf01 Cli 72 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/monit/monit.py: -------------------------------------------------------------------------------- 1 | from nornir.core import InitNornir 2 | from nornir.plugins.tasks.networking import napalm_get 3 | 4 | from flask import Flask, Response 5 | 6 | app = Flask(__name__) 7 | 8 | # Nornir will be instantiated globally to persist the connection 9 | nr = InitNornir(config_file="/monit/config.yaml", num_workers=100) 10 | 11 | 12 | def _prometheus_metric(name, value, **kwargs): 13 | """ Emit a metric in prometheus format. """ 14 | labels = ", ".join(f'{k}="{v}"' for k, v in kwargs.items()) 15 | return f"{name} {{{labels}}} {value}\n" 16 | 17 | 18 | def _napalm_iface_counters_to_prometheus(device_name, interface_counters): 19 | """ 20 | Transform interface metrics gathered by napalm into a format 21 | suitable for prometheus. 22 | """ 23 | metrics = "" 24 | for interface, counters in interface_counters.items(): 25 | for counter, value in counters.items(): 26 | c = counter.split("_") 27 | direction = c[0] 28 | metric = "_".join(c[1:]) 29 | metrics += _prometheus_metric( 30 | name="network_device_interface_counter", 31 | value=value, 32 | net_device=device_name, 33 | interface=interface, 34 | direction=direction, 35 | metric=metric, 36 | ) 37 | return metrics 38 | 39 | 40 | def _napalm_bgp_neighbors_to_prometheus(device_name, bgp_neighbors): 41 | """ 42 | Transform bgp metrics gathered by napalm into a format 43 | suitable for prometheus. 44 | """ 45 | metrics = "" 46 | for peer, peer_data in bgp_neighbors["global"]["peers"].items(): 47 | metrics += _prometheus_metric( 48 | name="bgp_session_up", 49 | value=int(peer_data["is_up"]), 50 | net_device=device_name, 51 | peer=peer, 52 | ) 53 | for counter, value in peer_data["address_family"]["ipv4"].items(): 54 | metrics += _prometheus_metric( 55 | name=f"bgp_prefixes", 56 | value=value, 57 | net_device=device_name, 58 | peer=peer, 59 | metric=counter, 60 | ) 61 | return metrics 62 | 63 | 64 | def _get_metrics(task): 65 | """ 66 | Nornir job to gather the metrics using `napalm_get` task 67 | and transform napalm metrics into prometheus metrics. 68 | """ 69 | result = task.run( 70 | task=napalm_get, 71 | getters=["interfaces_counters", "bgp_neighbors"] 72 | ) 73 | metrics = _napalm_iface_counters_to_prometheus( 74 | task.host.name, result.result["interfaces_counters"] 75 | ) 76 | metrics += _napalm_bgp_neighbors_to_prometheus( 77 | task.host.name, result.result["bgp_neighbors"] 78 | ) 79 | return metrics 80 | 81 | 82 | @app.route("/metrics") 83 | def metrics(): 84 | """ 85 | /metrics endpoint 86 | 87 | Gather metrics from the network and presents it to prometheus 88 | """ 89 | results = nr.run( 90 | task=_get_metrics, # nornir task to run 91 | on_failed=True, # Run the job is previous executions failed 92 | ) 93 | metrics = "\n".join([r.result for r in results.values()]) 94 | return Response(metrics, mimetype="text/plain") 95 | 96 | 97 | if __name__ == "__main__": 98 | app.run(host="0.0.0.0", port=5000) 99 | -------------------------------------------------------------------------------- /network-telemetry-prometheus/README.md: -------------------------------------------------------------------------------- 1 | # Network telemetry: from SNMP to prometheus 2 | 3 | Whether you just don't like SNMP or you want to leverage the same tooling for monitoring and alerting as the rest of your organization, this "tutorial" has you covered. What we are going to do is see how we can monitor our network infrastructure with [prometheus](https://prometheus.io/) and [grafana](https://grafana.com/). 4 | 5 | In this blogpost we are going to see how to build a webapp using [flask](http://flask.pocoo.org/)+[nornir](https://github.com/nornir-automation/nornir) that gathers metrics from the network and presents it via a web application. Then we will scrape that web application with prometheus to store those metrics and finally we will see how we can present those metrics with grafana. In summary, you will learn how to replace your old-fashioned SNMP monitoring system with a next-generation-12-factor-app-compliant-telemetry-system. 6 | 7 | ## Components 8 | 9 | Let's start by introducing the components involved: 10 | 11 | * [grafana](https://grafana.com/) - According to their webpage grafana is "the open platform for beautiful analytics and monitoring". It can take many different sources as input and alert based on events, create dashboards to present the many metrics of your system, etc. 12 | * [prometheus](https://prometheus.io/) - From their documentation "Prometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. It stores all scraped samples locally and runs rules over this data to either aggregate and record new time series from existing data or generate alerts. Grafana or other API consumers can be used to visualize the collected data." 13 | * [nornir](https://github.com/nornir-automation/nornir) - Nornir is a pluggable multi-threaded framework with inventory management to help operate collections of devices. We are going to use it to gather the metrics we will want to scrape with `prometheus` 14 | * [flask](http://flask.pocoo.org/) - Flask is a python framework to build web applications. We will basically present to prometheus all the data gathered by `nornir` via a flask web application. Because both `nornir` and `flask` are python frameworks they will integrate seamlessly as a single application. 15 | 16 | Here is a diagram to try to make things clearer: 17 | 18 | ![diagram](diagram.png) 19 | 20 | The network itself looks like this: 21 | 22 | ![network](network.png) 23 | 24 | They are all running cEOS (containerized version of EOS) although we won't care much about that as `nornir` will be using [napalm](http://napalm.readthedocs.io/) under the hoods so the OS will be abstracted away from us. 25 | 26 | ## Requirements 27 | 28 | You can run this demo yourself, however, you will need a couple of things: 29 | 30 | * `docker` and `docker-compose`. 31 | * `ceosimage:4.20.5F` which can be downloaded for free [here](https://www.arista.com/en/support/software-download) (requires registration) 32 | 33 | ## Project structure 34 | 35 | This "tutorial" is not going to go super deep into the details, instead, it's going to focus on laying out the foundation and it will be up to the reader to go deeper into the parts they are most interested in. Because of that, we are going to start by describing the project structure: 36 | 37 | * [./grafana/](grafana) - Configuration for grafana (it's in binary so not much to look at) 38 | * [./monit/](monit) - Python application using nornir+flask to gather and present the metrics to prometheus 39 | * [./network/](network) - Starting configuration for the network 40 | * [./prometheus/](prometheus) - Configuration for prometheus 41 | * [./Makefile](Makefile) - We are to express the `make` operations here. 42 | 43 | ## Starting the environment 44 | 45 | All the components will run on different containers so you don't have to worry about the environment. Just start everything by executing: 46 | 47 | make start 48 | 49 | Now wait a couple of minutes to make sure all the software gets properly initialized. You can connect to any of the devices available with `make {spine00,leaf00,leaf01}`. 50 | 51 | I'd suggest starting by connecting to `spine00` and checking if BGP is up and running. If BGP is not up you may have to connect to `spine00` and swap the IPs in `Et1` and `Et2` because docker sometimes doesn't respect the order of the interfaces. 52 | 53 | When you are done you can stop the environment executing the command: 54 | 55 | make stop 56 | 57 | # Gathering and presenting metrics 58 | 59 | Let's start by looking at the metrics being exported by clicking [here](http://127.0.0.1:5000/metrics). You should see something like this: 60 | 61 | network_device_interface_counter {net_device="spine00", interface="Ethernet2", direction="tx", metric="octets"} 5974 62 | network_device_interface_counter {net_device="spine00", interface="Ethernet2", direction="rx", metric="octets"} 34467 63 | network_device_interface_counter {net_device="spine00", interface="Ethernet2", direction="tx", metric="unicast_packets"} 29 64 | network_device_interface_counter {net_device="spine00", interface="Ethernet2", direction="rx", metric="unicast_packets"} 3 65 | network_device_interface_counter {net_device="spine00", interface="Ethernet2", direction="tx", metric="multicast_packets"} 20 66 | ... 67 | 68 | network_device_interface_counter {net_device="leaf00", interface="Ethernet1", direction="tx", metric="octets"} 6075 69 | network_device_interface_counter {net_device="leaf00", interface="Ethernet1", direction="rx", metric="octets"} 34946 70 | network_device_interface_counter {net_device="leaf00", interface="Ethernet1", direction="tx", metric="unicast_packets"} 31 71 | network_device_interface_counter {net_device="leaf00", interface="Ethernet1", direction="rx", metric="unicast_packets"} 1 72 | ... 73 | 74 | bgp_session_up {net_device="leaf00", peer="10.200.201.200"} 1 75 | bgp_prefixes {net_device="leaf00", peer="10.200.201.200", metric="sent_prefixes"} 1 76 | bgp_prefixes {net_device="leaf00", peer="10.200.201.200", metric="received_prefixes"} 2 77 | ... 78 | 79 | Those metrics are generated on the fly every time the page is loaded. Prometheus will be querying this endpoint every 15s and scraping the metrics capturing them from our network in near real time. 80 | 81 | If you want to take a look at the code needed to generate that page you can check the [monit.py](monit/monit.py) script. Code is commented and shouldn't be too scary if you know python. If you don't, sufficient is to say the code looks more daunting than it actually is due to the data transformations needed to accommodate prometheus. The interesting bits are in the functions `metrics` and `_get_metrics` as those show how `flask` and `nornir` integrate seamlessly to build this web application. 82 | 83 | Note: If you see an intermitent error when querying the metrics yourself don't sweat. For simplicity, we are not using `gunicorn` or any other application server, we are just starting the flask appliation directly. This means only one query at a time is possible, so you probably crossed the streams with prometheus. In a production environment you'd just place an application server on top of the flask application and this problem would be gone. 84 | 85 | # Querying the metrics 86 | 87 | Now that we have seen the endpoint, we can connect to prometheus and start playing with the metrics [here](http://127.0.0.1:9090/graph). There you can query the data and see some graphs. For instance: 88 | 89 | ![prometheus](prometheus.png) 90 | 91 | Note: If you are trying to follow this "tutorial" yourself I'd suggest making sure it's been ~5-10 minutes since you started the environment to make sure the system has enough data to return something meaningful. If you can't see any data check the [targets](http://127.0.0.1:9090/targets) are all `UP`. If they are and the previous step looked good wait a few more minutes. If they are not `UP` there is something wrong. 92 | 93 | # Dashboards 94 | 95 | As we mentioned earlier, we are going to use grafana for visualization and alerting. You can connect to it [here](http://localhost:3000) with user and password `admin`/`admin`. Now, if you click in `Home` you will see under `General` there are a few dashboards already created: 96 | 97 | * Interface Counters 98 | 99 | ![grafana](grafana_1.png) 100 | 101 | * BGP 102 | 103 | Note that on the image below we are getting alerts because one of the BGP sessions is failing 104 | 105 | ![grafana](grafana_2.png) 106 | 107 | At this point, I'd suggest playing a bit with the network and try things like: 108 | 109 | 1. Adding/Removing prefixes from BGP 110 | 2. Shutting down interface to see how fast grafana will start showing the alert as compared to other SNMP-based monitoring sytems 111 | 3. Playing with the dashboards to see how flexible they are thanks to prometheus query language you can use in the panels. 112 | 113 | Note that dashboards can be exported/imported in YAML format, which means that once you have created a "master" dashboard you like you should be able to use it as a template for future uses. 114 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------