├── .gitignore ├── Readme.md ├── deploy-monitoring.yml ├── examples └── systemd_timers │ ├── zfs-sync.service │ └── zfs-sync.timer ├── group_vars ├── exporters.yml.sample └── metrics.yml.sample ├── hosts.sample ├── init.sh ├── purge-monitoring.yml └── roles ├── container-engine ├── README.md ├── meta │ └── main.yml ├── tasks │ ├── main.yml │ └── pre_requisites │ │ ├── debian_prerequisites.yml │ │ └── prerequisites.yml └── vars │ ├── CentOS-8.yml │ ├── Debian.yml │ ├── RedHat-8.yml │ ├── RedHat.yml │ ├── Rocky-8.yml │ ├── Rocky-9.yml │ ├── Ubuntu-16.yml │ ├── Ubuntu-18.yml │ ├── Ubuntu-20.yml │ └── Ubuntu-22.yml ├── grafana ├── defaults │ └── main.yml ├── files │ ├── network-overview.json │ ├── system-overview.json │ ├── zfs-detailed-stats.json │ ├── zfs-overview.json │ └── zfs-replication-overview.json ├── meta │ └── main.yml ├── tasks │ ├── configure_grafana.yml │ ├── main.yml │ └── setup_container.yml └── templates │ ├── dashboards-system.yml.j2 │ ├── dashboards-zfs.yml.j2 │ ├── datasources-prometheus.yml.j2 │ ├── grafana-server.service.j2 │ └── grafana.ini.j2 ├── node-exporter ├── defaults │ └── main.yml ├── meta │ └── main.yml ├── tasks │ ├── main.yml │ └── setup_container.yml └── templates │ └── node_exporter.service.j2 ├── prometheus ├── defaults │ └── main.yml ├── files │ ├── default-alerts.yml │ ├── mdadm-alerts.yml │ └── zfs-alerts.yml ├── handlers │ └── main.yml ├── meta │ └── main.yml ├── tasks │ ├── main.yml │ └── setup_container.yml └── templates │ ├── alertmanager.service.j2 │ ├── alertmanager.yml.j2 │ ├── prometheus.service.j2 │ └── prometheus.yml.j2 ├── zfs-exporter ├── defaults │ └── main.yml ├── handlers │ └── main.yml ├── meta │ └── main.yml ├── tasks │ ├── main.yml │ └── setup_container.yml └── templates │ └── zfs_exporter.service.j2 └── znapzend-exporter ├── defaults └── main.yml ├── files ├── error_logger ├── post_send_cmd.sh ├── post_snap_cmd.sh ├── pre_send_cmd.sh ├── pre_snap_cmd.sh └── znapzend_monitor ├── meta └── main.yml ├── tasks ├── main.yml └── setup_container.yml └── templates └── znapzend_exporter.service.j2 /.gitignore: -------------------------------------------------------------------------------- 1 | group_vars/metrics.yml 2 | group_vars/exporters.yml 3 | hosts 4 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Monitoring Stack 2 | 3 | | Service Name | Description | 4 | |------------------- |----------------------------------------------------------------- | 5 | | Grafana | Display Statistics & Metrics from database | 6 | | Alertmanager | Query db of metrics and send alerts based on user defined rules | 7 | | Prometheus | Collect and store metrics scraped from exporters in database | 8 | | Node Exporter | Export hardware and OS metrics via http endpoint | 9 | | ZnapZend Exporter | Export state information on zfs snapshots and replication tasks | 10 | 11 | The services outlined above are deployed as containers using either podman or docker depending on Host OS. 12 | Containers are managed via systemd services and/or cockpit-podman module 13 | 14 | ## Supported OS 15 | * Rocky Linux 8.X 16 | * Rocky Linux 9.X 17 | * Ubuntu 20.04 18 | * Ubuntu 22.04 19 | 20 | # Installation 21 | 22 | * Clone git repo to "/usr/share" 23 | ```sh 24 | cd /usr/share/ 25 | git clone https://github.com/45drives/monitoring-stack.git 26 | ``` 27 | * Included inventory file "hosts" has two groups "metrics" and "exporters" 28 | * All hosts in the "metrics" group will have prometheus, alertmanager and grafana installed 29 | * All hosts in the "exporters" group will have node_exporter and znapzend_exporter installed 30 | * By default "metrics" and "exporters" is populated by localhost. This is sufficient for a single server deployment. 31 | * To add multiple servers add new hosts in the "exporters" group 32 | * It is possible to have the metric stack not run on the same server as the exporter services. 33 | 34 | * Configure email send/recieve setting for alertmanager in "group_vars/metrics.yml" 35 | 36 | * Default ports are defined in the table below, they can be changed in metrics.yml or exporters.yml 37 | 38 | | Default Setting | Value | 39 | |-------------------------- |------- | 40 | | Prometheus Port | 9091 | 41 | | Alertmanager Port | 9093 | 42 | | Grafana Port | 3000 | 43 | | Grafana Default User | admin | 44 | | Grafana Default Password | admin | 45 | | Node Exporter Port | 9100 | 46 | | Znapzend Port | 9101 | 47 | 48 | * Run metrics playbook 49 | ```sh 50 | cd /usr/share/monitoring-stack 51 | ansible-playbook -i hosts deploy-monitoring.yml 52 | ``` 53 | 54 | * To uninstall monitoring stack 55 | ```sh 56 | ansible-playbook -i hosts purge-monitoring.yml 57 | ``` 58 | 59 | # Verification 60 | 61 | To ensure monitoring stack is working as expected, simulate failure condition and you will recieve an email notification 62 | 63 | * Offline a disk in your zpool 64 | * Set disk as "Offline" in Houston UI, "ZFS + File Sharing" 65 | * Or in cli: zpool offline tank 1-1 66 | * After ~30 seconds you should see email with subject line "[FIRING:1] ZpoolDegradedState ($HOSTNAME node warning degraded $POOL_NAME)" 67 | 68 | -------------------------------------------------------------------------------- /deploy-monitoring.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: all 3 | become: true 4 | tasks: 5 | - block: 6 | - import_role: 7 | name: container-engine 8 | 9 | - hosts: exporters 10 | become: true 11 | tasks: 12 | - block: 13 | - import_role: 14 | name: node-exporter 15 | - import_role: 16 | name: zfs-exporter 17 | - import_role: 18 | name: znapzend-exporter 19 | 20 | 21 | - hosts: metrics 22 | become: true 23 | tasks: 24 | - block: 25 | - import_role: 26 | name: prometheus 27 | - import_role: 28 | name: grafana -------------------------------------------------------------------------------- /examples/systemd_timers/zfs-sync.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Starts zfs replication jobs 3 | Wants=zfs-sync.timer 4 | 5 | [Service] 6 | Type=oneshot 7 | ExecStart=/usr/bin/znapzend --runonce --logto=syslog::daemon 8 | [Install] 9 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /examples/systemd_timers/zfs-sync.timer: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Starts zfs replication jobs 3 | Requires=zfs-sync.service 4 | 5 | [Timer] 6 | Unit=zfs-sync.service 7 | OnCalendar=*-*-* 19:00:00 8 | 9 | [Install] 10 | WantedBy=timers.target -------------------------------------------------------------------------------- /group_vars/exporters.yml.sample: -------------------------------------------------------------------------------- 1 | # Node Exporter 2 | node_exporter_container_image: prom/node-exporter:latest 3 | node_exporter_port: 9100 4 | # Znapzend Exporter 5 | znapzend_exporter_container_image: ccremer/znapzend-exporter:v0.3.1 6 | znapzend_exporter_port: 9101 7 | # Zfs Exporter 8 | zfs_exporter_container_image: docker.io/45drives/zfs_exporter:v2.2.5 9 | zfs_exporter_port: 9134 -------------------------------------------------------------------------------- /group_vars/metrics.yml.sample: -------------------------------------------------------------------------------- 1 | # PROMETHEUS 2 | prometheus_port: 9091 3 | prometheus_listen_address: '' # To listen on all interfaces leave this empty 4 | prometheus_data_dir: /var/lib/prometheus 5 | prometheus_storage_tsdb_retention_time: "30d" 6 | # ALERTMANAGER 7 | alertmanager_port: 9093 8 | alertmanager_listen_address: '' # To listen on all interfaces leave this empty 9 | alertmanager_smtp_host: localhost 10 | alertmanager_smtp_port: 25 11 | alertmanager_smtp_username: '' #To authenticate to local/public smtp server, when applicable 12 | alertmanager_smtp_password: '' 13 | alertmanager_send_email: 'localhost@localhost' 14 | alertmanager_receive_email: 15 | - 'localhost@localhost' 16 | - 'dummy@localhost' 17 | alertmanager_require_tls: 'false' 18 | # GRAFANA 19 | grafana_port: 3000 20 | grafana_admin_user: admin 21 | grafana_admin_password: admin 22 | grafana_default_theme: light 23 | -------------------------------------------------------------------------------- /hosts.sample: -------------------------------------------------------------------------------- 1 | [metrics] 2 | localhost 3 | 4 | [exporters] 5 | localhost -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function get_base_distro() { 4 | local distro=$(cat /etc/os-release | grep '^ID_LIKE=' | head -1 | sed 's/ID_LIKE=//' | sed 's/"//g' | awk '{print $1}') 5 | 6 | if [ -z "$distro" ]; then 7 | distro=$(cat /etc/os-release | grep '^ID=' | head -1 | sed 's/ID=//' | sed 's/"//g' | awk '{print $1}') 8 | fi 9 | 10 | echo $distro 11 | } 12 | 13 | function get_distro() { 14 | local distro=$(cat /etc/os-release | grep '^ID=' | head -1 | sed 's/ID=//' | sed 's/"//g' | awk '{print $1}') 15 | 16 | echo $distro 17 | } 18 | 19 | function get_version_id() { 20 | local version_id=$(cat /etc/os-release | grep '^VERSION_ID=' | head -1 | sed 's/VERSION_ID=//' | sed 's/"//g' | awk '{print $1}' | awk 'BEGIN {FS="."} {print $1}') 21 | 22 | echo $version_id 23 | } 24 | 25 | base_distro=$(get_base_distro) 26 | distro=$(get_distro) 27 | distro_version=$(get_version_id) 28 | 29 | # Install ansible from ansible ppa if ubuntu 22 30 | if [ "$distro" == "ubuntu" ] && [ $distro_version -eq 22 ];then 31 | echo "Installing ansible ppa..." 32 | sudo apt update 33 | sudo apt install software-properties-common 34 | sudo add-apt-repository --yes --update ppa:ansible/ansible 35 | echo "Installing ansible..." 36 | sudo apt install ansible -y 37 | fi 38 | 39 | # Initialize ansible varibles and inventory 40 | cp group_vars/metrics.yml.sample group_vars/metrics.yml 41 | cp group_vars/exporters.yml.sample group_vars/exporters.yml 42 | cp hosts.sample hosts 43 | -------------------------------------------------------------------------------- /purge-monitoring.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: purge exporters 3 | hosts: 4 | - all 5 | 6 | become: true 7 | 8 | tasks: 9 | - name: set docker_registry value if not set 10 | set_fact: 11 | docker_registry: "docker.io" 12 | when: docker_registry is not defined 13 | 14 | - name: disable node_exporter service 15 | service: 16 | name: node_exporter 17 | state: stopped 18 | enabled: no 19 | failed_when: false 20 | 21 | - name: remove node-exporter container 22 | docker_container: 23 | name: node_exporter 24 | state: absent 25 | failed_when: false 26 | 27 | - name: remove node_exporter service file 28 | file: 29 | name: /etc/systemd/system/node_exporter.service 30 | state: absent 31 | 32 | - name: remove node-exporter image 33 | docker_image: 34 | image: "{{ docker_registry }}/prom/node-exporter" 35 | state: absent 36 | force_absent: yes 37 | tags: 38 | - remove_img 39 | failed_when: false 40 | 41 | - name: disable zfs_exporter service 42 | service: 43 | name: zfs_exporter 44 | state: stopped 45 | enabled: no 46 | failed_when: false 47 | 48 | - name: remove zfs-exporter container 49 | docker_container: 50 | name: znapzend_exporter 51 | state: absent 52 | failed_when: false 53 | 54 | - name: remove zfs_exporter service file 55 | file: 56 | name: /etc/systemd/system/zfs_exporter.service 57 | state: absent 58 | 59 | - name: remove zfs-exporter image 60 | docker_image: 61 | image: "{{ docker_registry }}/45drives/zfs_exporter" 62 | state: absent 63 | force_absent: yes 64 | tags: 65 | - remove_img 66 | failed_when: false 67 | 68 | - name: disable znapzend_exporter service 69 | service: 70 | name: znapzend_exporter 71 | state: stopped 72 | enabled: no 73 | failed_when: false 74 | 75 | - name: remove znapzend-exporter container 76 | docker_container: 77 | name: znapzend_exporter 78 | state: absent 79 | failed_when: false 80 | 81 | - name: remove znapzend_exporter service file 82 | file: 83 | name: /etc/systemd/system/znapzend_exporter.service 84 | state: absent 85 | 86 | - name: remove znapzend-exporter image 87 | docker_image: 88 | image: "{{ docker_registry }}/ccremer/znapzend-exporter" 89 | state: absent 90 | force_absent: yes 91 | tags: 92 | - remove_img 93 | failed_when: false 94 | 95 | - name: purge ceph grafana-server 96 | hosts: metrics 97 | become: true 98 | vars: 99 | grafana_services: 100 | - grafana-server 101 | - prometheus 102 | - alertmanager 103 | 104 | tasks: 105 | - name: set docker_registry value if not set 106 | set_fact: 107 | docker_registry: "docker.io" 108 | when: docker_registry is not defined 109 | 110 | - name: stop services 111 | service: 112 | name: "{{ item }}" 113 | state: stopped 114 | enabled: no 115 | with_items: "{{ grafana_services }}" 116 | failed_when: false 117 | 118 | - name: remove containers 119 | docker_container: 120 | name: "{{ item }}" 121 | state: absent 122 | with_items: "{{ grafana_services }}" 123 | failed_when: false 124 | 125 | - name: remove service files 126 | file: 127 | name: "/etc/systemd/system/{{ item }}.service" 128 | state: absent 129 | with_items: "{{ grafana_services }}" 130 | failed_when: false 131 | 132 | - name: remove images 133 | docker_image: 134 | name: "{{ item }}" 135 | state: absent 136 | force_absent: yes 137 | with_items: 138 | - "{{ docker_registry }}/prom/prometheus" 139 | - "{{ docker_registry }}/grafana/grafana" 140 | - "{{ docker_registry }}/prom/alertmanager" 141 | failed_when: false 142 | 143 | - name: remove data 144 | file: 145 | name: "{{ item }}" 146 | state: absent 147 | with_items: 148 | - /etc/grafana/dashboards 149 | - /etc/grafana/grafana.ini 150 | - /etc/grafana/provisioning 151 | - /var/lib/grafana 152 | - /etc/alertmanager 153 | - /var/lib/alertmanager 154 | - /var/lib/prometheus 155 | - /etc/prometheus 156 | failed_when: false 157 | 158 | -------------------------------------------------------------------------------- /roles/container-engine/README.md: -------------------------------------------------------------------------------- 1 | # Ansible role: ceph-container-engine 2 | 3 | Documentation is available at http://docs.ceph.com/ceph-ansible/. 4 | -------------------------------------------------------------------------------- /roles/container-engine/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | company: Red Hat 4 | author: Guillaume Abrioux 5 | description: Handles container installation prerequisites 6 | license: Apache 7 | min_ansible_version: 2.7 8 | platforms: 9 | - name: Ubuntu 10 | versions: 11 | - xenial 12 | - name: EL 13 | versions: 14 | - 7 15 | galaxy_tags: 16 | - system 17 | dependencies: [] 18 | -------------------------------------------------------------------------------- /roles/container-engine/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: include pre_requisites/prerequisites.yml 3 | include_tasks: pre_requisites/prerequisites.yml 4 | -------------------------------------------------------------------------------- /roles/container-engine/tasks/pre_requisites/debian_prerequisites.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: uninstall old docker versions 3 | package: 4 | name: ['docker', 'docker-engine', 'docker.io', 'containerd', 'runc'] 5 | state: absent 6 | when: container_package_name == 'docker-ce' 7 | 8 | - name: allow apt to use a repository over https (debian) 9 | package: 10 | name: ['apt-transport-https', 'ca-certificates', 'gnupg', 'software-properties-common'] 11 | update_cache: yes 12 | register: result 13 | until: result is succeeded 14 | 15 | - name: add docker's gpg key 16 | apt_key: 17 | url: 'https://download.docker.com/linux/{{ ansible_distribution | lower }}/gpg' 18 | register: result 19 | until: result is succeeded 20 | when: container_package_name == 'docker-ce' 21 | 22 | - name: add docker repository 23 | apt_repository: 24 | repo: "deb https://download.docker.com/linux/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable" 25 | when: container_package_name == 'docker-ce' 26 | 27 | - name: add podman kubic gpg key 28 | apt_key: 29 | url: 'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/Release.key' 30 | state: present 31 | register: result 32 | until: result is succeeded 33 | when: 34 | - container_package_name == 'podman' 35 | - ansible_distribution == 'Ubuntu' 36 | - ansible_distribution_major_version <= '20' 37 | 38 | - name: add podman kubic repository 39 | apt_repository: 40 | repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/ /" 41 | filename: kubic-podman 42 | state: present 43 | when: 44 | - container_package_name == 'podman' 45 | - ansible_distribution == 'Ubuntu' 46 | - ansible_distribution_major_version <= '20' 47 | -------------------------------------------------------------------------------- /roles/container-engine/tasks/pre_requisites/prerequisites.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: include specific variables 3 | include_vars: "{{ item }}" 4 | with_first_found: 5 | - "{{ ansible_distribution }}-{{ ansible_distribution_major_version }}.yml" 6 | - "{{ ansible_os_family }}.yml" 7 | 8 | - name: debian based systems tasks 9 | include_tasks: debian_prerequisites.yml 10 | when: 11 | - ansible_os_family == 'Debian' 12 | tags: with_pkg 13 | 14 | # ensure extras enabled for docker 15 | #- name: enable extras on centos 16 | # yum_repository: 17 | # name: extras 18 | # state: present 19 | # enabled: yes 20 | # when: 21 | # - ansible_distribution == 'CentOS' 22 | # tags: 23 | # with_pkg 24 | 25 | - name: install container packages 26 | package: 27 | name: ['{{ container_package_name }}', '{{ container_binding_name }}'] 28 | update_cache: true 29 | register: result 30 | until: result is succeeded 31 | tags: with_pkg 32 | 33 | - name: start container service 34 | service: 35 | name: '{{ container_service_name }}' 36 | state: started 37 | enabled: yes 38 | tags: 39 | with_pkg 40 | when: container_service_name == 'docker' 41 | -------------------------------------------------------------------------------- /roles/container-engine/vars/CentOS-8.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/Debian.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: docker-ce 3 | container_service_name: docker 4 | container_binding_name: python-docker 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/RedHat-8.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/RedHat.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: docker 3 | container_service_name: docker 4 | container_binding_name: python-docker-py 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/Rocky-8.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman -------------------------------------------------------------------------------- /roles/container-engine/vars/Rocky-9.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman -------------------------------------------------------------------------------- /roles/container-engine/vars/Ubuntu-16.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: docker.io 3 | container_service_name: docker 4 | container_binding_name: python-docker 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/Ubuntu-18.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: docker.io 3 | container_service_name: docker 4 | container_binding_name: python3-docker 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/Ubuntu-20.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman 5 | -------------------------------------------------------------------------------- /roles/container-engine/vars/Ubuntu-22.yml: -------------------------------------------------------------------------------- 1 | --- 2 | container_package_name: podman 3 | container_service_name: podman 4 | container_binding_name: podman 5 | -------------------------------------------------------------------------------- /roles/grafana/defaults/main.yml: -------------------------------------------------------------------------------- 1 | container_binary: podman 2 | grafana_admin_user: admin 3 | grafana_admin_password: admin 4 | grafana_port: 3000 5 | grafana_default_theme: light 6 | grafana_anonymous_access: 'true' 7 | grafana_protocol: 'http' 8 | grafana_crt: '' 9 | grafana_key: '' 10 | grafana_container_image: "docker.io/grafana/grafana:6.7.4" 11 | grafana_container_cpu_period: 100000 12 | grafana_container_cpu_cores: 2 13 | grafana_container_memory: 4 # container_memory is in GB 14 | grafana_uid: "472" 15 | grafana_datasource: Dashboard 16 | grafana_dashboards_path: "/etc/grafana/dashboards" 17 | grafana_plugins: 18 | - vonage-status-panel 19 | - grafana-piechart-panel 20 | grafana_allow_embedding: True -------------------------------------------------------------------------------- /roles/grafana/files/network-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Dashboard to view multiple servers", 16 | "editable": true, 17 | "gnetId": 1856, 18 | "graphTooltip": 2, 19 | "id": 8, 20 | "iteration": 1575918860121, 21 | "links": [], 22 | "panels": [ 23 | { 24 | "aliasColors": {}, 25 | "bars": false, 26 | "dashLength": 10, 27 | "dashes": false, 28 | "datasource": "Dashboard", 29 | "editable": true, 30 | "error": false, 31 | "fill": 1, 32 | "fillGradient": 0, 33 | "grid": {}, 34 | "gridPos": { 35 | "h": 7, 36 | "w": 24, 37 | "x": 0, 38 | "y": 0 39 | }, 40 | "hiddenSeries": false, 41 | "id": 13, 42 | "legend": { 43 | "avg": false, 44 | "current": false, 45 | "max": false, 46 | "min": false, 47 | "show": true, 48 | "total": false, 49 | "values": false 50 | }, 51 | "lines": true, 52 | "linewidth": 2, 53 | "links": [], 54 | "nullPointMode": "connected", 55 | "options": { 56 | "dataLinks": [] 57 | }, 58 | "percentage": false, 59 | "pointradius": 5, 60 | "points": false, 61 | "renderer": "flot", 62 | "repeat": null, 63 | "seriesOverrides": [], 64 | "spaceLength": 10, 65 | "stack": false, 66 | "steppedLine": false, 67 | "targets": [ 68 | { 69 | "expr": "sum(node_load1{instance=~\"$node\"})", 70 | "interval": "", 71 | "intervalFactor": 2, 72 | "legendFormat": "load1", 73 | "metric": "node_load1", 74 | "refId": "A", 75 | "step": 40, 76 | "target": "" 77 | }, 78 | { 79 | "expr": "sum(node_load5{instance=~\"$node\"})", 80 | "interval": "", 81 | "intervalFactor": 2, 82 | "legendFormat": "load5", 83 | "metric": "node_load5", 84 | "refId": "B", 85 | "step": 40, 86 | "target": "" 87 | }, 88 | { 89 | "expr": "sum(node_load15{instance=~\"$node\"})", 90 | "interval": "", 91 | "intervalFactor": 2, 92 | "legendFormat": "load15", 93 | "metric": "node_load15", 94 | "refId": "C", 95 | "step": 40, 96 | "target": "" 97 | } 98 | ], 99 | "thresholds": [], 100 | "timeFrom": null, 101 | "timeRegions": [], 102 | "timeShift": null, 103 | "title": "Load", 104 | "tooltip": { 105 | "msResolution": false, 106 | "shared": true, 107 | "sort": 0, 108 | "value_type": "cumulative" 109 | }, 110 | "type": "graph", 111 | "xaxis": { 112 | "buckets": null, 113 | "mode": "time", 114 | "name": null, 115 | "show": true, 116 | "values": [] 117 | }, 118 | "yaxes": [ 119 | { 120 | "format": "short", 121 | "logBase": 1, 122 | "max": null, 123 | "min": null, 124 | "show": true 125 | }, 126 | { 127 | "format": "short", 128 | "logBase": 1, 129 | "max": null, 130 | "min": null, 131 | "show": true 132 | } 133 | ], 134 | "yaxis": { 135 | "align": false, 136 | "alignLevel": null 137 | } 138 | }, 139 | { 140 | "aliasColors": {}, 141 | "bars": false, 142 | "dashLength": 10, 143 | "dashes": false, 144 | "datasource": "Dashboard", 145 | "editable": true, 146 | "error": false, 147 | "fill": 1, 148 | "fillGradient": 0, 149 | "grid": {}, 150 | "gridPos": { 151 | "h": 7, 152 | "w": 12, 153 | "x": 0, 154 | "y": 7 155 | }, 156 | "hiddenSeries": false, 157 | "id": 12, 158 | "legend": { 159 | "avg": true, 160 | "current": true, 161 | "max": true, 162 | "min": true, 163 | "show": true, 164 | "total": false, 165 | "values": true 166 | }, 167 | "lines": true, 168 | "linewidth": 2, 169 | "links": [], 170 | "nullPointMode": "connected", 171 | "options": { 172 | "dataLinks": [] 173 | }, 174 | "percentage": false, 175 | "pointradius": 5, 176 | "points": false, 177 | "renderer": "flot", 178 | "repeat": null, 179 | "seriesOverrides": [ 180 | { 181 | "alias": "/.*in/", 182 | "transform": "negative-Y" 183 | } 184 | ], 185 | "spaceLength": 10, 186 | "stack": false, 187 | "steppedLine": false, 188 | "targets": [ 189 | { 190 | "expr": "sum(irate(node_network_receive_bytes_total{device=~\"$device\",instance=~\"$node\"}[3m]))", 191 | "interval": "", 192 | "intervalFactor": 2, 193 | "legendFormat": "in", 194 | "metric": "", 195 | "refId": "A", 196 | "step": 60, 197 | "target": "" 198 | }, 199 | { 200 | "expr": "sum(irate(node_network_transmit_bytes_total{device=~\"$device\",instance=~\"$node\"}[3m]))", 201 | "interval": "", 202 | "intervalFactor": 2, 203 | "legendFormat": "out", 204 | "refId": "B", 205 | "step": 60 206 | } 207 | ], 208 | "thresholds": [], 209 | "timeFrom": null, 210 | "timeRegions": [], 211 | "timeShift": null, 212 | "title": "Network Traffic", 213 | "tooltip": { 214 | "msResolution": false, 215 | "shared": true, 216 | "sort": 0, 217 | "value_type": "cumulative" 218 | }, 219 | "type": "graph", 220 | "xaxis": { 221 | "buckets": null, 222 | "mode": "time", 223 | "name": null, 224 | "show": true, 225 | "values": [] 226 | }, 227 | "yaxes": [ 228 | { 229 | "format": "bytes", 230 | "label": "bits in (-) / bits out (+)", 231 | "logBase": 1, 232 | "max": null, 233 | "min": null, 234 | "show": true 235 | }, 236 | { 237 | "format": "short", 238 | "logBase": 1, 239 | "max": null, 240 | "min": null, 241 | "show": true 242 | } 243 | ], 244 | "yaxis": { 245 | "align": false, 246 | "alignLevel": null 247 | } 248 | }, 249 | { 250 | "aliasColors": {}, 251 | "bars": true, 252 | "dashLength": 10, 253 | "dashes": false, 254 | "datasource": "Dashboard", 255 | "editable": true, 256 | "error": false, 257 | "fill": 1, 258 | "fillGradient": 0, 259 | "grid": {}, 260 | "gridPos": { 261 | "h": 7, 262 | "w": 12, 263 | "x": 12, 264 | "y": 7 265 | }, 266 | "hiddenSeries": false, 267 | "id": 25, 268 | "legend": { 269 | "avg": true, 270 | "current": true, 271 | "max": true, 272 | "min": false, 273 | "show": true, 274 | "total": true, 275 | "values": true 276 | }, 277 | "lines": false, 278 | "linewidth": 2, 279 | "links": [], 280 | "nullPointMode": "connected", 281 | "options": { 282 | "dataLinks": [] 283 | }, 284 | "percentage": false, 285 | "pointradius": 1, 286 | "points": false, 287 | "renderer": "flot", 288 | "repeat": null, 289 | "seriesOverrides": [ 290 | { 291 | "alias": "/.*in/", 292 | "transform": "negative-Y" 293 | } 294 | ], 295 | "spaceLength": 10, 296 | "stack": false, 297 | "steppedLine": false, 298 | "targets": [ 299 | { 300 | "expr": "sum(increase(node_network_receive_bytes_total{device=~\"$device\",instance=~\"$node\"}[1m]))", 301 | "interval": "5m", 302 | "intervalFactor": 2, 303 | "legendFormat": "in", 304 | "metric": "", 305 | "refId": "A", 306 | "step": 600, 307 | "target": "" 308 | }, 309 | { 310 | "expr": "sum(increase(node_network_transmit_bytes_total{device=~\"$device\",instance=~\"$node\"}[1m]))", 311 | "hide": false, 312 | "interval": "5m", 313 | "intervalFactor": 2, 314 | "legendFormat": "out", 315 | "refId": "B", 316 | "step": 600 317 | } 318 | ], 319 | "thresholds": [], 320 | "timeFrom": null, 321 | "timeRegions": [], 322 | "timeShift": null, 323 | "title": "Network Utillization", 324 | "tooltip": { 325 | "msResolution": false, 326 | "shared": true, 327 | "sort": 0, 328 | "value_type": "cumulative" 329 | }, 330 | "type": "graph", 331 | "xaxis": { 332 | "buckets": null, 333 | "mode": "time", 334 | "name": null, 335 | "show": true, 336 | "values": [] 337 | }, 338 | "yaxes": [ 339 | { 340 | "format": "bytes", 341 | "label": "bits in (-) / bits out (+)", 342 | "logBase": 1, 343 | "max": null, 344 | "min": null, 345 | "show": true 346 | }, 347 | { 348 | "format": "short", 349 | "logBase": 1, 350 | "max": null, 351 | "min": null, 352 | "show": true 353 | } 354 | ], 355 | "yaxis": { 356 | "align": false, 357 | "alignLevel": null 358 | } 359 | }, 360 | { 361 | "cacheTimeout": null, 362 | "colorBackground": false, 363 | "colorValue": false, 364 | "colors": [ 365 | "rgba(245, 54, 54, 0.9)", 366 | "rgba(237, 129, 40, 0.89)", 367 | "rgba(50, 172, 45, 0.97)" 368 | ], 369 | "datasource": "Dashboard", 370 | "decimals": null, 371 | "format": "decbytes", 372 | "gauge": { 373 | "maxValue": 100, 374 | "minValue": 0, 375 | "show": false, 376 | "thresholdLabels": false, 377 | "thresholdMarkers": true 378 | }, 379 | "gridPos": { 380 | "h": 5, 381 | "w": 12, 382 | "x": 0, 383 | "y": 14 384 | }, 385 | "height": "200px", 386 | "hideTimeOverride": false, 387 | "id": 26, 388 | "interval": null, 389 | "links": [], 390 | "mappingType": 1, 391 | "mappingTypes": [ 392 | { 393 | "name": "value to text", 394 | "value": 1 395 | }, 396 | { 397 | "name": "range to text", 398 | "value": 2 399 | } 400 | ], 401 | "maxDataPoints": 100, 402 | "nullPointMode": "connected", 403 | "nullText": null, 404 | "options": {}, 405 | "postfix": "", 406 | "postfixFontSize": "50%", 407 | "prefix": "", 408 | "prefixFontSize": "50%", 409 | "rangeMaps": [ 410 | { 411 | "from": "null", 412 | "text": "N/A", 413 | "to": "null" 414 | } 415 | ], 416 | "repeat": null, 417 | "sparkline": { 418 | "fillColor": "rgba(31, 118, 189, 0.18)", 419 | "full": false, 420 | "lineColor": "rgb(31, 120, 193)", 421 | "show": true 422 | }, 423 | "tableColumn": "", 424 | "targets": [ 425 | { 426 | "expr": "sum(increase(node_network_receive_bytes_total{device=~\"$device\",instance=~\"$node\"}[24h]))", 427 | "interval": "", 428 | "intervalFactor": 2, 429 | "refId": "A", 430 | "step": 600 431 | } 432 | ], 433 | "thresholds": "", 434 | "timeFrom": null, 435 | "timeShift": null, 436 | "title": "Traffic In", 437 | "type": "singlestat", 438 | "valueFontSize": "80%", 439 | "valueMaps": [ 440 | { 441 | "op": "=", 442 | "text": "N/A", 443 | "value": "null" 444 | } 445 | ], 446 | "valueName": "max" 447 | }, 448 | { 449 | "cacheTimeout": null, 450 | "colorBackground": false, 451 | "colorValue": false, 452 | "colors": [ 453 | "rgba(245, 54, 54, 0.9)", 454 | "rgba(237, 129, 40, 0.89)", 455 | "rgba(50, 172, 45, 0.97)" 456 | ], 457 | "datasource": "Dashboard", 458 | "format": "bytes", 459 | "gauge": { 460 | "maxValue": 100, 461 | "minValue": 0, 462 | "show": false, 463 | "thresholdLabels": false, 464 | "thresholdMarkers": true 465 | }, 466 | "gridPos": { 467 | "h": 5, 468 | "w": 12, 469 | "x": 12, 470 | "y": 14 471 | }, 472 | "height": "200px", 473 | "id": 27, 474 | "interval": null, 475 | "links": [], 476 | "mappingType": 1, 477 | "mappingTypes": [ 478 | { 479 | "name": "value to text", 480 | "value": 1 481 | }, 482 | { 483 | "name": "range to text", 484 | "value": 2 485 | } 486 | ], 487 | "maxDataPoints": 100, 488 | "nullPointMode": "connected", 489 | "nullText": null, 490 | "options": {}, 491 | "postfix": "", 492 | "postfixFontSize": "50%", 493 | "prefix": "", 494 | "prefixFontSize": "50%", 495 | "rangeMaps": [ 496 | { 497 | "from": "null", 498 | "text": "N/A", 499 | "to": "null" 500 | } 501 | ], 502 | "repeat": null, 503 | "sparkline": { 504 | "fillColor": "rgba(31, 118, 189, 0.18)", 505 | "full": false, 506 | "lineColor": "rgb(31, 120, 193)", 507 | "show": true 508 | }, 509 | "tableColumn": "", 510 | "targets": [ 511 | { 512 | "expr": "sum(increase(node_network_transmit_bytes_total{device=~\"$device\",instance=~\"$node\"}[24h]))", 513 | "interval": "", 514 | "intervalFactor": 2, 515 | "legendFormat": "", 516 | "refId": "A", 517 | "step": 600 518 | } 519 | ], 520 | "thresholds": "", 521 | "timeFrom": null, 522 | "title": "Traffic Out", 523 | "type": "singlestat", 524 | "valueFontSize": "80%", 525 | "valueMaps": [ 526 | { 527 | "op": "=", 528 | "text": "N/A", 529 | "value": "null" 530 | } 531 | ], 532 | "valueName": "max" 533 | }, 534 | { 535 | "aliasColors": {}, 536 | "bars": false, 537 | "dashLength": 10, 538 | "dashes": false, 539 | "datasource": "Dashboard", 540 | "editable": true, 541 | "error": false, 542 | "fill": 1, 543 | "fillGradient": 0, 544 | "grid": {}, 545 | "gridPos": { 546 | "h": 7, 547 | "w": 24, 548 | "x": 0, 549 | "y": 19 550 | }, 551 | "hiddenSeries": false, 552 | "id": 21, 553 | "legend": { 554 | "avg": false, 555 | "current": false, 556 | "max": false, 557 | "min": false, 558 | "show": true, 559 | "total": false, 560 | "values": false 561 | }, 562 | "lines": true, 563 | "linewidth": 2, 564 | "links": [], 565 | "nullPointMode": "connected", 566 | "options": { 567 | "dataLinks": [] 568 | }, 569 | "percentage": false, 570 | "pointradius": 5, 571 | "points": false, 572 | "renderer": "flot", 573 | "repeat": null, 574 | "seriesOverrides": [], 575 | "spaceLength": 10, 576 | "stack": false, 577 | "steppedLine": false, 578 | "targets": [ 579 | { 580 | "expr": "sum(node_netstat_Tcp_CurrEstab{instance=~\"$node\"})", 581 | "intervalFactor": 2, 582 | "legendFormat": "established", 583 | "metric": "node_netstat_Tcp_CurrEstab", 584 | "refId": "A", 585 | "step": 40, 586 | "target": "" 587 | }, 588 | { 589 | "expr": "sum(node_netstat_Tcp_ActiveOpens{instance=~\"$node\"})", 590 | "interval": "", 591 | "intervalFactor": 2, 592 | "legendFormat": "activeOpens", 593 | "metric": "node_netstat_Tcp_ActiveOpens", 594 | "refId": "B", 595 | "step": 40 596 | }, 597 | { 598 | "expr": "sum(node_netstat_Tcp_PassiveOpens{instance=~\"$node\"})", 599 | "intervalFactor": 2, 600 | "legendFormat": "passiveOpens", 601 | "metric": "node_netstat_Tcp_PassiveOpens", 602 | "refId": "C", 603 | "step": 40 604 | }, 605 | { 606 | "expr": "sum(node_netstat_Tcp_EstabResets{instance=~\"$node\"})", 607 | "interval": "", 608 | "intervalFactor": 2, 609 | "legendFormat": "estabResets", 610 | "metric": "node_netstat_Tcp_EstabResets", 611 | "refId": "D", 612 | "step": 40 613 | }, 614 | { 615 | "expr": "sum(node_netstat_Tcp_AttemptFails{instance=~\"$node\"})", 616 | "intervalFactor": 2, 617 | "legendFormat": "attemptfails", 618 | "metric": "node_netstat_Tcp_AttemptFails", 619 | "refId": "E", 620 | "step": 40 621 | }, 622 | { 623 | "expr": "sum(node_netstat_Tcp_RetransSegs{instance=~\"$node\"})", 624 | "intervalFactor": 2, 625 | "legendFormat": "retransSegs", 626 | "metric": "node_netstat_Tcp_RetransSegs", 627 | "refId": "F", 628 | "step": 40 629 | } 630 | ], 631 | "thresholds": [], 632 | "timeFrom": null, 633 | "timeRegions": [], 634 | "timeShift": null, 635 | "title": "Netstat: TCP", 636 | "tooltip": { 637 | "msResolution": false, 638 | "shared": true, 639 | "sort": 0, 640 | "value_type": "cumulative" 641 | }, 642 | "type": "graph", 643 | "xaxis": { 644 | "buckets": null, 645 | "mode": "time", 646 | "name": null, 647 | "show": true, 648 | "values": [] 649 | }, 650 | "yaxes": [ 651 | { 652 | "format": "short", 653 | "logBase": 1, 654 | "max": null, 655 | "min": null, 656 | "show": true 657 | }, 658 | { 659 | "format": "short", 660 | "logBase": 1, 661 | "max": null, 662 | "min": null, 663 | "show": true 664 | } 665 | ], 666 | "yaxis": { 667 | "align": false, 668 | "alignLevel": null 669 | } 670 | }, 671 | { 672 | "aliasColors": {}, 673 | "bars": false, 674 | "dashLength": 10, 675 | "dashes": false, 676 | "datasource": "Dashboard", 677 | "editable": true, 678 | "error": false, 679 | "fill": 1, 680 | "fillGradient": 0, 681 | "grid": {}, 682 | "gridPos": { 683 | "h": 7, 684 | "w": 24, 685 | "x": 0, 686 | "y": 26 687 | }, 688 | "hiddenSeries": false, 689 | "id": 23, 690 | "legend": { 691 | "avg": false, 692 | "current": false, 693 | "max": false, 694 | "min": false, 695 | "show": true, 696 | "total": false, 697 | "values": false 698 | }, 699 | "lines": true, 700 | "linewidth": 2, 701 | "links": [], 702 | "nullPointMode": "connected", 703 | "options": { 704 | "dataLinks": [] 705 | }, 706 | "percentage": false, 707 | "pointradius": 5, 708 | "points": false, 709 | "renderer": "flot", 710 | "repeat": null, 711 | "seriesOverrides": [ 712 | { 713 | "alias": "/.*Out.*/", 714 | "transform": "negative-Y" 715 | }, 716 | { 717 | "alias": "Udp_NoPorts", 718 | "yaxis": 2 719 | } 720 | ], 721 | "spaceLength": 10, 722 | "stack": false, 723 | "steppedLine": false, 724 | "targets": [ 725 | { 726 | "expr": "sum(irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m]))", 727 | "intervalFactor": 2, 728 | "legendFormat": "Udp_InDatagrams", 729 | "metric": "node_netstat_Udp_InDatagrams", 730 | "refId": "A", 731 | "step": 40, 732 | "target": "" 733 | }, 734 | { 735 | "expr": "sum(irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m]))", 736 | "intervalFactor": 2, 737 | "legendFormat": "Udp_InErrors", 738 | "metric": "node_netstat_Udp_InErrors", 739 | "refId": "B", 740 | "step": 40 741 | }, 742 | { 743 | "expr": "sum(irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m]))", 744 | "interval": "", 745 | "intervalFactor": 2, 746 | "legendFormat": "Udp_OutDatagrams", 747 | "metric": "node_netstat_Udp_OutDatagrams", 748 | "refId": "C", 749 | "step": 40 750 | }, 751 | { 752 | "expr": "sum(irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m]))", 753 | "intervalFactor": 2, 754 | "legendFormat": "Udp_NoPorts", 755 | "metric": "node_netstat_Udp_NoPorts", 756 | "refId": "D", 757 | "step": 40 758 | }, 759 | { 760 | "expr": "sum(irate(node_netstat_Udp_InCsumErrors{instance=~\"$node\"}[5m]))", 761 | "interval": "", 762 | "intervalFactor": 2, 763 | "legendFormat": "Udp_InCsumErrors", 764 | "metric": "node_netstat_Udp_InCsumErrors", 765 | "refId": "E", 766 | "step": 40 767 | }, 768 | { 769 | "expr": "sum(irate(node_netstat_Udp_RcvbufErrors{instance=~\"$node\"}[5m]))", 770 | "intervalFactor": 2, 771 | "legendFormat": "Udp_RcvbufErrors", 772 | "metric": "node_netstat_Udp_RcvbufErrors", 773 | "refId": "F", 774 | "step": 40 775 | }, 776 | { 777 | "expr": "sum(irate(node_netstat_Udp_SndbufErrors{instance=~\"$node\"}[5m]))", 778 | "intervalFactor": 2, 779 | "legendFormat": "Udp_SndbufErrors", 780 | "metric": "node_netstat_Udp_SndbufErrors", 781 | "refId": "G", 782 | "step": 40 783 | } 784 | ], 785 | "thresholds": [], 786 | "timeFrom": null, 787 | "timeRegions": [], 788 | "timeShift": null, 789 | "title": "Netstat: UDP", 790 | "tooltip": { 791 | "msResolution": false, 792 | "shared": true, 793 | "sort": 0, 794 | "value_type": "cumulative" 795 | }, 796 | "type": "graph", 797 | "xaxis": { 798 | "buckets": null, 799 | "mode": "time", 800 | "name": null, 801 | "show": true, 802 | "values": [] 803 | }, 804 | "yaxes": [ 805 | { 806 | "format": "short", 807 | "logBase": 1, 808 | "max": null, 809 | "min": null, 810 | "show": true 811 | }, 812 | { 813 | "format": "short", 814 | "logBase": 1, 815 | "max": null, 816 | "min": null, 817 | "show": true 818 | } 819 | ], 820 | "yaxis": { 821 | "align": false, 822 | "alignLevel": null 823 | } 824 | }, 825 | { 826 | "aliasColors": {}, 827 | "bars": false, 828 | "dashLength": 10, 829 | "dashes": false, 830 | "datasource": "Dashboard", 831 | "editable": true, 832 | "error": false, 833 | "fill": 1, 834 | "fillGradient": 0, 835 | "grid": {}, 836 | "gridPos": { 837 | "h": 7, 838 | "w": 24, 839 | "x": 0, 840 | "y": 33 841 | }, 842 | "hiddenSeries": false, 843 | "id": 24, 844 | "legend": { 845 | "avg": false, 846 | "current": false, 847 | "max": false, 848 | "min": false, 849 | "show": true, 850 | "total": false, 851 | "values": false 852 | }, 853 | "lines": true, 854 | "linewidth": 2, 855 | "links": [], 856 | "nullPointMode": "connected", 857 | "options": { 858 | "dataLinks": [] 859 | }, 860 | "percentage": false, 861 | "pointradius": 5, 862 | "points": false, 863 | "renderer": "flot", 864 | "repeat": null, 865 | "seriesOverrides": [], 866 | "spaceLength": 10, 867 | "stack": false, 868 | "steppedLine": false, 869 | "targets": [ 870 | { 871 | "expr": "sum(node_nf_conntrack_entries_limit{instance=~\"$node\"}) - sum(node_nf_conntrack_entries{instance=~\"$node\"})", 872 | "intervalFactor": 2, 873 | "legendFormat": "free", 874 | "metric": "node_nf_conntrack_entries_limit", 875 | "refId": "A", 876 | "step": 40, 877 | "target": "" 878 | }, 879 | { 880 | "expr": "sum(node_nf_conntrack_entries_limit{instance=~\"$node\"})", 881 | "intervalFactor": 2, 882 | "legendFormat": "limit", 883 | "metric": "node_nf_conntrack_entries_limit", 884 | "refId": "B", 885 | "step": 40 886 | } 887 | ], 888 | "thresholds": [], 889 | "timeFrom": null, 890 | "timeRegions": [], 891 | "timeShift": null, 892 | "title": "Conntrack", 893 | "tooltip": { 894 | "msResolution": false, 895 | "shared": true, 896 | "sort": 0, 897 | "value_type": "cumulative" 898 | }, 899 | "type": "graph", 900 | "xaxis": { 901 | "buckets": null, 902 | "mode": "time", 903 | "name": null, 904 | "show": true, 905 | "values": [] 906 | }, 907 | "yaxes": [ 908 | { 909 | "format": "short", 910 | "logBase": 1, 911 | "max": null, 912 | "min": null, 913 | "show": true 914 | }, 915 | { 916 | "format": "short", 917 | "logBase": 1, 918 | "max": null, 919 | "min": null, 920 | "show": true 921 | } 922 | ], 923 | "yaxis": { 924 | "align": false, 925 | "alignLevel": null 926 | } 927 | } 928 | ], 929 | "refresh": false, 930 | "schemaVersion": 21, 931 | "style": "dark", 932 | "tags": [ 933 | "node", 934 | "network" 935 | ], 936 | "templating": { 937 | "list": [ 938 | { 939 | "allValue": null, 940 | "current": { 941 | "selected": false, 942 | "text": "All", 943 | "value": "$__all" 944 | }, 945 | "datasource": "Dashboard", 946 | "definition": "", 947 | "hide": 0, 948 | "includeAll": true, 949 | "label": null, 950 | "multi": false, 951 | "name": "node", 952 | "options": [], 953 | "query": "label_values(node_boot_time_seconds, instance)", 954 | "refresh": 1, 955 | "regex": "", 956 | "skipUrlSync": false, 957 | "sort": 0, 958 | "tagValuesQuery": "", 959 | "tags": [], 960 | "tagsQuery": "", 961 | "type": "query", 962 | "useTags": false 963 | }, 964 | { 965 | "allValue": null, 966 | "current": { 967 | "selected": false, 968 | "text": "All", 969 | "value": "$__all" 970 | }, 971 | "datasource": "Dashboard", 972 | "definition": "", 973 | "hide": 0, 974 | "includeAll": true, 975 | "label": null, 976 | "multi": false, 977 | "name": "device", 978 | "options": [], 979 | "query": "label_values(node_network_receive_bytes_total, device)", 980 | "refresh": 1, 981 | "regex": "", 982 | "skipUrlSync": false, 983 | "sort": 1, 984 | "tagValuesQuery": "", 985 | "tags": [], 986 | "tagsQuery": "", 987 | "type": "query", 988 | "useTags": false 989 | } 990 | ] 991 | }, 992 | "time": { 993 | "from": "now/d", 994 | "to": "now" 995 | }, 996 | "timepicker": { 997 | "now": true, 998 | "refresh_intervals": [ 999 | "5s", 1000 | "10s", 1001 | "30s", 1002 | "1m", 1003 | "5m", 1004 | "15m", 1005 | "30m", 1006 | "1h", 1007 | "2h", 1008 | "1d" 1009 | ], 1010 | "time_options": [ 1011 | "5m", 1012 | "15m", 1013 | "1h", 1014 | "6h", 1015 | "12h", 1016 | "24h", 1017 | "2d", 1018 | "7d", 1019 | "30d" 1020 | ] 1021 | }, 1022 | "timezone": "browser", 1023 | "title": "Network Overview", 1024 | "uid": "MBRTkBRZk", 1025 | "version": 3 1026 | } -------------------------------------------------------------------------------- /roles/grafana/files/system-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "limit": 100, 11 | "name": "Annotations & Alerts", 12 | "showIn": 0, 13 | "type": "dashboard" 14 | } 15 | ] 16 | }, 17 | "description": "Includes: CPU, memory, disk IO, network, temperature and other monitoring metrics.", 18 | "editable": true, 19 | "gnetId": 11074, 20 | "graphTooltip": 0, 21 | "id": 6, 22 | "iteration": 1575918887319, 23 | "links": [], 24 | "panels": [ 25 | { 26 | "cacheTimeout": null, 27 | "colorBackground": false, 28 | "colorPostfix": false, 29 | "colorPrefix": false, 30 | "colorValue": true, 31 | "colors": [ 32 | "rgba(245, 54, 54, 0.9)", 33 | "rgba(237, 129, 40, 0.89)", 34 | "rgba(50, 172, 45, 0.97)" 35 | ], 36 | "datasource": "Dashboard", 37 | "decimals": 1, 38 | "description": "", 39 | "format": "s", 40 | "gauge": { 41 | "maxValue": 100, 42 | "minValue": 0, 43 | "show": false, 44 | "thresholdLabels": false, 45 | "thresholdMarkers": true 46 | }, 47 | "gridPos": { 48 | "h": 3, 49 | "w": 2, 50 | "x": 0, 51 | "y": 0 52 | }, 53 | "hideTimeOverride": true, 54 | "id": 15, 55 | "interval": null, 56 | "links": [], 57 | "mappingType": 1, 58 | "mappingTypes": [ 59 | { 60 | "name": "value to text", 61 | "value": 1 62 | }, 63 | { 64 | "name": "range to text", 65 | "value": 2 66 | } 67 | ], 68 | "maxDataPoints": 100, 69 | "nullPointMode": "null", 70 | "nullText": null, 71 | "options": {}, 72 | "pluginVersion": "6.4.2", 73 | "postfix": "", 74 | "postfixFontSize": "50%", 75 | "prefix": "", 76 | "prefixFontSize": "50%", 77 | "rangeMaps": [ 78 | { 79 | "from": "null", 80 | "text": "N/A", 81 | "to": "null" 82 | } 83 | ], 84 | "sparkline": { 85 | "fillColor": "rgba(31, 118, 189, 0.18)", 86 | "full": false, 87 | "lineColor": "rgb(31, 120, 193)", 88 | "show": false 89 | }, 90 | "tableColumn": "", 91 | "targets": [ 92 | { 93 | "expr": "sum(time() - node_boot_time_seconds{instance=~\"$node\"})", 94 | "format": "time_series", 95 | "hide": false, 96 | "instant": true, 97 | "intervalFactor": 1, 98 | "refId": "A", 99 | "step": 40 100 | } 101 | ], 102 | "thresholds": "1,2", 103 | "title": "System Uptime", 104 | "type": "singlestat", 105 | "valueFontSize": "100%", 106 | "valueMaps": [ 107 | { 108 | "op": "=", 109 | "text": "N/A", 110 | "value": "null" 111 | } 112 | ], 113 | "valueName": "current" 114 | }, 115 | { 116 | "cacheTimeout": null, 117 | "colorBackground": false, 118 | "colorValue": true, 119 | "colors": [ 120 | "rgba(245, 54, 54, 0.9)", 121 | "rgba(237, 129, 40, 0.89)", 122 | "rgba(50, 172, 45, 0.97)" 123 | ], 124 | "datasource": "Dashboard", 125 | "decimals": 2, 126 | "description": "", 127 | "format": "bytes", 128 | "gauge": { 129 | "maxValue": 100, 130 | "minValue": 0, 131 | "show": false, 132 | "thresholdLabels": false, 133 | "thresholdMarkers": true 134 | }, 135 | "gridPos": { 136 | "h": 3, 137 | "w": 2, 138 | "x": 2, 139 | "y": 0 140 | }, 141 | "id": 75, 142 | "interval": null, 143 | "links": [], 144 | "mappingType": 1, 145 | "mappingTypes": [ 146 | { 147 | "name": "value to text", 148 | "value": 1 149 | }, 150 | { 151 | "name": "range to text", 152 | "value": 2 153 | } 154 | ], 155 | "maxDataPoints": 100, 156 | "maxPerRow": 6, 157 | "nullPointMode": "null", 158 | "nullText": null, 159 | "options": {}, 160 | "postfix": "", 161 | "postfixFontSize": "70%", 162 | "prefix": "", 163 | "prefixFontSize": "50%", 164 | "rangeMaps": [ 165 | { 166 | "from": "null", 167 | "text": "N/A", 168 | "to": "null" 169 | } 170 | ], 171 | "sparkline": { 172 | "fillColor": "rgba(31, 118, 189, 0.18)", 173 | "full": false, 174 | "lineColor": "rgb(31, 120, 193)", 175 | "show": false 176 | }, 177 | "tableColumn": "", 178 | "targets": [ 179 | { 180 | "expr": "sum(node_memory_MemTotal_bytes{instance=~\"$node\"})", 181 | "format": "time_series", 182 | "instant": true, 183 | "intervalFactor": 1, 184 | "legendFormat": "{{instance}}", 185 | "refId": "A", 186 | "step": 20 187 | } 188 | ], 189 | "thresholds": "2,3", 190 | "title": "Total RAM", 191 | "type": "singlestat", 192 | "valueFontSize": "80%", 193 | "valueMaps": [ 194 | { 195 | "op": "=", 196 | "text": "N/A", 197 | "value": "null" 198 | } 199 | ], 200 | "valueName": "current" 201 | }, 202 | { 203 | "datasource": "Dashboard", 204 | "gridPos": { 205 | "h": 6, 206 | "w": 4, 207 | "x": 4, 208 | "y": 0 209 | }, 210 | "id": 177, 211 | "options": { 212 | "displayMode": "lcd", 213 | "fieldOptions": { 214 | "calcs": [ 215 | "last" 216 | ], 217 | "defaults": { 218 | "mappings": [], 219 | "max": 100, 220 | "min": 0, 221 | "thresholds": [ 222 | { 223 | "color": "green", 224 | "value": null 225 | }, 226 | { 227 | "color": "#EAB839", 228 | "value": 60 229 | }, 230 | { 231 | "color": "red", 232 | "value": 80 233 | } 234 | ], 235 | "title": "", 236 | "unit": "percent" 237 | }, 238 | "override": {}, 239 | "values": false 240 | }, 241 | "orientation": "horizontal" 242 | }, 243 | "pluginVersion": "6.5.1", 244 | "targets": [ 245 | { 246 | "expr": "100 - (avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[30m])) * 100)", 247 | "instant": true, 248 | "legendFormat": "CPU Busy", 249 | "refId": "A" 250 | }, 251 | { 252 | "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[30m])) * 100", 253 | "hide": true, 254 | "instant": true, 255 | "legendFormat": "Busy Iowait", 256 | "refId": "C" 257 | }, 258 | { 259 | "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$node\"} / (node_memory_MemTotal_bytes{instance=~\"$node\"})))* 100", 260 | "instant": true, 261 | "legendFormat": "Used RAM Memory", 262 | "refId": "B" 263 | }, 264 | { 265 | "expr": "100 - ((node_filesystem_avail_bytes{instance=~\"$node\",mountpoint=\"$maxmount\",fstype=~\"ext4|xfs\"} * 100) / node_filesystem_size_bytes {instance=~\"$node\",mountpoint=\"$maxmount\",fstype=~\"ext4|xfs\"})", 266 | "hide": false, 267 | "instant": true, 268 | "legendFormat": "Used Max Mount($maxmount)", 269 | "refId": "D" 270 | }, 271 | { 272 | "expr": "(1 - (node_memory_SwapFree_bytes{instance=~\"$node\"} / node_memory_SwapTotal_bytes{instance=~\"$node\"})) * 100", 273 | "instant": true, 274 | "legendFormat": "Used SWAP", 275 | "refId": "E" 276 | } 277 | ], 278 | "timeFrom": null, 279 | "timeShift": null, 280 | "title": "", 281 | "type": "bargauge" 282 | }, 283 | { 284 | "columns": [], 285 | "datasource": "Dashboard", 286 | "fontSize": "110%", 287 | "gridPos": { 288 | "h": 6, 289 | "w": 10, 290 | "x": 8, 291 | "y": 0 292 | }, 293 | "id": 164, 294 | "links": [], 295 | "options": {}, 296 | "pageSize": null, 297 | "scroll": true, 298 | "showHeader": true, 299 | "sort": { 300 | "col": 6, 301 | "desc": false 302 | }, 303 | "styles": [ 304 | { 305 | "alias": "Mounted on", 306 | "colorMode": null, 307 | "colors": [ 308 | "rgba(50, 172, 45, 0.97)", 309 | "rgba(237, 129, 40, 0.89)", 310 | "rgba(245, 54, 54, 0.9)" 311 | ], 312 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 313 | "decimals": 2, 314 | "mappingType": 1, 315 | "pattern": "mountpoint", 316 | "thresholds": [ 317 | "" 318 | ], 319 | "type": "string", 320 | "unit": "bytes" 321 | }, 322 | { 323 | "alias": "Avail", 324 | "colorMode": "value", 325 | "colors": [ 326 | "rgba(245, 54, 54, 0.9)", 327 | "rgba(237, 129, 40, 0.89)", 328 | "rgba(50, 172, 45, 0.97)" 329 | ], 330 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 331 | "decimals": 2, 332 | "mappingType": 1, 333 | "pattern": "Value #A", 334 | "thresholds": [ 335 | "10000000000", 336 | "20000000000" 337 | ], 338 | "type": "number", 339 | "unit": "bytes" 340 | }, 341 | { 342 | "alias": "Used", 343 | "colorMode": "cell", 344 | "colors": [ 345 | "rgba(50, 172, 45, 0.97)", 346 | "rgba(237, 129, 40, 0.89)", 347 | "rgba(245, 54, 54, 0.9)" 348 | ], 349 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 350 | "decimals": 2, 351 | "mappingType": 1, 352 | "pattern": "Value #B", 353 | "thresholds": [ 354 | "0.6", 355 | "0.8" 356 | ], 357 | "type": "number", 358 | "unit": "percentunit" 359 | }, 360 | { 361 | "alias": "Size", 362 | "colorMode": null, 363 | "colors": [ 364 | "rgba(245, 54, 54, 0.9)", 365 | "rgba(237, 129, 40, 0.89)", 366 | "rgba(50, 172, 45, 0.97)" 367 | ], 368 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 369 | "decimals": 1, 370 | "link": false, 371 | "mappingType": 1, 372 | "pattern": "Value #C", 373 | "thresholds": [], 374 | "type": "number", 375 | "unit": "bytes" 376 | }, 377 | { 378 | "alias": "Filesystem", 379 | "colorMode": null, 380 | "colors": [ 381 | "rgba(245, 54, 54, 0.9)", 382 | "rgba(237, 129, 40, 0.89)", 383 | "rgba(50, 172, 45, 0.97)" 384 | ], 385 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 386 | "decimals": 2, 387 | "link": false, 388 | "mappingType": 1, 389 | "pattern": "fstype", 390 | "thresholds": [], 391 | "type": "string", 392 | "unit": "short" 393 | }, 394 | { 395 | "alias": "IP", 396 | "colorMode": null, 397 | "colors": [ 398 | "rgba(245, 54, 54, 0.9)", 399 | "rgba(237, 129, 40, 0.89)", 400 | "rgba(50, 172, 45, 0.97)" 401 | ], 402 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 403 | "decimals": 2, 404 | "link": false, 405 | "mappingType": 1, 406 | "pattern": "instance", 407 | "preserveFormat": false, 408 | "sanitize": false, 409 | "thresholds": [], 410 | "type": "string", 411 | "unit": "short" 412 | }, 413 | { 414 | "alias": "", 415 | "colorMode": null, 416 | "colors": [ 417 | "rgba(245, 54, 54, 0.9)", 418 | "rgba(237, 129, 40, 0.89)", 419 | "rgba(50, 172, 45, 0.97)" 420 | ], 421 | "decimals": 2, 422 | "pattern": "/.*/", 423 | "preserveFormat": true, 424 | "sanitize": false, 425 | "thresholds": [], 426 | "type": "hidden", 427 | "unit": "short" 428 | } 429 | ], 430 | "targets": [ 431 | { 432 | "expr": "node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext4|xfs\"}-0", 433 | "format": "table", 434 | "hide": false, 435 | "instant": true, 436 | "intervalFactor": 1, 437 | "legendFormat": "", 438 | "refId": "C" 439 | }, 440 | { 441 | "expr": "node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext4|xfs\"}-0", 442 | "format": "table", 443 | "hide": false, 444 | "instant": true, 445 | "interval": "10s", 446 | "intervalFactor": 1, 447 | "legendFormat": "", 448 | "refId": "A" 449 | }, 450 | { 451 | "expr": "1-(node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext4|xfs\"} / node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext4|xfs\"})", 452 | "format": "table", 453 | "hide": false, 454 | "instant": true, 455 | "intervalFactor": 1, 456 | "legendFormat": "", 457 | "refId": "B" 458 | } 459 | ], 460 | "title": "Disk Space Used Basic(EXT4/XFS)", 461 | "transform": "table", 462 | "type": "table" 463 | }, 464 | { 465 | "aliasColors": { 466 | "filefd_192.168.200.241:9100": "super-light-green", 467 | "switches_192.168.200.241:9100": "semi-dark-red" 468 | }, 469 | "bars": false, 470 | "cacheTimeout": null, 471 | "dashLength": 10, 472 | "dashes": false, 473 | "datasource": "Dashboard", 474 | "description": "", 475 | "fill": 0, 476 | "fillGradient": 1, 477 | "gridPos": { 478 | "h": 6, 479 | "w": 6, 480 | "x": 18, 481 | "y": 0 482 | }, 483 | "hiddenSeries": false, 484 | "hideTimeOverride": false, 485 | "id": 16, 486 | "legend": { 487 | "alignAsTable": false, 488 | "avg": false, 489 | "current": true, 490 | "max": true, 491 | "min": false, 492 | "rightSide": false, 493 | "show": true, 494 | "total": false, 495 | "values": true 496 | }, 497 | "lines": true, 498 | "linewidth": 2, 499 | "links": [], 500 | "nullPointMode": "null", 501 | "options": { 502 | "dataLinks": [] 503 | }, 504 | "percentage": false, 505 | "pluginVersion": "6.4.2", 506 | "pointradius": 1, 507 | "points": false, 508 | "renderer": "flot", 509 | "seriesOverrides": [ 510 | { 511 | "alias": "/filefd_.*/", 512 | "lines": false, 513 | "pointradius": 1, 514 | "points": true 515 | }, 516 | { 517 | "alias": "/switches_.*/", 518 | "color": "#F2495C", 519 | "yaxis": 2 520 | } 521 | ], 522 | "spaceLength": 10, 523 | "stack": false, 524 | "steppedLine": false, 525 | "targets": [ 526 | { 527 | "expr": "node_filefd_allocated{instance=~\"$node\"}", 528 | "format": "time_series", 529 | "instant": false, 530 | "interval": "", 531 | "intervalFactor": 5, 532 | "legendFormat": "filefd_{{instance}}", 533 | "refId": "B" 534 | }, 535 | { 536 | "expr": "irate(node_context_switches_total{instance=~\"$node\"}[30m])", 537 | "intervalFactor": 5, 538 | "legendFormat": "switches_{{instance}}", 539 | "refId": "A" 540 | }, 541 | { 542 | "expr": "node_filefd_maximum{instance=~\"$node\"}", 543 | "hide": true, 544 | "refId": "C" 545 | } 546 | ], 547 | "thresholds": [], 548 | "timeFrom": null, 549 | "timeRegions": [], 550 | "timeShift": null, 551 | "title": "Open File Descriptor(left)/Context switches(right)", 552 | "tooltip": { 553 | "shared": true, 554 | "sort": 2, 555 | "value_type": "individual" 556 | }, 557 | "type": "graph", 558 | "xaxis": { 559 | "buckets": null, 560 | "mode": "time", 561 | "name": null, 562 | "show": true, 563 | "values": [] 564 | }, 565 | "yaxes": [ 566 | { 567 | "format": "short", 568 | "label": "", 569 | "logBase": 1, 570 | "max": null, 571 | "min": null, 572 | "show": true 573 | }, 574 | { 575 | "format": "short", 576 | "label": "context_switches", 577 | "logBase": 1, 578 | "max": null, 579 | "min": null, 580 | "show": true 581 | } 582 | ], 583 | "yaxis": { 584 | "align": false, 585 | "alignLevel": null 586 | } 587 | }, 588 | { 589 | "cacheTimeout": null, 590 | "colorBackground": false, 591 | "colorPostfix": false, 592 | "colorValue": true, 593 | "colors": [ 594 | "rgba(245, 54, 54, 0.9)", 595 | "rgba(237, 129, 40, 0.89)", 596 | "rgba(50, 172, 45, 0.97)" 597 | ], 598 | "datasource": "Dashboard", 599 | "description": "", 600 | "format": "short", 601 | "gauge": { 602 | "maxValue": 100, 603 | "minValue": 0, 604 | "show": false, 605 | "thresholdLabels": false, 606 | "thresholdMarkers": true 607 | }, 608 | "gridPos": { 609 | "h": 3, 610 | "w": 2, 611 | "x": 0, 612 | "y": 3 613 | }, 614 | "id": 14, 615 | "interval": null, 616 | "links": [], 617 | "mappingType": 1, 618 | "mappingTypes": [ 619 | { 620 | "name": "value to text", 621 | "value": 1 622 | }, 623 | { 624 | "name": "range to text", 625 | "value": 2 626 | } 627 | ], 628 | "maxDataPoints": 100, 629 | "maxPerRow": 6, 630 | "nullPointMode": "null", 631 | "nullText": null, 632 | "options": {}, 633 | "postfix": "", 634 | "postfixFontSize": "50%", 635 | "prefix": "", 636 | "prefixFontSize": "50%", 637 | "rangeMaps": [ 638 | { 639 | "from": "null", 640 | "text": "N/A", 641 | "to": "null" 642 | } 643 | ], 644 | "sparkline": { 645 | "fillColor": "rgba(31, 118, 189, 0.18)", 646 | "full": false, 647 | "lineColor": "rgb(31, 120, 193)", 648 | "show": false 649 | }, 650 | "tableColumn": "", 651 | "targets": [ 652 | { 653 | "expr": "sum(count(node_cpu_seconds_total{instance=~\"$node\", mode='system'}) by (cpu))", 654 | "format": "time_series", 655 | "instant": true, 656 | "intervalFactor": 1, 657 | "legendFormat": "", 658 | "refId": "A", 659 | "step": 20 660 | } 661 | ], 662 | "thresholds": "1,2", 663 | "title": "CPU Cores", 664 | "type": "singlestat", 665 | "valueFontSize": "100%", 666 | "valueMaps": [ 667 | { 668 | "op": "=", 669 | "text": "N/A", 670 | "value": "null" 671 | } 672 | ], 673 | "valueName": "current" 674 | }, 675 | { 676 | "cacheTimeout": null, 677 | "colorBackground": false, 678 | "colorValue": true, 679 | "colors": [ 680 | "#299c46", 681 | "rgba(237, 129, 40, 0.89)", 682 | "#d44a3a" 683 | ], 684 | "datasource": "Dashboard", 685 | "decimals": 2, 686 | "description": "", 687 | "format": "percent", 688 | "gauge": { 689 | "maxValue": 100, 690 | "minValue": 0, 691 | "show": false, 692 | "thresholdLabels": false, 693 | "thresholdMarkers": true 694 | }, 695 | "gridPos": { 696 | "h": 3, 697 | "w": 2, 698 | "x": 2, 699 | "y": 3 700 | }, 701 | "id": 20, 702 | "interval": null, 703 | "links": [], 704 | "mappingType": 1, 705 | "mappingTypes": [ 706 | { 707 | "name": "value to text", 708 | "value": 1 709 | }, 710 | { 711 | "name": "range to text", 712 | "value": 2 713 | } 714 | ], 715 | "maxDataPoints": 100, 716 | "nullPointMode": "connected", 717 | "nullText": null, 718 | "options": {}, 719 | "pluginVersion": "6.4.2", 720 | "postfix": "", 721 | "postfixFontSize": "50%", 722 | "prefix": "", 723 | "prefixFontSize": "50%", 724 | "rangeMaps": [ 725 | { 726 | "from": "null", 727 | "text": "N/A", 728 | "to": "null" 729 | } 730 | ], 731 | "sparkline": { 732 | "fillColor": "rgba(31, 118, 189, 0.18)", 733 | "full": false, 734 | "lineColor": "#3274D9", 735 | "show": true, 736 | "ymax": null, 737 | "ymin": null 738 | }, 739 | "tableColumn": "", 740 | "targets": [ 741 | { 742 | "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[30m])) * 100", 743 | "format": "time_series", 744 | "hide": false, 745 | "instant": false, 746 | "interval": "", 747 | "intervalFactor": 1, 748 | "legendFormat": "", 749 | "refId": "A", 750 | "step": 20 751 | } 752 | ], 753 | "thresholds": "20,50", 754 | "timeFrom": null, 755 | "timeShift": null, 756 | "title": "CPU IOwait", 757 | "type": "singlestat", 758 | "valueFontSize": "100%", 759 | "valueMaps": [ 760 | { 761 | "op": "=", 762 | "text": "N/A", 763 | "value": "null" 764 | } 765 | ], 766 | "valueName": "avg" 767 | }, 768 | { 769 | "aliasColors": { 770 | "15分钟": "#6ED0E0", 771 | "1分钟": "#BF1B00", 772 | "5分钟": "#CCA300" 773 | }, 774 | "bars": false, 775 | "dashLength": 10, 776 | "dashes": false, 777 | "datasource": "Dashboard", 778 | "editable": true, 779 | "error": false, 780 | "fill": 1, 781 | "fillGradient": 1, 782 | "grid": {}, 783 | "gridPos": { 784 | "h": 8, 785 | "w": 8, 786 | "x": 0, 787 | "y": 6 788 | }, 789 | "height": "300", 790 | "hiddenSeries": false, 791 | "id": 13, 792 | "legend": { 793 | "alignAsTable": true, 794 | "avg": true, 795 | "current": true, 796 | "max": true, 797 | "min": false, 798 | "rightSide": false, 799 | "show": true, 800 | "total": false, 801 | "values": true 802 | }, 803 | "lines": true, 804 | "linewidth": 2, 805 | "links": [], 806 | "maxPerRow": 6, 807 | "nullPointMode": "null as zero", 808 | "options": { 809 | "dataLinks": [] 810 | }, 811 | "percentage": false, 812 | "pointradius": 5, 813 | "points": false, 814 | "renderer": "flot", 815 | "repeat": null, 816 | "seriesOverrides": [], 817 | "spaceLength": 10, 818 | "stack": false, 819 | "steppedLine": false, 820 | "targets": [ 821 | { 822 | "expr": "node_load1{instance=~\"$node\"}", 823 | "format": "time_series", 824 | "instant": false, 825 | "interval": "", 826 | "intervalFactor": 1, 827 | "legendFormat": "{{instance}}_1m", 828 | "metric": "", 829 | "refId": "A", 830 | "step": 20, 831 | "target": "" 832 | }, 833 | { 834 | "expr": "node_load5{instance=~\"$node\"}", 835 | "format": "time_series", 836 | "instant": false, 837 | "interval": "", 838 | "intervalFactor": 1, 839 | "legendFormat": "{{instance}}_5m", 840 | "refId": "B", 841 | "step": 20 842 | }, 843 | { 844 | "expr": "node_load15{instance=~\"$node\"}", 845 | "format": "time_series", 846 | "instant": false, 847 | "interval": "", 848 | "intervalFactor": 1, 849 | "legendFormat": "{{instance}}_15m", 850 | "refId": "C", 851 | "step": 20 852 | } 853 | ], 854 | "thresholds": [], 855 | "timeFrom": null, 856 | "timeRegions": [], 857 | "timeShift": null, 858 | "title": "System Load", 859 | "tooltip": { 860 | "msResolution": false, 861 | "shared": true, 862 | "sort": 2, 863 | "value_type": "cumulative" 864 | }, 865 | "type": "graph", 866 | "xaxis": { 867 | "buckets": null, 868 | "mode": "time", 869 | "name": null, 870 | "show": true, 871 | "values": [] 872 | }, 873 | "yaxes": [ 874 | { 875 | "format": "short", 876 | "logBase": 1, 877 | "max": null, 878 | "min": null, 879 | "show": true 880 | }, 881 | { 882 | "format": "short", 883 | "logBase": 1, 884 | "max": null, 885 | "min": null, 886 | "show": true 887 | } 888 | ], 889 | "yaxis": { 890 | "align": false, 891 | "alignLevel": null 892 | } 893 | }, 894 | { 895 | "aliasColors": { 896 | "192.168.200.241:9100_Total": "dark-red", 897 | "Idle - Waiting for something to happen": "#052B51", 898 | "guest": "#9AC48A", 899 | "idle": "#052B51", 900 | "iowait": "#EAB839", 901 | "irq": "#BF1B00", 902 | "nice": "#C15C17", 903 | "sdb_每秒I/O操作%": "#d683ce", 904 | "softirq": "#E24D42", 905 | "steal": "#FCE2DE", 906 | "system": "#508642", 907 | "user": "#5195CE", 908 | "磁盘花费在I/O操作占比": "#ba43a9" 909 | }, 910 | "bars": false, 911 | "dashLength": 10, 912 | "dashes": false, 913 | "datasource": "Dashboard", 914 | "decimals": 2, 915 | "description": "", 916 | "fill": 1, 917 | "fillGradient": 0, 918 | "gridPos": { 919 | "h": 8, 920 | "w": 8, 921 | "x": 8, 922 | "y": 6 923 | }, 924 | "hiddenSeries": false, 925 | "id": 7, 926 | "legend": { 927 | "alignAsTable": true, 928 | "avg": true, 929 | "current": true, 930 | "hideEmpty": true, 931 | "hideZero": true, 932 | "max": true, 933 | "min": false, 934 | "rightSide": false, 935 | "show": true, 936 | "sideWidth": null, 937 | "sort": "current", 938 | "sortDesc": true, 939 | "total": false, 940 | "values": true 941 | }, 942 | "lines": true, 943 | "linewidth": 2, 944 | "links": [], 945 | "maxPerRow": 6, 946 | "nullPointMode": "null", 947 | "options": { 948 | "dataLinks": [] 949 | }, 950 | "percentage": false, 951 | "pointradius": 5, 952 | "points": false, 953 | "renderer": "flot", 954 | "repeat": null, 955 | "seriesOverrides": [ 956 | { 957 | "alias": "/.*_Total/", 958 | "color": "#C4162A", 959 | "fill": 0 960 | } 961 | ], 962 | "spaceLength": 10, 963 | "stack": false, 964 | "steppedLine": false, 965 | "targets": [ 966 | { 967 | "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"system\"}[30m])) by (instance)", 968 | "format": "time_series", 969 | "hide": false, 970 | "instant": false, 971 | "interval": "", 972 | "intervalFactor": 1, 973 | "legendFormat": "{{instance}}_System", 974 | "refId": "A", 975 | "step": 20 976 | }, 977 | { 978 | "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"user\"}[30m])) by (instance)", 979 | "format": "time_series", 980 | "hide": false, 981 | "intervalFactor": 1, 982 | "legendFormat": "{{instance}}_User", 983 | "refId": "B", 984 | "step": 240 985 | }, 986 | { 987 | "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[30m])) by (instance)", 988 | "format": "time_series", 989 | "hide": false, 990 | "instant": false, 991 | "intervalFactor": 1, 992 | "legendFormat": "{{instance}}_Iowait", 993 | "refId": "D", 994 | "step": 240 995 | }, 996 | { 997 | "expr": "1 - avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[30m])) by (instance)", 998 | "format": "time_series", 999 | "hide": false, 1000 | "intervalFactor": 1, 1001 | "legendFormat": "{{instance}}_Total", 1002 | "refId": "F", 1003 | "step": 240 1004 | } 1005 | ], 1006 | "thresholds": [], 1007 | "timeFrom": null, 1008 | "timeRegions": [], 1009 | "timeShift": null, 1010 | "title": "CPU Basic", 1011 | "tooltip": { 1012 | "shared": true, 1013 | "sort": 2, 1014 | "value_type": "individual" 1015 | }, 1016 | "type": "graph", 1017 | "xaxis": { 1018 | "buckets": null, 1019 | "mode": "time", 1020 | "name": null, 1021 | "show": true, 1022 | "values": [] 1023 | }, 1024 | "yaxes": [ 1025 | { 1026 | "decimals": 2, 1027 | "format": "percentunit", 1028 | "label": "", 1029 | "logBase": 1, 1030 | "max": null, 1031 | "min": null, 1032 | "show": true 1033 | }, 1034 | { 1035 | "format": "short", 1036 | "label": null, 1037 | "logBase": 1, 1038 | "max": null, 1039 | "min": null, 1040 | "show": false 1041 | } 1042 | ], 1043 | "yaxis": { 1044 | "align": false, 1045 | "alignLevel": null 1046 | } 1047 | }, 1048 | { 1049 | "aliasColors": { 1050 | "192.168.10.227:9100_em1_in下载": "super-light-green", 1051 | "192.168.10.227:9100_em1_out上传": "dark-blue" 1052 | }, 1053 | "bars": false, 1054 | "dashLength": 10, 1055 | "dashes": false, 1056 | "datasource": "Dashboard", 1057 | "fill": 1, 1058 | "fillGradient": 3, 1059 | "gridPos": { 1060 | "h": 8, 1061 | "w": 8, 1062 | "x": 16, 1063 | "y": 6 1064 | }, 1065 | "height": "300", 1066 | "hiddenSeries": false, 1067 | "id": 157, 1068 | "legend": { 1069 | "alignAsTable": true, 1070 | "avg": false, 1071 | "current": true, 1072 | "hideEmpty": true, 1073 | "hideZero": true, 1074 | "max": true, 1075 | "min": false, 1076 | "rightSide": false, 1077 | "show": true, 1078 | "sort": "current", 1079 | "sortDesc": true, 1080 | "total": false, 1081 | "values": true 1082 | }, 1083 | "lines": true, 1084 | "linewidth": 2, 1085 | "links": [], 1086 | "nullPointMode": "null", 1087 | "options": { 1088 | "dataLinks": [] 1089 | }, 1090 | "percentage": false, 1091 | "pointradius": 2, 1092 | "points": false, 1093 | "renderer": "flot", 1094 | "seriesOverrides": [ 1095 | { 1096 | "alias": "/.*_transmit$/", 1097 | "transform": "negative-Y" 1098 | } 1099 | ], 1100 | "spaceLength": 10, 1101 | "stack": false, 1102 | "steppedLine": false, 1103 | "targets": [ 1104 | { 1105 | "expr": "irate(node_network_receive_bytes_total{instance=~'$node',device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[30m])*8", 1106 | "format": "time_series", 1107 | "intervalFactor": 1, 1108 | "legendFormat": "{{instance}}_{{device}}_receive", 1109 | "refId": "A", 1110 | "step": 4 1111 | }, 1112 | { 1113 | "expr": "irate(node_network_transmit_bytes_total{instance=~'$node',device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[30m])*8", 1114 | "format": "time_series", 1115 | "intervalFactor": 1, 1116 | "legendFormat": "{{instance}}_{{device}}_transmit", 1117 | "refId": "B", 1118 | "step": 4 1119 | } 1120 | ], 1121 | "thresholds": [], 1122 | "timeFrom": null, 1123 | "timeRegions": [], 1124 | "timeShift": null, 1125 | "title": "Network Traffic Basic", 1126 | "tooltip": { 1127 | "shared": true, 1128 | "sort": 2, 1129 | "value_type": "individual" 1130 | }, 1131 | "type": "graph", 1132 | "xaxis": { 1133 | "buckets": null, 1134 | "mode": "time", 1135 | "name": null, 1136 | "show": true, 1137 | "values": [] 1138 | }, 1139 | "yaxes": [ 1140 | { 1141 | "format": "bps", 1142 | "label": "transmit(-)/receive(+)", 1143 | "logBase": 1, 1144 | "max": null, 1145 | "min": null, 1146 | "show": true 1147 | }, 1148 | { 1149 | "format": "short", 1150 | "label": null, 1151 | "logBase": 1, 1152 | "max": null, 1153 | "min": null, 1154 | "show": false 1155 | } 1156 | ], 1157 | "yaxis": { 1158 | "align": false, 1159 | "alignLevel": null 1160 | } 1161 | }, 1162 | { 1163 | "aliasColors": {}, 1164 | "bars": false, 1165 | "dashLength": 10, 1166 | "dashes": false, 1167 | "datasource": "Dashboard", 1168 | "fill": 1, 1169 | "fillGradient": 3, 1170 | "gridPos": { 1171 | "h": 8, 1172 | "w": 8, 1173 | "x": 0, 1174 | "y": 14 1175 | }, 1176 | "hiddenSeries": false, 1177 | "id": 174, 1178 | "legend": { 1179 | "alignAsTable": true, 1180 | "avg": false, 1181 | "current": true, 1182 | "hideEmpty": false, 1183 | "hideZero": false, 1184 | "max": false, 1185 | "min": false, 1186 | "rightSide": false, 1187 | "show": true, 1188 | "sideWidth": null, 1189 | "total": false, 1190 | "values": true 1191 | }, 1192 | "lines": true, 1193 | "linewidth": 2, 1194 | "links": [], 1195 | "nullPointMode": "null", 1196 | "options": { 1197 | "dataLinks": [] 1198 | }, 1199 | "percentage": false, 1200 | "pointradius": 5, 1201 | "points": false, 1202 | "renderer": "flot", 1203 | "seriesOverrides": [ 1204 | { 1205 | "alias": "/Inodes.*/", 1206 | "yaxis": 2 1207 | } 1208 | ], 1209 | "spaceLength": 10, 1210 | "stack": false, 1211 | "steppedLine": false, 1212 | "targets": [ 1213 | { 1214 | "expr": "1-(node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext4|xfs\"} / node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext4|xfs\"})", 1215 | "format": "time_series", 1216 | "instant": false, 1217 | "intervalFactor": 1, 1218 | "legendFormat": "{{instance}}:{{mountpoint}}", 1219 | "refId": "A" 1220 | }, 1221 | { 1222 | "expr": "node_filesystem_files_free{instance=~'$node',fstype=~\"ext4|xfs\"} / node_filesystem_files{instance=~'$node',fstype=~\"ext4|xfs\"}", 1223 | "hide": true, 1224 | "legendFormat": "Inodes:{{instance}}:{{mountpoint}}", 1225 | "refId": "B" 1226 | } 1227 | ], 1228 | "thresholds": [], 1229 | "timeFrom": null, 1230 | "timeRegions": [], 1231 | "timeShift": null, 1232 | "title": "Disk Space Used Basic", 1233 | "tooltip": { 1234 | "shared": true, 1235 | "sort": 2, 1236 | "value_type": "individual" 1237 | }, 1238 | "type": "graph", 1239 | "xaxis": { 1240 | "buckets": null, 1241 | "mode": "time", 1242 | "name": null, 1243 | "show": true, 1244 | "values": [] 1245 | }, 1246 | "yaxes": [ 1247 | { 1248 | "decimals": 2, 1249 | "format": "percentunit", 1250 | "label": "", 1251 | "logBase": 1, 1252 | "max": null, 1253 | "min": null, 1254 | "show": true 1255 | }, 1256 | { 1257 | "decimals": 2, 1258 | "format": "percentunit", 1259 | "label": null, 1260 | "logBase": 1, 1261 | "max": "1", 1262 | "min": null, 1263 | "show": true 1264 | } 1265 | ], 1266 | "yaxis": { 1267 | "align": false, 1268 | "alignLevel": null 1269 | } 1270 | }, 1271 | { 1272 | "aliasColors": { 1273 | "192.168.200.241:9100_总内存": "dark-red", 1274 | "内存_Avaliable": "#6ED0E0", 1275 | "内存_Cached": "#EF843C", 1276 | "内存_Free": "#629E51", 1277 | "内存_Total": "#6d1f62", 1278 | "内存_Used": "#eab839", 1279 | "可用": "#9ac48a", 1280 | "总内存": "#bf1b00" 1281 | }, 1282 | "bars": false, 1283 | "dashLength": 10, 1284 | "dashes": false, 1285 | "datasource": "Dashboard", 1286 | "decimals": 2, 1287 | "fill": 1, 1288 | "fillGradient": 0, 1289 | "gridPos": { 1290 | "h": 8, 1291 | "w": 8, 1292 | "x": 8, 1293 | "y": 14 1294 | }, 1295 | "height": "300", 1296 | "hiddenSeries": false, 1297 | "id": 156, 1298 | "legend": { 1299 | "alignAsTable": true, 1300 | "avg": false, 1301 | "current": true, 1302 | "max": false, 1303 | "min": false, 1304 | "rightSide": false, 1305 | "show": true, 1306 | "sort": "current", 1307 | "sortDesc": true, 1308 | "total": false, 1309 | "values": true 1310 | }, 1311 | "lines": true, 1312 | "linewidth": 2, 1313 | "links": [], 1314 | "nullPointMode": "null", 1315 | "options": { 1316 | "dataLinks": [] 1317 | }, 1318 | "percentage": false, 1319 | "pointradius": 5, 1320 | "points": false, 1321 | "renderer": "flot", 1322 | "seriesOverrides": [ 1323 | { 1324 | "alias": "/.*_Total/", 1325 | "color": "#C4162A", 1326 | "fill": 0 1327 | } 1328 | ], 1329 | "spaceLength": 10, 1330 | "stack": false, 1331 | "steppedLine": false, 1332 | "targets": [ 1333 | { 1334 | "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"}", 1335 | "format": "time_series", 1336 | "hide": false, 1337 | "instant": false, 1338 | "intervalFactor": 1, 1339 | "legendFormat": "{{instance}}_Total", 1340 | "refId": "A", 1341 | "step": 4 1342 | }, 1343 | { 1344 | "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"} - node_memory_MemAvailable_bytes{instance=~\"$node\"}", 1345 | "format": "time_series", 1346 | "hide": false, 1347 | "intervalFactor": 1, 1348 | "legendFormat": "{{instance}}_Used", 1349 | "refId": "B", 1350 | "step": 4 1351 | }, 1352 | { 1353 | "expr": "node_memory_MemAvailable_bytes{instance=~\"$node\"}", 1354 | "format": "time_series", 1355 | "hide": false, 1356 | "interval": "", 1357 | "intervalFactor": 1, 1358 | "legendFormat": "{{instance}}_Avaliable", 1359 | "refId": "F", 1360 | "step": 4 1361 | } 1362 | ], 1363 | "thresholds": [], 1364 | "timeFrom": null, 1365 | "timeRegions": [], 1366 | "timeShift": null, 1367 | "title": "Memory Basic", 1368 | "tooltip": { 1369 | "shared": true, 1370 | "sort": 2, 1371 | "value_type": "individual" 1372 | }, 1373 | "type": "graph", 1374 | "xaxis": { 1375 | "buckets": null, 1376 | "mode": "time", 1377 | "name": null, 1378 | "show": true, 1379 | "values": [] 1380 | }, 1381 | "yaxes": [ 1382 | { 1383 | "format": "bytes", 1384 | "label": null, 1385 | "logBase": 1, 1386 | "max": null, 1387 | "min": "0", 1388 | "show": true 1389 | }, 1390 | { 1391 | "format": "short", 1392 | "label": null, 1393 | "logBase": 1, 1394 | "max": null, 1395 | "min": null, 1396 | "show": true 1397 | } 1398 | ], 1399 | "yaxis": { 1400 | "align": false, 1401 | "alignLevel": null 1402 | } 1403 | }, 1404 | { 1405 | "aliasColors": { 1406 | "Idle - Waiting for something to happen": "#052B51", 1407 | "guest": "#9AC48A", 1408 | "idle": "#052B51", 1409 | "iowait": "#EAB839", 1410 | "irq": "#BF1B00", 1411 | "nice": "#C15C17", 1412 | "sdb_每秒I/O操作%": "#d683ce", 1413 | "softirq": "#E24D42", 1414 | "steal": "#FCE2DE", 1415 | "system": "#508642", 1416 | "user": "#5195CE", 1417 | "磁盘花费在I/O操作占比": "#ba43a9" 1418 | }, 1419 | "bars": false, 1420 | "dashLength": 10, 1421 | "dashes": false, 1422 | "datasource": "Dashboard", 1423 | "decimals": null, 1424 | "description": "The time spent on I/O in the natural time of each second.(wall-clock time)", 1425 | "fill": 1, 1426 | "fillGradient": 5, 1427 | "gridPos": { 1428 | "h": 8, 1429 | "w": 8, 1430 | "x": 16, 1431 | "y": 14 1432 | }, 1433 | "hiddenSeries": false, 1434 | "id": 175, 1435 | "legend": { 1436 | "alignAsTable": true, 1437 | "avg": true, 1438 | "current": true, 1439 | "hideEmpty": true, 1440 | "hideZero": true, 1441 | "max": true, 1442 | "min": false, 1443 | "rightSide": false, 1444 | "show": true, 1445 | "sideWidth": null, 1446 | "sort": null, 1447 | "sortDesc": null, 1448 | "total": false, 1449 | "values": true 1450 | }, 1451 | "lines": true, 1452 | "linewidth": 2, 1453 | "links": [], 1454 | "maxPerRow": 6, 1455 | "nullPointMode": "null", 1456 | "options": { 1457 | "dataLinks": [] 1458 | }, 1459 | "percentage": false, 1460 | "pointradius": 5, 1461 | "points": false, 1462 | "renderer": "flot", 1463 | "seriesOverrides": [], 1464 | "spaceLength": 10, 1465 | "stack": false, 1466 | "steppedLine": false, 1467 | "targets": [ 1468 | { 1469 | "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$node\"}[30m])", 1470 | "format": "time_series", 1471 | "intervalFactor": 1, 1472 | "legendFormat": "{{instance}}_{{device}}_ IO time", 1473 | "refId": "C" 1474 | } 1475 | ], 1476 | "thresholds": [], 1477 | "timeFrom": null, 1478 | "timeRegions": [], 1479 | "timeShift": null, 1480 | "title": "Time Spent Doing I/Os", 1481 | "tooltip": { 1482 | "shared": true, 1483 | "sort": 2, 1484 | "value_type": "individual" 1485 | }, 1486 | "type": "graph", 1487 | "xaxis": { 1488 | "buckets": null, 1489 | "mode": "time", 1490 | "name": null, 1491 | "show": true, 1492 | "values": [] 1493 | }, 1494 | "yaxes": [ 1495 | { 1496 | "decimals": null, 1497 | "format": "s", 1498 | "label": "", 1499 | "logBase": 1, 1500 | "max": null, 1501 | "min": null, 1502 | "show": true 1503 | }, 1504 | { 1505 | "format": "short", 1506 | "label": null, 1507 | "logBase": 1, 1508 | "max": null, 1509 | "min": null, 1510 | "show": false 1511 | } 1512 | ], 1513 | "yaxis": { 1514 | "align": false, 1515 | "alignLevel": null 1516 | } 1517 | }, 1518 | { 1519 | "aliasColors": { 1520 | "vda_write": "#6ED0E0" 1521 | }, 1522 | "bars": false, 1523 | "dashLength": 10, 1524 | "dashes": false, 1525 | "datasource": "Dashboard", 1526 | "description": "Read/write completions per second", 1527 | "fill": 1, 1528 | "fillGradient": 1, 1529 | "gridPos": { 1530 | "h": 9, 1531 | "w": 8, 1532 | "x": 0, 1533 | "y": 22 1534 | }, 1535 | "height": "300", 1536 | "hiddenSeries": false, 1537 | "id": 161, 1538 | "legend": { 1539 | "alignAsTable": true, 1540 | "avg": true, 1541 | "current": true, 1542 | "hideEmpty": true, 1543 | "hideZero": true, 1544 | "max": true, 1545 | "min": false, 1546 | "show": true, 1547 | "sort": "current", 1548 | "sortDesc": true, 1549 | "total": false, 1550 | "values": true 1551 | }, 1552 | "lines": true, 1553 | "linewidth": 2, 1554 | "links": [], 1555 | "nullPointMode": "null", 1556 | "options": { 1557 | "dataLinks": [] 1558 | }, 1559 | "percentage": false, 1560 | "pointradius": 5, 1561 | "points": false, 1562 | "renderer": "flot", 1563 | "seriesOverrides": [ 1564 | { 1565 | "alias": "/.*_Reads completed$/", 1566 | "transform": "negative-Y" 1567 | } 1568 | ], 1569 | "spaceLength": 10, 1570 | "stack": false, 1571 | "steppedLine": false, 1572 | "targets": [ 1573 | { 1574 | "expr": "irate(node_disk_reads_completed_total{instance=~\"$node\"}[30m])", 1575 | "format": "time_series", 1576 | "hide": false, 1577 | "interval": "", 1578 | "intervalFactor": 1, 1579 | "legendFormat": "{{instance}}_{{device}}_Reads completed", 1580 | "refId": "A", 1581 | "step": 10 1582 | }, 1583 | { 1584 | "expr": "irate(node_disk_writes_completed_total{instance=~\"$node\"}[30m])", 1585 | "format": "time_series", 1586 | "hide": false, 1587 | "intervalFactor": 1, 1588 | "legendFormat": "{{instance}}_{{device}}_Writes completed", 1589 | "refId": "B", 1590 | "step": 10 1591 | } 1592 | ], 1593 | "thresholds": [], 1594 | "timeFrom": null, 1595 | "timeRegions": [], 1596 | "timeShift": null, 1597 | "title": "Disk IOps Completed", 1598 | "tooltip": { 1599 | "shared": true, 1600 | "sort": 2, 1601 | "value_type": "individual" 1602 | }, 1603 | "type": "graph", 1604 | "xaxis": { 1605 | "buckets": null, 1606 | "mode": "time", 1607 | "name": null, 1608 | "show": true, 1609 | "values": [] 1610 | }, 1611 | "yaxes": [ 1612 | { 1613 | "decimals": null, 1614 | "format": "iops", 1615 | "label": "IO read (-) / write (+)", 1616 | "logBase": 1, 1617 | "max": null, 1618 | "min": null, 1619 | "show": true 1620 | }, 1621 | { 1622 | "format": "short", 1623 | "label": null, 1624 | "logBase": 1, 1625 | "max": null, 1626 | "min": null, 1627 | "show": true 1628 | } 1629 | ], 1630 | "yaxis": { 1631 | "align": false, 1632 | "alignLevel": null 1633 | } 1634 | }, 1635 | { 1636 | "aliasColors": { 1637 | "vda_write": "#6ED0E0" 1638 | }, 1639 | "bars": false, 1640 | "dashLength": 10, 1641 | "dashes": false, 1642 | "datasource": "Dashboard", 1643 | "description": "Per second read / write bytes ", 1644 | "fill": 1, 1645 | "fillGradient": 1, 1646 | "gridPos": { 1647 | "h": 9, 1648 | "w": 8, 1649 | "x": 8, 1650 | "y": 22 1651 | }, 1652 | "height": "300", 1653 | "hiddenSeries": false, 1654 | "id": 168, 1655 | "legend": { 1656 | "alignAsTable": true, 1657 | "avg": true, 1658 | "current": true, 1659 | "hideEmpty": true, 1660 | "hideZero": true, 1661 | "max": true, 1662 | "min": false, 1663 | "show": true, 1664 | "sort": "current", 1665 | "sortDesc": true, 1666 | "total": false, 1667 | "values": true 1668 | }, 1669 | "lines": true, 1670 | "linewidth": 2, 1671 | "links": [], 1672 | "nullPointMode": "null", 1673 | "options": { 1674 | "dataLinks": [] 1675 | }, 1676 | "percentage": false, 1677 | "pointradius": 5, 1678 | "points": false, 1679 | "renderer": "flot", 1680 | "seriesOverrides": [ 1681 | { 1682 | "alias": "/.*_Read bytes$/", 1683 | "transform": "negative-Y" 1684 | } 1685 | ], 1686 | "spaceLength": 10, 1687 | "stack": false, 1688 | "steppedLine": false, 1689 | "targets": [ 1690 | { 1691 | "expr": "irate(node_disk_read_bytes_total{instance=~\"$node\"}[30m])", 1692 | "format": "time_series", 1693 | "interval": "", 1694 | "intervalFactor": 1, 1695 | "legendFormat": "{{instance}}_{{device}}_Read bytes", 1696 | "refId": "A", 1697 | "step": 10 1698 | }, 1699 | { 1700 | "expr": "irate(node_disk_written_bytes_total{instance=~\"$node\"}[30m])", 1701 | "format": "time_series", 1702 | "hide": false, 1703 | "intervalFactor": 1, 1704 | "legendFormat": "{{instance}}_{{device}}_Written bytes", 1705 | "refId": "B", 1706 | "step": 10 1707 | } 1708 | ], 1709 | "thresholds": [], 1710 | "timeFrom": null, 1711 | "timeRegions": [], 1712 | "timeShift": null, 1713 | "title": "Disk R/W Data", 1714 | "tooltip": { 1715 | "shared": true, 1716 | "sort": 2, 1717 | "value_type": "individual" 1718 | }, 1719 | "type": "graph", 1720 | "xaxis": { 1721 | "buckets": null, 1722 | "mode": "time", 1723 | "name": null, 1724 | "show": true, 1725 | "values": [] 1726 | }, 1727 | "yaxes": [ 1728 | { 1729 | "decimals": null, 1730 | "format": "Bps", 1731 | "label": "Bytes read (-) / write (+)", 1732 | "logBase": 1, 1733 | "max": null, 1734 | "min": null, 1735 | "show": true 1736 | }, 1737 | { 1738 | "format": "short", 1739 | "label": null, 1740 | "logBase": 1, 1741 | "max": null, 1742 | "min": null, 1743 | "show": false 1744 | } 1745 | ], 1746 | "yaxis": { 1747 | "align": false, 1748 | "alignLevel": null 1749 | } 1750 | }, 1751 | { 1752 | "aliasColors": { 1753 | "vda": "#6ED0E0" 1754 | }, 1755 | "bars": false, 1756 | "dashLength": 10, 1757 | "dashes": false, 1758 | "datasource": "Dashboard", 1759 | "description": "Time spent on each read/write operation", 1760 | "fill": 1, 1761 | "fillGradient": 1, 1762 | "gridPos": { 1763 | "h": 9, 1764 | "w": 8, 1765 | "x": 16, 1766 | "y": 22 1767 | }, 1768 | "height": "300", 1769 | "hiddenSeries": false, 1770 | "id": 160, 1771 | "legend": { 1772 | "alignAsTable": true, 1773 | "avg": true, 1774 | "current": true, 1775 | "hideEmpty": true, 1776 | "hideZero": true, 1777 | "max": true, 1778 | "min": false, 1779 | "show": true, 1780 | "sort": "current", 1781 | "sortDesc": true, 1782 | "total": false, 1783 | "values": true 1784 | }, 1785 | "lines": true, 1786 | "linewidth": 2, 1787 | "links": [], 1788 | "nullPointMode": "null as zero", 1789 | "options": { 1790 | "dataLinks": [] 1791 | }, 1792 | "percentage": false, 1793 | "pointradius": 5, 1794 | "points": false, 1795 | "renderer": "flot", 1796 | "seriesOverrides": [ 1797 | { 1798 | "alias": "/,*_Read time$/", 1799 | "transform": "negative-Y" 1800 | } 1801 | ], 1802 | "spaceLength": 10, 1803 | "stack": false, 1804 | "steppedLine": false, 1805 | "targets": [ 1806 | { 1807 | "expr": "irate(node_disk_read_time_seconds_total{instance=~\"$node\"}[30m]) / irate(node_disk_reads_completed_total{instance=~\"$node\"}[30m])", 1808 | "format": "time_series", 1809 | "hide": false, 1810 | "instant": false, 1811 | "interval": "", 1812 | "intervalFactor": 1, 1813 | "legendFormat": "{{instance}}_{{device}}_Read time", 1814 | "refId": "B" 1815 | }, 1816 | { 1817 | "expr": "irate(node_disk_write_time_seconds_total{instance=~\"$node\"}[30m]) / irate(node_disk_writes_completed_total{instance=~\"$node\"}[30m])", 1818 | "format": "time_series", 1819 | "hide": false, 1820 | "instant": false, 1821 | "intervalFactor": 1, 1822 | "legendFormat": "{{instance}}_{{device}}_Write time", 1823 | "refId": "C" 1824 | } 1825 | ], 1826 | "thresholds": [], 1827 | "timeFrom": null, 1828 | "timeRegions": [], 1829 | "timeShift": null, 1830 | "title": "Disk R/W Time(Reference: less than 100ms)(beta)", 1831 | "tooltip": { 1832 | "shared": true, 1833 | "sort": 2, 1834 | "value_type": "individual" 1835 | }, 1836 | "type": "graph", 1837 | "xaxis": { 1838 | "buckets": null, 1839 | "mode": "time", 1840 | "name": null, 1841 | "show": true, 1842 | "values": [] 1843 | }, 1844 | "yaxes": [ 1845 | { 1846 | "format": "s", 1847 | "label": "Time. read (-) / write (+)", 1848 | "logBase": 1, 1849 | "max": null, 1850 | "min": null, 1851 | "show": true 1852 | }, 1853 | { 1854 | "format": "short", 1855 | "label": null, 1856 | "logBase": 1, 1857 | "max": null, 1858 | "min": null, 1859 | "show": false 1860 | } 1861 | ], 1862 | "yaxis": { 1863 | "align": false, 1864 | "alignLevel": null 1865 | } 1866 | }, 1867 | { 1868 | "aliasColors": { 1869 | "TCP": "#6ED0E0" 1870 | }, 1871 | "bars": false, 1872 | "dashLength": 10, 1873 | "dashes": false, 1874 | "datasource": "Dashboard", 1875 | "description": "TCP_alloc - Allocated sockets\n\nCurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT\n\nTCP_tw - Sockets wating close\n\nUDP_inuse - Udp sockets currently in use\n\nSockets_used - Sockets currently in use", 1876 | "fill": 1, 1877 | "fillGradient": 0, 1878 | "gridPos": { 1879 | "h": 12, 1880 | "w": 12, 1881 | "x": 0, 1882 | "y": 31 1883 | }, 1884 | "height": "300", 1885 | "hiddenSeries": false, 1886 | "id": 158, 1887 | "interval": "", 1888 | "legend": { 1889 | "alignAsTable": true, 1890 | "avg": true, 1891 | "current": true, 1892 | "hideEmpty": true, 1893 | "hideZero": true, 1894 | "max": true, 1895 | "min": false, 1896 | "rightSide": false, 1897 | "show": true, 1898 | "sort": "current", 1899 | "sortDesc": true, 1900 | "total": false, 1901 | "values": true 1902 | }, 1903 | "lines": true, 1904 | "linewidth": 2, 1905 | "links": [], 1906 | "nullPointMode": "null", 1907 | "options": { 1908 | "dataLinks": [] 1909 | }, 1910 | "percentage": false, 1911 | "pointradius": 5, 1912 | "points": false, 1913 | "renderer": "flot", 1914 | "seriesOverrides": [ 1915 | { 1916 | "alias": "/.*_Sockets_used/", 1917 | "color": "#C4162A", 1918 | "fill": 0 1919 | } 1920 | ], 1921 | "spaceLength": 10, 1922 | "stack": false, 1923 | "steppedLine": false, 1924 | "targets": [ 1925 | { 1926 | "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", 1927 | "format": "time_series", 1928 | "hide": false, 1929 | "instant": false, 1930 | "interval": "", 1931 | "intervalFactor": 1, 1932 | "legendFormat": "{{instance}}_CurrEstab", 1933 | "refId": "A", 1934 | "step": 20 1935 | }, 1936 | { 1937 | "expr": "node_sockstat_TCP_tw{instance=~'$node'}", 1938 | "format": "time_series", 1939 | "intervalFactor": 1, 1940 | "legendFormat": "{{instance}}_TCP_tw", 1941 | "refId": "D" 1942 | }, 1943 | { 1944 | "expr": "node_sockstat_sockets_used{instance=~'$node'}", 1945 | "legendFormat": "{{instance}}_Sockets_used", 1946 | "refId": "B" 1947 | }, 1948 | { 1949 | "expr": "node_sockstat_UDP_inuse{instance=~'$node'}", 1950 | "legendFormat": "{{instance}}_UDP_inuse", 1951 | "refId": "C" 1952 | }, 1953 | { 1954 | "expr": "node_sockstat_TCP_alloc{instance=~'$node'}", 1955 | "legendFormat": "{{instance}}_TCP_alloc", 1956 | "refId": "E" 1957 | } 1958 | ], 1959 | "thresholds": [], 1960 | "timeFrom": null, 1961 | "timeRegions": [], 1962 | "timeShift": null, 1963 | "title": "Network Sockstat", 1964 | "tooltip": { 1965 | "shared": true, 1966 | "sort": 2, 1967 | "value_type": "individual" 1968 | }, 1969 | "type": "graph", 1970 | "xaxis": { 1971 | "buckets": null, 1972 | "mode": "time", 1973 | "name": null, 1974 | "show": true, 1975 | "values": [] 1976 | }, 1977 | "yaxes": [ 1978 | { 1979 | "format": "short", 1980 | "label": null, 1981 | "logBase": 1, 1982 | "max": null, 1983 | "min": null, 1984 | "show": true 1985 | }, 1986 | { 1987 | "format": "short", 1988 | "label": null, 1989 | "logBase": 1, 1990 | "max": null, 1991 | "min": null, 1992 | "show": true 1993 | } 1994 | ], 1995 | "yaxis": { 1996 | "align": false, 1997 | "alignLevel": null 1998 | } 1999 | }, 2000 | { 2001 | "aliasColors": {}, 2002 | "bars": false, 2003 | "dashLength": 10, 2004 | "dashes": false, 2005 | "datasource": "Dashboard", 2006 | "description": "", 2007 | "fill": 0, 2008 | "fillGradient": 1, 2009 | "gridPos": { 2010 | "h": 12, 2011 | "w": 12, 2012 | "x": 12, 2013 | "y": 31 2014 | }, 2015 | "hiddenSeries": false, 2016 | "id": 169, 2017 | "legend": { 2018 | "alignAsTable": true, 2019 | "avg": true, 2020 | "current": true, 2021 | "hideEmpty": true, 2022 | "hideZero": true, 2023 | "max": true, 2024 | "min": false, 2025 | "rightSide": false, 2026 | "show": true, 2027 | "sideWidth": null, 2028 | "sort": "current", 2029 | "sortDesc": true, 2030 | "total": false, 2031 | "values": true 2032 | }, 2033 | "lines": true, 2034 | "linewidth": 2, 2035 | "links": [], 2036 | "nullPointMode": "null", 2037 | "options": { 2038 | "dataLinks": [] 2039 | }, 2040 | "percentage": false, 2041 | "pointradius": 0.5, 2042 | "points": false, 2043 | "renderer": "flot", 2044 | "seriesOverrides": [], 2045 | "spaceLength": 10, 2046 | "stack": false, 2047 | "steppedLine": false, 2048 | "targets": [ 2049 | { 2050 | "expr": "node_hwmon_temp_celsius{instance=~'$node'}", 2051 | "format": "time_series", 2052 | "intervalFactor": 10, 2053 | "legendFormat": "{{instance}}_{{chip}}_{{sensor}}", 2054 | "refId": "A" 2055 | } 2056 | ], 2057 | "thresholds": [], 2058 | "timeFrom": null, 2059 | "timeRegions": [], 2060 | "timeShift": null, 2061 | "title": "Hardware Temperature(VM may not display the metrics)", 2062 | "tooltip": { 2063 | "shared": true, 2064 | "sort": 2, 2065 | "value_type": "individual" 2066 | }, 2067 | "type": "graph", 2068 | "xaxis": { 2069 | "buckets": null, 2070 | "mode": "time", 2071 | "name": null, 2072 | "show": true, 2073 | "values": [] 2074 | }, 2075 | "yaxes": [ 2076 | { 2077 | "format": "celsius", 2078 | "label": null, 2079 | "logBase": 1, 2080 | "max": null, 2081 | "min": null, 2082 | "show": true 2083 | }, 2084 | { 2085 | "format": "short", 2086 | "label": null, 2087 | "logBase": 1, 2088 | "max": null, 2089 | "min": null, 2090 | "show": true 2091 | } 2092 | ], 2093 | "yaxis": { 2094 | "align": false, 2095 | "alignLevel": null 2096 | } 2097 | } 2098 | ], 2099 | "refresh": false, 2100 | "schemaVersion": 21, 2101 | "style": "dark", 2102 | "tags": [ 2103 | "Prometheus", 2104 | "node_exporter" 2105 | ], 2106 | "templating": { 2107 | "list": [ 2108 | { 2109 | "allValue": null, 2110 | "current": { 2111 | "text": "node", 2112 | "value": "node" 2113 | }, 2114 | "datasource": "Dashboard", 2115 | "definition": "label_values(node_uname_info, job)", 2116 | "hide": 0, 2117 | "includeAll": false, 2118 | "label": "JOB", 2119 | "multi": false, 2120 | "name": "job", 2121 | "options": [], 2122 | "query": "label_values(node_uname_info, job)", 2123 | "refresh": 1, 2124 | "regex": "", 2125 | "skipUrlSync": false, 2126 | "sort": 1, 2127 | "tagValuesQuery": "", 2128 | "tags": [], 2129 | "tagsQuery": "", 2130 | "type": "query", 2131 | "useTags": false 2132 | }, 2133 | { 2134 | "allValue": null, 2135 | "current": { 2136 | "selected": false, 2137 | "text": "All", 2138 | "value": "$__all" 2139 | }, 2140 | "datasource": "Dashboard", 2141 | "definition": "label_values(node_uname_info{job=~\"$job\"}, nodename)", 2142 | "hide": 0, 2143 | "includeAll": true, 2144 | "label": "Host", 2145 | "multi": true, 2146 | "name": "hostname", 2147 | "options": [], 2148 | "query": "label_values(node_uname_info{job=~\"$job\"}, nodename)", 2149 | "refresh": 1, 2150 | "regex": "", 2151 | "skipUrlSync": false, 2152 | "sort": 0, 2153 | "tagValuesQuery": "", 2154 | "tags": [], 2155 | "tagsQuery": "", 2156 | "type": "query", 2157 | "useTags": false 2158 | }, 2159 | { 2160 | "allFormat": "glob", 2161 | "allValue": null, 2162 | "current": { 2163 | "selected": false, 2164 | "text": "All", 2165 | "value": "$__all" 2166 | }, 2167 | "datasource": "Dashboard", 2168 | "definition": "label_values(node_uname_info{nodename=~\"$hostname\"},instance)", 2169 | "hide": 0, 2170 | "includeAll": true, 2171 | "label": "IP", 2172 | "multi": false, 2173 | "multiFormat": "regex values", 2174 | "name": "node", 2175 | "options": [], 2176 | "query": "label_values(node_uname_info{nodename=~\"$hostname\"},instance)", 2177 | "refresh": 2, 2178 | "regex": "", 2179 | "skipUrlSync": false, 2180 | "sort": 1, 2181 | "tagValuesQuery": "", 2182 | "tags": [], 2183 | "tagsQuery": "", 2184 | "type": "query", 2185 | "useTags": false 2186 | }, 2187 | { 2188 | "allValue": null, 2189 | "current": { 2190 | "text": "/home", 2191 | "value": "/home" 2192 | }, 2193 | "datasource": "Dashboard", 2194 | "definition": "", 2195 | "hide": 2, 2196 | "includeAll": false, 2197 | "label": "", 2198 | "multi": false, 2199 | "name": "maxmount", 2200 | "options": [], 2201 | "query": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext4|xfs\"}) by (mountpoint))))", 2202 | "refresh": 2, 2203 | "regex": "/.*\\\"(.*)\\\".*/", 2204 | "skipUrlSync": false, 2205 | "sort": 0, 2206 | "tagValuesQuery": "", 2207 | "tags": [], 2208 | "tagsQuery": "", 2209 | "type": "query", 2210 | "useTags": false 2211 | }, 2212 | { 2213 | "allFormat": "glob", 2214 | "allValue": null, 2215 | "current": { 2216 | "isNone": true, 2217 | "selected": false, 2218 | "text": "None", 2219 | "value": "" 2220 | }, 2221 | "datasource": "Dashboard", 2222 | "definition": "", 2223 | "hide": 2, 2224 | "includeAll": false, 2225 | "label": "环境", 2226 | "multi": false, 2227 | "multiFormat": "regex values", 2228 | "name": "env", 2229 | "options": [], 2230 | "query": "label_values(node_exporter_build_info,env)", 2231 | "refresh": 2, 2232 | "regex": "", 2233 | "skipUrlSync": false, 2234 | "sort": 1, 2235 | "tagValuesQuery": "", 2236 | "tags": [], 2237 | "tagsQuery": "", 2238 | "type": "query", 2239 | "useTags": false 2240 | }, 2241 | { 2242 | "allFormat": "glob", 2243 | "allValue": "", 2244 | "current": { 2245 | "isNone": true, 2246 | "selected": false, 2247 | "text": "None", 2248 | "value": "" 2249 | }, 2250 | "datasource": "Dashboard", 2251 | "definition": "label_values(node_exporter_build_info{env=~'$env'},name)", 2252 | "hide": 2, 2253 | "includeAll": false, 2254 | "label": "名称", 2255 | "multi": true, 2256 | "multiFormat": "regex values", 2257 | "name": "name", 2258 | "options": [], 2259 | "query": "label_values(node_exporter_build_info{env=~'$env'},name)", 2260 | "refresh": 2, 2261 | "regex": "", 2262 | "skipUrlSync": false, 2263 | "sort": 1, 2264 | "tagValuesQuery": "/.*/", 2265 | "tags": [], 2266 | "tagsQuery": "", 2267 | "type": "query", 2268 | "useTags": false 2269 | } 2270 | ] 2271 | }, 2272 | "time": { 2273 | "from": "now-2d", 2274 | "to": "now" 2275 | }, 2276 | "timepicker": { 2277 | "now": true, 2278 | "refresh_intervals": [ 2279 | "5s", 2280 | "10s", 2281 | "30s", 2282 | "1m", 2283 | "5m", 2284 | "15m", 2285 | "30m", 2286 | "1h", 2287 | "2h", 2288 | "1d" 2289 | ], 2290 | "time_options": [ 2291 | "5m", 2292 | "15m", 2293 | "1h", 2294 | "6h", 2295 | "12h", 2296 | "24h", 2297 | "2d", 2298 | "7d", 2299 | "30d" 2300 | ] 2301 | }, 2302 | "timezone": "browser", 2303 | "title": "System Overview", 2304 | "uid": "hb7fSE0Zz", 2305 | "version": 6 2306 | } -------------------------------------------------------------------------------- /roles/grafana/files/zfs-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "Dashboard", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Graphs ZFS ARC and ARC L2 Hit %, Hits, Misses, Size, and Zpool", 16 | "editable": true, 17 | "gnetId": 7845, 18 | "graphTooltip": 0, 19 | "id": 1, 20 | "iteration": 1575913881668, 21 | "links": [], 22 | "panels": [ 23 | { 24 | "collapsed": false, 25 | "datasource": "Dashboard", 26 | "gridPos": { 27 | "h": 1, 28 | "w": 24, 29 | "x": 0, 30 | "y": 0 31 | }, 32 | "id": 26, 33 | "panels": [], 34 | "title": "Pools", 35 | "type": "row" 36 | }, 37 | { 38 | "aliasColors": {}, 39 | "bars": false, 40 | "dashLength": 10, 41 | "dashes": false, 42 | "datasource": "Dashboard", 43 | "decimals": 0, 44 | "fill": 1, 45 | "fillGradient": 0, 46 | "gridPos": { 47 | "h": 7, 48 | "w": 12, 49 | "x": 0, 50 | "y": 1 51 | }, 52 | "hiddenSeries": false, 53 | "id": 19, 54 | "legend": { 55 | "alignAsTable": true, 56 | "avg": true, 57 | "current": true, 58 | "max": true, 59 | "min": true, 60 | "rightSide": false, 61 | "show": true, 62 | "sideWidth": 350, 63 | "total": false, 64 | "values": true 65 | }, 66 | "lines": true, 67 | "linewidth": 1, 68 | "links": [], 69 | "nullPointMode": "null", 70 | "options": { 71 | "dataLinks": [] 72 | }, 73 | "percentage": false, 74 | "pointradius": 5, 75 | "points": false, 76 | "renderer": "flot", 77 | "seriesOverrides": [], 78 | "spaceLength": 10, 79 | "stack": false, 80 | "steppedLine": false, 81 | "targets": [ 82 | { 83 | "expr": "irate(node_zfs_zpool_rtime{job=\"node\",instance=\"$node\"}[5m])", 84 | "format": "time_series", 85 | "intervalFactor": 2, 86 | "legendFormat": "Read", 87 | "refId": "A", 88 | "step": 2, 89 | "target": "" 90 | }, 91 | { 92 | "expr": "irate(node_zfs_zpool_wtime{job=\"node\",instance=\"$node\"}[5m])", 93 | "format": "time_series", 94 | "hide": false, 95 | "intervalFactor": 2, 96 | "legendFormat": "Write", 97 | "refId": "B", 98 | "step": 2, 99 | "target": "" 100 | } 101 | ], 102 | "thresholds": [], 103 | "timeFrom": null, 104 | "timeRegions": [], 105 | "timeShift": null, 106 | "title": "Time", 107 | "tooltip": { 108 | "shared": true, 109 | "sort": 2, 110 | "value_type": "individual" 111 | }, 112 | "type": "graph", 113 | "xaxis": { 114 | "buckets": null, 115 | "mode": "time", 116 | "name": null, 117 | "show": true, 118 | "values": [] 119 | }, 120 | "yaxes": [ 121 | { 122 | "format": "ns", 123 | "label": "", 124 | "logBase": 1, 125 | "max": null, 126 | "min": "0", 127 | "show": true 128 | }, 129 | { 130 | "format": "short", 131 | "label": null, 132 | "logBase": 1, 133 | "max": null, 134 | "min": null, 135 | "show": true 136 | } 137 | ], 138 | "yaxis": { 139 | "align": false, 140 | "alignLevel": null 141 | } 142 | }, 143 | { 144 | "aliasColors": {}, 145 | "bars": false, 146 | "dashLength": 10, 147 | "dashes": false, 148 | "datasource": "Dashboard", 149 | "decimals": 0, 150 | "fill": 1, 151 | "fillGradient": 0, 152 | "gridPos": { 153 | "h": 7, 154 | "w": 12, 155 | "x": 12, 156 | "y": 1 157 | }, 158 | "hiddenSeries": false, 159 | "id": 20, 160 | "legend": { 161 | "alignAsTable": true, 162 | "avg": true, 163 | "current": false, 164 | "max": true, 165 | "min": true, 166 | "rightSide": false, 167 | "show": true, 168 | "sideWidth": 350, 169 | "total": true, 170 | "values": true 171 | }, 172 | "lines": true, 173 | "linewidth": 1, 174 | "links": [], 175 | "nullPointMode": "null", 176 | "options": { 177 | "dataLinks": [] 178 | }, 179 | "percentage": false, 180 | "pointradius": 5, 181 | "points": false, 182 | "renderer": "flot", 183 | "seriesOverrides": [], 184 | "spaceLength": 10, 185 | "stack": false, 186 | "steppedLine": false, 187 | "targets": [ 188 | { 189 | "expr": "irate(node_zfs_zpool_reads{job=\"node\",instance=\"$node\"}[5m])", 190 | "format": "time_series", 191 | "intervalFactor": 2, 192 | "legendFormat": "Reads", 193 | "refId": "A", 194 | "step": 2, 195 | "target": "" 196 | }, 197 | { 198 | "expr": "irate(node_zfs_zpool_writes{job=\"node\",instance=\"$node\"}[5m])", 199 | "format": "time_series", 200 | "intervalFactor": 2, 201 | "legendFormat": "Writes", 202 | "refId": "B", 203 | "step": 2, 204 | "target": "" 205 | } 206 | ], 207 | "thresholds": [], 208 | "timeFrom": null, 209 | "timeRegions": [], 210 | "timeShift": null, 211 | "title": "Ops", 212 | "tooltip": { 213 | "shared": true, 214 | "sort": 2, 215 | "value_type": "individual" 216 | }, 217 | "type": "graph", 218 | "xaxis": { 219 | "buckets": null, 220 | "mode": "time", 221 | "name": null, 222 | "show": true, 223 | "values": [] 224 | }, 225 | "yaxes": [ 226 | { 227 | "format": "none", 228 | "label": "", 229 | "logBase": 1, 230 | "max": null, 231 | "min": "0", 232 | "show": true 233 | }, 234 | { 235 | "format": "short", 236 | "label": null, 237 | "logBase": 1, 238 | "max": null, 239 | "min": null, 240 | "show": true 241 | } 242 | ], 243 | "yaxis": { 244 | "align": false, 245 | "alignLevel": null 246 | } 247 | }, 248 | { 249 | "collapsed": false, 250 | "datasource": "Dashboard", 251 | "gridPos": { 252 | "h": 1, 253 | "w": 24, 254 | "x": 0, 255 | "y": 8 256 | }, 257 | "id": 24, 258 | "panels": [], 259 | "title": "ARC", 260 | "type": "row" 261 | }, 262 | { 263 | "aliasColors": {}, 264 | "bars": false, 265 | "dashLength": 10, 266 | "dashes": false, 267 | "datasource": "Dashboard", 268 | "decimals": 0, 269 | "fill": 1, 270 | "fillGradient": 0, 271 | "gridPos": { 272 | "h": 6, 273 | "w": 12, 274 | "x": 0, 275 | "y": 9 276 | }, 277 | "hiddenSeries": false, 278 | "id": 14, 279 | "legend": { 280 | "alignAsTable": false, 281 | "avg": true, 282 | "current": false, 283 | "hideEmpty": false, 284 | "hideZero": false, 285 | "max": false, 286 | "min": false, 287 | "rightSide": false, 288 | "show": true, 289 | "sideWidth": 350, 290 | "total": false, 291 | "values": true 292 | }, 293 | "lines": true, 294 | "linewidth": 1, 295 | "links": [], 296 | "nullPointMode": "null", 297 | "options": { 298 | "dataLinks": [] 299 | }, 300 | "percentage": false, 301 | "pointradius": 5, 302 | "points": false, 303 | "renderer": "flot", 304 | "seriesOverrides": [], 305 | "spaceLength": 10, 306 | "stack": false, 307 | "steppedLine": false, 308 | "targets": [ 309 | { 310 | "expr": "irate(node_zfs_arc_demand_data_hits{job=\"node\",instance=\"$node\"}[5m]) / (irate(node_zfs_arc_demand_data_hits{job=\"node\",instance=\"$node\"}[5m]) + irate(node_zfs_arc_demand_data_misses{job=\"node\",instance=\"$node\"}[5m])) * 100", 311 | "format": "time_series", 312 | "intervalFactor": 2, 313 | "legendFormat": "data", 314 | "refId": "A", 315 | "step": 2, 316 | "target": "" 317 | }, 318 | { 319 | "expr": "irate(node_zfs_arc_demand_metadata_hits{job=\"node\",instance=\"$node\"}[5m]) / (irate(node_zfs_arc_demand_metadata_hits{job=\"node\",instance=\"$node\"}[5m]) + irate(node_zfs_arc_demand_metadata_misses{job=\"node\",instance=\"$node\"}[5m])) * 100", 320 | "format": "time_series", 321 | "intervalFactor": 2, 322 | "legendFormat": "metadata", 323 | "refId": "B", 324 | "step": 2, 325 | "target": "" 326 | } 327 | ], 328 | "thresholds": [], 329 | "timeFrom": null, 330 | "timeRegions": [], 331 | "timeShift": null, 332 | "title": "ARC - Hit %", 333 | "tooltip": { 334 | "shared": true, 335 | "sort": 2, 336 | "value_type": "individual" 337 | }, 338 | "type": "graph", 339 | "xaxis": { 340 | "buckets": null, 341 | "mode": "time", 342 | "name": null, 343 | "show": true, 344 | "values": [] 345 | }, 346 | "yaxes": [ 347 | { 348 | "format": "percent", 349 | "label": "", 350 | "logBase": 1, 351 | "max": null, 352 | "min": null, 353 | "show": true 354 | }, 355 | { 356 | "format": "short", 357 | "label": null, 358 | "logBase": 1, 359 | "max": null, 360 | "min": null, 361 | "show": true 362 | } 363 | ], 364 | "yaxis": { 365 | "align": false, 366 | "alignLevel": null 367 | } 368 | }, 369 | { 370 | "aliasColors": {}, 371 | "bars": false, 372 | "dashLength": 10, 373 | "dashes": false, 374 | "datasource": "Dashboard", 375 | "decimals": 0, 376 | "fill": 1, 377 | "fillGradient": 0, 378 | "gridPos": { 379 | "h": 6, 380 | "w": 12, 381 | "x": 12, 382 | "y": 9 383 | }, 384 | "hiddenSeries": false, 385 | "id": 13, 386 | "legend": { 387 | "alignAsTable": false, 388 | "avg": false, 389 | "current": false, 390 | "max": false, 391 | "min": false, 392 | "rightSide": false, 393 | "show": true, 394 | "sideWidth": 350, 395 | "total": true, 396 | "values": true 397 | }, 398 | "lines": true, 399 | "linewidth": 1, 400 | "links": [], 401 | "nullPointMode": "null", 402 | "options": { 403 | "dataLinks": [] 404 | }, 405 | "percentage": false, 406 | "pointradius": 5, 407 | "points": false, 408 | "renderer": "flot", 409 | "seriesOverrides": [], 410 | "spaceLength": 10, 411 | "stack": false, 412 | "steppedLine": false, 413 | "targets": [ 414 | { 415 | "expr": "irate(node_zfs_arc_demand_data_hits{job=\"node\",instance=\"$node\"}[5m])", 416 | "format": "time_series", 417 | "intervalFactor": 2, 418 | "legendFormat": "data_hits", 419 | "refId": "A", 420 | "step": 2, 421 | "target": "" 422 | }, 423 | { 424 | "expr": "irate(node_zfs_arc_demand_metadata_hits{job=\"node\",instance=\"$node\"}[5m])", 425 | "format": "time_series", 426 | "intervalFactor": 2, 427 | "legendFormat": "metadata_hits", 428 | "refId": "B", 429 | "step": 2, 430 | "target": "" 431 | }, 432 | { 433 | "expr": "irate(node_zfs_arc_demand_data_misses{job=\"node\",instance=\"$node\"}[5m])", 434 | "format": "time_series", 435 | "intervalFactor": 2, 436 | "legendFormat": "data_misses", 437 | "refId": "C", 438 | "step": 2, 439 | "target": "" 440 | }, 441 | { 442 | "expr": "irate(node_zfs_arc_demand_metadata_misses{job=\"node\",instance=\"$node\"}[5m])", 443 | "format": "time_series", 444 | "intervalFactor": 2, 445 | "legendFormat": "metadata_misses", 446 | "refId": "D", 447 | "step": 2, 448 | "target": "" 449 | } 450 | ], 451 | "thresholds": [], 452 | "timeFrom": null, 453 | "timeRegions": [], 454 | "timeShift": null, 455 | "title": "ARC - Hits, Misses", 456 | "tooltip": { 457 | "shared": true, 458 | "sort": 2, 459 | "value_type": "individual" 460 | }, 461 | "type": "graph", 462 | "xaxis": { 463 | "buckets": null, 464 | "mode": "time", 465 | "name": null, 466 | "show": true, 467 | "values": [] 468 | }, 469 | "yaxes": [ 470 | { 471 | "format": "none", 472 | "label": "", 473 | "logBase": 1, 474 | "max": null, 475 | "min": null, 476 | "show": true 477 | }, 478 | { 479 | "format": "short", 480 | "label": null, 481 | "logBase": 1, 482 | "max": null, 483 | "min": null, 484 | "show": true 485 | } 486 | ], 487 | "yaxis": { 488 | "align": false, 489 | "alignLevel": null 490 | } 491 | }, 492 | { 493 | "aliasColors": {}, 494 | "bars": false, 495 | "dashLength": 10, 496 | "dashes": false, 497 | "datasource": "Dashboard", 498 | "fill": 1, 499 | "fillGradient": 0, 500 | "gridPos": { 501 | "h": 8, 502 | "w": 24, 503 | "x": 0, 504 | "y": 15 505 | }, 506 | "hiddenSeries": false, 507 | "id": 15, 508 | "legend": { 509 | "alignAsTable": true, 510 | "avg": true, 511 | "current": true, 512 | "max": true, 513 | "min": true, 514 | "rightSide": false, 515 | "show": true, 516 | "sideWidth": 350, 517 | "total": false, 518 | "values": true 519 | }, 520 | "lines": true, 521 | "linewidth": 1, 522 | "links": [], 523 | "nullPointMode": "null", 524 | "options": { 525 | "dataLinks": [] 526 | }, 527 | "percentage": false, 528 | "pointradius": 5, 529 | "points": false, 530 | "renderer": "flot", 531 | "seriesOverrides": [], 532 | "spaceLength": 10, 533 | "stack": true, 534 | "steppedLine": false, 535 | "targets": [ 536 | { 537 | "expr": "node_zfs_arc_data_size{job=\"node\",instance=\"$node\"}", 538 | "format": "time_series", 539 | "hide": false, 540 | "interval": "", 541 | "intervalFactor": 2, 542 | "legendFormat": "data", 543 | "refId": "I", 544 | "step": 2, 545 | "target": "" 546 | }, 547 | { 548 | "expr": "node_zfs_arc_metadata_size{job=\"node\",instance=\"$node\"}", 549 | "format": "time_series", 550 | "intervalFactor": 2, 551 | "legendFormat": "metadata", 552 | "refId": "D", 553 | "step": 2, 554 | "target": "" 555 | }, 556 | { 557 | "expr": "node_zfs_arc_anon_size{job=\"node\",instance=\"$node\"}", 558 | "format": "time_series", 559 | "hide": false, 560 | "interval": "", 561 | "intervalFactor": 2, 562 | "legendFormat": "anon", 563 | "refId": "B", 564 | "step": 2, 565 | "target": "" 566 | }, 567 | { 568 | "expr": "node_zfs_arc_hdr_size{job=\"node\",instance=\"$node\"}", 569 | "format": "time_series", 570 | "intervalFactor": 2, 571 | "legendFormat": "hdr", 572 | "refId": "C", 573 | "step": 2, 574 | "target": "" 575 | }, 576 | { 577 | "expr": "node_zfs_arc_other_size{job=\"node\",instance=\"$node\"}", 578 | "format": "time_series", 579 | "intervalFactor": 2, 580 | "legendFormat": "other", 581 | "refId": "J", 582 | "step": 2, 583 | "target": "" 584 | } 585 | ], 586 | "thresholds": [], 587 | "timeFrom": null, 588 | "timeRegions": [], 589 | "timeShift": null, 590 | "title": "ARC - Size", 591 | "tooltip": { 592 | "shared": true, 593 | "sort": 2, 594 | "value_type": "individual" 595 | }, 596 | "type": "graph", 597 | "xaxis": { 598 | "buckets": null, 599 | "mode": "time", 600 | "name": null, 601 | "show": true, 602 | "values": [] 603 | }, 604 | "yaxes": [ 605 | { 606 | "format": "bytes", 607 | "label": "", 608 | "logBase": 1, 609 | "max": null, 610 | "min": null, 611 | "show": true 612 | }, 613 | { 614 | "format": "short", 615 | "label": null, 616 | "logBase": 1, 617 | "max": null, 618 | "min": null, 619 | "show": true 620 | } 621 | ], 622 | "yaxis": { 623 | "align": false, 624 | "alignLevel": null 625 | } 626 | }, 627 | { 628 | "collapsed": false, 629 | "datasource": "Dashboard", 630 | "gridPos": { 631 | "h": 1, 632 | "w": 24, 633 | "x": 0, 634 | "y": 23 635 | }, 636 | "id": 22, 637 | "panels": [], 638 | "title": "L2ARC", 639 | "type": "row" 640 | }, 641 | { 642 | "aliasColors": {}, 643 | "bars": false, 644 | "dashLength": 10, 645 | "dashes": false, 646 | "datasource": "Dashboard", 647 | "decimals": 0, 648 | "fill": 1, 649 | "fillGradient": 0, 650 | "gridPos": { 651 | "h": 7, 652 | "w": 12, 653 | "x": 0, 654 | "y": 24 655 | }, 656 | "hiddenSeries": false, 657 | "id": 16, 658 | "legend": { 659 | "alignAsTable": true, 660 | "avg": true, 661 | "current": false, 662 | "hideEmpty": false, 663 | "hideZero": false, 664 | "max": false, 665 | "min": false, 666 | "rightSide": false, 667 | "show": true, 668 | "sideWidth": 350, 669 | "total": false, 670 | "values": true 671 | }, 672 | "lines": true, 673 | "linewidth": 1, 674 | "links": [], 675 | "nullPointMode": "null", 676 | "options": { 677 | "dataLinks": [] 678 | }, 679 | "percentage": false, 680 | "pointradius": 5, 681 | "points": false, 682 | "renderer": "flot", 683 | "seriesOverrides": [], 684 | "spaceLength": 10, 685 | "stack": false, 686 | "steppedLine": false, 687 | "targets": [ 688 | { 689 | "expr": "irate(node_zfs_arc_l2_hits{job=\"node\",instance=\"$node\"}[5m]) / (irate(node_zfs_arc_l2_hits{job=\"node\",instance=\"$node\"}[5m]) + irate(node_zfs_arc_l2_misses{job=\"node\",instance=\"$node\"}[5m])) * 100", 690 | "format": "time_series", 691 | "intervalFactor": 2, 692 | "legendFormat": "L2", 693 | "refId": "A", 694 | "step": 2, 695 | "target": "" 696 | } 697 | ], 698 | "thresholds": [], 699 | "timeFrom": null, 700 | "timeRegions": [], 701 | "timeShift": null, 702 | "title": "L2ARC - Hit %", 703 | "tooltip": { 704 | "shared": true, 705 | "sort": 2, 706 | "value_type": "individual" 707 | }, 708 | "type": "graph", 709 | "xaxis": { 710 | "buckets": null, 711 | "mode": "time", 712 | "name": null, 713 | "show": true, 714 | "values": [] 715 | }, 716 | "yaxes": [ 717 | { 718 | "format": "percent", 719 | "label": "", 720 | "logBase": 1, 721 | "max": null, 722 | "min": "0", 723 | "show": true 724 | }, 725 | { 726 | "format": "short", 727 | "label": null, 728 | "logBase": 1, 729 | "max": null, 730 | "min": null, 731 | "show": true 732 | } 733 | ], 734 | "yaxis": { 735 | "align": false, 736 | "alignLevel": null 737 | } 738 | }, 739 | { 740 | "aliasColors": {}, 741 | "bars": false, 742 | "dashLength": 10, 743 | "dashes": false, 744 | "datasource": "Dashboard", 745 | "decimals": 0, 746 | "fill": 1, 747 | "fillGradient": 0, 748 | "gridPos": { 749 | "h": 7, 750 | "w": 12, 751 | "x": 12, 752 | "y": 24 753 | }, 754 | "hiddenSeries": false, 755 | "id": 17, 756 | "legend": { 757 | "alignAsTable": true, 758 | "avg": false, 759 | "current": false, 760 | "max": false, 761 | "min": false, 762 | "rightSide": false, 763 | "show": true, 764 | "sideWidth": 350, 765 | "total": true, 766 | "values": true 767 | }, 768 | "lines": true, 769 | "linewidth": 1, 770 | "links": [], 771 | "nullPointMode": "null", 772 | "options": { 773 | "dataLinks": [] 774 | }, 775 | "percentage": false, 776 | "pointradius": 5, 777 | "points": false, 778 | "renderer": "flot", 779 | "seriesOverrides": [], 780 | "spaceLength": 10, 781 | "stack": false, 782 | "steppedLine": false, 783 | "targets": [ 784 | { 785 | "expr": "irate(node_zfs_arc_l2_hits{job=\"node\",instance=\"$node\"}[5m])", 786 | "format": "time_series", 787 | "intervalFactor": 2, 788 | "legendFormat": "hits", 789 | "refId": "A", 790 | "step": 2, 791 | "target": "" 792 | }, 793 | { 794 | "expr": "irate(node_zfs_arc_l2_misses{job=\"node\",instance=\"$node\"}[5m])", 795 | "format": "time_series", 796 | "intervalFactor": 2, 797 | "legendFormat": "misses", 798 | "refId": "B", 799 | "step": 2, 800 | "target": "" 801 | } 802 | ], 803 | "thresholds": [], 804 | "timeFrom": null, 805 | "timeRegions": [], 806 | "timeShift": null, 807 | "title": "L2ARC - Hits, Misses", 808 | "tooltip": { 809 | "shared": true, 810 | "sort": 2, 811 | "value_type": "individual" 812 | }, 813 | "type": "graph", 814 | "xaxis": { 815 | "buckets": null, 816 | "mode": "time", 817 | "name": null, 818 | "show": true, 819 | "values": [] 820 | }, 821 | "yaxes": [ 822 | { 823 | "format": "none", 824 | "label": "", 825 | "logBase": 1, 826 | "max": null, 827 | "min": "0", 828 | "show": true 829 | }, 830 | { 831 | "format": "short", 832 | "label": null, 833 | "logBase": 1, 834 | "max": null, 835 | "min": null, 836 | "show": true 837 | } 838 | ], 839 | "yaxis": { 840 | "align": false, 841 | "alignLevel": null 842 | } 843 | }, 844 | { 845 | "aliasColors": {}, 846 | "bars": false, 847 | "dashLength": 10, 848 | "dashes": false, 849 | "datasource": "Dashboard", 850 | "fill": 1, 851 | "fillGradient": 0, 852 | "gridPos": { 853 | "h": 7, 854 | "w": 24, 855 | "x": 0, 856 | "y": 31 857 | }, 858 | "hiddenSeries": false, 859 | "id": 18, 860 | "legend": { 861 | "alignAsTable": true, 862 | "avg": true, 863 | "current": false, 864 | "max": true, 865 | "min": true, 866 | "rightSide": false, 867 | "show": true, 868 | "sideWidth": 350, 869 | "total": false, 870 | "values": true 871 | }, 872 | "lines": true, 873 | "linewidth": 1, 874 | "links": [], 875 | "nullPointMode": "null", 876 | "options": { 877 | "dataLinks": [] 878 | }, 879 | "percentage": false, 880 | "pointradius": 5, 881 | "points": false, 882 | "renderer": "flot", 883 | "seriesOverrides": [], 884 | "spaceLength": 10, 885 | "stack": true, 886 | "steppedLine": false, 887 | "targets": [ 888 | { 889 | "expr": "node_zfs_arc_l2_asize{job=\"node\",instance=\"$node\"}", 890 | "format": "time_series", 891 | "hide": false, 892 | "interval": "", 893 | "intervalFactor": 2, 894 | "legendFormat": "asize", 895 | "refId": "I", 896 | "step": 2, 897 | "target": "" 898 | }, 899 | { 900 | "expr": "node_zfs_arc_l2_hdr_size{job=\"node\",instance=\"$node\"}", 901 | "format": "time_series", 902 | "intervalFactor": 2, 903 | "legendFormat": "metadata", 904 | "refId": "D", 905 | "step": 2, 906 | "target": "" 907 | }, 908 | { 909 | "expr": "node_zfs_arc_l2_size{job=\"node\",instance=\"$node\"}", 910 | "format": "time_series", 911 | "hide": false, 912 | "interval": "", 913 | "intervalFactor": 2, 914 | "legendFormat": "size", 915 | "refId": "B", 916 | "step": 2, 917 | "target": "" 918 | } 919 | ], 920 | "thresholds": [], 921 | "timeFrom": null, 922 | "timeRegions": [], 923 | "timeShift": null, 924 | "title": "L2ARC - Size", 925 | "tooltip": { 926 | "shared": true, 927 | "sort": 2, 928 | "value_type": "individual" 929 | }, 930 | "type": "graph", 931 | "xaxis": { 932 | "buckets": null, 933 | "mode": "time", 934 | "name": null, 935 | "show": true, 936 | "values": [] 937 | }, 938 | "yaxes": [ 939 | { 940 | "format": "bytes", 941 | "label": "", 942 | "logBase": 1, 943 | "max": null, 944 | "min": "0", 945 | "show": true 946 | }, 947 | { 948 | "format": "short", 949 | "label": null, 950 | "logBase": 1, 951 | "max": null, 952 | "min": null, 953 | "show": true 954 | } 955 | ], 956 | "yaxis": { 957 | "align": false, 958 | "alignLevel": null 959 | } 960 | } 961 | ], 962 | "refresh": "10s", 963 | "schemaVersion": 21, 964 | "style": "dark", 965 | "tags": [], 966 | "templating": { 967 | "list": [ 968 | { 969 | "allValue": null, 970 | "current": { 971 | "text": "ansible-45d.bk.local", 972 | "value": "ansible-45d.bk.local" 973 | }, 974 | "datasource": "Dashboard", 975 | "definition": "label_values(node_exporter_build_info{job=~\"node\"}, instance)", 976 | "hide": 0, 977 | "includeAll": false, 978 | "label": null, 979 | "multi": false, 980 | "name": "node", 981 | "options": [], 982 | "query": "label_values(node_exporter_build_info{job=~\"node\"}, instance)", 983 | "refresh": 1, 984 | "regex": "", 985 | "skipUrlSync": false, 986 | "sort": 1, 987 | "tagValuesQuery": "", 988 | "tags": [], 989 | "tagsQuery": "", 990 | "type": "query", 991 | "useTags": false 992 | } 993 | ] 994 | }, 995 | "time": { 996 | "from": "now-30m", 997 | "to": "now" 998 | }, 999 | "timepicker": { 1000 | "refresh_intervals": [ 1001 | "5s", 1002 | "10s", 1003 | "30s", 1004 | "1m", 1005 | "5m", 1006 | "15m", 1007 | "30m", 1008 | "1h", 1009 | "2h", 1010 | "1d" 1011 | ], 1012 | "time_options": [ 1013 | "5m", 1014 | "15m", 1015 | "1h", 1016 | "6h", 1017 | "12h", 1018 | "24h", 1019 | "2d", 1020 | "7d", 1021 | "30d" 1022 | ] 1023 | }, 1024 | "timezone": "browser", 1025 | "title": "ZFS Stats Overview", 1026 | "uid": "YLum8s2ik", 1027 | "version": 8 1028 | } -------------------------------------------------------------------------------- /roles/grafana/files/zfs-replication-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "editable": true, 25 | "fiscalYearStartMonth": 0, 26 | "graphTooltip": 0, 27 | "links": [], 28 | "liveNow": false, 29 | "panels": [ 30 | { 31 | "collapsed": false, 32 | "gridPos": { 33 | "h": 1, 34 | "w": 24, 35 | "x": 0, 36 | "y": 0 37 | }, 38 | "id": 10, 39 | "panels": [], 40 | "title": "Row title", 41 | "type": "row" 42 | }, 43 | { 44 | "datasource": { 45 | "type": "prometheus", 46 | "uid": "P67B696468610B879" 47 | }, 48 | "fieldConfig": { 49 | "defaults": { 50 | "color": { 51 | "mode": "thresholds" 52 | }, 53 | "custom": { 54 | "align": "auto", 55 | "displayMode": "auto", 56 | "filterable": true, 57 | "inspect": false 58 | }, 59 | "mappings": [], 60 | "thresholds": { 61 | "mode": "absolute", 62 | "steps": [ 63 | { 64 | "color": "green", 65 | "value": null 66 | }, 67 | { 68 | "color": "red", 69 | "value": 80 70 | } 71 | ] 72 | }, 73 | "unit": "bytes" 74 | }, 75 | "overrides": [] 76 | }, 77 | "gridPos": { 78 | "h": 7, 79 | "w": 12, 80 | "x": 0, 81 | "y": 1 82 | }, 83 | "id": 2, 84 | "options": { 85 | "footer": { 86 | "enablePagination": false, 87 | "fields": [], 88 | "reducer": [ 89 | "sum" 90 | ], 91 | "show": false 92 | }, 93 | "showHeader": true, 94 | "sortBy": [ 95 | { 96 | "desc": true, 97 | "displayName": "Dataset" 98 | } 99 | ] 100 | }, 101 | "pluginVersion": "9.2.6", 102 | "targets": [ 103 | { 104 | "datasource": { 105 | "type": "prometheus", 106 | "uid": "P67B696468610B879" 107 | }, 108 | "editorMode": "builder", 109 | "exemplar": false, 110 | "expr": "zfs_dataset_used_bytes", 111 | "format": "table", 112 | "instant": true, 113 | "legendFormat": "__auto", 114 | "range": false, 115 | "refId": "A" 116 | }, 117 | { 118 | "datasource": { 119 | "type": "prometheus", 120 | "uid": "P67B696468610B879" 121 | }, 122 | "editorMode": "builder", 123 | "exemplar": false, 124 | "expr": "zfs_dataset_used_by_snapshot_bytes", 125 | "format": "table", 126 | "hide": false, 127 | "instant": true, 128 | "legendFormat": "__auto", 129 | "range": false, 130 | "refId": "B" 131 | }, 132 | { 133 | "datasource": { 134 | "type": "prometheus", 135 | "uid": "P67B696468610B879" 136 | }, 137 | "editorMode": "builder", 138 | "exemplar": false, 139 | "expr": "zfs_dataset_referenced_bytes", 140 | "format": "table", 141 | "hide": false, 142 | "instant": true, 143 | "legendFormat": "__auto", 144 | "range": false, 145 | "refId": "C" 146 | } 147 | ], 148 | "title": "Space Used by Dataset (Source)", 149 | "transformations": [ 150 | { 151 | "id": "joinByField", 152 | "options": { 153 | "byField": "name", 154 | "mode": "outer" 155 | } 156 | }, 157 | { 158 | "id": "organize", 159 | "options": { 160 | "excludeByName": { 161 | "Time": true, 162 | "__name__": true, 163 | "instance 2": true, 164 | "instance 3": true, 165 | "job": true, 166 | "pool": true, 167 | "type": true 168 | }, 169 | "indexByName": { 170 | "Time 1": 2, 171 | "Time 2": 8, 172 | "Value #A": 7, 173 | "Value #B": 14, 174 | "__name__ 1": 3, 175 | "__name__ 2": 9, 176 | "instance 1": 0, 177 | "instance 2": 10, 178 | "job 1": 4, 179 | "job 2": 11, 180 | "name": 1, 181 | "pool 1": 5, 182 | "pool 2": 12, 183 | "type 1": 6, 184 | "type 2": 13 185 | }, 186 | "renameByName": { 187 | "Value": "Value", 188 | "Value #A": "Used Space", 189 | "Value #B": "Used Space by Snapshots", 190 | "Value #C": "Referenced", 191 | "instance": "Hostname", 192 | "instance 1": "Hostname", 193 | "name": "Dataset" 194 | } 195 | } 196 | }, 197 | { 198 | "id": "filterByValue", 199 | "options": { 200 | "filters": [ 201 | { 202 | "config": { 203 | "id": "regex", 204 | "options": { 205 | "value": "\\/" 206 | } 207 | }, 208 | "fieldName": "Dataset" 209 | } 210 | ], 211 | "match": "any", 212 | "type": "include" 213 | } 214 | }, 215 | { 216 | "id": "filterByValue", 217 | "options": { 218 | "filters": [ 219 | { 220 | "config": { 221 | "id": "regex", 222 | "options": { 223 | "value": "@" 224 | } 225 | }, 226 | "fieldName": "Dataset" 227 | } 228 | ], 229 | "match": "any", 230 | "type": "exclude" 231 | } 232 | } 233 | ], 234 | "type": "table" 235 | }, 236 | { 237 | "datasource": { 238 | "type": "prometheus", 239 | "uid": "P67B696468610B879" 240 | }, 241 | "fieldConfig": { 242 | "defaults": { 243 | "color": { 244 | "mode": "thresholds" 245 | }, 246 | "custom": { 247 | "align": "auto", 248 | "displayMode": "auto", 249 | "filterable": true, 250 | "inspect": false 251 | }, 252 | "mappings": [], 253 | "thresholds": { 254 | "mode": "absolute", 255 | "steps": [ 256 | { 257 | "color": "green", 258 | "value": null 259 | }, 260 | { 261 | "color": "red", 262 | "value": 80 263 | } 264 | ] 265 | }, 266 | "unit": "bytes" 267 | }, 268 | "overrides": [] 269 | }, 270 | "gridPos": { 271 | "h": 7, 272 | "w": 12, 273 | "x": 12, 274 | "y": 1 275 | }, 276 | "id": 17, 277 | "options": { 278 | "footer": { 279 | "enablePagination": false, 280 | "fields": [], 281 | "reducer": [ 282 | "sum" 283 | ], 284 | "show": false 285 | }, 286 | "showHeader": true, 287 | "sortBy": [ 288 | { 289 | "desc": false, 290 | "displayName": "Hostname" 291 | } 292 | ] 293 | }, 294 | "pluginVersion": "9.2.6", 295 | "targets": [ 296 | { 297 | "datasource": { 298 | "type": "prometheus", 299 | "uid": "P67B696468610B879" 300 | }, 301 | "editorMode": "builder", 302 | "exemplar": false, 303 | "expr": "zfs_dataset_used_bytes", 304 | "format": "table", 305 | "instant": true, 306 | "legendFormat": "__auto", 307 | "range": false, 308 | "refId": "A" 309 | }, 310 | { 311 | "datasource": { 312 | "type": "prometheus", 313 | "uid": "P67B696468610B879" 314 | }, 315 | "editorMode": "builder", 316 | "exemplar": false, 317 | "expr": "zfs_dataset_used_by_snapshot_bytes", 318 | "format": "table", 319 | "hide": false, 320 | "instant": true, 321 | "legendFormat": "__auto", 322 | "range": false, 323 | "refId": "B" 324 | }, 325 | { 326 | "datasource": { 327 | "type": "prometheus", 328 | "uid": "P67B696468610B879" 329 | }, 330 | "editorMode": "builder", 331 | "exemplar": false, 332 | "expr": "zfs_dataset_referenced_bytes", 333 | "format": "table", 334 | "hide": false, 335 | "instant": true, 336 | "legendFormat": "__auto", 337 | "range": false, 338 | "refId": "C" 339 | } 340 | ], 341 | "title": "Space Used by Dataset (Destination)", 342 | "transformations": [ 343 | { 344 | "id": "joinByField", 345 | "options": { 346 | "byField": "name", 347 | "mode": "outer" 348 | } 349 | }, 350 | { 351 | "id": "organize", 352 | "options": { 353 | "excludeByName": { 354 | "Time": true, 355 | "__name__": true, 356 | "instance 2": true, 357 | "instance 3": true, 358 | "job": true, 359 | "pool": true, 360 | "type": true 361 | }, 362 | "indexByName": { 363 | "Time 1": 2, 364 | "Time 2": 8, 365 | "Value #A": 7, 366 | "Value #B": 14, 367 | "__name__ 1": 3, 368 | "__name__ 2": 9, 369 | "instance 1": 0, 370 | "instance 2": 10, 371 | "job 1": 4, 372 | "job 2": 11, 373 | "name": 1, 374 | "pool 1": 5, 375 | "pool 2": 12, 376 | "type 1": 6, 377 | "type 2": 13 378 | }, 379 | "renameByName": { 380 | "Value": "Value", 381 | "Value #A": "Used Space", 382 | "Value #B": "Used Space by Snapshots", 383 | "Value #C": "Referenced", 384 | "instance": "Hostname", 385 | "instance 1": "Hostname", 386 | "name": "Dataset" 387 | } 388 | } 389 | }, 390 | { 391 | "id": "filterByValue", 392 | "options": { 393 | "filters": [ 394 | { 395 | "config": { 396 | "id": "regex", 397 | "options": { 398 | "value": "\\/" 399 | } 400 | }, 401 | "fieldName": "Dataset" 402 | } 403 | ], 404 | "match": "any", 405 | "type": "include" 406 | } 407 | }, 408 | { 409 | "id": "filterByValue", 410 | "options": { 411 | "filters": [ 412 | { 413 | "config": { 414 | "id": "regex", 415 | "options": { 416 | "value": "@" 417 | } 418 | }, 419 | "fieldName": "Dataset" 420 | } 421 | ], 422 | "match": "any", 423 | "type": "exclude" 424 | } 425 | } 426 | ], 427 | "type": "table" 428 | }, 429 | { 430 | "datasource": { 431 | "type": "prometheus", 432 | "uid": "P67B696468610B879" 433 | }, 434 | "fieldConfig": { 435 | "defaults": { 436 | "color": { 437 | "mode": "palette-classic" 438 | }, 439 | "custom": { 440 | "axisCenteredZero": false, 441 | "axisColorMode": "text", 442 | "axisLabel": "", 443 | "axisPlacement": "auto", 444 | "barAlignment": 0, 445 | "drawStyle": "line", 446 | "fillOpacity": 0, 447 | "gradientMode": "none", 448 | "hideFrom": { 449 | "legend": false, 450 | "tooltip": false, 451 | "viz": false 452 | }, 453 | "lineInterpolation": "stepBefore", 454 | "lineWidth": 1, 455 | "pointSize": 1, 456 | "scaleDistribution": { 457 | "type": "linear" 458 | }, 459 | "showPoints": "auto", 460 | "spanNulls": false, 461 | "stacking": { 462 | "group": "A", 463 | "mode": "none" 464 | }, 465 | "thresholdsStyle": { 466 | "mode": "off" 467 | } 468 | }, 469 | "mappings": [], 470 | "thresholds": { 471 | "mode": "absolute", 472 | "steps": [ 473 | { 474 | "color": "green", 475 | "value": null 476 | }, 477 | { 478 | "color": "red", 479 | "value": 80 480 | } 481 | ] 482 | }, 483 | "unit": "bool_on_off" 484 | }, 485 | "overrides": [] 486 | }, 487 | "gridPos": { 488 | "h": 7, 489 | "w": 24, 490 | "x": 0, 491 | "y": 8 492 | }, 493 | "id": 12, 494 | "options": { 495 | "legend": { 496 | "calcs": [ 497 | "last" 498 | ], 499 | "displayMode": "table", 500 | "placement": "right", 501 | "showLegend": true 502 | }, 503 | "tooltip": { 504 | "mode": "single", 505 | "sort": "none" 506 | } 507 | }, 508 | "targets": [ 509 | { 510 | "datasource": { 511 | "type": "prometheus", 512 | "uid": "P67B696468610B879" 513 | }, 514 | "editorMode": "code", 515 | "expr": "znapzend_presend_command_started", 516 | "legendFormat": "{{exported_job}} Active", 517 | "range": true, 518 | "refId": "A" 519 | } 520 | ], 521 | "title": "Replication Task State", 522 | "type": "timeseries" 523 | }, 524 | { 525 | "datasource": { 526 | "type": "prometheus", 527 | "uid": "P67B696468610B879" 528 | }, 529 | "fieldConfig": { 530 | "defaults": { 531 | "color": { 532 | "mode": "palette-classic" 533 | }, 534 | "custom": { 535 | "axisCenteredZero": false, 536 | "axisColorMode": "text", 537 | "axisLabel": "", 538 | "axisPlacement": "auto", 539 | "barAlignment": 0, 540 | "drawStyle": "line", 541 | "fillOpacity": 0, 542 | "gradientMode": "none", 543 | "hideFrom": { 544 | "legend": false, 545 | "tooltip": false, 546 | "viz": false 547 | }, 548 | "lineInterpolation": "stepBefore", 549 | "lineWidth": 1, 550 | "pointSize": 1, 551 | "scaleDistribution": { 552 | "type": "linear" 553 | }, 554 | "showPoints": "auto", 555 | "spanNulls": false, 556 | "stacking": { 557 | "group": "A", 558 | "mode": "none" 559 | }, 560 | "thresholdsStyle": { 561 | "mode": "off" 562 | } 563 | }, 564 | "mappings": [], 565 | "noValue": "0", 566 | "thresholds": { 567 | "mode": "absolute", 568 | "steps": [ 569 | { 570 | "color": "green", 571 | "value": null 572 | }, 573 | { 574 | "color": "red", 575 | "value": 80 576 | } 577 | ] 578 | }, 579 | "unit": "bool" 580 | }, 581 | "overrides": [ 582 | { 583 | "__systemRef": "hideSeriesFrom", 584 | "matcher": { 585 | "id": "byNames", 586 | "options": { 587 | "mode": "exclude", 588 | "names": [ 589 | "storage/Lidar Failed" 590 | ], 591 | "prefix": "All except:", 592 | "readOnly": true 593 | } 594 | }, 595 | "properties": [ 596 | { 597 | "id": "custom.hideFrom", 598 | "value": { 599 | "legend": false, 600 | "tooltip": false, 601 | "viz": true 602 | } 603 | } 604 | ] 605 | } 606 | ] 607 | }, 608 | "gridPos": { 609 | "h": 7, 610 | "w": 24, 611 | "x": 0, 612 | "y": 15 613 | }, 614 | "id": 18, 615 | "options": { 616 | "legend": { 617 | "calcs": [], 618 | "displayMode": "table", 619 | "placement": "right", 620 | "showLegend": true, 621 | "sortBy": "Last", 622 | "sortDesc": false 623 | }, 624 | "tooltip": { 625 | "mode": "single", 626 | "sort": "none" 627 | } 628 | }, 629 | "targets": [ 630 | { 631 | "datasource": { 632 | "type": "prometheus", 633 | "uid": "P67B696468610B879" 634 | }, 635 | "editorMode": "builder", 636 | "expr": "znapzend_job_failed", 637 | "legendFormat": "{{source_dataset}} Failed", 638 | "range": true, 639 | "refId": "A" 640 | } 641 | ], 642 | "title": "Replication Failure State", 643 | "type": "timeseries" 644 | }, 645 | { 646 | "datasource": { 647 | "type": "prometheus", 648 | "uid": "P67B696468610B879" 649 | }, 650 | "fieldConfig": { 651 | "defaults": { 652 | "color": { 653 | "mode": "palette-classic" 654 | }, 655 | "custom": { 656 | "axisCenteredZero": false, 657 | "axisColorMode": "text", 658 | "axisLabel": "", 659 | "axisPlacement": "auto", 660 | "barAlignment": 0, 661 | "drawStyle": "line", 662 | "fillOpacity": 0, 663 | "gradientMode": "none", 664 | "hideFrom": { 665 | "legend": false, 666 | "tooltip": false, 667 | "viz": false 668 | }, 669 | "lineInterpolation": "linear", 670 | "lineWidth": 1, 671 | "pointSize": 1, 672 | "scaleDistribution": { 673 | "type": "linear" 674 | }, 675 | "showPoints": "auto", 676 | "spanNulls": false, 677 | "stacking": { 678 | "group": "A", 679 | "mode": "none" 680 | }, 681 | "thresholdsStyle": { 682 | "mode": "off" 683 | } 684 | }, 685 | "mappings": [], 686 | "min": 0, 687 | "thresholds": { 688 | "mode": "absolute", 689 | "steps": [ 690 | { 691 | "color": "green", 692 | "value": null 693 | }, 694 | { 695 | "color": "red", 696 | "value": 80 697 | } 698 | ] 699 | }, 700 | "unit": "bytes" 701 | }, 702 | "overrides": [] 703 | }, 704 | "gridPos": { 705 | "h": 8, 706 | "w": 24, 707 | "x": 0, 708 | "y": 22 709 | }, 710 | "id": 14, 711 | "options": { 712 | "legend": { 713 | "calcs": [ 714 | "last" 715 | ], 716 | "displayMode": "table", 717 | "placement": "right", 718 | "showLegend": true 719 | }, 720 | "tooltip": { 721 | "mode": "single", 722 | "sort": "none" 723 | } 724 | }, 725 | "targets": [ 726 | { 727 | "datasource": { 728 | "type": "prometheus", 729 | "uid": "P67B696468610B879" 730 | }, 731 | "editorMode": "builder", 732 | "expr": "zfs_dataset_written_bytes{instance=\"$node\"}", 733 | "legendFormat": "{{name}}", 734 | "range": true, 735 | "refId": "A" 736 | } 737 | ], 738 | "title": "Data Written Since Last Snapshot", 739 | "transformations": [ 740 | { 741 | "id": "filterFieldsByName", 742 | "options": { 743 | "include": { 744 | "pattern": "/.*/" 745 | } 746 | } 747 | } 748 | ], 749 | "type": "timeseries" 750 | }, 751 | { 752 | "collapsed": false, 753 | "gridPos": { 754 | "h": 1, 755 | "w": 24, 756 | "x": 0, 757 | "y": 30 758 | }, 759 | "id": 16, 760 | "panels": [], 761 | "title": "Row title", 762 | "type": "row" 763 | }, 764 | { 765 | "datasource": { 766 | "type": "prometheus", 767 | "uid": "P67B696468610B879" 768 | }, 769 | "fieldConfig": { 770 | "defaults": { 771 | "color": { 772 | "mode": "palette-classic" 773 | }, 774 | "custom": { 775 | "axisCenteredZero": false, 776 | "axisColorMode": "text", 777 | "axisLabel": "", 778 | "axisPlacement": "auto", 779 | "barAlignment": 0, 780 | "drawStyle": "line", 781 | "fillOpacity": 0, 782 | "gradientMode": "none", 783 | "hideFrom": { 784 | "legend": false, 785 | "tooltip": false, 786 | "viz": false 787 | }, 788 | "lineInterpolation": "linear", 789 | "lineWidth": 1, 790 | "pointSize": 1, 791 | "scaleDistribution": { 792 | "type": "linear" 793 | }, 794 | "showPoints": "auto", 795 | "spanNulls": false, 796 | "stacking": { 797 | "group": "A", 798 | "mode": "none" 799 | }, 800 | "thresholdsStyle": { 801 | "mode": "off" 802 | } 803 | }, 804 | "mappings": [], 805 | "thresholds": { 806 | "mode": "absolute", 807 | "steps": [ 808 | { 809 | "color": "green", 810 | "value": null 811 | }, 812 | { 813 | "color": "red", 814 | "value": 80 815 | } 816 | ] 817 | }, 818 | "unit": "bytes" 819 | }, 820 | "overrides": [ 821 | { 822 | "matcher": { 823 | "id": "byRegexp", 824 | "options": "/.*tx/" 825 | }, 826 | "properties": [ 827 | { 828 | "id": "custom.transform", 829 | "value": "negative-Y" 830 | } 831 | ] 832 | } 833 | ] 834 | }, 835 | "gridPos": { 836 | "h": 8, 837 | "w": 24, 838 | "x": 0, 839 | "y": 31 840 | }, 841 | "id": 8, 842 | "options": { 843 | "legend": { 844 | "calcs": [ 845 | "min", 846 | "max", 847 | "mean" 848 | ], 849 | "displayMode": "table", 850 | "placement": "right", 851 | "showLegend": true, 852 | "sortBy": "Mean", 853 | "sortDesc": true 854 | }, 855 | "tooltip": { 856 | "mode": "single", 857 | "sort": "none" 858 | } 859 | }, 860 | "targets": [ 861 | { 862 | "datasource": { 863 | "type": "prometheus", 864 | "uid": "P67B696468610B879" 865 | }, 866 | "editorMode": "code", 867 | "expr": "sum by (device) (\n irate(node_network_receive_bytes{device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{device!=\"lo\"}[1m])\n)", 868 | "legendFormat": "{{device}}.rx", 869 | "range": true, 870 | "refId": "A" 871 | }, 872 | { 873 | "datasource": { 874 | "type": "prometheus", 875 | "uid": "P67B696468610B879" 876 | }, 877 | "editorMode": "code", 878 | "expr": "sum by (device) (\n irate(node_network_transmit_bytes{device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{device!=\"lo\"}[1m])\n)", 879 | "hide": false, 880 | "legendFormat": "{{device}}.tx", 881 | "range": true, 882 | "refId": "B" 883 | } 884 | ], 885 | "title": "Network Throughput", 886 | "type": "timeseries" 887 | }, 888 | { 889 | "datasource": { 890 | "type": "prometheus", 891 | "uid": "P67B696468610B879" 892 | }, 893 | "fieldConfig": { 894 | "defaults": { 895 | "color": { 896 | "mode": "palette-classic" 897 | }, 898 | "custom": { 899 | "axisCenteredZero": false, 900 | "axisColorMode": "text", 901 | "axisLabel": "", 902 | "axisPlacement": "auto", 903 | "barAlignment": 0, 904 | "drawStyle": "line", 905 | "fillOpacity": 0, 906 | "gradientMode": "none", 907 | "hideFrom": { 908 | "legend": false, 909 | "tooltip": false, 910 | "viz": false 911 | }, 912 | "lineInterpolation": "linear", 913 | "lineWidth": 1, 914 | "pointSize": 1, 915 | "scaleDistribution": { 916 | "type": "linear" 917 | }, 918 | "showPoints": "auto", 919 | "spanNulls": false, 920 | "stacking": { 921 | "group": "A", 922 | "mode": "none" 923 | }, 924 | "thresholdsStyle": { 925 | "mode": "off" 926 | } 927 | }, 928 | "mappings": [], 929 | "thresholds": { 930 | "mode": "absolute", 931 | "steps": [ 932 | { 933 | "color": "green", 934 | "value": null 935 | }, 936 | { 937 | "color": "red", 938 | "value": 80 939 | } 940 | ] 941 | }, 942 | "unit": "bytes" 943 | }, 944 | "overrides": [ 945 | { 946 | "matcher": { 947 | "id": "byName", 948 | "options": "Total Read" 949 | }, 950 | "properties": [ 951 | { 952 | "id": "custom.transform", 953 | "value": "negative-Y" 954 | } 955 | ] 956 | } 957 | ] 958 | }, 959 | "gridPos": { 960 | "h": 8, 961 | "w": 24, 962 | "x": 0, 963 | "y": 39 964 | }, 965 | "id": 7, 966 | "options": { 967 | "legend": { 968 | "calcs": [ 969 | "min", 970 | "max", 971 | "mean" 972 | ], 973 | "displayMode": "table", 974 | "placement": "right", 975 | "showLegend": true 976 | }, 977 | "tooltip": { 978 | "mode": "single", 979 | "sort": "none" 980 | } 981 | }, 982 | "targets": [ 983 | { 984 | "datasource": { 985 | "type": "prometheus", 986 | "uid": "P67B696468610B879" 987 | }, 988 | "editorMode": "code", 989 | "expr": "sum(irate(node_disk_written_bytes_total[1m]))", 990 | "legendFormat": "Total Write", 991 | "range": true, 992 | "refId": "A" 993 | }, 994 | { 995 | "datasource": { 996 | "type": "prometheus", 997 | "uid": "P67B696468610B879" 998 | }, 999 | "editorMode": "code", 1000 | "expr": "sum(irate(node_disk_read_bytes_total[1m]))", 1001 | "hide": false, 1002 | "legendFormat": "Total Read", 1003 | "range": true, 1004 | "refId": "B" 1005 | } 1006 | ], 1007 | "title": "Disk Throughput", 1008 | "type": "timeseries" 1009 | } 1010 | ], 1011 | "refresh": "5s", 1012 | "schemaVersion": 37, 1013 | "style": "dark", 1014 | "tags": [], 1015 | "templating": { 1016 | "list": [ 1017 | { 1018 | "current": { 1019 | "selected": false, 1020 | "text": "zfssrc", 1021 | "value": "zfssrc" 1022 | }, 1023 | "datasource": { 1024 | "type": "prometheus", 1025 | "uid": "P67B696468610B879" 1026 | }, 1027 | "definition": "label_values(node_boot_time_seconds, instance)", 1028 | "hide": 0, 1029 | "includeAll": false, 1030 | "multi": false, 1031 | "name": "node", 1032 | "options": [], 1033 | "query": { 1034 | "query": "label_values(node_boot_time_seconds, instance)", 1035 | "refId": "StandardVariableQuery" 1036 | }, 1037 | "refresh": 1, 1038 | "regex": "", 1039 | "skipUrlSync": false, 1040 | "sort": 0, 1041 | "type": "query" 1042 | } 1043 | ] 1044 | }, 1045 | "time": { 1046 | "from": "now-1h", 1047 | "to": "now" 1048 | }, 1049 | "timepicker": {}, 1050 | "timezone": "", 1051 | "title": "ZFS Replication Overview", 1052 | "uid": "Wo0VxCOVz", 1053 | "version": 1, 1054 | "weekStart": "" 1055 | } -------------------------------------------------------------------------------- /roles/grafana/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | company: Red Hat 4 | author: Boris Ranto 5 | description: Configures Grafana for Ceph Dashboard 6 | license: Apache 7 | min_ansible_version: 2.4 8 | platforms: 9 | - name: EL 10 | versions: 11 | - 7 12 | galaxy_tags: 13 | - system 14 | dependencies: [] 15 | -------------------------------------------------------------------------------- /roles/grafana/tasks/configure_grafana.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: make sure grafana is down 3 | service: 4 | name: grafana-server 5 | state: stopped 6 | 7 | - name: wait for grafana to be stopped 8 | wait_for: 9 | port: '{{ grafana_port }}' 10 | state: stopped 11 | 12 | - name: make sure grafana configuration directories exist 13 | file: 14 | path: "{{ item }}" 15 | state: directory 16 | recurse: yes 17 | with_items: 18 | - "/etc/grafana/provisioning/datasources" 19 | - "/etc/grafana/provisioning/dashboards" 20 | 21 | - name: write grafana.ini 22 | template: 23 | src: grafana.ini.j2 24 | dest: /etc/grafana/grafana.ini 25 | mode: 0640 26 | 27 | - name: write datasources provisioning config file 28 | template: 29 | src: datasources-prometheus.yml.j2 30 | dest: /etc/grafana/provisioning/datasources/prometheus.yml 31 | mode: 0640 32 | 33 | - name: Write dashboards provisioning config files 34 | template: 35 | src: dashboards-{{ item }}.yml.j2 36 | dest: /etc/grafana/provisioning/dashboards/{{ item }}.yml 37 | mode: 0640 38 | with_items: 39 | - zfs 40 | - system 41 | 42 | - name: Write zfs dashboard files 43 | copy: 44 | src: "{{ item }}" 45 | dest: "{{ grafana_dashboards_path }}/zfs/" 46 | owner: root 47 | group: root 48 | mode: 0644 49 | with_items: 50 | - zfs-overview.json 51 | - zfs-detailed-stats.json 52 | - zfs-replication-overview.json 53 | 54 | - name: Write System dashboard files 55 | copy: 56 | src: "{{ item }}" 57 | dest: "{{ grafana_dashboards_path }}/system/" 58 | owner: root 59 | group: root 60 | mode: 0644 61 | with_items: 62 | - system-overview.json 63 | - network-overview.json 64 | 65 | - name: copy grafana SSL certificate file 66 | copy: 67 | src: "{{ grafana_crt }}" 68 | dest: "/etc/grafana/ceph-dashboard.crt" 69 | mode: 0640 70 | when: 71 | - grafana_crt | bool 72 | - grafana_protocol == "https" 73 | 74 | - name: copy grafana SSL certificate key 75 | copy: 76 | src: "{{ grafana_key }}" 77 | dest: "/etc/grafana/ceph-dashboard.key" 78 | mode: 0440 79 | when: 80 | - grafana_key | bool 81 | - grafana_protocol == "https" 82 | 83 | - name: generate a Self Signed OpenSSL certificate for dashboard 84 | shell: | 85 | test -f /etc/grafana/dashboard.key -a -f /etc/grafana/dashboard.crt || \ 86 | openssl req -new -nodes -x509 -subj '/O=IT/CN=grafana' -days 3650 -keyout /etc/grafana/dashboard.key -out /etc/grafana/dashboard.crt -extensions v3_ca 87 | when: 88 | - grafana_protocol == "https" 89 | - not grafana_key | bool or not grafana_crt | bool 90 | 91 | - name: set owner/group on /etc/grafana 92 | file: 93 | path: /etc/grafana 94 | state: directory 95 | # This is the UID used by the grafana container 96 | owner: "{{ grafana_uid }}" 97 | # This group is used by the grafana rpm 98 | group: "grafana" 99 | recurse: true 100 | 101 | - name: open grafana firewall port 102 | firewalld: 103 | port: "{{ grafana_port }}/tcp" 104 | zone: "public" 105 | permanent: true 106 | immediate: true 107 | state: enabled 108 | 109 | - name: enable and start grafana 110 | service: 111 | name: grafana-server 112 | state: restarted 113 | enabled: true 114 | 115 | - name: wait for grafana to start 116 | wait_for: 117 | port: "{{ grafana_port }}" 118 | -------------------------------------------------------------------------------- /roles/grafana/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: include setup_container.yml 3 | include_tasks: setup_container.yml 4 | 5 | - name: include configure_grafana.yml 6 | include_tasks: configure_grafana.yml 7 | -------------------------------------------------------------------------------- /roles/grafana/tasks/setup_container.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: set fact container_binary to docker centos 7 3 | set_fact: 4 | container_binary: "docker" 5 | when: 6 | - ansible_distribution == 'CentOS' 7 | - ansible_distribution_major_version == '7' 8 | 9 | - name: set fact container_binary to docker debian 10 | set_fact: 11 | container_binary: "docker" 12 | when: 13 | - ansible_distribution == 'Debian' 14 | 15 | - name: create grafana user 16 | user: 17 | name: grafana 18 | shell: '/bin/false' 19 | createhome: false 20 | system: true 21 | 22 | - name: create /etc/grafana and /var/lib/grafana 23 | file: 24 | path: "{{ item }}" 25 | state: directory 26 | owner: "{{ grafana_uid }}" 27 | group: "{{ grafana_uid }}" 28 | recurse: true 29 | with_items: 30 | - /etc/grafana 31 | - /var/lib/grafana 32 | 33 | - name: ship systemd service 34 | template: 35 | src: grafana-server.service.j2 36 | dest: "/etc/systemd/system/grafana-server.service" 37 | owner: root 38 | group: root 39 | mode: 0644 40 | 41 | - name: start the grafana-server service 42 | systemd: 43 | name: grafana-server 44 | state: started 45 | enabled: yes 46 | daemon_reload: yes 47 | failed_when: false 48 | -------------------------------------------------------------------------------- /roles/grafana/templates/dashboards-system.yml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'System Dashboard' 5 | orgId: 1 6 | folder: 'System-Dashboard' 7 | type: file 8 | disableDeletion: false 9 | updateIntervalSeconds: 3 10 | editable: false 11 | options: 12 | path: '{{ grafana_dashboards_path }}/system' 13 | -------------------------------------------------------------------------------- /roles/grafana/templates/dashboards-zfs.yml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'ZFS Dashboard' 5 | orgId: 1 6 | folder: 'ZFS-Dashboard' 7 | type: file 8 | disableDeletion: false 9 | updateIntervalSeconds: 3 10 | editable: True 11 | options: 12 | path: '{{ grafana_dashboards_path }}/zfs' -------------------------------------------------------------------------------- /roles/grafana/templates/datasources-prometheus.yml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | # list of datasources that should be deleted from the database 4 | deleteDatasources: 5 | - name: '{{ grafana_datasource }}' 6 | orgId: 1 7 | 8 | # list of datasources to insert/update depending 9 | # what's available in the database 10 | datasources: 11 | # name of the datasource. Required 12 | - name: '{{ grafana_datasource }}' 13 | # datasource type. Required 14 | type: 'prometheus' 15 | # access mode. proxy or direct (Server or Browser in the UI). Required 16 | access: 'proxy' 17 | # org id. will default to orgId 1 if not specified 18 | orgId: 1 19 | # url 20 | url: 'http://localhost:{{ prometheus_port }}' 21 | # enable/disable basic auth 22 | basicAuth: false 23 | # mark as default datasource. Max one per org 24 | isDefault: true 25 | # allow users to edit datasources from the UI. 26 | editable: false 27 | -------------------------------------------------------------------------------- /roles/grafana/templates/grafana-server.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=grafana-server 5 | {% if container_binary == 'docker' %} 6 | After=docker.service 7 | {% else %} 8 | After=network.target 9 | {% endif %} 10 | 11 | [Service] 12 | EnvironmentFile=-/etc/environment 13 | ExecStartPre=-/usr/bin/{{ container_binary }} stop grafana-server 14 | ExecStartPre=-/usr/bin/{{ container_binary }} rm grafana-server 15 | ExecStart=/usr/bin/{{ container_binary }} run --rm --name=grafana-server \ 16 | -v /etc/grafana:/etc/grafana:Z \ 17 | -v /var/lib/grafana:/var/lib/grafana:Z \ 18 | --net=host \ 19 | --cpu-period={{ grafana_container_cpu_period }} \ 20 | --cpu-quota={{ grafana_container_cpu_period * grafana_container_cpu_cores }} \ 21 | --memory={{ grafana_container_memory }}GB \ 22 | --memory-swap={{ grafana_container_memory * 2 }}GB \ 23 | -e GF_INSTALL_PLUGINS={{ grafana_plugins|join(',') }} \ 24 | {{ grafana_container_image }} 25 | ExecStop=-/usr/bin/{{ container_binary }} stop grafana-server 26 | KillMode=none 27 | Restart=always 28 | RestartSec=10s 29 | TimeoutStartSec=120 30 | TimeoutStopSec=15 31 | 32 | [Install] 33 | WantedBy=multi-user.target 34 | -------------------------------------------------------------------------------- /roles/grafana/templates/grafana.ini.j2: -------------------------------------------------------------------------------- 1 | # [server] 2 | # root_url = %(protocol)s://%(domain)s:%(http_port)s/api/grafana/proxy 3 | 4 | [users] 5 | default_theme = {{ grafana_default_theme }} 6 | 7 | #################################### Anonymous Auth ########################## 8 | {% if grafana_anonymous_access %} 9 | [auth.anonymous] 10 | # enable anonymous access 11 | enabled = True 12 | 13 | # specify organization name that should be used for unauthenticated users 14 | org_name = Main Org. 15 | 16 | # specify role for unauthenticated users 17 | org_role = Viewer 18 | {% endif %} 19 | 20 | [server] 21 | cert_file = /etc/grafana/ceph-dashboard.crt 22 | cert_key = /etc/grafana/ceph-dashboard.key 23 | domain = {{ ansible_fqdn }} 24 | protocol = {{ grafana_protocol }} 25 | http_port = {{ grafana_port }} 26 | 27 | [security] 28 | admin_user = {{ grafana_admin_user }} 29 | admin_password = {{ grafana_admin_password }} 30 | allow_embedding = {{ grafana_allow_embedding }} -------------------------------------------------------------------------------- /roles/node-exporter/defaults/main.yml: -------------------------------------------------------------------------------- 1 | container_binary: podman 2 | node_exporter_container_image: "docker.io/prom/node-exporter:v1.4.0" 3 | node_exporter_port: 9100 4 | node_exporter_collector_dir: /var/lib/node_exporter -------------------------------------------------------------------------------- /roles/node-exporter/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | company: Red Hat 4 | author: Boris Ranto 5 | description: Configures Prometheus Node Exporter 6 | license: Apache 7 | min_ansible_version: 2.4 8 | platforms: 9 | - name: EL 10 | versions: 11 | - 7 12 | galaxy_tags: 13 | - system 14 | dependencies: [] 15 | -------------------------------------------------------------------------------- /roles/node-exporter/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: open node_exporter firewall port 3 | firewalld: 4 | port: "{{ node_exporter_port }}/tcp" 5 | zone: "public" 6 | permanent: true 7 | immediate: true 8 | state: enabled 9 | - name: make collector directory 10 | file: 11 | path: "{{ node_exporter_collector_dir }}" 12 | state: directory 13 | - name: include setup_container.yml 14 | include_tasks: setup_container.yml 15 | -------------------------------------------------------------------------------- /roles/node-exporter/tasks/setup_container.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: set fact container_binary to docker centos 7 3 | set_fact: 4 | container_binary: "docker" 5 | when: 6 | - ansible_distribution == 'CentOS' 7 | - ansible_distribution_major_version == '7' 8 | 9 | - name: set fact container_binary to docker debian 10 | set_fact: 11 | container_binary: "docker" 12 | when: 13 | - ansible_distribution == 'Debian' 14 | 15 | - name: ship systemd service 16 | template: 17 | src: node_exporter.service.j2 18 | dest: "/etc/systemd/system/node_exporter.service" 19 | owner: root 20 | group: root 21 | mode: 0644 22 | 23 | - name: start the node_exporter service 24 | systemd: 25 | name: node_exporter 26 | state: started 27 | enabled: yes 28 | daemon_reload: yes 29 | failed_when: false 30 | -------------------------------------------------------------------------------- /roles/node-exporter/templates/node_exporter.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=Node Exporter 5 | {% if container_binary == 'docker' %} 6 | After=docker.service 7 | {% else %} 8 | After=network.target 9 | {% endif %} 10 | 11 | [Service] 12 | EnvironmentFile=-/etc/environment 13 | ExecStartPre=-/usr/bin/{{ container_binary }} rm -f node-exporter 14 | ExecStart=/usr/bin/{{ container_binary }} run --rm --name=node-exporter \ 15 | --privileged \ 16 | -v /proc:/host/proc:ro -v /sys:/host/sys:ro -v {{ node_exporter_collector_dir }}:{{ node_exporter_collector_dir }} \ 17 | --net=host \ 18 | {{ node_exporter_container_image }} \ 19 | --path.procfs=/host/proc \ 20 | --collector.textfile.directory={{ node_exporter_collector_dir }} \ 21 | --path.sysfs=/host/sys \ 22 | --collector.systemd \ 23 | --no-collector.timex \ 24 | --web.listen-address=:{{ node_exporter_port }} 25 | ExecStop=-/usr/bin/{{ container_binary }} stop node-exporter 26 | KillMode=none 27 | Restart=always 28 | RestartSec=10s 29 | TimeoutStartSec=120 30 | TimeoutStopSec=15 31 | 32 | [Install] 33 | WantedBy=multi-user.target 34 | -------------------------------------------------------------------------------- /roles/prometheus/defaults/main.yml: -------------------------------------------------------------------------------- 1 | container_binary: podman 2 | prometheus_listen_address: '' #To listen on all interfaces leave this empty 3 | prometheus_port: 9091 4 | prometheus_container_image: "docker.io/prom/prometheus:v2.7.2" 5 | prometheus_container_cpu_period: 100000 6 | prometheus_container_cpu_cores: 2 7 | prometheus_container_memory: 4 # container_memory is in GB 8 | prometheus_data_dir: /var/lib/prometheus 9 | prometheus_storage_tsdb_retention_time: "30d" 10 | prometheus_conf_dir: /etc/prometheus 11 | prometheus_user_id: '65534' # This is the UID used by the prom/prometheus container image 12 | alertmanager_listen_address: '' #To listen on all interfaces leave this empty 13 | alertmanager_port: 9093 14 | alertmanager_cluster_port: 9094 15 | alertmanager_container_image: "docker.io/prom/alertmanager:v0.16.2" 16 | alertmanager_container_cpu_period: 100000 17 | alertmanager_container_cpu_cores: 2 18 | alertmanager_container_memory: 4 # container_memory is in GB 19 | alertmanager_data_dir: /var/lib/alertmanager 20 | alertmanager_conf_dir: /etc/alertmanager 21 | alertmanager_smtp_host: localhost 22 | alertmanager_smtp_port: 25 23 | alertmanager_send_email: 'localhost@localhost' 24 | alertmanager_receive_email: 25 | - 'localhost@localhost' 26 | - 'dummy@localhost' 27 | -------------------------------------------------------------------------------- /roles/prometheus/files/default-alerts.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: default 3 | rules: 4 | - alert: Node Exporter Down 5 | expr: up{job="node"} == 0 6 | for: 5m 7 | labels: 8 | severity: page 9 | annotations: 10 | summary: "Node Exporter Down" 11 | description: "No metrics are being collected" 12 | -------------------------------------------------------------------------------- /roles/prometheus/files/mdadm-alerts.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: mdadm 3 | rules: 4 | - alert: Host RAID array is inactive 5 | expr: node_md_state{state="inactive"} > 0 6 | labels: 7 | severity: critical 8 | annotations: 9 | summary: "Attention Required: {{ $labels.instance }} : mdadm RAID {{ $labels.device }} is in a {{ $labels.state }} state" 10 | description: "mdadm RAID array {{ $labels.device }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically" 11 | - alert: Host RAID disk failure 12 | expr: node_md_disks{state="failed"} > 0 13 | labels: 14 | severity: warning 15 | annotations: 16 | summary: "Attention Required: {{ $labels.instance }} : mdadm RAID {{ $labels.device }} is in a {{ $labels.state }} state" 17 | description: "At least one device in mdadm RAID array on {{ $labels.instance }} failed. Array {{ $labels.device }} needs attention and possibly a disk replacment" -------------------------------------------------------------------------------- /roles/prometheus/files/zfs-alerts.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: zfs 3 | rules: 4 | - alert: ZpoolDegradedState 5 | expr: node_zfs_zpool_state{state="degraded"} == 1 6 | labels: 7 | severity: warning 8 | annotations: 9 | summary: "Attention Required: {{ $labels.instance }} : zpool {{ $labels.zpool }} is in a {{ $labels.state }} state." 10 | description: "Zpool:{{ $labels.zpool }} has experienced a failure but is still functioniong. The fault tolerance of the pool might be compromised, as a subsequent fault in another device might be unrecoverable" 11 | - alert: ZpoolFaultedState 12 | expr: node_zfs_zpool_state{state="faulted"} == 1 13 | labels: 14 | severity: critical 15 | annotations: 16 | summary: "Attention Required: {{ $labels.instance }} : zpool {{ $labels.zpool }} is in a {{ $labels.state }} state." 17 | description: "Zpool:{{ $labels.zpool }} is in FAULTED state and is completely inaccessible. No data can be recovered until the necessary devices are attached or repaired" 18 | - alert: ZFS Send Task Started 19 | expr: deriv(znapzend_presend_command_started[1m]) > 0 20 | labels: 21 | severity: info 22 | annotations: 23 | summary: "ZFS Snapshot Send Started : Dataset {{ $labels.exported_job }} has started a replication task" 24 | description: "Dataset:{{ $labels.exported_job }} has started a replication task from host {{ $labels.instance }} to remote destination {{ $labels.target_host }} " 25 | - alert: ZFS Send Task Finished 26 | expr: deriv(znapzend_presend_command_started[1m]) < 0 27 | labels: 28 | severity: info 29 | annotations: 30 | summary: "ZFS Snapshot Send finished: Dataset {{ $labels.exported_job }} has finished a replication task" 31 | description: "Dataset:{{ $labels.exported_job }} has finished a replication task from host {{ $labels.instance }} to remote destination {{ $labels.target_host }} " 32 | - alert: ZFS Send Task Failed 33 | expr: znapzend_job_failed == 1 34 | labels: 35 | severity: warn 36 | annotations: 37 | summary: "ZFS Snapshot Send failed: Dataset {{ $labels.source_dataset }} has failed to send" 38 | description: "Dataset:{{ $labels.source_dataset }} has failed its replication task from host {{ $labels.instance }} to remote destination {{ $labels.target_host }}. Please consult 'journalctl -u znapzend' on host {{ $labels.instance }} for reason" 39 | - alert: Znapzend Service Failed 40 | expr: node_systemd_unit_state{name="znapzend.service",state="active"} == 0 41 | for: 1m 42 | labels: 43 | severity: critical 44 | annotations: 45 | summary: "Znapzend systemd service failed or is not running" 46 | description: "Znapzend service is not running, check systemctl status or journalctl for more information" 47 | -------------------------------------------------------------------------------- /roles/prometheus/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: service handler 3 | # We use the systemd module here so we can use the daemon_reload feature, 4 | # since we're shipping the .service file ourselves 5 | systemd: 6 | name: "{{ item }}" 7 | daemon_reload: true 8 | enabled: true 9 | state: restarted 10 | with_items: 11 | - 'alertmanager' 12 | - 'prometheus' 13 | -------------------------------------------------------------------------------- /roles/prometheus/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | company: Red Hat 4 | author: Boris Ranto 5 | description: Configures Prometheus for Ceph Dashboard 6 | license: Apache 7 | min_ansible_version: 2.4 8 | platforms: 9 | - name: EL 10 | versions: 11 | - 7 12 | galaxy_tags: 13 | - system 14 | dependencies: [] 15 | -------------------------------------------------------------------------------- /roles/prometheus/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: create prometheus directories 3 | file: 4 | path: "{{ item }}" 5 | state: directory 6 | owner: "{{ prometheus_user_id }}" 7 | with_items: 8 | - "{{ prometheus_conf_dir }}" 9 | - "{{ prometheus_data_dir }}" 10 | 11 | - name: write prometheus config file 12 | template: 13 | src: prometheus.yml.j2 14 | dest: "{{ prometheus_conf_dir }}/prometheus.yml" 15 | owner: "{{ prometheus_user_id }}" 16 | notify: service handler 17 | 18 | - name: make sure the alerting rules directory exists 19 | file: 20 | path: "/etc/prometheus/alerting/" 21 | state: directory 22 | recurse: yes 23 | 24 | - name: copy {{ item }} alerting rules file 25 | copy: 26 | src: "{{ item }}-alerts.yml" 27 | dest: "/etc/prometheus/alerting/{{ item }}-alerts.yml" 28 | owner: root 29 | group: root 30 | mode: 0644 31 | with_items: 32 | - default 33 | - zfs 34 | - mdadm 35 | 36 | - name: create alertmanager directories 37 | file: 38 | path: "{{ item }}" 39 | state: directory 40 | owner: "root" 41 | with_items: 42 | - "{{ alertmanager_conf_dir }}" 43 | - "{{ alertmanager_data_dir }}" 44 | 45 | - name: write alertmanager config file 46 | template: 47 | src: alertmanager.yml.j2 48 | dest: "{{ alertmanager_conf_dir }}/alertmanager.yml" 49 | owner: "root" 50 | notify: service handler 51 | 52 | - name: open firewall ports 53 | firewalld: 54 | port: "{{ item }}/tcp" 55 | zone: "public" 56 | permanent: true 57 | immediate: true 58 | state: enabled 59 | with_items: 60 | - "{{ prometheus_port }}" 61 | - "{{ alertmanager_port }}" 62 | - "{{ alertmanager_cluster_port }}" 63 | 64 | - name: include setup_container.yml 65 | include_tasks: setup_container.yml 66 | -------------------------------------------------------------------------------- /roles/prometheus/tasks/setup_container.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: set fact container_binary to docker centos 7 3 | set_fact: 4 | container_binary: "docker" 5 | when: 6 | - ansible_distribution == 'CentOS' 7 | - ansible_distribution_major_version == '7' 8 | 9 | - name: set fact container_binary to docker debian 10 | set_fact: 11 | container_binary: "docker" 12 | when: 13 | - ansible_distribution == 'Debian' 14 | 15 | - name: ship systemd services 16 | template: 17 | src: "{{ item }}.j2" 18 | dest: "/etc/systemd/system/{{ item }}" 19 | owner: root 20 | group: root 21 | mode: 0644 22 | with_items: 23 | - 'alertmanager.service' 24 | - 'prometheus.service' 25 | notify: service handler 26 | 27 | - name: start prometheus services 28 | systemd: 29 | name: "{{ item }}" 30 | daemon_reload: true 31 | enabled: true 32 | state: started 33 | with_items: 34 | - prometheus 35 | - alertmanager 36 | -------------------------------------------------------------------------------- /roles/prometheus/templates/alertmanager.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=alertmanager 5 | {% if container_binary == 'docker' %} 6 | After=docker.service 7 | {% else %} 8 | After=network.target 9 | {% endif %} 10 | 11 | [Service] 12 | WorkingDirectory={{ alertmanager_data_dir }} 13 | EnvironmentFile=-/etc/environment 14 | ExecStartPre=-/usr/bin/{{ container_binary }} rm -f alertmanager 15 | ExecStart=/usr/bin/{{ container_binary }} run --rm --name=alertmanager \ 16 | -v "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z" \ 17 | -v "{{ alertmanager_data_dir }}:/alertmanager:Z" \ 18 | --net=host \ 19 | --cpu-period={{ alertmanager_container_cpu_period }} \ 20 | --cpu-quota={{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }} \ 21 | --memory={{ alertmanager_container_memory }}GB \ 22 | --memory-swap={{ alertmanager_container_memory * 2 }}GB \ 23 | {{ alertmanager_container_image }} \ 24 | --config.file=/etc/alertmanager/alertmanager.yml \ 25 | --cluster.listen-address={{ alertmanager_listen_address }}:{{ alertmanager_cluster_port }} \ 26 | --storage.path=/alertmanager \ 27 | --web.external-url=http://{{ ansible_fqdn }}:{{ alertmanager_port }}/ \ 28 | --web.listen-address={{ alertmanager_listen_address }}:{{ alertmanager_port }} 29 | ExecStop=/usr/bin/{{ container_binary }} stop alertmanager 30 | KillMode=none 31 | Restart=always 32 | RestartSec=10s 33 | TimeoutStartSec=120 34 | TimeoutStopSec=15 35 | 36 | [Install] 37 | WantedBy=multi-user.target 38 | -------------------------------------------------------------------------------- /roles/prometheus/templates/alertmanager.yml.j2: -------------------------------------------------------------------------------- 1 | global: 2 | smtp_smarthost: {{ alertmanager_smtp_host }}:{{ alertmanager_smtp_port }} 3 | smtp_from: {{ alertmanager_send_email }} 4 | smtp_require_tls: {{ alertmanager_require_tls }} 5 | resolve_timeout: 5m 6 | {% if alertmanager_smtp_username is defined %} 7 | smtp_auth_username: {{ alertmanager_smtp_username }} 8 | smtp_auth_password: {{ alertmanager_smtp_password }} 9 | {% endif %} 10 | route: 11 | group_by: ['alertname'] 12 | group_wait: 10s 13 | group_interval: 10s 14 | repeat_interval: 1h 15 | receiver: email 16 | 17 | receivers: 18 | - name: email 19 | email_configs: 20 | {% for email in alertmanager_receive_email %} 21 | - to: {{ email }} 22 | {% endfor %} 23 | -------------------------------------------------------------------------------- /roles/prometheus/templates/prometheus.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=prometheus 5 | {% if container_binary == 'docker' %} 6 | After=docker.service 7 | {% else %} 8 | After=network.target 9 | {% endif %} 10 | 11 | [Service] 12 | EnvironmentFile=-/etc/environment 13 | ExecStartPre=-/usr/bin/{{ container_binary }} rm -f prometheus 14 | ExecStart=/usr/bin/{{ container_binary }} run --rm --name=prometheus \ 15 | -v "{{ prometheus_conf_dir }}:/etc/prometheus:Z" \ 16 | -v "{{ prometheus_data_dir }}:/prometheus:Z" \ 17 | --net=host \ 18 | --user={{ prometheus_user_id }} \ 19 | --cpu-period={{ prometheus_container_cpu_period }} \ 20 | --cpu-quota={{ prometheus_container_cpu_period * prometheus_container_cpu_cores }} \ 21 | --memory={{ prometheus_container_memory }}GB \ 22 | --memory-swap={{ prometheus_container_memory * 2 }}GB \ 23 | {{ prometheus_container_image }} \ 24 | --config.file=/etc/prometheus/prometheus.yml \ 25 | --storage.tsdb.path=/prometheus \ 26 | {% if prometheus_storage_tsdb_retention_time is defined %} 27 | --storage.tsdb.retention.time={{ prometheus_storage_tsdb_retention_time }} \ 28 | {% endif %} 29 | --web.external-url=http://{{ ansible_fqdn }}:{{ prometheus_port }}/ \ 30 | --web.listen-address={{ prometheus_listen_address }}:{{ prometheus_port }} 31 | ExecStop=/usr/bin/{{ container_binary }} stop prometheus 32 | KillMode=none 33 | Restart=always 34 | RestartSec=10s 35 | TimeoutStartSec=120 36 | TimeoutStopSec=15 37 | 38 | [Install] 39 | WantedBy=multi-user.target 40 | -------------------------------------------------------------------------------- /roles/prometheus/templates/prometheus.yml.j2: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | rule_files: 6 | - '/etc/prometheus/alerting/*' 7 | 8 | scrape_configs: 9 | - job_name: 'prometheus' 10 | static_configs: 11 | - targets: ['localhost:{{ prometheus_port }}'] 12 | - job_name: 'zfsreplication' 13 | static_configs: 14 | - targets: ['localhost:{{ znapzend_exporter_port }}'] 15 | - job_name: 'node' 16 | static_configs: 17 | {% for host in groups['all'] %} 18 | - targets: ['{{ host }}:9100'] 19 | labels: 20 | instance: "{{ hostvars[host]['ansible_nodename'] }}" 21 | {% endfor %} 22 | - job_name: 'zfs' 23 | static_configs: 24 | {% for host in groups['all'] %} 25 | - targets: ['{{ host }}:{{ zfs_exporter_port }}'] 26 | labels: 27 | instance: "{{ hostvars[host]['ansible_nodename'] }}" 28 | {% endfor %} 29 | alerting: 30 | alertmanagers: 31 | - scheme: http 32 | static_configs: 33 | - targets: ['localhost:{{ alertmanager_port }}'] 34 | -------------------------------------------------------------------------------- /roles/zfs-exporter/defaults/main.yml: -------------------------------------------------------------------------------- 1 | container_binary: podman 2 | zfs_exporter_container_image: docker.io/45drives/zfs_exporter:v2.2.5 3 | zfs_exporter_port: 9134 4 | zfs_exporter_enable_snapshot_collection: false -------------------------------------------------------------------------------- /roles/zfs-exporter/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | -------------------------------------------------------------------------------- /roles/zfs-exporter/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | -------------------------------------------------------------------------------- /roles/zfs-exporter/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: open zfs_exporter firewall port 3 | firewalld: 4 | port: "{{ zfs_exporter_port }}/tcp" 5 | zone: "public" 6 | permanent: true 7 | immediate: true 8 | state: enabled 9 | 10 | - name: include setup_container.yml 11 | include_tasks: setup_container.yml 12 | -------------------------------------------------------------------------------- /roles/zfs-exporter/tasks/setup_container.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: ship systemd service 3 | template: 4 | src: zfs_exporter.service.j2 5 | dest: "/etc/systemd/system/zfs_exporter.service" 6 | owner: root 7 | group: root 8 | mode: 0644 9 | 10 | - name: start the zfs_exporter service 11 | systemd: 12 | name: zfs_exporter 13 | state: started 14 | enabled: yes 15 | daemon_reload: yes 16 | failed_when: false -------------------------------------------------------------------------------- /roles/zfs-exporter/templates/zfs_exporter.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=zfs_exporter 5 | 6 | [Service] 7 | EnvironmentFile=-/etc/environment 8 | ExecStartPre=-/usr/bin/podman rm -f zfs_exporter 9 | ExecStart=/usr/bin/podman run --rm --name=zfs_exporter \ 10 | --privileged \ 11 | --net=host \ 12 | {{ zfs_exporter_container_image }} \ 13 | {% if zfs_exporter_enable_snapshot_collection %} 14 | --collector.dataset-snapshot \ 15 | {% endif %} 16 | --web.listen-address=":{{ zfs_exporter_port }}" \ 17 | --properties.dataset-filesystem="available,logicalused,quota,referenced,used,usedbydataset,usedbysnapshots,written" 18 | 19 | ExecStop=-/usr/bin/podman stop zfs-exporter 20 | KillMode=none 21 | Restart=always 22 | RestartSec=10s 23 | TimeoutStartSec=120 24 | TimeoutStopSec=15 25 | 26 | [Install] 27 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /roles/znapzend-exporter/defaults/main.yml: -------------------------------------------------------------------------------- 1 | container_binary: podman 2 | znapzend_exporter_container_image: docker.io/ccremer/znapzend-exporter:v0.3.1 3 | znapzend_exporter_port: 9101 4 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/error_logger: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | reset_exporter(){ 4 | /opt/znapzend/post_send_cmd.sh $1 $2 5 | } 6 | 7 | reset_log(){ 8 | echo "" > /opt/znapzend/error.log 9 | } 10 | 11 | OFFSET=$1 12 | 13 | if [ $# -eq 0 ]; then 14 | OFFSET="60" 15 | fi 16 | 17 | # Grab Current Time 18 | NOW_EPOCH=$(date +%s) 19 | PAST_EPOCH=$(( NOW_EPOCH - OFFSET )) 20 | PAST=$(date -d @$PAST_EPOCH +"%Y-%m-%d %H:%M:%S") 21 | 22 | reset_log 23 | 24 | #sleep 5 # sleep for 5 seconds before we grab the log 25 | # grab journalctl of znapzend --since time above --no-pager 26 | LOG=$(journalctl -u znapzend --since "$PAST" -o cat --no-pager) 27 | 28 | ERROR_COUNT=0 29 | 30 | #sleep 5 # sleep for 5 seconds so we dont miss 31 | 32 | echo "# HELP znapzend_job_failed Current status of znapzend replication job (0 = not failed, 1 = failed) INIT" > /var/lib/node_exporter/znapzend_monitor.prom.tmp 33 | 34 | # loop through LOG line by line 35 | echo "$LOG" | while read line ; do 36 | if echo "$line" | grep -v "+-->" | grep -q "ERROR: cannot send snapshots" ; then # If ERROR found 37 | #ERROR: cannot send snapshots from storage/Projects to backup/Projects on root@smb-nfs-01 38 | REMOTE_DATASET=$(echo $line | awk '{print $8}') # Get dataset name 39 | REMOTE_HOST=$(echo $line | awk '{print $10}') # get destination host 40 | SOURCE_DATASET=$(echo $line | awk '{print $6}') 41 | echo "znapzend_job_failed{target_host=\"$REMOTE_HOST\",target_dataset=\"$REMOTE_DATASET\",source_dataset=\"$SOURCE_DATASET\"} 1" >> /var/lib/node_exporter/znapzend_monitor.prom.tmp 42 | reset_exporter "$SOURCE_DATASET" "$REMOTE_HOST" # Fire reset command for exporter 43 | let ERROR_COUNT=$ERROR_COUNT+1 44 | elif echo "$line" | grep -v "+-->" | grep -q "ERROR: snapshot(s) exist on destination" ; then 45 | #ERROR: snapshot(s) exist on destination, but no common found on source: storage/Projects and destination: root@smb-nfs-01:backup/Projects clean up destination root@smb-nfs-01:backup/Projects (i.e. destroy existing snapshots) 46 | REMOTE=$(echo $line | awk '{print $15}') # root@smb-nfs-01:backup/Projects 47 | REMOTE_HOST=$(echo $REMOTE | cut -d: -f1) 48 | REMOTE_DATASET=$(echo $REMOTE | cut -d: -f2) 49 | SOURCE_DATASET=$(echo $line | awk '{print $12}') 50 | echo "znapzend_job_failed{target_host=\"$REMOTE_HOST\",target_dataset=\"$REMOTE_DATASET\",source_dataset=\"$SOURCE_DATASET\"} 1" >> /var/lib/node_exporter/znapzend_monitor.prom.tmp 51 | reset_exporter "$SOURCE_DATASET" "$REMOTE_HOST" 52 | let ERROR_COUNT=$ERROR_COUNT+1 53 | fi 54 | done 55 | 56 | # if [ "$ERROR_COUNT" -eq "0" ] ; then 57 | # # if any errors encountered reset all znapzend_fail values to 0 58 | # echo "# HELP znapzend_job_failed Current status of znapzend replication job (0 = not failed, 1 = failed) RESET" > /var/lib/node_exporter/znapzend_monitor.prom.tmp 59 | # cat /var/lib/node_exporter/znapzend_monitor.prom | while read -r line; do 60 | # if [ "$(echo "$line" | awk '{print $2}')" == "HELP" ] ; then 61 | # echo "# HELP znapzend_job_failed Current status of znapzend replication job (0 = not failed, 1 = failed) RESET" > /var/lib/node_exporter/znapzend_monitor.prom.tmp 62 | # elif [ "$(echo "$line" | awk '{print $2}')" -eq "1" ] ; then 63 | # echo "${line/%1/0}" >> /var/lib/node_exporter/znapzend_monitor.prom.tmp 64 | # else [ "$(echo "$line" | awk '{print $2}')" -eq "0" ] 65 | # echo "$line" >> /var/lib/node_exporter/znapzend_monitor.prom.tmp 66 | # fi 67 | # done 68 | # fi 69 | 70 | mv /var/lib/node_exporter/znapzend_monitor.prom.tmp /var/lib/node_exporter/znapzend_monitor.prom -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/post_send_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | EXPORTER_HOST="localhost" 4 | EXPORTER_PORT="9101" 5 | DATASET_NAME="$1" 6 | REMOTE_HOSTNAME="$2" 7 | 8 | if [ $# -eq 0 ]; then 9 | echo "Dataset name required. ex) storage/dataset1" 10 | exit 1 11 | fi 12 | 13 | if [ -z $REMOTE_HOSTNAME ];then 14 | REMOTE_HOSTNAME="remote-host" 15 | fi 16 | 17 | /usr/bin/curl -sS $EXPORTER_HOST:$EXPORTER_PORT/postsend/$DATASET_NAME?SelfResetAfter=5m\&TargetHost=$REMOTE_HOSTNAME -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/post_snap_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | EXPORTER_HOST="localhost" 4 | EXPORTER_PORT="9101" 5 | DATASET_NAME="$1" 6 | 7 | if [ $# -eq 0 ]; then 8 | echo "Dataset name required. ex) storage/dataset1" 9 | exit 1 10 | fi 11 | 12 | 13 | /usr/bin/curl -sS $EXPORTER_HOST:$EXPORTER_PORT/postsnap/$DATASET_NAME 14 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/pre_send_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | EXPORTER_HOST="localhost" 4 | EXPORTER_PORT="9101" 5 | DATASET_NAME="$1" 6 | REMOTE_HOSTNAME="$2" 7 | 8 | if [ $# -eq 0 ]; then 9 | echo "Dataset name required. ex) storage/dataset1" 10 | exit 1 11 | fi 12 | 13 | if [ -z $REMOTE_HOSTNAME ];then 14 | REMOTE_HOSTNAME="remote-host" 15 | fi 16 | 17 | /usr/bin/curl -sS $EXPORTER_HOST:$EXPORTER_PORT/presend/$DATASET_NAME?TargetHost=$REMOTE_HOSTNAME 18 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/pre_snap_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | EXPORTER_HOST="localhost" 4 | EXPORTER_PORT="9101" 5 | DATASET_NAME="$1" 6 | 7 | 8 | if [ $# -eq 0 ]; then 9 | echo "Dataset name required. ex) storage/dataset1" 10 | exit 1 11 | fi 12 | 13 | /usr/bin/curl -sS $EXPORTER_HOST:$EXPORTER_PORT/presnap/$DATASET_NAME 14 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/files/znapzend_monitor: -------------------------------------------------------------------------------- 1 | */1 * * * * root /bin/bash /opt/znapzend/error_logger 60 -------------------------------------------------------------------------------- /roles/znapzend-exporter/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | company: 45Drives 4 | author: Brett Kelly 5 | description: Configures ccremer Znapzend Exporter 6 | license: Apache 7 | min_ansible_version: 2.4 8 | platforms: 9 | - name: EL 10 | versions: 11 | - 7 12 | galaxy_tags: 13 | - system 14 | dependencies: [] 15 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: open znapzend_exporter firewall port 3 | firewalld: 4 | port: "{{ znapzend_exporter_port }}/tcp" 5 | zone: "public" 6 | permanent: true 7 | immediate: true 8 | state: enabled 9 | 10 | - name: include setup_container.yml 11 | include_tasks: setup_container.yml 12 | 13 | - name: install helper scripts 14 | block: 15 | - name: create znapzend dir 16 | ansible.builtin.file: 17 | path: /opt/znapzend 18 | state: directory 19 | mode: '0755' 20 | 21 | - name: Copy helper scripts 22 | ansible.builtin.copy: 23 | src: "{{ item }}" 24 | dest: /opt/znapzend/{{ item }} 25 | owner: root 26 | group: root 27 | mode: '0774' 28 | loop: 29 | - pre_send_cmd.sh 30 | - post_send_cmd.sh 31 | - pre_snap_cmd.sh 32 | - post_snap_cmd.sh 33 | - error_logger 34 | 35 | - name: Copy znapzend cron file 36 | ansible.builtin.copy: 37 | src: "error_logger" 38 | dest: /etc/cron.d/error_logger 39 | owner: root 40 | group: root 41 | mode: '0774' 42 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/tasks/setup_container.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: set fact container_binary to docker centos 7 3 | set_fact: 4 | container_binary: "docker" 5 | when: 6 | - ansible_distribution == 'CentOS' 7 | - ansible_distribution_major_version == '7' 8 | 9 | - name: set fact container_binary to docker debian 10 | set_fact: 11 | container_binary: "docker" 12 | when: 13 | - ansible_distribution == 'Debian' 14 | 15 | - name: ship systemd service 16 | template: 17 | src: znapzend_exporter.service.j2 18 | dest: "/etc/systemd/system/znapzend_exporter.service" 19 | owner: root 20 | group: root 21 | mode: 0644 22 | 23 | - name: start the znapzend_exporter service 24 | systemd: 25 | name: znapzend_exporter 26 | state: started 27 | enabled: yes 28 | daemon_reload: yes 29 | failed_when: false 30 | -------------------------------------------------------------------------------- /roles/znapzend-exporter/templates/znapzend_exporter.service.j2: -------------------------------------------------------------------------------- 1 | # This file is managed by ansible, don't make changes here - they will be 2 | # overwritten. 3 | [Unit] 4 | Description=znapzend_exporter 5 | {% if container_binary == 'docker' %} 6 | After=docker.service 7 | {% else %} 8 | After=network.target 9 | {% endif %} 10 | 11 | [Service] 12 | EnvironmentFile=-/etc/environment 13 | ExecStartPre=-/usr/bin/podman rm -f znapzend-exporter 14 | ExecStart=/usr/bin/podman run --rm --name=znapzend-exporter \ 15 | --net=host \ 16 | {{ znapzend_exporter_container_image }} \ 17 | --bindAddr=:{{ znapzend_exporter_port }} 18 | 19 | ExecStop=-/usr/bin/podman stop znapzend-exporter 20 | KillMode=none 21 | Restart=always 22 | RestartSec=10s 23 | TimeoutStartSec=120 24 | TimeoutStopSec=15 25 | 26 | [Install] 27 | WantedBy=multi-user.target --------------------------------------------------------------------------------