├── _config.yml ├── go.mod ├── kustomization.yaml ├── docker-compose.yaml ├── resources ├── service-monitor.yaml ├── service.yaml └── daemonset.yaml ├── Dockerfile ├── README.md ├── LICENSE ├── main.go ├── go.sum └── grafana └── dashboard.json /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module nvme_exporter 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/prometheus/client_golang v1.11.0 7 | github.com/tidwall/gjson v1.8.1 8 | ) 9 | -------------------------------------------------------------------------------- /kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: default 4 | 5 | resources: 6 | - resources/daemonset.yaml 7 | - resources/service.yaml 8 | - resources/service-monitor.yaml 9 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '2.4' 2 | services: 3 | nvme_exporter: 4 | container_name: "nvme_exporter" 5 | build: 6 | context: ./ 7 | dockerfile: Dockerfile 8 | network_mode: host 9 | restart: unless-stopped 10 | privileged: true 11 | expose: 12 | - "9998" 13 | -------------------------------------------------------------------------------- /resources/service-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: nvme-exporter 5 | labels: 6 | app: nvme-exporter 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: nvme-exporter 11 | endpoints: 12 | - port: metrics 13 | path: /metrics 14 | -------------------------------------------------------------------------------- /resources/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | app: nvme-exporter 7 | name: nvme-exporter 8 | spec: 9 | type: NodePort 10 | selector: 11 | app: nvme-exporter 12 | ports: 13 | - name: metrics 14 | protocol: TCP 15 | port: 9998 16 | targetPort: 9998 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.16 2 | MAINTAINER Frank R <12985912+fritchie@users.noreply.github.com> 3 | 4 | RUN apt-get update 5 | RUN apt-get -y install nvme-cli 6 | 7 | WORKDIR /go/src/nvme_exporter 8 | COPY . . 9 | 10 | RUN go get -d -v ./... 11 | RUN go install -v ./... 12 | 13 | EXPOSE 9998 14 | 15 | CMD [ "nvme_exporter" ] 16 | -------------------------------------------------------------------------------- /resources/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: nvme-exporter 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: nvme-exporter 9 | template: 10 | metadata: 11 | labels: 12 | app: nvme-exporter 13 | spec: 14 | tolerations: 15 | - key: node-role.kubernetes.io/master 16 | operator: Exists 17 | effect: NoSchedule 18 | containers: 19 | - name: nvme-exporter 20 | image: "fritchie/nvme_exporter" 21 | command: ["nvme_exporter"] 22 | # nvme-cli requires pod to be privileged 23 | securityContext: 24 | privileged: true 25 | ports: 26 | - containerPort: 9998 27 | protocol: TCP 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nvme_exporter 2 | Prometheus exporter for nvme smart-log metrics 3 | 4 | ## Building and running 5 | 6 | ### Build 7 | 8 | ``` 9 | go build . 10 | ``` 11 | 12 | A sample Dockerfile and docker-compose.yaml are provided. 13 | 14 | ### Running 15 | 16 | Running the exporter requires the nvme-cli package to be installed on the host. 17 | 18 | ``` 19 | ./nvme_exporter 20 | ``` 21 | 22 | #### Flags 23 | 24 | | Name | Description | 25 | |----|-------------------------------------------------| 26 | port | Listen port number. Type: String. Default: 9998 | 27 | 28 | ### Sample Output 29 | 30 | Golang and process metrics have been removed from the sample. 31 | 32 | ``` 33 | # HELP nvme_avail_spare Normalized percentage of remaining spare capacity available 34 | # TYPE nvme_avail_spare gauge 35 | nvme_avail_spare{device="/dev/nvme0n1"} 100 36 | nvme_avail_spare{device="/dev/nvme1n1"} 100 37 | nvme_avail_spare{device="/dev/nvme2n1"} 100 38 | # HELP nvme_controller_busy_time Amount of time in minutes controller busy with IO commands 39 | # TYPE nvme_controller_busy_time counter 40 | nvme_controller_busy_time{device="/dev/nvme0n1"} 26476 41 | nvme_controller_busy_time{device="/dev/nvme1n1"} 2344 42 | nvme_controller_busy_time{device="/dev/nvme2n1"} 426 43 | # HELP nvme_critical_comp_time Amount of time in minutes temperature > critical threshold 44 | # TYPE nvme_critical_comp_time counter 45 | nvme_critical_comp_time{device="/dev/nvme0n1"} 0 46 | nvme_critical_comp_time{device="/dev/nvme1n1"} 0 47 | nvme_critical_comp_time{device="/dev/nvme2n1"} 0 48 | # HELP nvme_critical_warning Critical warnings for the state of the controller 49 | # TYPE nvme_critical_warning gauge 50 | nvme_critical_warning{device="/dev/nvme0n1"} 0 51 | nvme_critical_warning{device="/dev/nvme1n1"} 0 52 | nvme_critical_warning{device="/dev/nvme2n1"} 0 53 | # HELP nvme_data_units_read Number of 512 byte data units host has read 54 | # TYPE nvme_data_units_read counter 55 | nvme_data_units_read{device="/dev/nvme0n1"} 7.24388547e+08 56 | nvme_data_units_read{device="/dev/nvme1n1"} 2.171078e+06 57 | nvme_data_units_read{device="/dev/nvme2n1"} 4.370719e+06 58 | # HELP nvme_data_units_written Number of 512 byte data units the host has written 59 | # TYPE nvme_data_units_written counter 60 | nvme_data_units_written{device="/dev/nvme0n1"} 1.01395942e+08 61 | nvme_data_units_written{device="/dev/nvme1n1"} 3.0735598e+07 62 | nvme_data_units_written{device="/dev/nvme2n1"} 2.960926e+06 63 | # HELP nvme_endurance_grp_critical_warning_summary Critical warnings for the state of endurance groups 64 | # TYPE nvme_endurance_grp_critical_warning_summary gauge 65 | nvme_endurance_grp_critical_warning_summary{device="/dev/nvme0n1"} 0 66 | nvme_endurance_grp_critical_warning_summary{device="/dev/nvme1n1"} 0 67 | nvme_endurance_grp_critical_warning_summary{device="/dev/nvme2n1"} 0 68 | # HELP nvme_host_read_commands Number of read commands completed 69 | # TYPE nvme_host_read_commands counter 70 | nvme_host_read_commands{device="/dev/nvme0n1"} 5.028009993e+09 71 | nvme_host_read_commands{device="/dev/nvme1n1"} 1.34732619e+08 72 | nvme_host_read_commands{device="/dev/nvme2n1"} 2.78362886e+08 73 | # HELP nvme_host_write_commands Number of write commands completed 74 | # TYPE nvme_host_write_commands counter 75 | nvme_host_write_commands{device="/dev/nvme0n1"} 2.517983855e+09 76 | nvme_host_write_commands{device="/dev/nvme1n1"} 9.13277657e+08 77 | nvme_host_write_commands{device="/dev/nvme2n1"} 2.17255509e+08 78 | # HELP nvme_media_errors Number of unrecovered data integrity errors 79 | # TYPE nvme_media_errors counter 80 | nvme_media_errors{device="/dev/nvme0n1"} 0 81 | nvme_media_errors{device="/dev/nvme1n1"} 0 82 | nvme_media_errors{device="/dev/nvme2n1"} 0 83 | # HELP nvme_num_err_log_entries Lifetime number of error log entries 84 | # TYPE nvme_num_err_log_entries counter 85 | nvme_num_err_log_entries{device="/dev/nvme0n1"} 0 86 | nvme_num_err_log_entries{device="/dev/nvme1n1"} 94 87 | nvme_num_err_log_entries{device="/dev/nvme2n1"} 88 88 | # HELP nvme_percent_used Vendor specific estimate of the percentage of life used 89 | # TYPE nvme_percent_used gauge 90 | nvme_percent_used{device="/dev/nvme0n1"} 11 91 | nvme_percent_used{device="/dev/nvme1n1"} 0 92 | nvme_percent_used{device="/dev/nvme2n1"} 1 93 | # HELP nvme_power_cycles Number of power cycles 94 | # TYPE nvme_power_cycles counter 95 | nvme_power_cycles{device="/dev/nvme0n1"} 66 96 | nvme_power_cycles{device="/dev/nvme1n1"} 72 97 | nvme_power_cycles{device="/dev/nvme2n1"} 66 98 | # HELP nvme_power_on_hours Number of power on hours 99 | # TYPE nvme_power_on_hours counter 100 | nvme_power_on_hours{device="/dev/nvme0n1"} 16410 101 | nvme_power_on_hours{device="/dev/nvme1n1"} 3825 102 | nvme_power_on_hours{device="/dev/nvme2n1"} 16342 103 | # HELP nvme_spare_thresh Async event completion may occur when avail spare < threshold 104 | # TYPE nvme_spare_thresh gauge 105 | nvme_spare_thresh{device="/dev/nvme0n1"} 10 106 | nvme_spare_thresh{device="/dev/nvme1n1"} 10 107 | nvme_spare_thresh{device="/dev/nvme2n1"} 5 108 | # HELP nvme_temperature Temperature in degrees fahrenheit 109 | # TYPE nvme_temperature gauge 110 | nvme_temperature{device="/dev/nvme0n1"} 103.73000000000005 111 | nvme_temperature{device="/dev/nvme1n1"} 105.53000000000004 112 | nvme_temperature{device="/dev/nvme2n1"} 91.13000000000004 113 | # HELP nvme_thm_temp1_trans_count Number of times controller transitioned to lower power 114 | # TYPE nvme_thm_temp1_trans_count counter 115 | nvme_thm_temp1_trans_count{device="/dev/nvme0n1"} 0 116 | nvme_thm_temp1_trans_count{device="/dev/nvme1n1"} 0 117 | nvme_thm_temp1_trans_count{device="/dev/nvme2n1"} 0 118 | # HELP nvme_thm_temp1_trans_time Total number of seconds controller transitioned to lower power 119 | # TYPE nvme_thm_temp1_trans_time counter 120 | nvme_thm_temp1_trans_time{device="/dev/nvme0n1"} 0 121 | nvme_thm_temp1_trans_time{device="/dev/nvme1n1"} 0 122 | nvme_thm_temp1_trans_time{device="/dev/nvme2n1"} 0 123 | # HELP nvme_thm_temp2_trans_count Number of times controller transitioned to lower power 124 | # TYPE nvme_thm_temp2_trans_count counter 125 | nvme_thm_temp2_trans_count{device="/dev/nvme0n1"} 0 126 | nvme_thm_temp2_trans_count{device="/dev/nvme1n1"} 0 127 | nvme_thm_temp2_trans_count{device="/dev/nvme2n1"} 0 128 | # HELP nvme_thm_temp2_trans_time Total number of seconds controller transitioned to lower power 129 | # TYPE nvme_thm_temp2_trans_time counter 130 | nvme_thm_temp2_trans_time{device="/dev/nvme0n1"} 0 131 | nvme_thm_temp2_trans_time{device="/dev/nvme1n1"} 0 132 | nvme_thm_temp2_trans_time{device="/dev/nvme2n1"} 0 133 | # HELP nvme_unsafe_shutdowns Number of unsafe shutdowns 134 | # TYPE nvme_unsafe_shutdowns counter 135 | nvme_unsafe_shutdowns{device="/dev/nvme0n1"} 44 136 | nvme_unsafe_shutdowns{device="/dev/nvme1n1"} 49 137 | nvme_unsafe_shutdowns{device="/dev/nvme2n1"} 48 138 | # HELP nvme_warning_temp_time Amount of time in minutes temperature > warning threshold 139 | # TYPE nvme_warning_temp_time counter 140 | nvme_warning_temp_time{device="/dev/nvme0n1"} 0 141 | nvme_warning_temp_time{device="/dev/nvme1n1"} 0 142 | nvme_warning_temp_time{device="/dev/nvme2n1"} 2 143 | ``` 144 | 145 | ### Dashboard 146 | 147 | A sample Grafana dashboard is available: 148 | 149 | [https://grafana.com/grafana/dashboards/14706](https://grafana.com/grafana/dashboards/14706) 150 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // Export nvme smart-log metrics in prometheus format 4 | 5 | import ( 6 | "flag" 7 | "log" 8 | "net/http" 9 | "os/exec" 10 | "os/user" 11 | 12 | "github.com/prometheus/client_golang/prometheus" 13 | "github.com/prometheus/client_golang/prometheus/promhttp" 14 | "github.com/tidwall/gjson" 15 | ) 16 | 17 | var labels = []string{"device"} 18 | 19 | type nvmeCollector struct { 20 | nvmeCriticalWarning *prometheus.Desc 21 | nvmeTemperature *prometheus.Desc 22 | nvmeAvailSpare *prometheus.Desc 23 | nvmeSpareThresh *prometheus.Desc 24 | nvmePercentUsed *prometheus.Desc 25 | nvmeEnduranceGrpCriticalWarningSummary *prometheus.Desc 26 | nvmeDataUnitsRead *prometheus.Desc 27 | nvmeDataUnitsWritten *prometheus.Desc 28 | nvmeHostReadCommands *prometheus.Desc 29 | nvmeHostWriteCommands *prometheus.Desc 30 | nvmeControllerBusyTime *prometheus.Desc 31 | nvmePowerCycles *prometheus.Desc 32 | nvmePowerOnHours *prometheus.Desc 33 | nvmeUnsafeShutdowns *prometheus.Desc 34 | nvmeMediaErrors *prometheus.Desc 35 | nvmeNumErrLogEntries *prometheus.Desc 36 | nvmeWarningTempTime *prometheus.Desc 37 | nvmeCriticalCompTime *prometheus.Desc 38 | nvmeThmTemp1TransCount *prometheus.Desc 39 | nvmeThmTemp2TransCount *prometheus.Desc 40 | nvmeThmTemp1TotalTime *prometheus.Desc 41 | nvmeThmTemp2TotalTime *prometheus.Desc 42 | } 43 | 44 | // nvme smart-log field descriptions can be found on page 180 of: 45 | // https://nvmexpress.org/wp-content/uploads/NVM-Express-Base-Specification-2_0-2021.06.02-Ratified-5.pdf 46 | 47 | func newNvmeCollector() prometheus.Collector { 48 | return &nvmeCollector{ 49 | nvmeCriticalWarning: prometheus.NewDesc( 50 | "nvme_critical_warning", 51 | "Critical warnings for the state of the controller", 52 | labels, 53 | nil, 54 | ), 55 | nvmeTemperature: prometheus.NewDesc( 56 | "nvme_temperature", 57 | "Temperature in degrees fahrenheit", 58 | labels, 59 | nil, 60 | ), 61 | nvmeAvailSpare: prometheus.NewDesc( 62 | "nvme_avail_spare", 63 | "Normalized percentage of remaining spare capacity available", 64 | labels, 65 | nil, 66 | ), 67 | nvmeSpareThresh: prometheus.NewDesc( 68 | "nvme_spare_thresh", 69 | "Async event completion may occur when avail spare < threshold", 70 | labels, 71 | nil, 72 | ), 73 | nvmePercentUsed: prometheus.NewDesc( 74 | "nvme_percent_used", 75 | "Vendor specific estimate of the percentage of life used", 76 | labels, 77 | nil, 78 | ), 79 | nvmeEnduranceGrpCriticalWarningSummary: prometheus.NewDesc( 80 | "nvme_endurance_grp_critical_warning_summary", 81 | "Critical warnings for the state of endurance groups", 82 | labels, 83 | nil, 84 | ), 85 | nvmeDataUnitsRead: prometheus.NewDesc( 86 | "nvme_data_units_read", 87 | "Number of 512 byte data units host has read", 88 | labels, 89 | nil, 90 | ), 91 | nvmeDataUnitsWritten: prometheus.NewDesc( 92 | "nvme_data_units_written", 93 | "Number of 512 byte data units the host has written", 94 | labels, 95 | nil, 96 | ), 97 | nvmeHostReadCommands: prometheus.NewDesc( 98 | "nvme_host_read_commands", 99 | "Number of read commands completed", 100 | labels, 101 | nil, 102 | ), 103 | nvmeHostWriteCommands: prometheus.NewDesc( 104 | "nvme_host_write_commands", 105 | "Number of write commands completed", 106 | labels, 107 | nil, 108 | ), 109 | nvmeControllerBusyTime: prometheus.NewDesc( 110 | "nvme_controller_busy_time", 111 | "Amount of time in minutes controller busy with IO commands", 112 | labels, 113 | nil, 114 | ), 115 | nvmePowerCycles: prometheus.NewDesc( 116 | "nvme_power_cycles", 117 | "Number of power cycles", 118 | labels, 119 | nil, 120 | ), 121 | nvmePowerOnHours: prometheus.NewDesc( 122 | "nvme_power_on_hours", 123 | "Number of power on hours", 124 | labels, 125 | nil, 126 | ), 127 | nvmeUnsafeShutdowns: prometheus.NewDesc( 128 | "nvme_unsafe_shutdowns", 129 | "Number of unsafe shutdowns", 130 | labels, 131 | nil, 132 | ), 133 | nvmeMediaErrors: prometheus.NewDesc( 134 | "nvme_media_errors", 135 | "Number of unrecovered data integrity errors", 136 | labels, 137 | nil, 138 | ), 139 | nvmeNumErrLogEntries: prometheus.NewDesc( 140 | "nvme_num_err_log_entries", 141 | "Lifetime number of error log entries", 142 | labels, 143 | nil, 144 | ), 145 | nvmeWarningTempTime: prometheus.NewDesc( 146 | "nvme_warning_temp_time", 147 | "Amount of time in minutes temperature > warning threshold", 148 | labels, 149 | nil, 150 | ), 151 | nvmeCriticalCompTime: prometheus.NewDesc( 152 | "nvme_critical_comp_time", 153 | "Amount of time in minutes temperature > critical threshold", 154 | labels, 155 | nil, 156 | ), 157 | nvmeThmTemp1TransCount: prometheus.NewDesc( 158 | "nvme_thm_temp1_trans_count", 159 | "Number of times controller transitioned to lower power", 160 | labels, 161 | nil, 162 | ), 163 | nvmeThmTemp2TransCount: prometheus.NewDesc( 164 | "nvme_thm_temp2_trans_count", 165 | "Number of times controller transitioned to lower power", 166 | labels, 167 | nil, 168 | ), 169 | nvmeThmTemp1TotalTime: prometheus.NewDesc( 170 | "nvme_thm_temp1_trans_time", 171 | "Total number of seconds controller transitioned to lower power", 172 | labels, 173 | nil, 174 | ), 175 | nvmeThmTemp2TotalTime: prometheus.NewDesc( 176 | "nvme_thm_temp2_trans_time", 177 | "Total number of seconds controller transitioned to lower power", 178 | labels, 179 | nil, 180 | ), 181 | } 182 | } 183 | 184 | func (c *nvmeCollector) Describe(ch chan<- *prometheus.Desc) { 185 | ch <- c.nvmeCriticalWarning 186 | ch <- c.nvmeTemperature 187 | ch <- c.nvmeAvailSpare 188 | ch <- c.nvmeSpareThresh 189 | ch <- c.nvmePercentUsed 190 | ch <- c.nvmeEnduranceGrpCriticalWarningSummary 191 | ch <- c.nvmeDataUnitsRead 192 | ch <- c.nvmeDataUnitsWritten 193 | ch <- c.nvmeHostReadCommands 194 | ch <- c.nvmeHostWriteCommands 195 | ch <- c.nvmeControllerBusyTime 196 | ch <- c.nvmePowerCycles 197 | ch <- c.nvmePowerOnHours 198 | ch <- c.nvmeUnsafeShutdowns 199 | ch <- c.nvmeMediaErrors 200 | ch <- c.nvmeNumErrLogEntries 201 | ch <- c.nvmeWarningTempTime 202 | ch <- c.nvmeCriticalCompTime 203 | ch <- c.nvmeThmTemp1TransCount 204 | ch <- c.nvmeThmTemp2TransCount 205 | ch <- c.nvmeThmTemp1TotalTime 206 | ch <- c.nvmeThmTemp2TotalTime 207 | } 208 | 209 | func (c *nvmeCollector) Collect(ch chan<- prometheus.Metric) { 210 | nvmeDeviceCmd, err := exec.Command("nvme", "list", "-o", "json").Output() 211 | if err != nil { 212 | log.Fatalf("Error running nvme command: %s\n", err) 213 | } 214 | if !gjson.Valid(string(nvmeDeviceCmd)) { 215 | log.Fatal("nvmeDeviceCmd json is not valid") 216 | } 217 | nvmeDeviceList := gjson.Get(string(nvmeDeviceCmd), "Devices.#.DevicePath") 218 | for _, nvmeDevice := range nvmeDeviceList.Array() { 219 | nvmeSmartLog, err := exec.Command("nvme", "smart-log", nvmeDevice.String(), "-o", "json").Output() 220 | if err != nil { 221 | log.Fatalf("Error running nvme smart-log command for device %s: %s\n", nvmeDevice.String(), err) 222 | } 223 | if !gjson.Valid(string(nvmeSmartLog)) { 224 | log.Fatalf("nvmeSmartLog json is not valid for device: %s: %s\n", nvmeDevice.String(), err) 225 | } 226 | nvmeSmartLogMetrics := gjson.GetMany(string(nvmeSmartLog), 227 | "critical_warning", 228 | "temperature", 229 | "avail_spare", 230 | "spare_thresh", 231 | "percent_used", 232 | "endurance_grp_critical_warning_summary", 233 | "data_units_read", 234 | "data_units_written", 235 | "host_read_commands", 236 | "host_write_commands", 237 | "controller_busy_time", 238 | "power_cycles", 239 | "power_on_hours", 240 | "unsafe_shutdowns", 241 | "media_errors", 242 | "num_err_log_entries", 243 | "warning_temp_time", 244 | "critical_comp_time", 245 | "thm_temp1_trans_count", 246 | "thm_temp2_trans_count", 247 | "thm_temp1_total_time", 248 | "thm_temp2_total_time",) 249 | 250 | ch <- prometheus.MustNewConstMetric(c.nvmeCriticalWarning, prometheus.GaugeValue, nvmeSmartLogMetrics[0].Float(), nvmeDevice.String()) 251 | // convert kelvin to fahrenheit 252 | ch <- prometheus.MustNewConstMetric(c.nvmeTemperature, prometheus.GaugeValue, (nvmeSmartLogMetrics[1].Float() - 273.15) * 9/5 + 32, nvmeDevice.String()) 253 | ch <- prometheus.MustNewConstMetric(c.nvmeAvailSpare, prometheus.GaugeValue, nvmeSmartLogMetrics[2].Float(), nvmeDevice.String()) 254 | ch <- prometheus.MustNewConstMetric(c.nvmeSpareThresh, prometheus.GaugeValue, nvmeSmartLogMetrics[3].Float(), nvmeDevice.String()) 255 | ch <- prometheus.MustNewConstMetric(c.nvmePercentUsed, prometheus.GaugeValue, nvmeSmartLogMetrics[4].Float(), nvmeDevice.String()) 256 | ch <- prometheus.MustNewConstMetric(c.nvmeEnduranceGrpCriticalWarningSummary, prometheus.GaugeValue, nvmeSmartLogMetrics[5].Float(), nvmeDevice.String()) 257 | ch <- prometheus.MustNewConstMetric(c.nvmeDataUnitsRead, prometheus.CounterValue, nvmeSmartLogMetrics[6].Float(), nvmeDevice.String()) 258 | ch <- prometheus.MustNewConstMetric(c.nvmeDataUnitsWritten, prometheus.CounterValue, nvmeSmartLogMetrics[7].Float(), nvmeDevice.String()) 259 | ch <- prometheus.MustNewConstMetric(c.nvmeHostReadCommands, prometheus.CounterValue, nvmeSmartLogMetrics[8].Float(), nvmeDevice.String()) 260 | ch <- prometheus.MustNewConstMetric(c.nvmeHostWriteCommands, prometheus.CounterValue, nvmeSmartLogMetrics[9].Float(), nvmeDevice.String()) 261 | ch <- prometheus.MustNewConstMetric(c.nvmeControllerBusyTime, prometheus.CounterValue, nvmeSmartLogMetrics[10].Float(), nvmeDevice.String()) 262 | ch <- prometheus.MustNewConstMetric(c.nvmePowerCycles, prometheus.CounterValue, nvmeSmartLogMetrics[11].Float(), nvmeDevice.String()) 263 | ch <- prometheus.MustNewConstMetric(c.nvmePowerOnHours, prometheus.CounterValue, nvmeSmartLogMetrics[12].Float(), nvmeDevice.String()) 264 | ch <- prometheus.MustNewConstMetric(c.nvmeUnsafeShutdowns, prometheus.CounterValue, nvmeSmartLogMetrics[13].Float(), nvmeDevice.String()) 265 | ch <- prometheus.MustNewConstMetric(c.nvmeMediaErrors, prometheus.CounterValue, nvmeSmartLogMetrics[14].Float(), nvmeDevice.String()) 266 | ch <- prometheus.MustNewConstMetric(c.nvmeNumErrLogEntries, prometheus.CounterValue, nvmeSmartLogMetrics[15].Float(), nvmeDevice.String()) 267 | ch <- prometheus.MustNewConstMetric(c.nvmeWarningTempTime, prometheus.CounterValue, nvmeSmartLogMetrics[16].Float(), nvmeDevice.String()) 268 | ch <- prometheus.MustNewConstMetric(c.nvmeCriticalCompTime, prometheus.CounterValue, nvmeSmartLogMetrics[17].Float(), nvmeDevice.String()) 269 | ch <- prometheus.MustNewConstMetric(c.nvmeThmTemp1TransCount, prometheus.CounterValue, nvmeSmartLogMetrics[18].Float(), nvmeDevice.String()) 270 | ch <- prometheus.MustNewConstMetric(c.nvmeThmTemp2TransCount, prometheus.CounterValue, nvmeSmartLogMetrics[19].Float(), nvmeDevice.String()) 271 | ch <- prometheus.MustNewConstMetric(c.nvmeThmTemp1TotalTime, prometheus.CounterValue, nvmeSmartLogMetrics[20].Float(), nvmeDevice.String()) 272 | ch <- prometheus.MustNewConstMetric(c.nvmeThmTemp2TotalTime, prometheus.CounterValue, nvmeSmartLogMetrics[21].Float(), nvmeDevice.String()) 273 | } 274 | } 275 | 276 | func main() { 277 | port := flag.String("port", "9998", "port to listen on") 278 | flag.Parse() 279 | // check user 280 | currentUser, err := user.Current() 281 | if err != nil { 282 | log.Fatalf("Error getting current user %s\n", err) 283 | } 284 | if currentUser.Username != "root" { 285 | log.Fatalln("Error: you must be root to use nvme-cli") 286 | } 287 | // check for nvme-cli executable 288 | _, err = exec.LookPath("nvme") 289 | if err != nil { 290 | log.Fatalf("Cannot find nvme command in path: %s\n", err) 291 | } 292 | prometheus.MustRegister(newNvmeCollector()) 293 | http.Handle("/metrics", promhttp.Handler()) 294 | log.Fatal(http.ListenAndServe(":"+*port, nil)) 295 | } 296 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 3 | github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 4 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 5 | github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 6 | github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= 7 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 8 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 9 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 10 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 11 | github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= 12 | github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 13 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= 16 | github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= 17 | github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= 18 | github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= 19 | github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= 20 | github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= 21 | github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= 22 | github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 23 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 24 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 25 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 26 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 27 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 28 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 29 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 30 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 31 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 32 | github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= 33 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 34 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 35 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 36 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 37 | github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 38 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 39 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 40 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 41 | github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 42 | github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 43 | github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= 44 | github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= 45 | github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= 46 | github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= 47 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 48 | github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 49 | github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= 50 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 51 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 52 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 53 | github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= 54 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 55 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 56 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 57 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 58 | github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 59 | github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 60 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 61 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 62 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 63 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 64 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 65 | github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= 66 | github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= 67 | github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= 68 | github.com/prometheus/client_golang v1.11.0 h1:HNkLOAEQMIDv/K+04rukrLx6ch7msSRwf3/SASFAGtQ= 69 | github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= 70 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 71 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 72 | github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= 73 | github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 74 | github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= 75 | github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= 76 | github.com/prometheus/common v0.26.0 h1:iMAkS2TDoNWnKM+Kopnx/8tnEStIfpYA0ur0xQzzhMQ= 77 | github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= 78 | github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 79 | github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= 80 | github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= 81 | github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= 82 | github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= 83 | github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= 84 | github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= 85 | github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= 86 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 87 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 88 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 89 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 90 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 91 | github.com/tidwall/gjson v1.8.1 h1:8j5EE9Hrh3l9Od1OIEDAb7IpezNA20UdRngNAj5N0WU= 92 | github.com/tidwall/gjson v1.8.1/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= 93 | github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= 94 | github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= 95 | github.com/tidwall/pretty v1.1.0 h1:K3hMW5epkdAVwibsQEfR/7Zj0Qgt4DxtNumTq/VloO8= 96 | github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= 97 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 98 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 99 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 100 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 101 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 102 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 103 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 104 | golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 105 | golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= 106 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 107 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 108 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 109 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 110 | golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 111 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 112 | golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 113 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 114 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 115 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 116 | golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 117 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 118 | golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 119 | golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 120 | golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 121 | golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 h1:JWgyZ1qgdTaF3N3oxC+MdTV7qvEEgHo3otj+HB5CM7Q= 122 | golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 123 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 124 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 125 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 126 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 127 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 128 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 129 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 130 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 131 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 132 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 133 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 134 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 135 | google.golang.org/protobuf v1.26.0-rc.1 h1:7QnIQpGRHE5RnLKnESfDoxm2dTapTZua5a0kS0A+VXQ= 136 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 137 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 138 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 139 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 140 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 141 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 142 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 143 | gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 144 | gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 145 | -------------------------------------------------------------------------------- /grafana/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "Prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "7.0.1" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "stat", 34 | "name": "Stat", 35 | "version": "" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [ 40 | { 41 | "builtIn": 1, 42 | "datasource": "-- Grafana --", 43 | "enable": true, 44 | "hide": true, 45 | "iconColor": "rgba(0, 211, 255, 1)", 46 | "name": "Annotations & Alerts", 47 | "type": "dashboard" 48 | } 49 | ] 50 | }, 51 | "editable": true, 52 | "gnetId": null, 53 | "graphTooltip": 0, 54 | "id": null, 55 | "links": [], 56 | "panels": [ 57 | { 58 | "datasource": "${DS_PROMETHEUS}", 59 | "description": "Number of current critical warnings", 60 | "fieldConfig": { 61 | "defaults": { 62 | "custom": {}, 63 | "mappings": [], 64 | "thresholds": { 65 | "mode": "absolute", 66 | "steps": [ 67 | { 68 | "color": "green", 69 | "value": null 70 | }, 71 | { 72 | "color": "red", 73 | "value": 1 74 | } 75 | ] 76 | } 77 | }, 78 | "overrides": [] 79 | }, 80 | "gridPos": { 81 | "h": 7, 82 | "w": 6, 83 | "x": 0, 84 | "y": 0 85 | }, 86 | "id": 10, 87 | "options": { 88 | "colorMode": "value", 89 | "graphMode": "area", 90 | "justifyMode": "auto", 91 | "orientation": "auto", 92 | "reduceOptions": { 93 | "calcs": [ 94 | "mean" 95 | ], 96 | "values": false 97 | } 98 | }, 99 | "pluginVersion": "7.0.1", 100 | "targets": [ 101 | { 102 | "expr": "sum(nvme_critical_warning)", 103 | "format": "time_series", 104 | "interval": "", 105 | "legendFormat": "", 106 | "refId": "A" 107 | } 108 | ], 109 | "timeFrom": null, 110 | "timeShift": null, 111 | "title": "NVMe Current Critical Warning", 112 | "type": "stat" 113 | }, 114 | { 115 | "datasource": "${DS_PROMETHEUS}", 116 | "description": "Number of devices with nvme_percent_used > 80", 117 | "fieldConfig": { 118 | "defaults": { 119 | "custom": {}, 120 | "mappings": [], 121 | "thresholds": { 122 | "mode": "absolute", 123 | "steps": [ 124 | { 125 | "color": "green", 126 | "value": null 127 | }, 128 | { 129 | "color": "red", 130 | "value": 1 131 | } 132 | ] 133 | } 134 | }, 135 | "overrides": [] 136 | }, 137 | "gridPos": { 138 | "h": 7, 139 | "w": 6, 140 | "x": 6, 141 | "y": 0 142 | }, 143 | "id": 13, 144 | "options": { 145 | "colorMode": "value", 146 | "graphMode": "area", 147 | "justifyMode": "auto", 148 | "orientation": "auto", 149 | "reduceOptions": { 150 | "calcs": [ 151 | "mean" 152 | ], 153 | "values": false 154 | } 155 | }, 156 | "pluginVersion": "7.0.1", 157 | "targets": [ 158 | { 159 | "expr": "count(nvme_percent_used > 80) or (1 - absent(nvme_percent_used > 80))", 160 | "format": "time_series", 161 | "interval": "", 162 | "legendFormat": "", 163 | "refId": "A" 164 | } 165 | ], 166 | "timeFrom": null, 167 | "timeShift": null, 168 | "title": "NVMe Percentage Used > 80", 169 | "type": "stat" 170 | }, 171 | { 172 | "datasource": "${DS_PROMETHEUS}", 173 | "description": "Number of critical temperature warnings over last 24 hours", 174 | "fieldConfig": { 175 | "defaults": { 176 | "custom": {}, 177 | "mappings": [], 178 | "thresholds": { 179 | "mode": "absolute", 180 | "steps": [ 181 | { 182 | "color": "green", 183 | "value": null 184 | }, 185 | { 186 | "color": "red", 187 | "value": 1 188 | } 189 | ] 190 | } 191 | }, 192 | "overrides": [] 193 | }, 194 | "gridPos": { 195 | "h": 7, 196 | "w": 6, 197 | "x": 12, 198 | "y": 0 199 | }, 200 | "id": 11, 201 | "options": { 202 | "colorMode": "value", 203 | "graphMode": "area", 204 | "justifyMode": "auto", 205 | "orientation": "auto", 206 | "reduceOptions": { 207 | "calcs": [ 208 | "mean" 209 | ], 210 | "values": false 211 | } 212 | }, 213 | "pluginVersion": "7.0.1", 214 | "targets": [ 215 | { 216 | "expr": "sum(rate(nvme_critical_comp_time[5m]))", 217 | "format": "time_series", 218 | "interval": "", 219 | "legendFormat": "", 220 | "refId": "A" 221 | } 222 | ], 223 | "timeFrom": null, 224 | "timeShift": null, 225 | "title": "NVMe Recent Critical Temp ", 226 | "type": "stat" 227 | }, 228 | { 229 | "datasource": "${DS_PROMETHEUS}", 230 | "description": "Number of media errors over last 24 hours", 231 | "fieldConfig": { 232 | "defaults": { 233 | "custom": {}, 234 | "mappings": [], 235 | "thresholds": { 236 | "mode": "absolute", 237 | "steps": [ 238 | { 239 | "color": "green", 240 | "value": null 241 | }, 242 | { 243 | "color": "red", 244 | "value": 1 245 | } 246 | ] 247 | } 248 | }, 249 | "overrides": [] 250 | }, 251 | "gridPos": { 252 | "h": 7, 253 | "w": 6, 254 | "x": 18, 255 | "y": 0 256 | }, 257 | "id": 12, 258 | "options": { 259 | "colorMode": "value", 260 | "graphMode": "area", 261 | "justifyMode": "auto", 262 | "orientation": "auto", 263 | "reduceOptions": { 264 | "calcs": [ 265 | "mean" 266 | ], 267 | "values": false 268 | } 269 | }, 270 | "pluginVersion": "7.0.1", 271 | "targets": [ 272 | { 273 | "expr": "sum(rate(nvme_media_errors[24h]))", 274 | "format": "time_series", 275 | "interval": "", 276 | "legendFormat": "", 277 | "refId": "A" 278 | } 279 | ], 280 | "timeFrom": null, 281 | "timeShift": null, 282 | "title": "NVMe Recent Media Error", 283 | "type": "stat" 284 | }, 285 | { 286 | "aliasColors": {}, 287 | "bars": false, 288 | "dashLength": 10, 289 | "dashes": false, 290 | "datasource": "${DS_PROMETHEUS}", 291 | "decimals": null, 292 | "description": "Vendor specific estimate of the percentage of life used", 293 | "fieldConfig": { 294 | "defaults": { 295 | "custom": {} 296 | }, 297 | "overrides": [] 298 | }, 299 | "fill": 0, 300 | "fillGradient": 0, 301 | "gridPos": { 302 | "h": 9, 303 | "w": 12, 304 | "x": 0, 305 | "y": 7 306 | }, 307 | "hiddenSeries": false, 308 | "id": 7, 309 | "legend": { 310 | "avg": false, 311 | "current": false, 312 | "max": false, 313 | "min": false, 314 | "show": true, 315 | "total": false, 316 | "values": false 317 | }, 318 | "lines": true, 319 | "linewidth": 1, 320 | "nullPointMode": "null", 321 | "options": { 322 | "dataLinks": [] 323 | }, 324 | "percentage": false, 325 | "pointradius": 2, 326 | "points": false, 327 | "renderer": "flot", 328 | "seriesOverrides": [], 329 | "spaceLength": 10, 330 | "stack": false, 331 | "steppedLine": false, 332 | "targets": [ 333 | { 334 | "expr": "nvme_percent_used{job=\"nvme_exporter\"}", 335 | "interval": "", 336 | "legendFormat": "nvme_percent_used{device={{ device }}, instance={{ instance }}}", 337 | "refId": "A" 338 | } 339 | ], 340 | "thresholds": [], 341 | "timeFrom": null, 342 | "timeRegions": [], 343 | "timeShift": null, 344 | "title": "NVMe Percent Used", 345 | "tooltip": { 346 | "shared": true, 347 | "sort": 0, 348 | "value_type": "individual" 349 | }, 350 | "type": "graph", 351 | "xaxis": { 352 | "buckets": null, 353 | "mode": "time", 354 | "name": null, 355 | "show": true, 356 | "values": [] 357 | }, 358 | "yaxes": [ 359 | { 360 | "$$hashKey": "object:79", 361 | "decimals": 0, 362 | "format": "percent", 363 | "label": null, 364 | "logBase": 1, 365 | "max": null, 366 | "min": "0", 367 | "show": true 368 | }, 369 | { 370 | "$$hashKey": "object:80", 371 | "decimals": null, 372 | "format": "short", 373 | "label": null, 374 | "logBase": 1, 375 | "max": null, 376 | "min": null, 377 | "show": true 378 | } 379 | ], 380 | "yaxis": { 381 | "align": false, 382 | "alignLevel": null 383 | } 384 | }, 385 | { 386 | "aliasColors": {}, 387 | "bars": false, 388 | "dashLength": 10, 389 | "dashes": false, 390 | "datasource": "${DS_PROMETHEUS}", 391 | "decimals": null, 392 | "description": "Normalized percentage of remaining spare capacity available and warning threshold", 393 | "fieldConfig": { 394 | "defaults": { 395 | "custom": {} 396 | }, 397 | "overrides": [] 398 | }, 399 | "fill": 0, 400 | "fillGradient": 0, 401 | "gridPos": { 402 | "h": 9, 403 | "w": 12, 404 | "x": 12, 405 | "y": 7 406 | }, 407 | "hiddenSeries": false, 408 | "id": 4, 409 | "legend": { 410 | "avg": false, 411 | "current": false, 412 | "max": false, 413 | "min": false, 414 | "show": true, 415 | "total": false, 416 | "values": false 417 | }, 418 | "lines": true, 419 | "linewidth": 1, 420 | "nullPointMode": "null", 421 | "options": { 422 | "dataLinks": [] 423 | }, 424 | "percentage": false, 425 | "pointradius": 2, 426 | "points": false, 427 | "renderer": "flot", 428 | "seriesOverrides": [], 429 | "spaceLength": 10, 430 | "stack": false, 431 | "steppedLine": false, 432 | "targets": [ 433 | { 434 | "expr": "nvme_avail_spare{job=\"nvme_exporter\"}", 435 | "interval": "", 436 | "legendFormat": "nvme_avail_spare{device={{ device }}, instance={{ instance }}}", 437 | "refId": "A" 438 | }, 439 | { 440 | "expr": "nvme_spare_thresh{job=\"nvme_exporter\"}", 441 | "interval": "", 442 | "legendFormat": "nvme_spare_thresh{device={{ device }}, instance={{ instance }}}", 443 | "refId": "B" 444 | } 445 | ], 446 | "thresholds": [], 447 | "timeFrom": null, 448 | "timeRegions": [], 449 | "timeShift": null, 450 | "title": "NVMe Available Spare and Spare Threshold", 451 | "tooltip": { 452 | "shared": true, 453 | "sort": 0, 454 | "value_type": "individual" 455 | }, 456 | "type": "graph", 457 | "xaxis": { 458 | "buckets": null, 459 | "mode": "time", 460 | "name": null, 461 | "show": true, 462 | "values": [] 463 | }, 464 | "yaxes": [ 465 | { 466 | "$$hashKey": "object:79", 467 | "decimals": 0, 468 | "format": "percent", 469 | "label": null, 470 | "logBase": 1, 471 | "max": "100", 472 | "min": "0", 473 | "show": true 474 | }, 475 | { 476 | "$$hashKey": "object:80", 477 | "decimals": null, 478 | "format": "short", 479 | "label": null, 480 | "logBase": 1, 481 | "max": null, 482 | "min": null, 483 | "show": true 484 | } 485 | ], 486 | "yaxis": { 487 | "align": false, 488 | "alignLevel": null 489 | } 490 | }, 491 | { 492 | "aliasColors": {}, 493 | "bars": false, 494 | "dashLength": 10, 495 | "dashes": false, 496 | "datasource": "${DS_PROMETHEUS}", 497 | "decimals": null, 498 | "description": "Critical warnings for the state of the controller", 499 | "fieldConfig": { 500 | "defaults": { 501 | "custom": {} 502 | }, 503 | "overrides": [] 504 | }, 505 | "fill": 1, 506 | "fillGradient": 0, 507 | "gridPos": { 508 | "h": 9, 509 | "w": 12, 510 | "x": 0, 511 | "y": 16 512 | }, 513 | "hiddenSeries": false, 514 | "id": 2, 515 | "legend": { 516 | "avg": false, 517 | "current": false, 518 | "max": false, 519 | "min": false, 520 | "show": true, 521 | "total": false, 522 | "values": false 523 | }, 524 | "lines": true, 525 | "linewidth": 1, 526 | "nullPointMode": "null", 527 | "options": { 528 | "dataLinks": [] 529 | }, 530 | "percentage": false, 531 | "pointradius": 2, 532 | "points": false, 533 | "renderer": "flot", 534 | "seriesOverrides": [], 535 | "spaceLength": 10, 536 | "stack": false, 537 | "steppedLine": false, 538 | "targets": [ 539 | { 540 | "expr": "nvme_critical_warning{job=\"nvme_exporter\"}", 541 | "interval": "", 542 | "legendFormat": "nvme_critical_warning{device={{ device }}, instance={{ instance }}}", 543 | "refId": "A" 544 | } 545 | ], 546 | "thresholds": [], 547 | "timeFrom": null, 548 | "timeRegions": [], 549 | "timeShift": null, 550 | "title": "NVMe Critical Warnings", 551 | "tooltip": { 552 | "shared": true, 553 | "sort": 0, 554 | "value_type": "individual" 555 | }, 556 | "type": "graph", 557 | "xaxis": { 558 | "buckets": null, 559 | "mode": "time", 560 | "name": null, 561 | "show": true, 562 | "values": [] 563 | }, 564 | "yaxes": [ 565 | { 566 | "$$hashKey": "object:79", 567 | "decimals": 0, 568 | "format": "short", 569 | "label": null, 570 | "logBase": 1, 571 | "max": null, 572 | "min": "0", 573 | "show": true 574 | }, 575 | { 576 | "$$hashKey": "object:80", 577 | "decimals": null, 578 | "format": "short", 579 | "label": null, 580 | "logBase": 1, 581 | "max": null, 582 | "min": null, 583 | "show": true 584 | } 585 | ], 586 | "yaxis": { 587 | "align": false, 588 | "alignLevel": null 589 | } 590 | }, 591 | { 592 | "aliasColors": {}, 593 | "bars": false, 594 | "dashLength": 10, 595 | "dashes": false, 596 | "datasource": "${DS_PROMETHEUS}", 597 | "decimals": null, 598 | "description": "Number of unrecovered data integrity errors", 599 | "fieldConfig": { 600 | "defaults": { 601 | "custom": {} 602 | }, 603 | "overrides": [] 604 | }, 605 | "fill": 0, 606 | "fillGradient": 0, 607 | "gridPos": { 608 | "h": 9, 609 | "w": 12, 610 | "x": 12, 611 | "y": 16 612 | }, 613 | "hiddenSeries": false, 614 | "id": 5, 615 | "legend": { 616 | "avg": false, 617 | "current": false, 618 | "max": false, 619 | "min": false, 620 | "show": true, 621 | "total": false, 622 | "values": false 623 | }, 624 | "lines": true, 625 | "linewidth": 1, 626 | "nullPointMode": "null", 627 | "options": { 628 | "dataLinks": [] 629 | }, 630 | "percentage": false, 631 | "pointradius": 2, 632 | "points": false, 633 | "renderer": "flot", 634 | "seriesOverrides": [], 635 | "spaceLength": 10, 636 | "stack": false, 637 | "steppedLine": false, 638 | "targets": [ 639 | { 640 | "expr": "nvme_media_errors{job=\"nvme_exporter\"}", 641 | "interval": "", 642 | "legendFormat": "nvme_media_errors{device={{ device }}, instance={{ instance }}}", 643 | "refId": "A" 644 | } 645 | ], 646 | "thresholds": [], 647 | "timeFrom": null, 648 | "timeRegions": [], 649 | "timeShift": null, 650 | "title": "NVMe Media Errors", 651 | "tooltip": { 652 | "shared": true, 653 | "sort": 0, 654 | "value_type": "individual" 655 | }, 656 | "type": "graph", 657 | "xaxis": { 658 | "buckets": null, 659 | "mode": "time", 660 | "name": null, 661 | "show": true, 662 | "values": [] 663 | }, 664 | "yaxes": [ 665 | { 666 | "$$hashKey": "object:79", 667 | "decimals": 0, 668 | "format": "short", 669 | "label": null, 670 | "logBase": 1, 671 | "max": null, 672 | "min": "0", 673 | "show": true 674 | }, 675 | { 676 | "$$hashKey": "object:80", 677 | "decimals": null, 678 | "format": "short", 679 | "label": null, 680 | "logBase": 1, 681 | "max": null, 682 | "min": null, 683 | "show": true 684 | } 685 | ], 686 | "yaxis": { 687 | "align": false, 688 | "alignLevel": null 689 | } 690 | }, 691 | { 692 | "aliasColors": {}, 693 | "bars": false, 694 | "dashLength": 10, 695 | "dashes": false, 696 | "datasource": "${DS_PROMETHEUS}", 697 | "decimals": null, 698 | "description": "Temperature in degrees Fahrenheit", 699 | "fieldConfig": { 700 | "defaults": { 701 | "custom": {} 702 | }, 703 | "overrides": [] 704 | }, 705 | "fill": 0, 706 | "fillGradient": 0, 707 | "gridPos": { 708 | "h": 9, 709 | "w": 12, 710 | "x": 0, 711 | "y": 25 712 | }, 713 | "hiddenSeries": false, 714 | "id": 14, 715 | "legend": { 716 | "avg": false, 717 | "current": false, 718 | "max": false, 719 | "min": false, 720 | "show": true, 721 | "total": false, 722 | "values": false 723 | }, 724 | "lines": true, 725 | "linewidth": 1, 726 | "nullPointMode": "null", 727 | "options": { 728 | "dataLinks": [] 729 | }, 730 | "percentage": false, 731 | "pointradius": 2, 732 | "points": false, 733 | "renderer": "flot", 734 | "seriesOverrides": [], 735 | "spaceLength": 10, 736 | "stack": false, 737 | "steppedLine": false, 738 | "targets": [ 739 | { 740 | "expr": "nvme_temperature{job=\"nvme_exporter\"}", 741 | "interval": "", 742 | "legendFormat": "nvme_temperature{device={{ device }}, instance={{ instance }}}", 743 | "refId": "A" 744 | } 745 | ], 746 | "thresholds": [], 747 | "timeFrom": null, 748 | "timeRegions": [], 749 | "timeShift": null, 750 | "title": "NVMe Temperature", 751 | "tooltip": { 752 | "shared": true, 753 | "sort": 0, 754 | "value_type": "individual" 755 | }, 756 | "type": "graph", 757 | "xaxis": { 758 | "buckets": null, 759 | "mode": "time", 760 | "name": null, 761 | "show": true, 762 | "values": [] 763 | }, 764 | "yaxes": [ 765 | { 766 | "$$hashKey": "object:79", 767 | "decimals": 0, 768 | "format": "fahrenheit", 769 | "label": null, 770 | "logBase": 1, 771 | "max": null, 772 | "min": "0", 773 | "show": true 774 | }, 775 | { 776 | "$$hashKey": "object:80", 777 | "decimals": null, 778 | "format": "short", 779 | "label": null, 780 | "logBase": 1, 781 | "max": null, 782 | "min": null, 783 | "show": true 784 | } 785 | ], 786 | "yaxis": { 787 | "align": false, 788 | "alignLevel": null 789 | } 790 | }, 791 | { 792 | "aliasColors": {}, 793 | "bars": false, 794 | "dashLength": 10, 795 | "dashes": false, 796 | "datasource": "${DS_PROMETHEUS}", 797 | "decimals": null, 798 | "description": "Amount of time in minutes temperature > warning and critical threshold", 799 | "fieldConfig": { 800 | "defaults": { 801 | "custom": {} 802 | }, 803 | "overrides": [] 804 | }, 805 | "fill": 0, 806 | "fillGradient": 0, 807 | "gridPos": { 808 | "h": 9, 809 | "w": 12, 810 | "x": 12, 811 | "y": 25 812 | }, 813 | "hiddenSeries": false, 814 | "id": 6, 815 | "legend": { 816 | "avg": false, 817 | "current": false, 818 | "max": false, 819 | "min": false, 820 | "show": true, 821 | "total": false, 822 | "values": false 823 | }, 824 | "lines": true, 825 | "linewidth": 1, 826 | "nullPointMode": "null", 827 | "options": { 828 | "dataLinks": [] 829 | }, 830 | "percentage": false, 831 | "pointradius": 2, 832 | "points": false, 833 | "renderer": "flot", 834 | "seriesOverrides": [], 835 | "spaceLength": 10, 836 | "stack": false, 837 | "steppedLine": false, 838 | "targets": [ 839 | { 840 | "expr": "nvme_warning_temp_time{job=\"nvme_exporter\"}", 841 | "interval": "", 842 | "legendFormat": "nvme_warning_temp_time{device={{ device }}, instance={{ instance }}}", 843 | "refId": "A" 844 | }, 845 | { 846 | "expr": "nvme_critical_comp_time{job=\"nvme_exporter\"}", 847 | "interval": "", 848 | "legendFormat": "nvme_critical_comp_time{device={{ device }}, instance={{ instance }}}", 849 | "refId": "B" 850 | } 851 | ], 852 | "thresholds": [], 853 | "timeFrom": null, 854 | "timeRegions": [], 855 | "timeShift": null, 856 | "title": "NVMe Warning and Critical Temp Time", 857 | "tooltip": { 858 | "shared": true, 859 | "sort": 0, 860 | "value_type": "individual" 861 | }, 862 | "type": "graph", 863 | "xaxis": { 864 | "buckets": null, 865 | "mode": "time", 866 | "name": null, 867 | "show": true, 868 | "values": [] 869 | }, 870 | "yaxes": [ 871 | { 872 | "$$hashKey": "object:79", 873 | "decimals": 0, 874 | "format": "m", 875 | "label": null, 876 | "logBase": 1, 877 | "max": null, 878 | "min": "0", 879 | "show": true 880 | }, 881 | { 882 | "$$hashKey": "object:80", 883 | "decimals": null, 884 | "format": "short", 885 | "label": null, 886 | "logBase": 1, 887 | "max": null, 888 | "min": null, 889 | "show": true 890 | } 891 | ], 892 | "yaxis": { 893 | "align": false, 894 | "alignLevel": null 895 | } 896 | }, 897 | { 898 | "aliasColors": {}, 899 | "bars": false, 900 | "dashLength": 10, 901 | "dashes": false, 902 | "datasource": "${DS_PROMETHEUS}", 903 | "decimals": null, 904 | "description": "Lifetime number of error log entries", 905 | "fieldConfig": { 906 | "defaults": { 907 | "custom": {} 908 | }, 909 | "overrides": [] 910 | }, 911 | "fill": 0, 912 | "fillGradient": 0, 913 | "gridPos": { 914 | "h": 9, 915 | "w": 12, 916 | "x": 0, 917 | "y": 34 918 | }, 919 | "hiddenSeries": false, 920 | "id": 8, 921 | "legend": { 922 | "avg": false, 923 | "current": false, 924 | "max": false, 925 | "min": false, 926 | "show": true, 927 | "total": false, 928 | "values": false 929 | }, 930 | "lines": true, 931 | "linewidth": 1, 932 | "nullPointMode": "null", 933 | "options": { 934 | "dataLinks": [] 935 | }, 936 | "percentage": false, 937 | "pointradius": 2, 938 | "points": false, 939 | "renderer": "flot", 940 | "seriesOverrides": [], 941 | "spaceLength": 10, 942 | "stack": false, 943 | "steppedLine": false, 944 | "targets": [ 945 | { 946 | "expr": "nvme_num_err_log_entries{job=\"nvme_exporter\"}", 947 | "interval": "", 948 | "legendFormat": "nvme_media_errors{device={{ device }}, instance={{ instance }}}", 949 | "refId": "A" 950 | } 951 | ], 952 | "thresholds": [], 953 | "timeFrom": null, 954 | "timeRegions": [], 955 | "timeShift": null, 956 | "title": "NVMe Number of Error Log Entries", 957 | "tooltip": { 958 | "shared": true, 959 | "sort": 0, 960 | "value_type": "individual" 961 | }, 962 | "type": "graph", 963 | "xaxis": { 964 | "buckets": null, 965 | "mode": "time", 966 | "name": null, 967 | "show": true, 968 | "values": [] 969 | }, 970 | "yaxes": [ 971 | { 972 | "$$hashKey": "object:79", 973 | "decimals": 0, 974 | "format": "short", 975 | "label": null, 976 | "logBase": 1, 977 | "max": null, 978 | "min": "0", 979 | "show": true 980 | }, 981 | { 982 | "$$hashKey": "object:80", 983 | "decimals": null, 984 | "format": "short", 985 | "label": null, 986 | "logBase": 1, 987 | "max": null, 988 | "min": null, 989 | "show": true 990 | } 991 | ], 992 | "yaxis": { 993 | "align": false, 994 | "alignLevel": null 995 | } 996 | }, 997 | { 998 | "aliasColors": {}, 999 | "bars": false, 1000 | "dashLength": 10, 1001 | "dashes": false, 1002 | "datasource": "${DS_PROMETHEUS}", 1003 | "decimals": null, 1004 | "description": "Critical warnings for the state of endurance groups", 1005 | "fieldConfig": { 1006 | "defaults": { 1007 | "custom": {} 1008 | }, 1009 | "overrides": [] 1010 | }, 1011 | "fill": 0, 1012 | "fillGradient": 0, 1013 | "gridPos": { 1014 | "h": 9, 1015 | "w": 12, 1016 | "x": 12, 1017 | "y": 34 1018 | }, 1019 | "hiddenSeries": false, 1020 | "id": 15, 1021 | "legend": { 1022 | "avg": false, 1023 | "current": false, 1024 | "max": false, 1025 | "min": false, 1026 | "show": true, 1027 | "total": false, 1028 | "values": false 1029 | }, 1030 | "lines": true, 1031 | "linewidth": 1, 1032 | "nullPointMode": "null", 1033 | "options": { 1034 | "dataLinks": [] 1035 | }, 1036 | "percentage": false, 1037 | "pointradius": 2, 1038 | "points": false, 1039 | "renderer": "flot", 1040 | "seriesOverrides": [], 1041 | "spaceLength": 10, 1042 | "stack": false, 1043 | "steppedLine": false, 1044 | "targets": [ 1045 | { 1046 | "expr": "nvme_endurance_grp_critical_warning_summary{job=\"nvme_exporter\"}", 1047 | "interval": "", 1048 | "legendFormat": "nvme_endurance_grp_critical_warning_summary{device={{ device }}, instance={{ instance }}}", 1049 | "refId": "A" 1050 | } 1051 | ], 1052 | "thresholds": [], 1053 | "timeFrom": null, 1054 | "timeRegions": [], 1055 | "timeShift": null, 1056 | "title": "NVMe Endurance Group Critical Warning Summary", 1057 | "tooltip": { 1058 | "shared": true, 1059 | "sort": 0, 1060 | "value_type": "individual" 1061 | }, 1062 | "type": "graph", 1063 | "xaxis": { 1064 | "buckets": null, 1065 | "mode": "time", 1066 | "name": null, 1067 | "show": true, 1068 | "values": [] 1069 | }, 1070 | "yaxes": [ 1071 | { 1072 | "$$hashKey": "object:79", 1073 | "decimals": 0, 1074 | "format": "short", 1075 | "label": null, 1076 | "logBase": 1, 1077 | "max": null, 1078 | "min": "0", 1079 | "show": true 1080 | }, 1081 | { 1082 | "$$hashKey": "object:80", 1083 | "decimals": null, 1084 | "format": "short", 1085 | "label": null, 1086 | "logBase": 1, 1087 | "max": null, 1088 | "min": null, 1089 | "show": true 1090 | } 1091 | ], 1092 | "yaxis": { 1093 | "align": false, 1094 | "alignLevel": null 1095 | } 1096 | } 1097 | ], 1098 | "schemaVersion": 25, 1099 | "style": "dark", 1100 | "tags": [], 1101 | "templating": { 1102 | "list": [] 1103 | }, 1104 | "time": { 1105 | "from": "now-6h", 1106 | "to": "now" 1107 | }, 1108 | "timepicker": {}, 1109 | "timezone": "", 1110 | "title": "NVMe Exporter", 1111 | "uid": "lBqX37i7z", 1112 | "version": 53 1113 | } --------------------------------------------------------------------------------