├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.yml
├── grafana
├── grafana_dashboard.json
├── grafana_dashboard_1.png
└── grafana_dashboard_2.png
├── megaraid.py
├── requirements.txt
├── smartprom.py
└── smartprom.service
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on:
3 | push:
4 | tags:
5 | - '*'
6 | branches:
7 | - '*'
8 |
9 | jobs:
10 | build:
11 | name: Docker
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v2
17 |
18 | - name: Downcase repo
19 | run: echo REPOSITORY=matusnovak/prometheus-smartctl >> $GITHUB_ENV
20 |
21 | - name: Docker metadata
22 | id: docker_metadata
23 | uses: docker/metadata-action@v3
24 | with:
25 | images: ${{ env.REPOSITORY }}
26 | # images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
27 | flavor: |
28 | latest=true
29 | prefix=
30 | suffix=
31 | tags: |
32 | type=sha
33 | type=ref,event=tag
34 |
35 | - name: Set up QEMU
36 | uses: docker/setup-qemu-action@v1.0.1
37 |
38 | - name: Set up Docker Buildx
39 | uses: docker/setup-buildx-action@v1
40 |
41 | - name: Login to DockerHub
42 | uses: docker/login-action@v1
43 | with:
44 | username: ${{ secrets.DOCKER_USERNAME }}
45 | password: ${{ secrets.DOCKER_PASSWORD }}
46 |
47 | # - name: Login to GitHub Container Registry
48 | # uses: docker/login-action@v1
49 | # with:
50 | # registry: ghcr.io
51 | # username: ${{ github.repository_owner }}
52 | # password: ${{ secrets.GH_PAT }}
53 |
54 | - name: Build and push Docker image
55 | uses: docker/build-push-action@v2
56 | with:
57 | context: .
58 | file: ./Dockerfile
59 | platforms: linux/386,linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64/v8,linux/ppc64le,linux/s390x
60 | push: ${{ github.event_name != 'pull_request' }}
61 | tags: ${{ steps.docker_metadata.outputs.tags }}
62 | labels: ${{ steps.docker_metadata.outputs.labels }}
63 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=vim,zsh,visualstudiocode,pycharm+all
3 |
4 | ### PyCharm+all ###
5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7 |
8 | # User-specific stuff
9 | .idea/**/workspace.xml
10 | .idea/**/tasks.xml
11 | .idea/**/usage.statistics.xml
12 | .idea/**/dictionaries
13 | .idea/**/shelf
14 |
15 | # AWS User-specific
16 | .idea/**/aws.xml
17 |
18 | # Generated files
19 | .idea/**/contentModel.xml
20 |
21 | # Sensitive or high-churn files
22 | .idea/**/dataSources/
23 | .idea/**/dataSources.ids
24 | .idea/**/dataSources.local.xml
25 | .idea/**/sqlDataSources.xml
26 | .idea/**/dynamic.xml
27 | .idea/**/uiDesigner.xml
28 | .idea/**/dbnavigator.xml
29 |
30 | # Gradle
31 | .idea/**/gradle.xml
32 | .idea/**/libraries
33 |
34 | # Gradle and Maven with auto-import
35 | # When using Gradle or Maven with auto-import, you should exclude module files,
36 | # since they will be recreated, and may cause churn. Uncomment if using
37 | # auto-import.
38 | # .idea/artifacts
39 | # .idea/compiler.xml
40 | # .idea/jarRepositories.xml
41 | # .idea/modules.xml
42 | # .idea/*.iml
43 | # .idea/modules
44 | # *.iml
45 | # *.ipr
46 |
47 | # CMake
48 | cmake-build-*/
49 |
50 | # Mongo Explorer plugin
51 | .idea/**/mongoSettings.xml
52 |
53 | # File-based project format
54 | *.iws
55 |
56 | # IntelliJ
57 | out/
58 |
59 | # mpeltonen/sbt-idea plugin
60 | .idea_modules/
61 |
62 | # JIRA plugin
63 | atlassian-ide-plugin.xml
64 |
65 | # Cursive Clojure plugin
66 | .idea/replstate.xml
67 |
68 | # SonarLint plugin
69 | .idea/sonarlint/
70 |
71 | # Crashlytics plugin (for Android Studio and IntelliJ)
72 | com_crashlytics_export_strings.xml
73 | crashlytics.properties
74 | crashlytics-build.properties
75 | fabric.properties
76 |
77 | # Editor-based Rest Client
78 | .idea/httpRequests
79 |
80 | # Android studio 3.1+ serialized cache file
81 | .idea/caches/build_file_checksums.ser
82 |
83 | ### PyCharm+all Patch ###
84 | # Ignore everything but code style settings and run configurations
85 | # that are supposed to be shared within teams.
86 |
87 | .idea/*
88 |
89 | !.idea/codeStyles
90 | !.idea/runConfigurations
91 |
92 | ### Vim ###
93 | # Swap
94 | [._]*.s[a-v][a-z]
95 | !*.svg # comment out if you don't need vector files
96 | [._]*.sw[a-p]
97 | [._]s[a-rt-v][a-z]
98 | [._]ss[a-gi-z]
99 | [._]sw[a-p]
100 |
101 | # Session
102 | Session.vim
103 | Sessionx.vim
104 |
105 | # Temporary
106 | .netrwhist
107 | *~
108 | # Auto-generated tag files
109 | tags
110 | # Persistent undo
111 | [._]*.un~
112 |
113 | ### VisualStudioCode ###
114 | .vscode/*
115 | !.vscode/settings.json
116 | !.vscode/tasks.json
117 | !.vscode/launch.json
118 | !.vscode/extensions.json
119 | !.vscode/*.code-snippets
120 |
121 | # Local History for Visual Studio Code
122 | .history/
123 |
124 | # Built Visual Studio Code Extensions
125 | *.vsix
126 |
127 | ### VisualStudioCode Patch ###
128 | # Ignore all local history of files
129 | .history
130 | .ionide
131 |
132 | # Support for Project snippet scope
133 | .vscode/*.code-snippets
134 |
135 | # Ignore code-workspaces
136 | *.code-workspace
137 |
138 | ### Zsh ###
139 | # Zsh compiled script + zrecompile backup
140 | *.zwc
141 | *.zwc.old
142 |
143 | # Zsh completion-optimization dumpfile
144 | *zcompdump*
145 |
146 | # Zsh zcalc history
147 | .zcalc_history
148 |
149 | # A popular plugin manager's files
150 | ._zinit
151 | .zinit_lstupd
152 |
153 | # zdharma/zshelldoc tool's files
154 | zsdoc/data
155 |
156 | # robbyrussell/oh-my-zsh/plugins/per-directory-history plugin's files
157 | # (when set-up to store the history in the local directory)
158 | .directory_history
159 |
160 | # MichaelAquilina/zsh-autoswitch-virtualenv plugin's files
161 | # (for Zsh plugins using Python)
162 | .venv
163 |
164 | # Zunit tests' output
165 | /tests/_output/*
166 | !/tests/_output/.gitkeep
167 |
168 | # End of https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all
169 |
170 | # Python cache
171 | __pycache__
172 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## v2.3.0 (20/01/2024)
4 |
5 | * Add support for drives connected by MegaRAID
6 | * Add user_capacity label (disk size in bytes) for each device
7 | * Update prometheus-client 0.19.0
8 | * Update Python 3.12
9 | * Update base Docker image to Alpine 3.19
10 |
11 | ## v2.2.0 (20/09/2022)
12 |
13 | * Add support for USB bridged drives
14 |
15 | ## v2.1.1 (17/09/2022)
16 |
17 | * Handle smartctl exit code != 0 and add smartprom_exit_code metric
18 |
19 | ## v2.1.0 (21/08/2022)
20 |
21 | * Include new metric with SMART Health Status => smartprom_smart_passed
22 | * Add model_family, model_name, serial_number and type attributes for each device
23 | * The "drive" attribute now includes the full path. sda => /dev/sda
24 | * Add more detailed log traces about discovered devices
25 | * Update the Grafana dashboard
26 | * Update Readme to include example metrics
27 |
28 | ## v2.0.1 (29/07/2022)
29 |
30 | * Fix duplicated timeseries error. Resolves #36 (#37)
31 | * Add missing raw metrics for sat devices. Resolves #25 (#38)
32 | * Chore: Code cleanup
33 |
34 | ## v2.0.0 (28/07/2022)
35 |
36 | * Breaking change: Convert the metrics name into lower case (#13)
37 | * Update base Docker image and reduce image size. Resolves #17 (#31)
38 | * Publish Docker images for ARM architecture. Resolves #19 (#34)
39 | * Make refresh interval configurable. Revolves #24 (#29)
40 | * Make exporter port and address configurable via environment variable (#27)
41 | * Include zero value raw metrics (#15)
42 | * Return more information on smartctl error. Resolves #23 (#28)
43 | * Handle error when devices are not detected (#32)
44 | * Using SMART tool to get the devices instead of glob (#14)
45 | * Avoid Python stdout buffering (#33)
46 | * Add Grafana dashboard. Resolves #18 (#30)
47 | * Added gitignore (#12)
48 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-alpine3.19
2 |
3 | # Install smartmontools
4 | RUN apk add --no-cache smartmontools
5 |
6 | # Install Python dependencies
7 | COPY requirements.txt /
8 | RUN pip install -r /requirements.txt \
9 | # remove temporary files
10 | && rm -rf /root/.cache
11 |
12 | COPY ./smartprom.py /megaraid.py /
13 |
14 | EXPOSE 9902
15 | ENTRYPOINT ["/usr/local/bin/python", "-u", "/smartprom.py"]
16 |
17 | # HELP
18 | # docker build -t matusnovak/prometheus-smartctl:test .
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Prometheus S.M.A.R.T ctl metrics exporter
2 |
3 | 
4 |
5 | This is a simple exporter for the [Prometheus metrics](https://prometheus.io/) using [smartctl](https://www.smartmontools.org/). The script `smartprom.py` also comes with `smartprom.service` so that you can run this script in the background on your Linux OS via `systemctl`. The script will use port `9902`, you can change it by changing it directly in the script. This script exports all of the data available from the smartctl.
6 |
7 | ## Install
8 |
9 | _Note: You don't have to do this if you use the Docker image._
10 |
11 | 1. Copy the `smartprom.service` file into `/etc/systemd/system` folder.
12 | 2. Copy the `smartprom.py` file anywhere into your system.
13 | 3. Modify `ExecStart=` in the `smartprom.service` so that it points to `smartprom.py` in your system.
14 | 4. Run `chmod +x smartprom.py`
15 | 5. Install Python dependencies for the root user, example: `sudo -H python3 -m pip install -r requirements.txt`
16 | 6. Run `systemctl enable smartprom` and `systemctl start smartprom`
17 | 7. Your metrics will now be available at `http://localhost:9902`
18 |
19 | ## Docker usage
20 |
21 | No extra configuration needed, should work out of the box. The `privileged: true` is required in order for `smartctl` to be able to access drives from the host.
22 |
23 | Docker image is here:
24 |
25 | The architectures supported by this image are: linux/386, linux/amd64, linux/arm/v6, linux/arm/v7, linux/arm64/v8, linux/ppc64le, linux/s390x
26 |
27 | Example docker-compose.yml:
28 |
29 | ```yml
30 | version: '3'
31 | services:
32 | smartctl-exporter:
33 | image: matusnovak/prometheus-smartctl:latest
34 | container_name: smartctl-exporter
35 | privileged: true
36 | ports:
37 | - "9902:9902"
38 | restart: unless-stopped
39 | ```
40 |
41 | Example docker-compose.yml with node-exporter and file export:
42 |
43 |
44 | ```yml
45 | version: "3"
46 | services:
47 | node-exporter:
48 | image: quay.io/prometheus/node-exporter
49 | restart: always
50 | volumes:
51 | - '/:/host:ro,rslave'
52 | - './tmp/:/tmp/'
53 | network_mode: "host"
54 | pid: "host"
55 | command:
56 | - "--path.rootfs=/host"
57 | - "--collector.textfile.directory=/tmp/"
58 | smartctl-exporter:
59 | image: matusnovak/prometheus-smartctl:latest
60 | container_name: smartctl-exporter
61 | privileged: true
62 | environment:
63 | - "SMARTCTL_METRICS_FILE_ENABLE=True"
64 | volumes:
65 | - ./tmp/:/metrics/
66 | restart: unless-stopped
67 | ```
68 |
69 |
70 |
71 | Your metrics will be available at
72 |
73 | The exported metrics looks like these:
74 |
75 | ```shell
76 | smartprom_smart_passed{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 1.0
77 | smartprom_exit_code{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 0.0
78 | smartprom_raw_read_error_rate{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 83.0
79 | smartprom_raw_read_error_rate_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 2.23179896e+08
80 | smartprom_power_on_hours{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 73.0
81 | smartprom_power_on_hours_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 24299.0
82 | smartprom_airflow_temperature_cel{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 60.0
83 | smartprom_airflow_temperature_cel_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 40.0
84 | ...
85 | ```
86 |
87 | If you are using a MegaRAID card to connect the drives, the metrics will export look like these:
88 |
89 | ```shell
90 | smartprom_power_on_hours_raw{drive="megaraid,0",model_family="Western Digital Ultrastar He10/12",model_name="WDC WD80EMAZ-00M9AA0",serial_number="XXXXXXXX",type="sat",user_capacity="6001175126016"} 28522.0
91 | smartprom_power_on_time_hours{drive="megaraid,1",model_family="Unknown",model_name="HGST HUH728080AL5200",serial_number="XXXXXXXX",type="scsi",user_capacity="6001175126016"} 37341.0
92 | ```
93 |
94 | ## Configuration
95 |
96 | All configuration is done with environment variables.
97 |
98 | - `SMARTCTL_REFRESH_INTERVAL`: (Optional) The refresh interval of the metrics. A larger value reduces CPU usage. The default is `60` seconds.
99 | - `SMARTCTL_EXPORTER_PORT`: (Optional) The address the exporter should listen on. The default is `9902`.
100 | - `SMARTCTL_EXPORTER_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses.
101 | - `SMARTCTL_METRICS_FILE_ENABLE`: (Optional) To enable metrics file, if you have a node exporter running anyway, you can simply read out this file . The default is `False`.
102 | - `SMARTCTL_METRICS_FILE_PATH`: (Optional) the path, this must then also be specified in the docker-compose as volume. The default is `/metrics/`.
103 |
104 | ## Grafana dashboard
105 |
106 | There is a reference Grafana dashboard in [grafana/grafana_dashboard.json](./grafana/grafana_dashboard.json).
107 |
108 | 
109 | 
110 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | smartctl-metrics:
4 | build: ./
5 | restart: unless-stopped
6 | privileged: true
7 | ports:
8 | - 9902:9902
9 |
--------------------------------------------------------------------------------
/grafana/grafana_dashboard.json:
--------------------------------------------------------------------------------
1 | {
2 | "__inputs": [
3 | {
4 | "name": "DS_PROMETHEUS",
5 | "label": "Prometheus",
6 | "description": "",
7 | "type": "datasource",
8 | "pluginId": "prometheus",
9 | "pluginName": "Prometheus"
10 | }
11 | ],
12 | "__elements": {},
13 | "__requires": [
14 | {
15 | "type": "grafana",
16 | "id": "grafana",
17 | "name": "Grafana",
18 | "version": "10.2.3"
19 | },
20 | {
21 | "type": "datasource",
22 | "id": "prometheus",
23 | "name": "Prometheus",
24 | "version": "1.0.0"
25 | },
26 | {
27 | "type": "panel",
28 | "id": "table",
29 | "name": "Table",
30 | "version": ""
31 | },
32 | {
33 | "type": "panel",
34 | "id": "timeseries",
35 | "name": "Time series",
36 | "version": ""
37 | }
38 | ],
39 | "annotations": {
40 | "list": [
41 | {
42 | "builtIn": 1,
43 | "datasource": {
44 | "type": "datasource",
45 | "uid": "grafana"
46 | },
47 | "enable": true,
48 | "hide": true,
49 | "iconColor": "rgba(0, 211, 255, 1)",
50 | "name": "Annotations & Alerts",
51 | "target": {
52 | "limit": 100,
53 | "matchAny": false,
54 | "tags": [],
55 | "type": "dashboard"
56 | },
57 | "type": "dashboard"
58 | }
59 | ]
60 | },
61 | "description": "",
62 | "editable": true,
63 | "fiscalYearStartMonth": 0,
64 | "gnetId": 9846,
65 | "graphTooltip": 0,
66 | "id": null,
67 | "links": [
68 | {
69 | "icon": "external link",
70 | "tags": [
71 | "system"
72 | ],
73 | "type": "dashboards"
74 | }
75 | ],
76 | "liveNow": false,
77 | "panels": [
78 | {
79 | "datasource": {
80 | "type": "prometheus",
81 | "uid": "${DS_PROMETHEUS}"
82 | },
83 | "description": "",
84 | "fieldConfig": {
85 | "defaults": {
86 | "color": {
87 | "mode": "thresholds"
88 | },
89 | "custom": {
90 | "align": "auto",
91 | "cellOptions": {
92 | "type": "auto"
93 | },
94 | "filterable": false,
95 | "inspect": false
96 | },
97 | "mappings": [
98 | {
99 | "options": {
100 | "0": {
101 | "index": 1,
102 | "text": "ERROR"
103 | },
104 | "1": {
105 | "index": 2,
106 | "text": "OK"
107 | },
108 | "-1": {
109 | "index": 0,
110 | "text": "UNKNOWN"
111 | }
112 | },
113 | "type": "value"
114 | }
115 | ],
116 | "thresholds": {
117 | "mode": "absolute",
118 | "steps": [
119 | {
120 | "color": "red",
121 | "value": null
122 | },
123 | {
124 | "color": "red",
125 | "value": 0
126 | },
127 | {
128 | "color": "green",
129 | "value": 1
130 | }
131 | ]
132 | },
133 | "unit": "none"
134 | },
135 | "overrides": [
136 | {
137 | "matcher": {
138 | "id": "byRegexp",
139 | "options": "health"
140 | },
141 | "properties": [
142 | {
143 | "id": "custom.cellOptions",
144 | "value": {
145 | "mode": "gradient",
146 | "type": "color-background"
147 | }
148 | }
149 | ]
150 | },
151 | {
152 | "matcher": {
153 | "id": "byName",
154 | "options": "user_capacity"
155 | },
156 | "properties": [
157 | {
158 | "id": "unit",
159 | "value": "decbytes"
160 | }
161 | ]
162 | }
163 | ]
164 | },
165 | "gridPos": {
166 | "h": 7,
167 | "w": 24,
168 | "x": 0,
169 | "y": 0
170 | },
171 | "id": 38,
172 | "links": [],
173 | "maxDataPoints": 100,
174 | "options": {
175 | "cellHeight": "sm",
176 | "footer": {
177 | "countRows": false,
178 | "fields": "",
179 | "reducer": [
180 | "sum"
181 | ],
182 | "show": false
183 | },
184 | "frameIndex": 1,
185 | "showHeader": true,
186 | "sortBy": [
187 | {
188 | "desc": false,
189 | "displayName": "instance"
190 | }
191 | ]
192 | },
193 | "pluginVersion": "10.2.3",
194 | "targets": [
195 | {
196 | "datasource": {
197 | "type": "prometheus",
198 | "uid": "${DS_PROMETHEUS}"
199 | },
200 | "exemplar": true,
201 | "expr": "smartprom_smart_passed",
202 | "format": "table",
203 | "hide": false,
204 | "instant": true,
205 | "interval": "",
206 | "intervalFactor": 1,
207 | "legendFormat": "",
208 | "refId": "A"
209 | }
210 | ],
211 | "title": "SMART Health",
212 | "transformations": [
213 | {
214 | "id": "groupBy",
215 | "options": {
216 | "fields": {
217 | "Value": {
218 | "aggregations": [
219 | "last"
220 | ],
221 | "operation": "aggregate"
222 | },
223 | "Value #A": {
224 | "aggregations": [
225 | "sum"
226 | ],
227 | "operation": "aggregate"
228 | },
229 | "Value #B": {
230 | "aggregations": [
231 | "sum"
232 | ],
233 | "operation": "aggregate"
234 | },
235 | "Value #C": {
236 | "aggregations": [
237 | "sum"
238 | ],
239 | "operation": "aggregate"
240 | },
241 | "Value #D": {
242 | "aggregations": [
243 | "sum"
244 | ],
245 | "operation": "aggregate"
246 | },
247 | "Value #E": {
248 | "aggregations": [
249 | "sum"
250 | ],
251 | "operation": "aggregate"
252 | },
253 | "drive": {
254 | "aggregations": [],
255 | "operation": "groupby"
256 | },
257 | "instance": {
258 | "aggregations": [],
259 | "operation": "groupby"
260 | },
261 | "model_family": {
262 | "aggregations": [],
263 | "operation": "groupby"
264 | },
265 | "model_name": {
266 | "aggregations": [],
267 | "operation": "groupby"
268 | },
269 | "serial_number": {
270 | "aggregations": [],
271 | "operation": "groupby"
272 | },
273 | "type": {
274 | "aggregations": [],
275 | "operation": "groupby"
276 | },
277 | "user_capacity": {
278 | "aggregations": [],
279 | "operation": "groupby"
280 | }
281 | }
282 | }
283 | },
284 | {
285 | "id": "organize",
286 | "options": {
287 | "excludeByName": {
288 | "Time": true,
289 | "__name__": false,
290 | "backup_id": true,
291 | "backup_type": true,
292 | "client_hostname": false,
293 | "client_id": true,
294 | "client_os_version": true,
295 | "client_username": true,
296 | "client_version": true,
297 | "instance": false,
298 | "job": true,
299 | "snapshot_id": true
300 | },
301 | "includeByName": {},
302 | "indexByName": {
303 | "Value (last)": 7,
304 | "drive": 1,
305 | "instance": 0,
306 | "model_family": 2,
307 | "model_name": 3,
308 | "serial_number": 4,
309 | "type": 6,
310 | "user_capacity": 5
311 | },
312 | "renameByName": {
313 | "Value": "",
314 | "Value #A": "reallocated_sector_ct",
315 | "Value #A (sum)": "reallocated_sector_ct",
316 | "Value #B": "reported_uncorrect",
317 | "Value #B (sum)": "reported_uncorrect",
318 | "Value #C (sum)": "command_timeout",
319 | "Value #D (sum)": "current_pending_sector",
320 | "Value #E (sum)": "offline_uncorrectable",
321 | "Value (last)": "health",
322 | "job": "",
323 | "serial_number": "",
324 | "user_capacity": ""
325 | }
326 | }
327 | }
328 | ],
329 | "type": "table"
330 | },
331 | {
332 | "datasource": {
333 | "type": "prometheus",
334 | "uid": "${DS_PROMETHEUS}"
335 | },
336 | "description": "",
337 | "fieldConfig": {
338 | "defaults": {
339 | "color": {
340 | "mode": "thresholds"
341 | },
342 | "custom": {
343 | "align": "auto",
344 | "cellOptions": {
345 | "type": "auto"
346 | },
347 | "filterable": false,
348 | "inspect": false
349 | },
350 | "mappings": [],
351 | "thresholds": {
352 | "mode": "absolute",
353 | "steps": [
354 | {
355 | "color": "green",
356 | "value": null
357 | },
358 | {
359 | "color": "#EAB839",
360 | "value": 1
361 | },
362 | {
363 | "color": "red",
364 | "value": 2
365 | }
366 | ]
367 | },
368 | "unit": "none"
369 | },
370 | "overrides": [
371 | {
372 | "matcher": {
373 | "id": "byRegexp",
374 | "options": ".*_.*"
375 | },
376 | "properties": [
377 | {
378 | "id": "custom.cellOptions",
379 | "value": {
380 | "mode": "gradient",
381 | "type": "color-background"
382 | }
383 | }
384 | ]
385 | }
386 | ]
387 | },
388 | "gridPos": {
389 | "h": 7,
390 | "w": 24,
391 | "x": 0,
392 | "y": 7
393 | },
394 | "id": 27,
395 | "links": [],
396 | "maxDataPoints": 100,
397 | "options": {
398 | "cellHeight": "sm",
399 | "footer": {
400 | "countRows": false,
401 | "fields": "",
402 | "reducer": [
403 | "sum"
404 | ],
405 | "show": false
406 | },
407 | "frameIndex": 1,
408 | "showHeader": true,
409 | "sortBy": [
410 | {
411 | "desc": false,
412 | "displayName": "instance"
413 | }
414 | ]
415 | },
416 | "pluginVersion": "10.2.3",
417 | "targets": [
418 | {
419 | "exemplar": true,
420 | "expr": "smartprom_reallocated_sector_ct_raw",
421 | "format": "table",
422 | "hide": false,
423 | "instant": true,
424 | "interval": "",
425 | "intervalFactor": 1,
426 | "legendFormat": "",
427 | "refId": "A",
428 | "datasource": {
429 | "type": "prometheus",
430 | "uid": "${DS_PROMETHEUS}"
431 | }
432 | },
433 | {
434 | "exemplar": true,
435 | "expr": "smartprom_reported_uncorrect_raw",
436 | "format": "table",
437 | "hide": false,
438 | "instant": true,
439 | "interval": "",
440 | "intervalFactor": 1,
441 | "legendFormat": "",
442 | "refId": "B",
443 | "datasource": {
444 | "type": "prometheus",
445 | "uid": "${DS_PROMETHEUS}"
446 | }
447 | },
448 | {
449 | "exemplar": true,
450 | "expr": "smartprom_command_timeout_raw",
451 | "format": "table",
452 | "hide": false,
453 | "instant": true,
454 | "interval": "",
455 | "intervalFactor": 1,
456 | "legendFormat": "",
457 | "refId": "C",
458 | "datasource": {
459 | "type": "prometheus",
460 | "uid": "${DS_PROMETHEUS}"
461 | }
462 | },
463 | {
464 | "exemplar": true,
465 | "expr": "smartprom_current_pending_sector_raw",
466 | "format": "table",
467 | "hide": false,
468 | "instant": true,
469 | "interval": "",
470 | "intervalFactor": 1,
471 | "legendFormat": "",
472 | "refId": "D",
473 | "datasource": {
474 | "type": "prometheus",
475 | "uid": "${DS_PROMETHEUS}"
476 | }
477 | },
478 | {
479 | "exemplar": true,
480 | "expr": "smartprom_offline_uncorrectable_raw",
481 | "format": "table",
482 | "hide": false,
483 | "instant": true,
484 | "interval": "",
485 | "intervalFactor": 1,
486 | "legendFormat": "",
487 | "refId": "E",
488 | "datasource": {
489 | "type": "prometheus",
490 | "uid": "${DS_PROMETHEUS}"
491 | }
492 | }
493 | ],
494 | "title": "Error Metrics",
495 | "transformations": [
496 | {
497 | "id": "merge",
498 | "options": {}
499 | },
500 | {
501 | "id": "groupBy",
502 | "options": {
503 | "fields": {
504 | "Value #A": {
505 | "aggregations": [
506 | "sum"
507 | ],
508 | "operation": "aggregate"
509 | },
510 | "Value #B": {
511 | "aggregations": [
512 | "sum"
513 | ],
514 | "operation": "aggregate"
515 | },
516 | "Value #C": {
517 | "aggregations": [
518 | "sum"
519 | ],
520 | "operation": "aggregate"
521 | },
522 | "Value #D": {
523 | "aggregations": [
524 | "sum"
525 | ],
526 | "operation": "aggregate"
527 | },
528 | "Value #E": {
529 | "aggregations": [
530 | "sum"
531 | ],
532 | "operation": "aggregate"
533 | },
534 | "drive": {
535 | "aggregations": [],
536 | "operation": "groupby"
537 | },
538 | "instance": {
539 | "aggregations": [],
540 | "operation": "groupby"
541 | }
542 | }
543 | }
544 | },
545 | {
546 | "id": "organize",
547 | "options": {
548 | "excludeByName": {
549 | "Time": true,
550 | "__name__": false,
551 | "backup_id": true,
552 | "backup_type": true,
553 | "client_hostname": false,
554 | "client_id": true,
555 | "client_os_version": true,
556 | "client_username": true,
557 | "client_version": true,
558 | "instance": false,
559 | "job": true,
560 | "snapshot_id": true
561 | },
562 | "indexByName": {
563 | "Time": 0,
564 | "Value #A": 5,
565 | "Value #B": 6,
566 | "__name__": 1,
567 | "drive": 3,
568 | "instance": 2,
569 | "job": 4
570 | },
571 | "renameByName": {
572 | "Value": "",
573 | "Value #A": "reallocated_sector_ct",
574 | "Value #A (sum)": "reallocated_sector_ct",
575 | "Value #B": "reported_uncorrect",
576 | "Value #B (sum)": "reported_uncorrect",
577 | "Value #C (sum)": "command_timeout",
578 | "Value #D (sum)": "current_pending_sector",
579 | "Value #E (sum)": "offline_uncorrectable",
580 | "job": ""
581 | }
582 | }
583 | }
584 | ],
585 | "type": "table"
586 | },
587 | {
588 | "datasource": {
589 | "type": "prometheus",
590 | "uid": "${DS_PROMETHEUS}"
591 | },
592 | "description": "",
593 | "fieldConfig": {
594 | "defaults": {
595 | "color": {
596 | "mode": "thresholds"
597 | },
598 | "custom": {
599 | "align": "auto",
600 | "cellOptions": {
601 | "type": "auto"
602 | },
603 | "filterable": false,
604 | "inspect": false
605 | },
606 | "mappings": [],
607 | "thresholds": {
608 | "mode": "absolute",
609 | "steps": [
610 | {
611 | "color": "green",
612 | "value": null
613 | },
614 | {
615 | "color": "#EAB839",
616 | "value": 50
617 | },
618 | {
619 | "color": "red",
620 | "value": 60
621 | }
622 | ]
623 | },
624 | "unit": "none"
625 | },
626 | "overrides": [
627 | {
628 | "matcher": {
629 | "id": "byName",
630 | "options": "temperature_celsius"
631 | },
632 | "properties": [
633 | {
634 | "id": "custom.cellOptions",
635 | "value": {
636 | "mode": "gradient",
637 | "type": "color-background"
638 | }
639 | }
640 | ]
641 | },
642 | {
643 | "matcher": {
644 | "id": "byName",
645 | "options": "power_on_hours"
646 | },
647 | "properties": [
648 | {
649 | "id": "unit",
650 | "value": "h"
651 | }
652 | ]
653 | }
654 | ]
655 | },
656 | "gridPos": {
657 | "h": 7,
658 | "w": 24,
659 | "x": 0,
660 | "y": 14
661 | },
662 | "id": 37,
663 | "links": [],
664 | "maxDataPoints": 100,
665 | "options": {
666 | "cellHeight": "sm",
667 | "footer": {
668 | "countRows": false,
669 | "fields": "",
670 | "reducer": [
671 | "sum"
672 | ],
673 | "show": false
674 | },
675 | "frameIndex": 1,
676 | "showHeader": true,
677 | "sortBy": [
678 | {
679 | "desc": false,
680 | "displayName": "instance"
681 | }
682 | ]
683 | },
684 | "pluginVersion": "10.2.3",
685 | "targets": [
686 | {
687 | "exemplar": true,
688 | "expr": "smartprom_temperature_celsius_raw",
689 | "format": "table",
690 | "hide": false,
691 | "instant": true,
692 | "interval": "",
693 | "intervalFactor": 1,
694 | "legendFormat": "",
695 | "refId": "A",
696 | "datasource": {
697 | "type": "prometheus",
698 | "uid": "${DS_PROMETHEUS}"
699 | }
700 | },
701 | {
702 | "exemplar": true,
703 | "expr": "smartprom_power_cycle_count_raw",
704 | "format": "table",
705 | "hide": false,
706 | "instant": true,
707 | "interval": "",
708 | "intervalFactor": 1,
709 | "legendFormat": "",
710 | "refId": "B",
711 | "datasource": {
712 | "type": "prometheus",
713 | "uid": "${DS_PROMETHEUS}"
714 | }
715 | },
716 | {
717 | "exemplar": true,
718 | "expr": "smartprom_power_on_hours_raw",
719 | "format": "table",
720 | "hide": false,
721 | "instant": true,
722 | "interval": "",
723 | "intervalFactor": 1,
724 | "legendFormat": "",
725 | "refId": "C",
726 | "datasource": {
727 | "type": "prometheus",
728 | "uid": "${DS_PROMETHEUS}"
729 | }
730 | }
731 | ],
732 | "title": "Info Metrics",
733 | "transformations": [
734 | {
735 | "id": "merge",
736 | "options": {}
737 | },
738 | {
739 | "id": "groupBy",
740 | "options": {
741 | "fields": {
742 | "Value #A": {
743 | "aggregations": [
744 | "sum"
745 | ],
746 | "operation": "aggregate"
747 | },
748 | "Value #B": {
749 | "aggregations": [
750 | "sum"
751 | ],
752 | "operation": "aggregate"
753 | },
754 | "Value #C": {
755 | "aggregations": [
756 | "sum"
757 | ],
758 | "operation": "aggregate"
759 | },
760 | "Value #D": {
761 | "aggregations": [
762 | "sum"
763 | ],
764 | "operation": "aggregate"
765 | },
766 | "Value #E": {
767 | "aggregations": [
768 | "sum"
769 | ],
770 | "operation": "aggregate"
771 | },
772 | "drive": {
773 | "aggregations": [],
774 | "operation": "groupby"
775 | },
776 | "instance": {
777 | "aggregations": [],
778 | "operation": "groupby"
779 | }
780 | }
781 | }
782 | },
783 | {
784 | "id": "organize",
785 | "options": {
786 | "excludeByName": {
787 | "Time": true,
788 | "__name__": false,
789 | "backup_id": true,
790 | "backup_type": true,
791 | "client_hostname": false,
792 | "client_id": true,
793 | "client_os_version": true,
794 | "client_username": true,
795 | "client_version": true,
796 | "instance": false,
797 | "job": true,
798 | "snapshot_id": true
799 | },
800 | "indexByName": {
801 | "Time": 0,
802 | "Value #A": 5,
803 | "Value #B": 6,
804 | "__name__": 1,
805 | "drive": 3,
806 | "instance": 2,
807 | "job": 4
808 | },
809 | "renameByName": {
810 | "Value": "",
811 | "Value #A": "reallocated_sector_ct",
812 | "Value #A (sum)": "temperature_celsius",
813 | "Value #B": "reported_uncorrect",
814 | "Value #B (sum)": "power_cycle_count",
815 | "Value #C (sum)": "power_on_hours",
816 | "Value #D (sum)": "current_pending_sector",
817 | "Value #E (sum)": "offline_uncorrectable",
818 | "job": ""
819 | }
820 | }
821 | }
822 | ],
823 | "type": "table"
824 | },
825 | {
826 | "datasource": {
827 | "type": "prometheus",
828 | "uid": "${DS_PROMETHEUS}"
829 | },
830 | "fieldConfig": {
831 | "defaults": {
832 | "color": {
833 | "mode": "palette-classic"
834 | },
835 | "custom": {
836 | "axisBorderShow": false,
837 | "axisCenteredZero": false,
838 | "axisColorMode": "text",
839 | "axisLabel": "",
840 | "axisPlacement": "auto",
841 | "barAlignment": 0,
842 | "drawStyle": "line",
843 | "fillOpacity": 0,
844 | "gradientMode": "none",
845 | "hideFrom": {
846 | "legend": false,
847 | "tooltip": false,
848 | "viz": false
849 | },
850 | "insertNulls": false,
851 | "lineInterpolation": "linear",
852 | "lineWidth": 1,
853 | "pointSize": 5,
854 | "scaleDistribution": {
855 | "type": "linear"
856 | },
857 | "showPoints": "never",
858 | "spanNulls": false,
859 | "stacking": {
860 | "group": "A",
861 | "mode": "none"
862 | },
863 | "thresholdsStyle": {
864 | "mode": "off"
865 | }
866 | },
867 | "links": [],
868 | "mappings": [],
869 | "thresholds": {
870 | "mode": "absolute",
871 | "steps": [
872 | {
873 | "color": "green",
874 | "value": null
875 | },
876 | {
877 | "color": "red",
878 | "value": 80
879 | }
880 | ]
881 | },
882 | "unit": "celsius"
883 | },
884 | "overrides": []
885 | },
886 | "gridPos": {
887 | "h": 11,
888 | "w": 24,
889 | "x": 0,
890 | "y": 21
891 | },
892 | "id": 36,
893 | "links": [],
894 | "options": {
895 | "legend": {
896 | "calcs": [
897 | "mean",
898 | "lastNotNull",
899 | "max",
900 | "min"
901 | ],
902 | "displayMode": "table",
903 | "placement": "bottom",
904 | "showLegend": true
905 | },
906 | "tooltip": {
907 | "mode": "multi",
908 | "sort": "none"
909 | }
910 | },
911 | "pluginVersion": "8.5.0",
912 | "targets": [
913 | {
914 | "datasource": {
915 | "type": "prometheus",
916 | "uid": "${DS_PROMETHEUS}"
917 | },
918 | "editorMode": "code",
919 | "exemplar": true,
920 | "expr": "smartprom_temperature_celsius_raw",
921 | "format": "time_series",
922 | "interval": "",
923 | "intervalFactor": 2,
924 | "legendFormat": "{{instance}} {{drive}}",
925 | "range": true,
926 | "refId": "A"
927 | }
928 | ],
929 | "title": "Temperature",
930 | "type": "timeseries"
931 | }
932 | ],
933 | "refresh": "",
934 | "schemaVersion": 39,
935 | "tags": [
936 | "system"
937 | ],
938 | "templating": {
939 | "list": []
940 | },
941 | "time": {
942 | "from": "now-30m",
943 | "to": "now"
944 | },
945 | "timepicker": {
946 | "refresh_intervals": [
947 | "5s",
948 | "10s",
949 | "30s",
950 | "1m",
951 | "5m",
952 | "15m",
953 | "30m",
954 | "1h",
955 | "2h",
956 | "1d"
957 | ],
958 | "time_options": [
959 | "5m",
960 | "15m",
961 | "1h",
962 | "6h",
963 | "12h",
964 | "24h",
965 | "2d",
966 | "7d",
967 | "30d"
968 | ]
969 | },
970 | "timezone": "",
971 | "title": "SMART Exporter",
972 | "uid": "hmXXiWPnk",
973 | "version": 12,
974 | "weekStart": ""
975 | }
--------------------------------------------------------------------------------
/grafana/grafana_dashboard_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_1.png
--------------------------------------------------------------------------------
/grafana/grafana_dashboard_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_2.png
--------------------------------------------------------------------------------
/megaraid.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | from typing import Optional
4 |
5 | import smartprom
6 |
7 | MEGARAID_TYPE_PATTERN = r"(sat\+)?(megaraid,\d+)"
8 |
9 |
10 | def get_megaraid_device_info(dev: str, typ: str) -> dict:
11 | """
12 | Get device information connected with MegaRAID,
13 | and process the information into get_device_info compatible format.
14 | """
15 | megaraid_id = get_megaraid_device_id(typ)
16 | if megaraid_id is None:
17 | return {}
18 |
19 | results, _ = smartprom.run_smartctl_cmd(
20 | ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
21 | )
22 | results = json.loads(results)
23 | serial_number = results.get("serial_number", "Unknown")
24 | model_family = results.get("model_family", "Unknown")
25 |
26 | # When using SAS drive and smartmontools r5286 and later,
27 | # scsi_ prefix is added to model_name field.
28 | # https://sourceforge.net/p/smartmontools/code/5286/
29 | model_name = results.get(
30 | "scsi_model_name",
31 | results.get("model_name", "Unknown"),
32 | )
33 |
34 | user_capacity = "Unknown"
35 | if "user_capacity" in results and "bytes" in results["user_capacity"]:
36 | user_capacity = str(results["user_capacity"]["bytes"])
37 |
38 | return {
39 | "model_family": model_family,
40 | "model_name": model_name,
41 | "serial_number": serial_number,
42 | "user_capacity": user_capacity,
43 | }
44 |
45 |
46 | def get_megaraid_device_type(dev: str, typ: str) -> str:
47 | megaraid_id = get_megaraid_device_id(typ)
48 | if megaraid_id is None:
49 | return "unknown"
50 |
51 | results, _ = smartprom.run_smartctl_cmd(
52 | ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
53 | )
54 | results = json.loads(results)
55 |
56 | if "device" not in results or "protocol" not in results["device"]:
57 | return "unknown"
58 | return "sat" if results["device"]["protocol"] == "ATA" else "scsi"
59 |
60 |
61 | def get_megaraid_device_id(typ: str) -> Optional[str]:
62 | """
63 | Returns the device ID on the MegaRAID from the typ string
64 | """
65 | megaraid_match = re.search(MEGARAID_TYPE_PATTERN, typ)
66 | if not megaraid_match:
67 | return None
68 |
69 | return megaraid_match.group(2)
70 |
71 |
72 | def smart_megaraid(dev: str, megaraid_id: str) -> dict:
73 | """
74 | Runs the smartctl command on device connected by MegaRAID
75 | and processes its attributes
76 | """
77 | results, exit_code = smartprom.run_smartctl_cmd(
78 | ["smartctl", "-A", "-H", "-d", megaraid_id, "--json=c", dev]
79 | )
80 | results = json.loads(results)
81 |
82 | if results["device"]["protocol"] == "ATA":
83 | # SATA device on MegaRAID
84 | data = results["ata_smart_attributes"]["table"]
85 | attributes = smartprom.table_to_attributes_sat(data)
86 | attributes["smart_passed"] = (0, smartprom.get_smart_status(results))
87 | attributes["exit_code"] = (0, exit_code)
88 | return attributes
89 | elif results["device"]["protocol"] == "SCSI":
90 | # SAS device on MegaRAID
91 | attributes = smartprom.results_to_attributes_scsi(results)
92 | attributes["smart_passed"] = smartprom.get_smart_status(results)
93 | attributes["exit_code"] = exit_code
94 | return attributes
95 | return {}
96 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | prometheus-client==0.19.0
2 |
--------------------------------------------------------------------------------
/smartprom.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import json
3 | import os
4 | import subprocess
5 | import time
6 | import re
7 | from typing import Tuple
8 |
9 | import prometheus_client
10 |
11 | import megaraid
12 |
13 | LABELS = [
14 | "drive",
15 | "type",
16 | "model_family",
17 | "model_name",
18 | "serial_number",
19 | "user_capacity",
20 | ]
21 | DRIVES = {}
22 | METRICS = {}
23 |
24 | # https://www.smartmontools.org/wiki/USB
25 | SAT_TYPES = ["sat", "usbjmicron", "usbprolific", "usbsunplus"]
26 | NVME_TYPES = ["nvme", "sntasmedia", "sntjmicron", "sntrealtek"]
27 | SCSI_TYPES = ["scsi"]
28 |
29 |
30 | def run_smartctl_cmd(args: list) -> Tuple[str, int]:
31 | """
32 | Runs the smartctl command on the system
33 | """
34 | out = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
35 | stdout, stderr = out.communicate()
36 |
37 | # exit code can be != 0 even if the command returned valid data
38 | # see EXIT STATUS in
39 | # https://www.smartmontools.org/browser/trunk/smartmontools/smartctl.8.in
40 | if out.returncode != 0:
41 | stdout_msg = stdout.decode("utf-8") if stdout is not None else ""
42 | stderr_msg = stderr.decode("utf-8") if stderr is not None else ""
43 | print(
44 | f"WARNING: Command returned exit code {out.returncode}. "
45 | f"Stdout: '{stdout_msg}' Stderr: '{stderr_msg}'"
46 | )
47 |
48 | return stdout.decode("utf-8"), out.returncode
49 |
50 |
51 | def get_drives() -> dict:
52 | """
53 | Returns a dictionary of devices and its types
54 | """
55 | disks = {}
56 | result, _ = run_smartctl_cmd(["smartctl", "--scan-open", "--json=c"])
57 | result_json = json.loads(result)
58 |
59 | if "devices" in result_json:
60 | devices = result_json["devices"]
61 |
62 | # Ignore devices that fail on open, such as Virtual Drives created by MegaRAID.
63 | devices = list(
64 | filter(
65 | lambda x: (
66 | x.get("open_error", "")
67 | != "DELL or MegaRaid controller, please try adding '-d megaraid,N'"
68 | ),
69 | devices,
70 | )
71 | )
72 |
73 | for device in devices:
74 | dev = device["name"]
75 | if re.match(megaraid.MEGARAID_TYPE_PATTERN, device["type"]):
76 | # If drive is connected by MegaRAID, dev has a bus name like "/dev/bus/0".
77 | # After retrieving the disk information using the bus name,
78 | # replace dev with a disk ID such as "megaraid,0".
79 | disk_attrs = megaraid.get_megaraid_device_info(dev, device["type"])
80 | disk_attrs["type"] = megaraid.get_megaraid_device_type(
81 | dev, device["type"]
82 | )
83 | disk_attrs["bus_device"] = dev
84 | disk_attrs["megaraid_id"] = megaraid.get_megaraid_device_id(
85 | device["type"]
86 | )
87 | dev = disk_attrs["megaraid_id"]
88 | else:
89 | disk_attrs = get_device_info(dev)
90 | disk_attrs["type"] = device["type"]
91 | disks[dev] = disk_attrs
92 | print("Discovered device", dev, "with attributes", disk_attrs)
93 | else:
94 | print("No devices found. Make sure you have enough privileges.")
95 | return disks
96 |
97 |
98 | def get_device_info(dev: str) -> dict:
99 | """
100 | Returns a dictionary of device info
101 | """
102 | results, _ = run_smartctl_cmd(["smartctl", "-i", "--json=c", dev])
103 | results = json.loads(results)
104 | user_capacity = "Unknown"
105 | if "user_capacity" in results and "bytes" in results["user_capacity"]:
106 | user_capacity = str(results["user_capacity"]["bytes"])
107 | return {
108 | "model_family": results.get("model_family", "Unknown"),
109 | "model_name": results.get("model_name", "Unknown"),
110 | "serial_number": results.get("serial_number", "Unknown"),
111 | "user_capacity": user_capacity,
112 | }
113 |
114 |
115 | def get_smart_status(results: dict) -> int:
116 | """
117 | Returns a 1, 0 or -1 depending on if result from
118 | smart status is True, False or unknown.
119 | """
120 | status = results.get("smart_status")
121 | return +(status.get("passed")) if status is not None else -1
122 |
123 |
124 | def smart_sat(dev: str) -> dict:
125 | """
126 | Runs the smartctl command on a internal or external "sat" device
127 | and processes its attributes
128 | """
129 | results, exit_code = run_smartctl_cmd(
130 | ["smartctl", "-A", "-H", "-d", "sat", "--json=c", dev]
131 | )
132 | results = json.loads(results)
133 |
134 | attributes = table_to_attributes_sat(results["ata_smart_attributes"]["table"])
135 | attributes["smart_passed"] = (0, get_smart_status(results))
136 | attributes["exit_code"] = (0, exit_code)
137 | return attributes
138 |
139 |
140 | def table_to_attributes_sat(data: dict) -> dict:
141 | """
142 | Returns a results["ata_smart_attributes"]["table"]
143 | processed into an attributes dict
144 | """
145 | attributes = {}
146 | for metric in data:
147 | code = metric["id"]
148 | name = metric["name"]
149 | value = metric["value"]
150 |
151 | # metric['raw']['value'] contains values difficult to understand for temperatures and time up
152 | # that's why we added some logic to parse the string value
153 | value_raw = metric["raw"]["string"]
154 | try:
155 | # example value_raw: "33" or "43 (Min/Max 39/46)"
156 | value_raw = int(value_raw.split()[0])
157 | except:
158 | # example value_raw: "20071h+27m+15.375s"
159 | if "h+" in value_raw:
160 | value_raw = int(value_raw.split("h+")[0])
161 | else:
162 | print(
163 | f"Raw value of sat metric '{name}' can't be parsed. raw_string: {value_raw} "
164 | f"raw_int: {metric['raw']['value']}"
165 | )
166 | value_raw = None
167 |
168 | attributes[name] = (int(code), value)
169 | if value_raw is not None:
170 | attributes[f"{name}_raw"] = (int(code), value_raw)
171 | return attributes
172 |
173 |
174 | def smart_nvme(dev: str) -> dict:
175 | """
176 | Runs the smartctl command on a internal or external "nvme" device
177 | and processes its attributes
178 | """
179 | results, exit_code = run_smartctl_cmd(
180 | ["smartctl", "-A", "-H", "-d", "nvme", "--json=c", dev]
181 | )
182 | results = json.loads(results)
183 |
184 | attributes = {"smart_passed": get_smart_status(results), "exit_code": exit_code}
185 | data = results["nvme_smart_health_information_log"]
186 | for key, value in data.items():
187 | if key == "temperature_sensors":
188 | for i, _value in enumerate(value, start=1):
189 | attributes[f"temperature_sensor{i}"] = _value
190 | else:
191 | attributes[key] = value
192 | return attributes
193 |
194 |
195 | def smart_scsi(dev: str) -> dict:
196 | """
197 | Runs the smartctl command on a "scsi" device
198 | and processes its attributes
199 | """
200 | results, exit_code = run_smartctl_cmd(
201 | ["smartctl", "-A", "-H", "-d", "scsi", "--json=c", dev]
202 | )
203 | results = json.loads(results)
204 |
205 | attributes = results_to_attributes_scsi(results)
206 | attributes["smart_passed"] = get_smart_status(results)
207 | attributes["exit_code"] = exit_code
208 | return attributes
209 |
210 |
211 | def results_to_attributes_scsi(data: dict) -> dict:
212 | """
213 | Returns the result of smartctl -i on the SCSI device
214 | processed into an attributes dict
215 | """
216 | attributes = {}
217 | for key, value in data.items():
218 | if type(value) == dict:
219 | for _label, _value in value.items():
220 | if type(_value) == int:
221 | attributes[f"{key}_{_label}"] = _value
222 | elif type(value) == int:
223 | attributes[key] = value
224 | return attributes
225 |
226 |
227 | def collect():
228 | """
229 | Collect all drive metrics and save them as Gauge type
230 | """
231 | global LABELS, DRIVES, METRICS, SAT_TYPES, NVME_TYPES, SCSI_TYPES
232 |
233 | for drive, drive_attrs in DRIVES.items():
234 | typ = drive_attrs["type"]
235 | try:
236 | if "megaraid_id" in drive_attrs:
237 | attrs = megaraid.smart_megaraid(
238 | drive_attrs["bus_device"], drive_attrs["megaraid_id"]
239 | )
240 | elif typ in SAT_TYPES:
241 | attrs = smart_sat(drive)
242 | elif typ in NVME_TYPES:
243 | attrs = smart_nvme(drive)
244 | elif typ in SCSI_TYPES:
245 | attrs = smart_scsi(drive)
246 | else:
247 | continue
248 |
249 | for key, values in attrs.items():
250 | # Metric name in lower case
251 | metric = (
252 | "smartprom_"
253 | + key.replace("-", "_")
254 | .replace(" ", "_")
255 | .replace(".", "")
256 | .replace("/", "_")
257 | .lower()
258 | )
259 |
260 | # Create metric if it does not exist
261 | if metric not in METRICS:
262 | desc = key.replace("_", " ")
263 | code = hex(values[0]) if typ in SAT_TYPES else hex(values)
264 | print(f"Adding new gauge {metric} ({code})")
265 | METRICS[metric] = prometheus_client.Gauge(
266 | metric, f"({code}) {desc}", LABELS
267 | )
268 |
269 | # Update metric
270 | metric_val = values[1] if typ in SAT_TYPES else values
271 |
272 | METRICS[metric].labels(
273 | drive=drive,
274 | type=typ,
275 | model_family=drive_attrs["model_family"],
276 | model_name=drive_attrs["model_name"],
277 | serial_number=drive_attrs["serial_number"],
278 | user_capacity=drive_attrs["user_capacity"],
279 | ).set(metric_val)
280 |
281 | except Exception as e:
282 | print("Exception:", e)
283 | pass
284 |
285 |
286 | def main():
287 | """
288 | Starts a server and exposes the metrics
289 | """
290 | global DRIVES
291 |
292 | # Validate configuration
293 | exporter_address = os.environ.get("SMARTCTL_EXPORTER_ADDRESS", "0.0.0.0")
294 | exporter_port = int(os.environ.get("SMARTCTL_EXPORTER_PORT", 9902))
295 | refresh_interval = int(os.environ.get("SMARTCTL_REFRESH_INTERVAL", 60))
296 | metrics_file_enable = os.environ.get("SMARTCTL_METRICS_FILE_ENABLE", False)
297 | metrics_file_path = os.environ.get("SMARTCTL_METRICS_FILE_PATH", "/metrics/")
298 |
299 | # Get drives (test smartctl)
300 | DRIVES = get_drives()
301 |
302 | # Start Prometheus server
303 | prometheus_client.start_http_server(exporter_port, exporter_address)
304 | print(f"Server listening in http://{exporter_address}:{exporter_port}/metrics")
305 |
306 | while True:
307 | collect()
308 | if metrics_file_enable:
309 | prometheus_client.write_to_textfile(metrics_file_path+"smartctl.prom", prometheus_client.REGISTRY)
310 | time.sleep(refresh_interval)
311 |
312 |
313 | if __name__ == "__main__":
314 | main()
315 |
--------------------------------------------------------------------------------
/smartprom.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=SMART Prometheus metrics
3 |
4 | [Service]
5 | ExecStart=/var/lib/homelab/smartprom.py
6 | Restart=always
7 |
8 | [Install]
9 | WantedBy=multi-user.target
10 |
--------------------------------------------------------------------------------