├── .github └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.yml ├── grafana ├── grafana_dashboard.json ├── grafana_dashboard_1.png └── grafana_dashboard_2.png ├── megaraid.py ├── requirements.txt ├── smartprom.py └── smartprom.service /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | tags: 5 | - '*' 6 | branches: 7 | - '*' 8 | 9 | jobs: 10 | build: 11 | name: Docker 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | 18 | - name: Downcase repo 19 | run: echo REPOSITORY=matusnovak/prometheus-smartctl >> $GITHUB_ENV 20 | 21 | - name: Docker metadata 22 | id: docker_metadata 23 | uses: docker/metadata-action@v3 24 | with: 25 | images: ${{ env.REPOSITORY }} 26 | # images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }} 27 | flavor: | 28 | latest=true 29 | prefix= 30 | suffix= 31 | tags: | 32 | type=sha 33 | type=ref,event=tag 34 | 35 | - name: Set up QEMU 36 | uses: docker/setup-qemu-action@v1.0.1 37 | 38 | - name: Set up Docker Buildx 39 | uses: docker/setup-buildx-action@v1 40 | 41 | - name: Login to DockerHub 42 | uses: docker/login-action@v1 43 | with: 44 | username: ${{ secrets.DOCKER_USERNAME }} 45 | password: ${{ secrets.DOCKER_PASSWORD }} 46 | 47 | # - name: Login to GitHub Container Registry 48 | # uses: docker/login-action@v1 49 | # with: 50 | # registry: ghcr.io 51 | # username: ${{ github.repository_owner }} 52 | # password: ${{ secrets.GH_PAT }} 53 | 54 | - name: Build and push Docker image 55 | uses: docker/build-push-action@v2 56 | with: 57 | context: . 58 | file: ./Dockerfile 59 | platforms: linux/386,linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64/v8,linux/ppc64le,linux/s390x 60 | push: ${{ github.event_name != 'pull_request' }} 61 | tags: ${{ steps.docker_metadata.outputs.tags }} 62 | labels: ${{ steps.docker_metadata.outputs.labels }} 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=vim,zsh,visualstudiocode,pycharm+all 3 | 4 | ### PyCharm+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # AWS User-specific 16 | .idea/**/aws.xml 17 | 18 | # Generated files 19 | .idea/**/contentModel.xml 20 | 21 | # Sensitive or high-churn files 22 | .idea/**/dataSources/ 23 | .idea/**/dataSources.ids 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | .idea/**/dbnavigator.xml 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | # Gradle and Maven with auto-import 35 | # When using Gradle or Maven with auto-import, you should exclude module files, 36 | # since they will be recreated, and may cause churn. Uncomment if using 37 | # auto-import. 38 | # .idea/artifacts 39 | # .idea/compiler.xml 40 | # .idea/jarRepositories.xml 41 | # .idea/modules.xml 42 | # .idea/*.iml 43 | # .idea/modules 44 | # *.iml 45 | # *.ipr 46 | 47 | # CMake 48 | cmake-build-*/ 49 | 50 | # Mongo Explorer plugin 51 | .idea/**/mongoSettings.xml 52 | 53 | # File-based project format 54 | *.iws 55 | 56 | # IntelliJ 57 | out/ 58 | 59 | # mpeltonen/sbt-idea plugin 60 | .idea_modules/ 61 | 62 | # JIRA plugin 63 | atlassian-ide-plugin.xml 64 | 65 | # Cursive Clojure plugin 66 | .idea/replstate.xml 67 | 68 | # SonarLint plugin 69 | .idea/sonarlint/ 70 | 71 | # Crashlytics plugin (for Android Studio and IntelliJ) 72 | com_crashlytics_export_strings.xml 73 | crashlytics.properties 74 | crashlytics-build.properties 75 | fabric.properties 76 | 77 | # Editor-based Rest Client 78 | .idea/httpRequests 79 | 80 | # Android studio 3.1+ serialized cache file 81 | .idea/caches/build_file_checksums.ser 82 | 83 | ### PyCharm+all Patch ### 84 | # Ignore everything but code style settings and run configurations 85 | # that are supposed to be shared within teams. 86 | 87 | .idea/* 88 | 89 | !.idea/codeStyles 90 | !.idea/runConfigurations 91 | 92 | ### Vim ### 93 | # Swap 94 | [._]*.s[a-v][a-z] 95 | !*.svg # comment out if you don't need vector files 96 | [._]*.sw[a-p] 97 | [._]s[a-rt-v][a-z] 98 | [._]ss[a-gi-z] 99 | [._]sw[a-p] 100 | 101 | # Session 102 | Session.vim 103 | Sessionx.vim 104 | 105 | # Temporary 106 | .netrwhist 107 | *~ 108 | # Auto-generated tag files 109 | tags 110 | # Persistent undo 111 | [._]*.un~ 112 | 113 | ### VisualStudioCode ### 114 | .vscode/* 115 | !.vscode/settings.json 116 | !.vscode/tasks.json 117 | !.vscode/launch.json 118 | !.vscode/extensions.json 119 | !.vscode/*.code-snippets 120 | 121 | # Local History for Visual Studio Code 122 | .history/ 123 | 124 | # Built Visual Studio Code Extensions 125 | *.vsix 126 | 127 | ### VisualStudioCode Patch ### 128 | # Ignore all local history of files 129 | .history 130 | .ionide 131 | 132 | # Support for Project snippet scope 133 | .vscode/*.code-snippets 134 | 135 | # Ignore code-workspaces 136 | *.code-workspace 137 | 138 | ### Zsh ### 139 | # Zsh compiled script + zrecompile backup 140 | *.zwc 141 | *.zwc.old 142 | 143 | # Zsh completion-optimization dumpfile 144 | *zcompdump* 145 | 146 | # Zsh zcalc history 147 | .zcalc_history 148 | 149 | # A popular plugin manager's files 150 | ._zinit 151 | .zinit_lstupd 152 | 153 | # zdharma/zshelldoc tool's files 154 | zsdoc/data 155 | 156 | # robbyrussell/oh-my-zsh/plugins/per-directory-history plugin's files 157 | # (when set-up to store the history in the local directory) 158 | .directory_history 159 | 160 | # MichaelAquilina/zsh-autoswitch-virtualenv plugin's files 161 | # (for Zsh plugins using Python) 162 | .venv 163 | 164 | # Zunit tests' output 165 | /tests/_output/* 166 | !/tests/_output/.gitkeep 167 | 168 | # End of https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all 169 | 170 | # Python cache 171 | __pycache__ 172 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v2.3.0 (20/01/2024) 4 | 5 | * Add support for drives connected by MegaRAID 6 | * Add user_capacity label (disk size in bytes) for each device 7 | * Update prometheus-client 0.19.0 8 | * Update Python 3.12 9 | * Update base Docker image to Alpine 3.19 10 | 11 | ## v2.2.0 (20/09/2022) 12 | 13 | * Add support for USB bridged drives 14 | 15 | ## v2.1.1 (17/09/2022) 16 | 17 | * Handle smartctl exit code != 0 and add smartprom_exit_code metric 18 | 19 | ## v2.1.0 (21/08/2022) 20 | 21 | * Include new metric with SMART Health Status => smartprom_smart_passed 22 | * Add model_family, model_name, serial_number and type attributes for each device 23 | * The "drive" attribute now includes the full path. sda => /dev/sda 24 | * Add more detailed log traces about discovered devices 25 | * Update the Grafana dashboard 26 | * Update Readme to include example metrics 27 | 28 | ## v2.0.1 (29/07/2022) 29 | 30 | * Fix duplicated timeseries error. Resolves #36 (#37) 31 | * Add missing raw metrics for sat devices. Resolves #25 (#38) 32 | * Chore: Code cleanup 33 | 34 | ## v2.0.0 (28/07/2022) 35 | 36 | * Breaking change: Convert the metrics name into lower case (#13) 37 | * Update base Docker image and reduce image size. Resolves #17 (#31) 38 | * Publish Docker images for ARM architecture. Resolves #19 (#34) 39 | * Make refresh interval configurable. Revolves #24 (#29) 40 | * Make exporter port and address configurable via environment variable (#27) 41 | * Include zero value raw metrics (#15) 42 | * Return more information on smartctl error. Resolves #23 (#28) 43 | * Handle error when devices are not detected (#32) 44 | * Using SMART tool to get the devices instead of glob (#14) 45 | * Avoid Python stdout buffering (#33) 46 | * Add Grafana dashboard. Resolves #18 (#30) 47 | * Added gitignore (#12) 48 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-alpine3.19 2 | 3 | # Install smartmontools 4 | RUN apk add --no-cache smartmontools 5 | 6 | # Install Python dependencies 7 | COPY requirements.txt / 8 | RUN pip install -r /requirements.txt \ 9 | # remove temporary files 10 | && rm -rf /root/.cache 11 | 12 | COPY ./smartprom.py /megaraid.py / 13 | 14 | EXPOSE 9902 15 | ENTRYPOINT ["/usr/local/bin/python", "-u", "/smartprom.py"] 16 | 17 | # HELP 18 | # docker build -t matusnovak/prometheus-smartctl:test . 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus S.M.A.R.T ctl metrics exporter 2 | 3 | ![build](https://github.com/matusnovak/prometheus-smartctl/workflows/build/badge.svg) 4 | 5 | This is a simple exporter for the [Prometheus metrics](https://prometheus.io/) using [smartctl](https://www.smartmontools.org/). The script `smartprom.py` also comes with `smartprom.service` so that you can run this script in the background on your Linux OS via `systemctl`. The script will use port `9902`, you can change it by changing it directly in the script. This script exports all of the data available from the smartctl. 6 | 7 | ## Install 8 | 9 | _Note: You don't have to do this if you use the Docker image._ 10 | 11 | 1. Copy the `smartprom.service` file into `/etc/systemd/system` folder. 12 | 2. Copy the `smartprom.py` file anywhere into your system. 13 | 3. Modify `ExecStart=` in the `smartprom.service` so that it points to `smartprom.py` in your system. 14 | 4. Run `chmod +x smartprom.py` 15 | 5. Install Python dependencies for the root user, example: `sudo -H python3 -m pip install -r requirements.txt` 16 | 6. Run `systemctl enable smartprom` and `systemctl start smartprom` 17 | 7. Your metrics will now be available at `http://localhost:9902` 18 | 19 | ## Docker usage 20 | 21 | No extra configuration needed, should work out of the box. The `privileged: true` is required in order for `smartctl` to be able to access drives from the host. 22 | 23 | Docker image is here: 24 | 25 | The architectures supported by this image are: linux/386, linux/amd64, linux/arm/v6, linux/arm/v7, linux/arm64/v8, linux/ppc64le, linux/s390x 26 | 27 | Example docker-compose.yml: 28 | 29 | ```yml 30 | version: '3' 31 | services: 32 | smartctl-exporter: 33 | image: matusnovak/prometheus-smartctl:latest 34 | container_name: smartctl-exporter 35 | privileged: true 36 | ports: 37 | - "9902:9902" 38 | restart: unless-stopped 39 | ``` 40 | 41 | Example docker-compose.yml with node-exporter and file export: 42 | 43 | 44 | ```yml 45 | version: "3" 46 | services: 47 | node-exporter: 48 | image: quay.io/prometheus/node-exporter 49 | restart: always 50 | volumes: 51 | - '/:/host:ro,rslave' 52 | - './tmp/:/tmp/' 53 | network_mode: "host" 54 | pid: "host" 55 | command: 56 | - "--path.rootfs=/host" 57 | - "--collector.textfile.directory=/tmp/" 58 | smartctl-exporter: 59 | image: matusnovak/prometheus-smartctl:latest 60 | container_name: smartctl-exporter 61 | privileged: true 62 | environment: 63 | - "SMARTCTL_METRICS_FILE_ENABLE=True" 64 | volumes: 65 | - ./tmp/:/metrics/ 66 | restart: unless-stopped 67 | ``` 68 | 69 | 70 | 71 | Your metrics will be available at 72 | 73 | The exported metrics looks like these: 74 | 75 | ```shell 76 | smartprom_smart_passed{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 1.0 77 | smartprom_exit_code{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 0.0 78 | smartprom_raw_read_error_rate{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 83.0 79 | smartprom_raw_read_error_rate_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 2.23179896e+08 80 | smartprom_power_on_hours{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 73.0 81 | smartprom_power_on_hours_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 24299.0 82 | smartprom_airflow_temperature_cel{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 60.0 83 | smartprom_airflow_temperature_cel_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 40.0 84 | ... 85 | ``` 86 | 87 | If you are using a MegaRAID card to connect the drives, the metrics will export look like these: 88 | 89 | ```shell 90 | smartprom_power_on_hours_raw{drive="megaraid,0",model_family="Western Digital Ultrastar He10/12",model_name="WDC WD80EMAZ-00M9AA0",serial_number="XXXXXXXX",type="sat",user_capacity="6001175126016"} 28522.0 91 | smartprom_power_on_time_hours{drive="megaraid,1",model_family="Unknown",model_name="HGST HUH728080AL5200",serial_number="XXXXXXXX",type="scsi",user_capacity="6001175126016"} 37341.0 92 | ``` 93 | 94 | ## Configuration 95 | 96 | All configuration is done with environment variables. 97 | 98 | - `SMARTCTL_REFRESH_INTERVAL`: (Optional) The refresh interval of the metrics. A larger value reduces CPU usage. The default is `60` seconds. 99 | - `SMARTCTL_EXPORTER_PORT`: (Optional) The address the exporter should listen on. The default is `9902`. 100 | - `SMARTCTL_EXPORTER_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses. 101 | - `SMARTCTL_METRICS_FILE_ENABLE`: (Optional) To enable metrics file, if you have a node exporter running anyway, you can simply read out this file . The default is `False`. 102 | - `SMARTCTL_METRICS_FILE_PATH`: (Optional) the path, this must then also be specified in the docker-compose as volume. The default is `/metrics/`. 103 | 104 | ## Grafana dashboard 105 | 106 | There is a reference Grafana dashboard in [grafana/grafana_dashboard.json](./grafana/grafana_dashboard.json). 107 | 108 | ![](./grafana/grafana_dashboard_1.png) 109 | ![](./grafana/grafana_dashboard_2.png) 110 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | smartctl-metrics: 4 | build: ./ 5 | restart: unless-stopped 6 | privileged: true 7 | ports: 8 | - 9902:9902 9 | -------------------------------------------------------------------------------- /grafana/grafana_dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "Prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__elements": {}, 13 | "__requires": [ 14 | { 15 | "type": "grafana", 16 | "id": "grafana", 17 | "name": "Grafana", 18 | "version": "10.2.3" 19 | }, 20 | { 21 | "type": "datasource", 22 | "id": "prometheus", 23 | "name": "Prometheus", 24 | "version": "1.0.0" 25 | }, 26 | { 27 | "type": "panel", 28 | "id": "table", 29 | "name": "Table", 30 | "version": "" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "timeseries", 35 | "name": "Time series", 36 | "version": "" 37 | } 38 | ], 39 | "annotations": { 40 | "list": [ 41 | { 42 | "builtIn": 1, 43 | "datasource": { 44 | "type": "datasource", 45 | "uid": "grafana" 46 | }, 47 | "enable": true, 48 | "hide": true, 49 | "iconColor": "rgba(0, 211, 255, 1)", 50 | "name": "Annotations & Alerts", 51 | "target": { 52 | "limit": 100, 53 | "matchAny": false, 54 | "tags": [], 55 | "type": "dashboard" 56 | }, 57 | "type": "dashboard" 58 | } 59 | ] 60 | }, 61 | "description": "", 62 | "editable": true, 63 | "fiscalYearStartMonth": 0, 64 | "gnetId": 9846, 65 | "graphTooltip": 0, 66 | "id": null, 67 | "links": [ 68 | { 69 | "icon": "external link", 70 | "tags": [ 71 | "system" 72 | ], 73 | "type": "dashboards" 74 | } 75 | ], 76 | "liveNow": false, 77 | "panels": [ 78 | { 79 | "datasource": { 80 | "type": "prometheus", 81 | "uid": "${DS_PROMETHEUS}" 82 | }, 83 | "description": "", 84 | "fieldConfig": { 85 | "defaults": { 86 | "color": { 87 | "mode": "thresholds" 88 | }, 89 | "custom": { 90 | "align": "auto", 91 | "cellOptions": { 92 | "type": "auto" 93 | }, 94 | "filterable": false, 95 | "inspect": false 96 | }, 97 | "mappings": [ 98 | { 99 | "options": { 100 | "0": { 101 | "index": 1, 102 | "text": "ERROR" 103 | }, 104 | "1": { 105 | "index": 2, 106 | "text": "OK" 107 | }, 108 | "-1": { 109 | "index": 0, 110 | "text": "UNKNOWN" 111 | } 112 | }, 113 | "type": "value" 114 | } 115 | ], 116 | "thresholds": { 117 | "mode": "absolute", 118 | "steps": [ 119 | { 120 | "color": "red", 121 | "value": null 122 | }, 123 | { 124 | "color": "red", 125 | "value": 0 126 | }, 127 | { 128 | "color": "green", 129 | "value": 1 130 | } 131 | ] 132 | }, 133 | "unit": "none" 134 | }, 135 | "overrides": [ 136 | { 137 | "matcher": { 138 | "id": "byRegexp", 139 | "options": "health" 140 | }, 141 | "properties": [ 142 | { 143 | "id": "custom.cellOptions", 144 | "value": { 145 | "mode": "gradient", 146 | "type": "color-background" 147 | } 148 | } 149 | ] 150 | }, 151 | { 152 | "matcher": { 153 | "id": "byName", 154 | "options": "user_capacity" 155 | }, 156 | "properties": [ 157 | { 158 | "id": "unit", 159 | "value": "decbytes" 160 | } 161 | ] 162 | } 163 | ] 164 | }, 165 | "gridPos": { 166 | "h": 7, 167 | "w": 24, 168 | "x": 0, 169 | "y": 0 170 | }, 171 | "id": 38, 172 | "links": [], 173 | "maxDataPoints": 100, 174 | "options": { 175 | "cellHeight": "sm", 176 | "footer": { 177 | "countRows": false, 178 | "fields": "", 179 | "reducer": [ 180 | "sum" 181 | ], 182 | "show": false 183 | }, 184 | "frameIndex": 1, 185 | "showHeader": true, 186 | "sortBy": [ 187 | { 188 | "desc": false, 189 | "displayName": "instance" 190 | } 191 | ] 192 | }, 193 | "pluginVersion": "10.2.3", 194 | "targets": [ 195 | { 196 | "datasource": { 197 | "type": "prometheus", 198 | "uid": "${DS_PROMETHEUS}" 199 | }, 200 | "exemplar": true, 201 | "expr": "smartprom_smart_passed", 202 | "format": "table", 203 | "hide": false, 204 | "instant": true, 205 | "interval": "", 206 | "intervalFactor": 1, 207 | "legendFormat": "", 208 | "refId": "A" 209 | } 210 | ], 211 | "title": "SMART Health", 212 | "transformations": [ 213 | { 214 | "id": "groupBy", 215 | "options": { 216 | "fields": { 217 | "Value": { 218 | "aggregations": [ 219 | "last" 220 | ], 221 | "operation": "aggregate" 222 | }, 223 | "Value #A": { 224 | "aggregations": [ 225 | "sum" 226 | ], 227 | "operation": "aggregate" 228 | }, 229 | "Value #B": { 230 | "aggregations": [ 231 | "sum" 232 | ], 233 | "operation": "aggregate" 234 | }, 235 | "Value #C": { 236 | "aggregations": [ 237 | "sum" 238 | ], 239 | "operation": "aggregate" 240 | }, 241 | "Value #D": { 242 | "aggregations": [ 243 | "sum" 244 | ], 245 | "operation": "aggregate" 246 | }, 247 | "Value #E": { 248 | "aggregations": [ 249 | "sum" 250 | ], 251 | "operation": "aggregate" 252 | }, 253 | "drive": { 254 | "aggregations": [], 255 | "operation": "groupby" 256 | }, 257 | "instance": { 258 | "aggregations": [], 259 | "operation": "groupby" 260 | }, 261 | "model_family": { 262 | "aggregations": [], 263 | "operation": "groupby" 264 | }, 265 | "model_name": { 266 | "aggregations": [], 267 | "operation": "groupby" 268 | }, 269 | "serial_number": { 270 | "aggregations": [], 271 | "operation": "groupby" 272 | }, 273 | "type": { 274 | "aggregations": [], 275 | "operation": "groupby" 276 | }, 277 | "user_capacity": { 278 | "aggregations": [], 279 | "operation": "groupby" 280 | } 281 | } 282 | } 283 | }, 284 | { 285 | "id": "organize", 286 | "options": { 287 | "excludeByName": { 288 | "Time": true, 289 | "__name__": false, 290 | "backup_id": true, 291 | "backup_type": true, 292 | "client_hostname": false, 293 | "client_id": true, 294 | "client_os_version": true, 295 | "client_username": true, 296 | "client_version": true, 297 | "instance": false, 298 | "job": true, 299 | "snapshot_id": true 300 | }, 301 | "includeByName": {}, 302 | "indexByName": { 303 | "Value (last)": 7, 304 | "drive": 1, 305 | "instance": 0, 306 | "model_family": 2, 307 | "model_name": 3, 308 | "serial_number": 4, 309 | "type": 6, 310 | "user_capacity": 5 311 | }, 312 | "renameByName": { 313 | "Value": "", 314 | "Value #A": "reallocated_sector_ct", 315 | "Value #A (sum)": "reallocated_sector_ct", 316 | "Value #B": "reported_uncorrect", 317 | "Value #B (sum)": "reported_uncorrect", 318 | "Value #C (sum)": "command_timeout", 319 | "Value #D (sum)": "current_pending_sector", 320 | "Value #E (sum)": "offline_uncorrectable", 321 | "Value (last)": "health", 322 | "job": "", 323 | "serial_number": "", 324 | "user_capacity": "" 325 | } 326 | } 327 | } 328 | ], 329 | "type": "table" 330 | }, 331 | { 332 | "datasource": { 333 | "type": "prometheus", 334 | "uid": "${DS_PROMETHEUS}" 335 | }, 336 | "description": "", 337 | "fieldConfig": { 338 | "defaults": { 339 | "color": { 340 | "mode": "thresholds" 341 | }, 342 | "custom": { 343 | "align": "auto", 344 | "cellOptions": { 345 | "type": "auto" 346 | }, 347 | "filterable": false, 348 | "inspect": false 349 | }, 350 | "mappings": [], 351 | "thresholds": { 352 | "mode": "absolute", 353 | "steps": [ 354 | { 355 | "color": "green", 356 | "value": null 357 | }, 358 | { 359 | "color": "#EAB839", 360 | "value": 1 361 | }, 362 | { 363 | "color": "red", 364 | "value": 2 365 | } 366 | ] 367 | }, 368 | "unit": "none" 369 | }, 370 | "overrides": [ 371 | { 372 | "matcher": { 373 | "id": "byRegexp", 374 | "options": ".*_.*" 375 | }, 376 | "properties": [ 377 | { 378 | "id": "custom.cellOptions", 379 | "value": { 380 | "mode": "gradient", 381 | "type": "color-background" 382 | } 383 | } 384 | ] 385 | } 386 | ] 387 | }, 388 | "gridPos": { 389 | "h": 7, 390 | "w": 24, 391 | "x": 0, 392 | "y": 7 393 | }, 394 | "id": 27, 395 | "links": [], 396 | "maxDataPoints": 100, 397 | "options": { 398 | "cellHeight": "sm", 399 | "footer": { 400 | "countRows": false, 401 | "fields": "", 402 | "reducer": [ 403 | "sum" 404 | ], 405 | "show": false 406 | }, 407 | "frameIndex": 1, 408 | "showHeader": true, 409 | "sortBy": [ 410 | { 411 | "desc": false, 412 | "displayName": "instance" 413 | } 414 | ] 415 | }, 416 | "pluginVersion": "10.2.3", 417 | "targets": [ 418 | { 419 | "exemplar": true, 420 | "expr": "smartprom_reallocated_sector_ct_raw", 421 | "format": "table", 422 | "hide": false, 423 | "instant": true, 424 | "interval": "", 425 | "intervalFactor": 1, 426 | "legendFormat": "", 427 | "refId": "A", 428 | "datasource": { 429 | "type": "prometheus", 430 | "uid": "${DS_PROMETHEUS}" 431 | } 432 | }, 433 | { 434 | "exemplar": true, 435 | "expr": "smartprom_reported_uncorrect_raw", 436 | "format": "table", 437 | "hide": false, 438 | "instant": true, 439 | "interval": "", 440 | "intervalFactor": 1, 441 | "legendFormat": "", 442 | "refId": "B", 443 | "datasource": { 444 | "type": "prometheus", 445 | "uid": "${DS_PROMETHEUS}" 446 | } 447 | }, 448 | { 449 | "exemplar": true, 450 | "expr": "smartprom_command_timeout_raw", 451 | "format": "table", 452 | "hide": false, 453 | "instant": true, 454 | "interval": "", 455 | "intervalFactor": 1, 456 | "legendFormat": "", 457 | "refId": "C", 458 | "datasource": { 459 | "type": "prometheus", 460 | "uid": "${DS_PROMETHEUS}" 461 | } 462 | }, 463 | { 464 | "exemplar": true, 465 | "expr": "smartprom_current_pending_sector_raw", 466 | "format": "table", 467 | "hide": false, 468 | "instant": true, 469 | "interval": "", 470 | "intervalFactor": 1, 471 | "legendFormat": "", 472 | "refId": "D", 473 | "datasource": { 474 | "type": "prometheus", 475 | "uid": "${DS_PROMETHEUS}" 476 | } 477 | }, 478 | { 479 | "exemplar": true, 480 | "expr": "smartprom_offline_uncorrectable_raw", 481 | "format": "table", 482 | "hide": false, 483 | "instant": true, 484 | "interval": "", 485 | "intervalFactor": 1, 486 | "legendFormat": "", 487 | "refId": "E", 488 | "datasource": { 489 | "type": "prometheus", 490 | "uid": "${DS_PROMETHEUS}" 491 | } 492 | } 493 | ], 494 | "title": "Error Metrics", 495 | "transformations": [ 496 | { 497 | "id": "merge", 498 | "options": {} 499 | }, 500 | { 501 | "id": "groupBy", 502 | "options": { 503 | "fields": { 504 | "Value #A": { 505 | "aggregations": [ 506 | "sum" 507 | ], 508 | "operation": "aggregate" 509 | }, 510 | "Value #B": { 511 | "aggregations": [ 512 | "sum" 513 | ], 514 | "operation": "aggregate" 515 | }, 516 | "Value #C": { 517 | "aggregations": [ 518 | "sum" 519 | ], 520 | "operation": "aggregate" 521 | }, 522 | "Value #D": { 523 | "aggregations": [ 524 | "sum" 525 | ], 526 | "operation": "aggregate" 527 | }, 528 | "Value #E": { 529 | "aggregations": [ 530 | "sum" 531 | ], 532 | "operation": "aggregate" 533 | }, 534 | "drive": { 535 | "aggregations": [], 536 | "operation": "groupby" 537 | }, 538 | "instance": { 539 | "aggregations": [], 540 | "operation": "groupby" 541 | } 542 | } 543 | } 544 | }, 545 | { 546 | "id": "organize", 547 | "options": { 548 | "excludeByName": { 549 | "Time": true, 550 | "__name__": false, 551 | "backup_id": true, 552 | "backup_type": true, 553 | "client_hostname": false, 554 | "client_id": true, 555 | "client_os_version": true, 556 | "client_username": true, 557 | "client_version": true, 558 | "instance": false, 559 | "job": true, 560 | "snapshot_id": true 561 | }, 562 | "indexByName": { 563 | "Time": 0, 564 | "Value #A": 5, 565 | "Value #B": 6, 566 | "__name__": 1, 567 | "drive": 3, 568 | "instance": 2, 569 | "job": 4 570 | }, 571 | "renameByName": { 572 | "Value": "", 573 | "Value #A": "reallocated_sector_ct", 574 | "Value #A (sum)": "reallocated_sector_ct", 575 | "Value #B": "reported_uncorrect", 576 | "Value #B (sum)": "reported_uncorrect", 577 | "Value #C (sum)": "command_timeout", 578 | "Value #D (sum)": "current_pending_sector", 579 | "Value #E (sum)": "offline_uncorrectable", 580 | "job": "" 581 | } 582 | } 583 | } 584 | ], 585 | "type": "table" 586 | }, 587 | { 588 | "datasource": { 589 | "type": "prometheus", 590 | "uid": "${DS_PROMETHEUS}" 591 | }, 592 | "description": "", 593 | "fieldConfig": { 594 | "defaults": { 595 | "color": { 596 | "mode": "thresholds" 597 | }, 598 | "custom": { 599 | "align": "auto", 600 | "cellOptions": { 601 | "type": "auto" 602 | }, 603 | "filterable": false, 604 | "inspect": false 605 | }, 606 | "mappings": [], 607 | "thresholds": { 608 | "mode": "absolute", 609 | "steps": [ 610 | { 611 | "color": "green", 612 | "value": null 613 | }, 614 | { 615 | "color": "#EAB839", 616 | "value": 50 617 | }, 618 | { 619 | "color": "red", 620 | "value": 60 621 | } 622 | ] 623 | }, 624 | "unit": "none" 625 | }, 626 | "overrides": [ 627 | { 628 | "matcher": { 629 | "id": "byName", 630 | "options": "temperature_celsius" 631 | }, 632 | "properties": [ 633 | { 634 | "id": "custom.cellOptions", 635 | "value": { 636 | "mode": "gradient", 637 | "type": "color-background" 638 | } 639 | } 640 | ] 641 | }, 642 | { 643 | "matcher": { 644 | "id": "byName", 645 | "options": "power_on_hours" 646 | }, 647 | "properties": [ 648 | { 649 | "id": "unit", 650 | "value": "h" 651 | } 652 | ] 653 | } 654 | ] 655 | }, 656 | "gridPos": { 657 | "h": 7, 658 | "w": 24, 659 | "x": 0, 660 | "y": 14 661 | }, 662 | "id": 37, 663 | "links": [], 664 | "maxDataPoints": 100, 665 | "options": { 666 | "cellHeight": "sm", 667 | "footer": { 668 | "countRows": false, 669 | "fields": "", 670 | "reducer": [ 671 | "sum" 672 | ], 673 | "show": false 674 | }, 675 | "frameIndex": 1, 676 | "showHeader": true, 677 | "sortBy": [ 678 | { 679 | "desc": false, 680 | "displayName": "instance" 681 | } 682 | ] 683 | }, 684 | "pluginVersion": "10.2.3", 685 | "targets": [ 686 | { 687 | "exemplar": true, 688 | "expr": "smartprom_temperature_celsius_raw", 689 | "format": "table", 690 | "hide": false, 691 | "instant": true, 692 | "interval": "", 693 | "intervalFactor": 1, 694 | "legendFormat": "", 695 | "refId": "A", 696 | "datasource": { 697 | "type": "prometheus", 698 | "uid": "${DS_PROMETHEUS}" 699 | } 700 | }, 701 | { 702 | "exemplar": true, 703 | "expr": "smartprom_power_cycle_count_raw", 704 | "format": "table", 705 | "hide": false, 706 | "instant": true, 707 | "interval": "", 708 | "intervalFactor": 1, 709 | "legendFormat": "", 710 | "refId": "B", 711 | "datasource": { 712 | "type": "prometheus", 713 | "uid": "${DS_PROMETHEUS}" 714 | } 715 | }, 716 | { 717 | "exemplar": true, 718 | "expr": "smartprom_power_on_hours_raw", 719 | "format": "table", 720 | "hide": false, 721 | "instant": true, 722 | "interval": "", 723 | "intervalFactor": 1, 724 | "legendFormat": "", 725 | "refId": "C", 726 | "datasource": { 727 | "type": "prometheus", 728 | "uid": "${DS_PROMETHEUS}" 729 | } 730 | } 731 | ], 732 | "title": "Info Metrics", 733 | "transformations": [ 734 | { 735 | "id": "merge", 736 | "options": {} 737 | }, 738 | { 739 | "id": "groupBy", 740 | "options": { 741 | "fields": { 742 | "Value #A": { 743 | "aggregations": [ 744 | "sum" 745 | ], 746 | "operation": "aggregate" 747 | }, 748 | "Value #B": { 749 | "aggregations": [ 750 | "sum" 751 | ], 752 | "operation": "aggregate" 753 | }, 754 | "Value #C": { 755 | "aggregations": [ 756 | "sum" 757 | ], 758 | "operation": "aggregate" 759 | }, 760 | "Value #D": { 761 | "aggregations": [ 762 | "sum" 763 | ], 764 | "operation": "aggregate" 765 | }, 766 | "Value #E": { 767 | "aggregations": [ 768 | "sum" 769 | ], 770 | "operation": "aggregate" 771 | }, 772 | "drive": { 773 | "aggregations": [], 774 | "operation": "groupby" 775 | }, 776 | "instance": { 777 | "aggregations": [], 778 | "operation": "groupby" 779 | } 780 | } 781 | } 782 | }, 783 | { 784 | "id": "organize", 785 | "options": { 786 | "excludeByName": { 787 | "Time": true, 788 | "__name__": false, 789 | "backup_id": true, 790 | "backup_type": true, 791 | "client_hostname": false, 792 | "client_id": true, 793 | "client_os_version": true, 794 | "client_username": true, 795 | "client_version": true, 796 | "instance": false, 797 | "job": true, 798 | "snapshot_id": true 799 | }, 800 | "indexByName": { 801 | "Time": 0, 802 | "Value #A": 5, 803 | "Value #B": 6, 804 | "__name__": 1, 805 | "drive": 3, 806 | "instance": 2, 807 | "job": 4 808 | }, 809 | "renameByName": { 810 | "Value": "", 811 | "Value #A": "reallocated_sector_ct", 812 | "Value #A (sum)": "temperature_celsius", 813 | "Value #B": "reported_uncorrect", 814 | "Value #B (sum)": "power_cycle_count", 815 | "Value #C (sum)": "power_on_hours", 816 | "Value #D (sum)": "current_pending_sector", 817 | "Value #E (sum)": "offline_uncorrectable", 818 | "job": "" 819 | } 820 | } 821 | } 822 | ], 823 | "type": "table" 824 | }, 825 | { 826 | "datasource": { 827 | "type": "prometheus", 828 | "uid": "${DS_PROMETHEUS}" 829 | }, 830 | "fieldConfig": { 831 | "defaults": { 832 | "color": { 833 | "mode": "palette-classic" 834 | }, 835 | "custom": { 836 | "axisBorderShow": false, 837 | "axisCenteredZero": false, 838 | "axisColorMode": "text", 839 | "axisLabel": "", 840 | "axisPlacement": "auto", 841 | "barAlignment": 0, 842 | "drawStyle": "line", 843 | "fillOpacity": 0, 844 | "gradientMode": "none", 845 | "hideFrom": { 846 | "legend": false, 847 | "tooltip": false, 848 | "viz": false 849 | }, 850 | "insertNulls": false, 851 | "lineInterpolation": "linear", 852 | "lineWidth": 1, 853 | "pointSize": 5, 854 | "scaleDistribution": { 855 | "type": "linear" 856 | }, 857 | "showPoints": "never", 858 | "spanNulls": false, 859 | "stacking": { 860 | "group": "A", 861 | "mode": "none" 862 | }, 863 | "thresholdsStyle": { 864 | "mode": "off" 865 | } 866 | }, 867 | "links": [], 868 | "mappings": [], 869 | "thresholds": { 870 | "mode": "absolute", 871 | "steps": [ 872 | { 873 | "color": "green", 874 | "value": null 875 | }, 876 | { 877 | "color": "red", 878 | "value": 80 879 | } 880 | ] 881 | }, 882 | "unit": "celsius" 883 | }, 884 | "overrides": [] 885 | }, 886 | "gridPos": { 887 | "h": 11, 888 | "w": 24, 889 | "x": 0, 890 | "y": 21 891 | }, 892 | "id": 36, 893 | "links": [], 894 | "options": { 895 | "legend": { 896 | "calcs": [ 897 | "mean", 898 | "lastNotNull", 899 | "max", 900 | "min" 901 | ], 902 | "displayMode": "table", 903 | "placement": "bottom", 904 | "showLegend": true 905 | }, 906 | "tooltip": { 907 | "mode": "multi", 908 | "sort": "none" 909 | } 910 | }, 911 | "pluginVersion": "8.5.0", 912 | "targets": [ 913 | { 914 | "datasource": { 915 | "type": "prometheus", 916 | "uid": "${DS_PROMETHEUS}" 917 | }, 918 | "editorMode": "code", 919 | "exemplar": true, 920 | "expr": "smartprom_temperature_celsius_raw", 921 | "format": "time_series", 922 | "interval": "", 923 | "intervalFactor": 2, 924 | "legendFormat": "{{instance}} {{drive}}", 925 | "range": true, 926 | "refId": "A" 927 | } 928 | ], 929 | "title": "Temperature", 930 | "type": "timeseries" 931 | } 932 | ], 933 | "refresh": "", 934 | "schemaVersion": 39, 935 | "tags": [ 936 | "system" 937 | ], 938 | "templating": { 939 | "list": [] 940 | }, 941 | "time": { 942 | "from": "now-30m", 943 | "to": "now" 944 | }, 945 | "timepicker": { 946 | "refresh_intervals": [ 947 | "5s", 948 | "10s", 949 | "30s", 950 | "1m", 951 | "5m", 952 | "15m", 953 | "30m", 954 | "1h", 955 | "2h", 956 | "1d" 957 | ], 958 | "time_options": [ 959 | "5m", 960 | "15m", 961 | "1h", 962 | "6h", 963 | "12h", 964 | "24h", 965 | "2d", 966 | "7d", 967 | "30d" 968 | ] 969 | }, 970 | "timezone": "", 971 | "title": "SMART Exporter", 972 | "uid": "hmXXiWPnk", 973 | "version": 12, 974 | "weekStart": "" 975 | } -------------------------------------------------------------------------------- /grafana/grafana_dashboard_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_1.png -------------------------------------------------------------------------------- /grafana/grafana_dashboard_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_2.png -------------------------------------------------------------------------------- /megaraid.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from typing import Optional 4 | 5 | import smartprom 6 | 7 | MEGARAID_TYPE_PATTERN = r"(sat\+)?(megaraid,\d+)" 8 | 9 | 10 | def get_megaraid_device_info(dev: str, typ: str) -> dict: 11 | """ 12 | Get device information connected with MegaRAID, 13 | and process the information into get_device_info compatible format. 14 | """ 15 | megaraid_id = get_megaraid_device_id(typ) 16 | if megaraid_id is None: 17 | return {} 18 | 19 | results, _ = smartprom.run_smartctl_cmd( 20 | ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev] 21 | ) 22 | results = json.loads(results) 23 | serial_number = results.get("serial_number", "Unknown") 24 | model_family = results.get("model_family", "Unknown") 25 | 26 | # When using SAS drive and smartmontools r5286 and later, 27 | # scsi_ prefix is added to model_name field. 28 | # https://sourceforge.net/p/smartmontools/code/5286/ 29 | model_name = results.get( 30 | "scsi_model_name", 31 | results.get("model_name", "Unknown"), 32 | ) 33 | 34 | user_capacity = "Unknown" 35 | if "user_capacity" in results and "bytes" in results["user_capacity"]: 36 | user_capacity = str(results["user_capacity"]["bytes"]) 37 | 38 | return { 39 | "model_family": model_family, 40 | "model_name": model_name, 41 | "serial_number": serial_number, 42 | "user_capacity": user_capacity, 43 | } 44 | 45 | 46 | def get_megaraid_device_type(dev: str, typ: str) -> str: 47 | megaraid_id = get_megaraid_device_id(typ) 48 | if megaraid_id is None: 49 | return "unknown" 50 | 51 | results, _ = smartprom.run_smartctl_cmd( 52 | ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev] 53 | ) 54 | results = json.loads(results) 55 | 56 | if "device" not in results or "protocol" not in results["device"]: 57 | return "unknown" 58 | return "sat" if results["device"]["protocol"] == "ATA" else "scsi" 59 | 60 | 61 | def get_megaraid_device_id(typ: str) -> Optional[str]: 62 | """ 63 | Returns the device ID on the MegaRAID from the typ string 64 | """ 65 | megaraid_match = re.search(MEGARAID_TYPE_PATTERN, typ) 66 | if not megaraid_match: 67 | return None 68 | 69 | return megaraid_match.group(2) 70 | 71 | 72 | def smart_megaraid(dev: str, megaraid_id: str) -> dict: 73 | """ 74 | Runs the smartctl command on device connected by MegaRAID 75 | and processes its attributes 76 | """ 77 | results, exit_code = smartprom.run_smartctl_cmd( 78 | ["smartctl", "-A", "-H", "-d", megaraid_id, "--json=c", dev] 79 | ) 80 | results = json.loads(results) 81 | 82 | if results["device"]["protocol"] == "ATA": 83 | # SATA device on MegaRAID 84 | data = results["ata_smart_attributes"]["table"] 85 | attributes = smartprom.table_to_attributes_sat(data) 86 | attributes["smart_passed"] = (0, smartprom.get_smart_status(results)) 87 | attributes["exit_code"] = (0, exit_code) 88 | return attributes 89 | elif results["device"]["protocol"] == "SCSI": 90 | # SAS device on MegaRAID 91 | attributes = smartprom.results_to_attributes_scsi(results) 92 | attributes["smart_passed"] = smartprom.get_smart_status(results) 93 | attributes["exit_code"] = exit_code 94 | return attributes 95 | return {} 96 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | prometheus-client==0.19.0 2 | -------------------------------------------------------------------------------- /smartprom.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import json 3 | import os 4 | import subprocess 5 | import time 6 | import re 7 | from typing import Tuple 8 | 9 | import prometheus_client 10 | 11 | import megaraid 12 | 13 | LABELS = [ 14 | "drive", 15 | "type", 16 | "model_family", 17 | "model_name", 18 | "serial_number", 19 | "user_capacity", 20 | ] 21 | DRIVES = {} 22 | METRICS = {} 23 | 24 | # https://www.smartmontools.org/wiki/USB 25 | SAT_TYPES = ["sat", "usbjmicron", "usbprolific", "usbsunplus"] 26 | NVME_TYPES = ["nvme", "sntasmedia", "sntjmicron", "sntrealtek"] 27 | SCSI_TYPES = ["scsi"] 28 | 29 | 30 | def run_smartctl_cmd(args: list) -> Tuple[str, int]: 31 | """ 32 | Runs the smartctl command on the system 33 | """ 34 | out = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 35 | stdout, stderr = out.communicate() 36 | 37 | # exit code can be != 0 even if the command returned valid data 38 | # see EXIT STATUS in 39 | # https://www.smartmontools.org/browser/trunk/smartmontools/smartctl.8.in 40 | if out.returncode != 0: 41 | stdout_msg = stdout.decode("utf-8") if stdout is not None else "" 42 | stderr_msg = stderr.decode("utf-8") if stderr is not None else "" 43 | print( 44 | f"WARNING: Command returned exit code {out.returncode}. " 45 | f"Stdout: '{stdout_msg}' Stderr: '{stderr_msg}'" 46 | ) 47 | 48 | return stdout.decode("utf-8"), out.returncode 49 | 50 | 51 | def get_drives() -> dict: 52 | """ 53 | Returns a dictionary of devices and its types 54 | """ 55 | disks = {} 56 | result, _ = run_smartctl_cmd(["smartctl", "--scan-open", "--json=c"]) 57 | result_json = json.loads(result) 58 | 59 | if "devices" in result_json: 60 | devices = result_json["devices"] 61 | 62 | # Ignore devices that fail on open, such as Virtual Drives created by MegaRAID. 63 | devices = list( 64 | filter( 65 | lambda x: ( 66 | x.get("open_error", "") 67 | != "DELL or MegaRaid controller, please try adding '-d megaraid,N'" 68 | ), 69 | devices, 70 | ) 71 | ) 72 | 73 | for device in devices: 74 | dev = device["name"] 75 | if re.match(megaraid.MEGARAID_TYPE_PATTERN, device["type"]): 76 | # If drive is connected by MegaRAID, dev has a bus name like "/dev/bus/0". 77 | # After retrieving the disk information using the bus name, 78 | # replace dev with a disk ID such as "megaraid,0". 79 | disk_attrs = megaraid.get_megaraid_device_info(dev, device["type"]) 80 | disk_attrs["type"] = megaraid.get_megaraid_device_type( 81 | dev, device["type"] 82 | ) 83 | disk_attrs["bus_device"] = dev 84 | disk_attrs["megaraid_id"] = megaraid.get_megaraid_device_id( 85 | device["type"] 86 | ) 87 | dev = disk_attrs["megaraid_id"] 88 | else: 89 | disk_attrs = get_device_info(dev) 90 | disk_attrs["type"] = device["type"] 91 | disks[dev] = disk_attrs 92 | print("Discovered device", dev, "with attributes", disk_attrs) 93 | else: 94 | print("No devices found. Make sure you have enough privileges.") 95 | return disks 96 | 97 | 98 | def get_device_info(dev: str) -> dict: 99 | """ 100 | Returns a dictionary of device info 101 | """ 102 | results, _ = run_smartctl_cmd(["smartctl", "-i", "--json=c", dev]) 103 | results = json.loads(results) 104 | user_capacity = "Unknown" 105 | if "user_capacity" in results and "bytes" in results["user_capacity"]: 106 | user_capacity = str(results["user_capacity"]["bytes"]) 107 | return { 108 | "model_family": results.get("model_family", "Unknown"), 109 | "model_name": results.get("model_name", "Unknown"), 110 | "serial_number": results.get("serial_number", "Unknown"), 111 | "user_capacity": user_capacity, 112 | } 113 | 114 | 115 | def get_smart_status(results: dict) -> int: 116 | """ 117 | Returns a 1, 0 or -1 depending on if result from 118 | smart status is True, False or unknown. 119 | """ 120 | status = results.get("smart_status") 121 | return +(status.get("passed")) if status is not None else -1 122 | 123 | 124 | def smart_sat(dev: str) -> dict: 125 | """ 126 | Runs the smartctl command on a internal or external "sat" device 127 | and processes its attributes 128 | """ 129 | results, exit_code = run_smartctl_cmd( 130 | ["smartctl", "-A", "-H", "-d", "sat", "--json=c", dev] 131 | ) 132 | results = json.loads(results) 133 | 134 | attributes = table_to_attributes_sat(results["ata_smart_attributes"]["table"]) 135 | attributes["smart_passed"] = (0, get_smart_status(results)) 136 | attributes["exit_code"] = (0, exit_code) 137 | return attributes 138 | 139 | 140 | def table_to_attributes_sat(data: dict) -> dict: 141 | """ 142 | Returns a results["ata_smart_attributes"]["table"] 143 | processed into an attributes dict 144 | """ 145 | attributes = {} 146 | for metric in data: 147 | code = metric["id"] 148 | name = metric["name"] 149 | value = metric["value"] 150 | 151 | # metric['raw']['value'] contains values difficult to understand for temperatures and time up 152 | # that's why we added some logic to parse the string value 153 | value_raw = metric["raw"]["string"] 154 | try: 155 | # example value_raw: "33" or "43 (Min/Max 39/46)" 156 | value_raw = int(value_raw.split()[0]) 157 | except: 158 | # example value_raw: "20071h+27m+15.375s" 159 | if "h+" in value_raw: 160 | value_raw = int(value_raw.split("h+")[0]) 161 | else: 162 | print( 163 | f"Raw value of sat metric '{name}' can't be parsed. raw_string: {value_raw} " 164 | f"raw_int: {metric['raw']['value']}" 165 | ) 166 | value_raw = None 167 | 168 | attributes[name] = (int(code), value) 169 | if value_raw is not None: 170 | attributes[f"{name}_raw"] = (int(code), value_raw) 171 | return attributes 172 | 173 | 174 | def smart_nvme(dev: str) -> dict: 175 | """ 176 | Runs the smartctl command on a internal or external "nvme" device 177 | and processes its attributes 178 | """ 179 | results, exit_code = run_smartctl_cmd( 180 | ["smartctl", "-A", "-H", "-d", "nvme", "--json=c", dev] 181 | ) 182 | results = json.loads(results) 183 | 184 | attributes = {"smart_passed": get_smart_status(results), "exit_code": exit_code} 185 | data = results["nvme_smart_health_information_log"] 186 | for key, value in data.items(): 187 | if key == "temperature_sensors": 188 | for i, _value in enumerate(value, start=1): 189 | attributes[f"temperature_sensor{i}"] = _value 190 | else: 191 | attributes[key] = value 192 | return attributes 193 | 194 | 195 | def smart_scsi(dev: str) -> dict: 196 | """ 197 | Runs the smartctl command on a "scsi" device 198 | and processes its attributes 199 | """ 200 | results, exit_code = run_smartctl_cmd( 201 | ["smartctl", "-A", "-H", "-d", "scsi", "--json=c", dev] 202 | ) 203 | results = json.loads(results) 204 | 205 | attributes = results_to_attributes_scsi(results) 206 | attributes["smart_passed"] = get_smart_status(results) 207 | attributes["exit_code"] = exit_code 208 | return attributes 209 | 210 | 211 | def results_to_attributes_scsi(data: dict) -> dict: 212 | """ 213 | Returns the result of smartctl -i on the SCSI device 214 | processed into an attributes dict 215 | """ 216 | attributes = {} 217 | for key, value in data.items(): 218 | if type(value) == dict: 219 | for _label, _value in value.items(): 220 | if type(_value) == int: 221 | attributes[f"{key}_{_label}"] = _value 222 | elif type(value) == int: 223 | attributes[key] = value 224 | return attributes 225 | 226 | 227 | def collect(): 228 | """ 229 | Collect all drive metrics and save them as Gauge type 230 | """ 231 | global LABELS, DRIVES, METRICS, SAT_TYPES, NVME_TYPES, SCSI_TYPES 232 | 233 | for drive, drive_attrs in DRIVES.items(): 234 | typ = drive_attrs["type"] 235 | try: 236 | if "megaraid_id" in drive_attrs: 237 | attrs = megaraid.smart_megaraid( 238 | drive_attrs["bus_device"], drive_attrs["megaraid_id"] 239 | ) 240 | elif typ in SAT_TYPES: 241 | attrs = smart_sat(drive) 242 | elif typ in NVME_TYPES: 243 | attrs = smart_nvme(drive) 244 | elif typ in SCSI_TYPES: 245 | attrs = smart_scsi(drive) 246 | else: 247 | continue 248 | 249 | for key, values in attrs.items(): 250 | # Metric name in lower case 251 | metric = ( 252 | "smartprom_" 253 | + key.replace("-", "_") 254 | .replace(" ", "_") 255 | .replace(".", "") 256 | .replace("/", "_") 257 | .lower() 258 | ) 259 | 260 | # Create metric if it does not exist 261 | if metric not in METRICS: 262 | desc = key.replace("_", " ") 263 | code = hex(values[0]) if typ in SAT_TYPES else hex(values) 264 | print(f"Adding new gauge {metric} ({code})") 265 | METRICS[metric] = prometheus_client.Gauge( 266 | metric, f"({code}) {desc}", LABELS 267 | ) 268 | 269 | # Update metric 270 | metric_val = values[1] if typ in SAT_TYPES else values 271 | 272 | METRICS[metric].labels( 273 | drive=drive, 274 | type=typ, 275 | model_family=drive_attrs["model_family"], 276 | model_name=drive_attrs["model_name"], 277 | serial_number=drive_attrs["serial_number"], 278 | user_capacity=drive_attrs["user_capacity"], 279 | ).set(metric_val) 280 | 281 | except Exception as e: 282 | print("Exception:", e) 283 | pass 284 | 285 | 286 | def main(): 287 | """ 288 | Starts a server and exposes the metrics 289 | """ 290 | global DRIVES 291 | 292 | # Validate configuration 293 | exporter_address = os.environ.get("SMARTCTL_EXPORTER_ADDRESS", "0.0.0.0") 294 | exporter_port = int(os.environ.get("SMARTCTL_EXPORTER_PORT", 9902)) 295 | refresh_interval = int(os.environ.get("SMARTCTL_REFRESH_INTERVAL", 60)) 296 | metrics_file_enable = os.environ.get("SMARTCTL_METRICS_FILE_ENABLE", False) 297 | metrics_file_path = os.environ.get("SMARTCTL_METRICS_FILE_PATH", "/metrics/") 298 | 299 | # Get drives (test smartctl) 300 | DRIVES = get_drives() 301 | 302 | # Start Prometheus server 303 | prometheus_client.start_http_server(exporter_port, exporter_address) 304 | print(f"Server listening in http://{exporter_address}:{exporter_port}/metrics") 305 | 306 | while True: 307 | collect() 308 | if metrics_file_enable: 309 | prometheus_client.write_to_textfile(metrics_file_path+"smartctl.prom", prometheus_client.REGISTRY) 310 | time.sleep(refresh_interval) 311 | 312 | 313 | if __name__ == "__main__": 314 | main() 315 | -------------------------------------------------------------------------------- /smartprom.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=SMART Prometheus metrics 3 | 4 | [Service] 5 | ExecStart=/var/lib/homelab/smartprom.py 6 | Restart=always 7 | 8 | [Install] 9 | WantedBy=multi-user.target 10 | --------------------------------------------------------------------------------