├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.yml
├── grafana
    ├── grafana_dashboard.json
    ├── grafana_dashboard_1.png
    └── grafana_dashboard_2.png
├── megaraid.py
├── requirements.txt
├── smartprom.py
└── smartprom.service


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - '*'
 6 |     branches:
 7 |       - '*'
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Docker
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v2
17 | 
18 |       - name: Downcase repo
19 |         run: echo REPOSITORY=matusnovak/prometheus-smartctl >> $GITHUB_ENV
20 | 
21 |       - name: Docker metadata
22 |         id: docker_metadata
23 |         uses: docker/metadata-action@v3
24 |         with:
25 |           images: ${{ env.REPOSITORY }}
26 |           # images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
27 |           flavor: |
28 |             latest=true
29 |             prefix=
30 |             suffix=
31 |           tags: |
32 |             type=sha
33 |             type=ref,event=tag
34 | 
35 |       - name: Set up QEMU
36 |         uses: docker/setup-qemu-action@v1.0.1
37 | 
38 |       - name: Set up Docker Buildx
39 |         uses: docker/setup-buildx-action@v1
40 | 
41 |       - name: Login to DockerHub
42 |         uses: docker/login-action@v1
43 |         with:
44 |           username: ${{ secrets.DOCKER_USERNAME }}
45 |           password: ${{ secrets.DOCKER_PASSWORD }}
46 | 
47 | #      - name: Login to GitHub Container Registry
48 | #        uses: docker/login-action@v1
49 | #        with:
50 | #          registry: ghcr.io
51 | #          username: ${{ github.repository_owner }}
52 | #          password: ${{ secrets.GH_PAT }}
53 | 
54 |       - name: Build and push Docker image
55 |         uses: docker/build-push-action@v2
56 |         with:
57 |           context: .
58 |           file: ./Dockerfile
59 |           platforms: linux/386,linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64/v8,linux/ppc64le,linux/s390x
60 |           push: ${{ github.event_name != 'pull_request' }}
61 |           tags: ${{ steps.docker_metadata.outputs.tags }}
62 |           labels: ${{ steps.docker_metadata.outputs.labels }}
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=vim,zsh,visualstudiocode,pycharm+all
  3 | 
  4 | ### PyCharm+all ###
  5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  7 | 
  8 | # User-specific stuff
  9 | .idea/**/workspace.xml
 10 | .idea/**/tasks.xml
 11 | .idea/**/usage.statistics.xml
 12 | .idea/**/dictionaries
 13 | .idea/**/shelf
 14 | 
 15 | # AWS User-specific
 16 | .idea/**/aws.xml
 17 | 
 18 | # Generated files
 19 | .idea/**/contentModel.xml
 20 | 
 21 | # Sensitive or high-churn files
 22 | .idea/**/dataSources/
 23 | .idea/**/dataSources.ids
 24 | .idea/**/dataSources.local.xml
 25 | .idea/**/sqlDataSources.xml
 26 | .idea/**/dynamic.xml
 27 | .idea/**/uiDesigner.xml
 28 | .idea/**/dbnavigator.xml
 29 | 
 30 | # Gradle
 31 | .idea/**/gradle.xml
 32 | .idea/**/libraries
 33 | 
 34 | # Gradle and Maven with auto-import
 35 | # When using Gradle or Maven with auto-import, you should exclude module files,
 36 | # since they will be recreated, and may cause churn.  Uncomment if using
 37 | # auto-import.
 38 | # .idea/artifacts
 39 | # .idea/compiler.xml
 40 | # .idea/jarRepositories.xml
 41 | # .idea/modules.xml
 42 | # .idea/*.iml
 43 | # .idea/modules
 44 | # *.iml
 45 | # *.ipr
 46 | 
 47 | # CMake
 48 | cmake-build-*/
 49 | 
 50 | # Mongo Explorer plugin
 51 | .idea/**/mongoSettings.xml
 52 | 
 53 | # File-based project format
 54 | *.iws
 55 | 
 56 | # IntelliJ
 57 | out/
 58 | 
 59 | # mpeltonen/sbt-idea plugin
 60 | .idea_modules/
 61 | 
 62 | # JIRA plugin
 63 | atlassian-ide-plugin.xml
 64 | 
 65 | # Cursive Clojure plugin
 66 | .idea/replstate.xml
 67 | 
 68 | # SonarLint plugin
 69 | .idea/sonarlint/
 70 | 
 71 | # Crashlytics plugin (for Android Studio and IntelliJ)
 72 | com_crashlytics_export_strings.xml
 73 | crashlytics.properties
 74 | crashlytics-build.properties
 75 | fabric.properties
 76 | 
 77 | # Editor-based Rest Client
 78 | .idea/httpRequests
 79 | 
 80 | # Android studio 3.1+ serialized cache file
 81 | .idea/caches/build_file_checksums.ser
 82 | 
 83 | ### PyCharm+all Patch ###
 84 | # Ignore everything but code style settings and run configurations
 85 | # that are supposed to be shared within teams.
 86 | 
 87 | .idea/*
 88 | 
 89 | !.idea/codeStyles
 90 | !.idea/runConfigurations
 91 | 
 92 | ### Vim ###
 93 | # Swap
 94 | [._]*.s[a-v][a-z]
 95 | !*.svg  # comment out if you don't need vector files
 96 | [._]*.sw[a-p]
 97 | [._]s[a-rt-v][a-z]
 98 | [._]ss[a-gi-z]
 99 | [._]sw[a-p]
100 | 
101 | # Session
102 | Session.vim
103 | Sessionx.vim
104 | 
105 | # Temporary
106 | .netrwhist
107 | *~
108 | # Auto-generated tag files
109 | tags
110 | # Persistent undo
111 | [._]*.un~
112 | 
113 | ### VisualStudioCode ###
114 | .vscode/*
115 | !.vscode/settings.json
116 | !.vscode/tasks.json
117 | !.vscode/launch.json
118 | !.vscode/extensions.json
119 | !.vscode/*.code-snippets
120 | 
121 | # Local History for Visual Studio Code
122 | .history/
123 | 
124 | # Built Visual Studio Code Extensions
125 | *.vsix
126 | 
127 | ### VisualStudioCode Patch ###
128 | # Ignore all local history of files
129 | .history
130 | .ionide
131 | 
132 | # Support for Project snippet scope
133 | .vscode/*.code-snippets
134 | 
135 | # Ignore code-workspaces
136 | *.code-workspace
137 | 
138 | ### Zsh ###
139 | # Zsh compiled script + zrecompile backup
140 | *.zwc
141 | *.zwc.old
142 | 
143 | # Zsh completion-optimization dumpfile
144 | *zcompdump*
145 | 
146 | # Zsh zcalc history
147 | .zcalc_history
148 | 
149 | # A popular plugin manager's files
150 | ._zinit
151 | .zinit_lstupd
152 | 
153 | # zdharma/zshelldoc tool's files
154 | zsdoc/data
155 | 
156 | # robbyrussell/oh-my-zsh/plugins/per-directory-history plugin's files
157 | # (when set-up to store the history in the local directory)
158 | .directory_history
159 | 
160 | # MichaelAquilina/zsh-autoswitch-virtualenv plugin's files
161 | # (for Zsh plugins using Python)
162 | .venv
163 | 
164 | # Zunit tests' output
165 | /tests/_output/*
166 | !/tests/_output/.gitkeep
167 | 
168 | # End of https://www.toptal.com/developers/gitignore/api/vim,zsh,visualstudiocode,pycharm+all
169 | 
170 | # Python cache
171 | __pycache__
172 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## v2.3.0 (20/01/2024)
 4 | 
 5 | * Add support for drives connected by MegaRAID
 6 | * Add user_capacity label (disk size in bytes) for each device
 7 | * Update prometheus-client 0.19.0
 8 | * Update Python 3.12
 9 | * Update base Docker image to Alpine 3.19
10 | 
11 | ## v2.2.0 (20/09/2022)
12 | 
13 | * Add support for USB bridged drives
14 | 
15 | ## v2.1.1 (17/09/2022)
16 | 
17 | * Handle smartctl exit code != 0 and add smartprom_exit_code metric
18 | 
19 | ## v2.1.0 (21/08/2022)
20 | 
21 | * Include new metric with SMART Health Status => smartprom_smart_passed
22 | * Add model_family, model_name, serial_number and type attributes for each device
23 | * The "drive" attribute now includes the full path. sda => /dev/sda
24 | * Add more detailed log traces about discovered devices
25 | * Update the Grafana dashboard
26 | * Update Readme to include example metrics
27 | 
28 | ## v2.0.1 (29/07/2022)
29 | 
30 | * Fix duplicated timeseries error. Resolves #36 (#37)
31 | * Add missing raw metrics for sat devices. Resolves #25 (#38)
32 | * Chore: Code cleanup
33 | 
34 | ## v2.0.0 (28/07/2022)
35 | 
36 | * Breaking change: Convert the metrics name into lower case (#13)
37 | * Update base Docker image and reduce image size. Resolves #17 (#31)
38 | * Publish Docker images for ARM architecture. Resolves #19 (#34)
39 | * Make refresh interval configurable. Revolves #24 (#29)
40 | * Make exporter port and address configurable via environment variable (#27)
41 | * Include zero value raw metrics (#15)
42 | * Return more information on smartctl error. Resolves #23 (#28)
43 | * Handle error when devices are not detected (#32)
44 | * Using SMART tool to get the devices instead of glob (#14)
45 | * Avoid Python stdout buffering (#33)
46 | * Add Grafana dashboard. Resolves #18 (#30)
47 | * Added gitignore (#12)
48 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-alpine3.19
 2 | 
 3 | # Install smartmontools
 4 | RUN apk add --no-cache smartmontools
 5 | 
 6 | # Install Python dependencies
 7 | COPY requirements.txt /
 8 | RUN pip install -r /requirements.txt \
 9 |     # remove temporary files
10 |     && rm -rf /root/.cache
11 | 
12 | COPY ./smartprom.py /megaraid.py /
13 | 
14 | EXPOSE 9902
15 | ENTRYPOINT ["/usr/local/bin/python", "-u", "/smartprom.py"]
16 | 
17 | # HELP
18 | # docker build -t matusnovak/prometheus-smartctl:test .
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Prometheus S.M.A.R.T ctl metrics exporter
  2 | 
  3 | ![build](https://github.com/matusnovak/prometheus-smartctl/workflows/build/badge.svg)
  4 | 
  5 | This is a simple exporter for the [Prometheus metrics](https://prometheus.io/) using [smartctl](https://www.smartmontools.org/). The script `smartprom.py` also comes with `smartprom.service` so that you can run this script in the background on your Linux OS via `systemctl`. The script will use port `9902`, you can change it by changing it directly in the script. This script exports all of the data available from the smartctl.
  6 | 
  7 | ## Install
  8 | 
  9 | _Note: You don't have to do this if you use the Docker image._
 10 | 
 11 | 1. Copy the `smartprom.service` file into `/etc/systemd/system` folder.
 12 | 2. Copy the `smartprom.py` file anywhere into your system.
 13 | 3. Modify `ExecStart=` in the `smartprom.service` so that it points to `smartprom.py` in your system.
 14 | 4. Run `chmod +x smartprom.py`
 15 | 5. Install Python dependencies for the root user, example: `sudo -H python3 -m pip install -r requirements.txt`
 16 | 6. Run `systemctl enable smartprom` and `systemctl start smartprom`
 17 | 7. Your metrics will now be available at `http://localhost:9902`
 18 | 
 19 | ## Docker usage
 20 | 
 21 | No extra configuration needed, should work out of the box. The `privileged: true` is required in order for `smartctl` to be able to access drives from the host.
 22 | 
 23 | Docker image is here: <https://hub.docker.com/r/matusnovak/prometheus-smartctl>
 24 | 
 25 | The architectures supported by this image are: linux/386, linux/amd64, linux/arm/v6, linux/arm/v7, linux/arm64/v8, linux/ppc64le, linux/s390x
 26 | 
 27 | Example docker-compose.yml:
 28 | 
 29 | ```yml
 30 | version: '3'
 31 | services:
 32 |   smartctl-exporter:
 33 |     image: matusnovak/prometheus-smartctl:latest
 34 |     container_name: smartctl-exporter
 35 |     privileged: true
 36 |     ports:
 37 |       - "9902:9902"
 38 |     restart: unless-stopped
 39 | ```
 40 | 
 41 | Example docker-compose.yml with node-exporter and file export:
 42 | 
 43 | 
 44 | ```yml
 45 | version: "3"
 46 | services:
 47 |     node-exporter:
 48 |         image: quay.io/prometheus/node-exporter
 49 |         restart: always
 50 |         volumes:
 51 |             - '/:/host:ro,rslave'
 52 |             - './tmp/:/tmp/'
 53 |         network_mode: "host"
 54 |         pid: "host"
 55 |         command:
 56 |             - "--path.rootfs=/host"
 57 |             - "--collector.textfile.directory=/tmp/"
 58 |     smartctl-exporter:
 59 |       image: matusnovak/prometheus-smartctl:latest
 60 |       container_name: smartctl-exporter
 61 |       privileged: true
 62 |       environment:
 63 |         - "SMARTCTL_METRICS_FILE_ENABLE=True"
 64 |       volumes:
 65 |         - ./tmp/:/metrics/
 66 |       restart: unless-stopped
 67 | ```
 68 | 
 69 | 
 70 | 
 71 | Your metrics will be available at <http://localhost:9902/metrics>
 72 | 
 73 | The exported metrics looks like these:
 74 | 
 75 | ```shell
 76 | smartprom_smart_passed{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 1.0
 77 | smartprom_exit_code{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 0.0
 78 | smartprom_raw_read_error_rate{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 83.0
 79 | smartprom_raw_read_error_rate_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 2.23179896e+08
 80 | smartprom_power_on_hours{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 73.0
 81 | smartprom_power_on_hours_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 24299.0
 82 | smartprom_airflow_temperature_cel{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 60.0
 83 | smartprom_airflow_temperature_cel_raw{drive="/dev/sda",model_family="Seagate BarraCuda 3.5 (SMR)",model_name="ST6000DM003-2CY296",serial_number="WCT362XM",type="sat",user_capacity="6001175126016"} 40.0
 84 | ...
 85 | ```
 86 | 
 87 | If you are using a MegaRAID card to connect the drives, the metrics will export look like these:
 88 | 
 89 | ```shell
 90 | smartprom_power_on_hours_raw{drive="megaraid,0",model_family="Western Digital Ultrastar He10/12",model_name="WDC WD80EMAZ-00M9AA0",serial_number="XXXXXXXX",type="sat",user_capacity="6001175126016"} 28522.0
 91 | smartprom_power_on_time_hours{drive="megaraid,1",model_family="Unknown",model_name="HGST HUH728080AL5200",serial_number="XXXXXXXX",type="scsi",user_capacity="6001175126016"} 37341.0
 92 | ```
 93 | 
 94 | ## Configuration
 95 | 
 96 | All configuration is done with environment variables.
 97 | 
 98 | - `SMARTCTL_REFRESH_INTERVAL`: (Optional) The refresh interval of the metrics. A larger value reduces CPU usage. The default is `60` seconds.
 99 | - `SMARTCTL_EXPORTER_PORT`: (Optional) The address the exporter should listen on. The default is `9902`.
100 | - `SMARTCTL_EXPORTER_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses.
101 | - `SMARTCTL_METRICS_FILE_ENABLE`: (Optional) To enable metrics file, if you have a node exporter running anyway, you can simply read out this file . The default is `False`.
102 | - `SMARTCTL_METRICS_FILE_PATH`: (Optional) the path, this must then also be specified in the docker-compose as volume. The default is `/metrics/`.
103 | 
104 | ## Grafana dashboard
105 | 
106 | There is a reference Grafana dashboard in [grafana/grafana_dashboard.json](./grafana/grafana_dashboard.json).
107 | 
108 | ![](./grafana/grafana_dashboard_1.png)
109 | ![](./grafana/grafana_dashboard_2.png)
110 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 |   smartctl-metrics:
4 |     build: ./
5 |     restart: unless-stopped
6 |     privileged: true
7 |     ports:
8 |       - 9902:9902
9 | 


--------------------------------------------------------------------------------
/grafana/grafana_dashboard.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "__inputs": [
  3 |     {
  4 |       "name": "DS_PROMETHEUS",
  5 |       "label": "Prometheus",
  6 |       "description": "",
  7 |       "type": "datasource",
  8 |       "pluginId": "prometheus",
  9 |       "pluginName": "Prometheus"
 10 |     }
 11 |   ],
 12 |   "__elements": {},
 13 |   "__requires": [
 14 |     {
 15 |       "type": "grafana",
 16 |       "id": "grafana",
 17 |       "name": "Grafana",
 18 |       "version": "10.2.3"
 19 |     },
 20 |     {
 21 |       "type": "datasource",
 22 |       "id": "prometheus",
 23 |       "name": "Prometheus",
 24 |       "version": "1.0.0"
 25 |     },
 26 |     {
 27 |       "type": "panel",
 28 |       "id": "table",
 29 |       "name": "Table",
 30 |       "version": ""
 31 |     },
 32 |     {
 33 |       "type": "panel",
 34 |       "id": "timeseries",
 35 |       "name": "Time series",
 36 |       "version": ""
 37 |     }
 38 |   ],
 39 |   "annotations": {
 40 |     "list": [
 41 |       {
 42 |         "builtIn": 1,
 43 |         "datasource": {
 44 |           "type": "datasource",
 45 |           "uid": "grafana"
 46 |         },
 47 |         "enable": true,
 48 |         "hide": true,
 49 |         "iconColor": "rgba(0, 211, 255, 1)",
 50 |         "name": "Annotations & Alerts",
 51 |         "target": {
 52 |           "limit": 100,
 53 |           "matchAny": false,
 54 |           "tags": [],
 55 |           "type": "dashboard"
 56 |         },
 57 |         "type": "dashboard"
 58 |       }
 59 |     ]
 60 |   },
 61 |   "description": "",
 62 |   "editable": true,
 63 |   "fiscalYearStartMonth": 0,
 64 |   "gnetId": 9846,
 65 |   "graphTooltip": 0,
 66 |   "id": null,
 67 |   "links": [
 68 |     {
 69 |       "icon": "external link",
 70 |       "tags": [
 71 |         "system"
 72 |       ],
 73 |       "type": "dashboards"
 74 |     }
 75 |   ],
 76 |   "liveNow": false,
 77 |   "panels": [
 78 |     {
 79 |       "datasource": {
 80 |         "type": "prometheus",
 81 |         "uid": "${DS_PROMETHEUS}"
 82 |       },
 83 |       "description": "",
 84 |       "fieldConfig": {
 85 |         "defaults": {
 86 |           "color": {
 87 |             "mode": "thresholds"
 88 |           },
 89 |           "custom": {
 90 |             "align": "auto",
 91 |             "cellOptions": {
 92 |               "type": "auto"
 93 |             },
 94 |             "filterable": false,
 95 |             "inspect": false
 96 |           },
 97 |           "mappings": [
 98 |             {
 99 |               "options": {
100 |                 "0": {
101 |                   "index": 1,
102 |                   "text": "ERROR"
103 |                 },
104 |                 "1": {
105 |                   "index": 2,
106 |                   "text": "OK"
107 |                 },
108 |                 "-1": {
109 |                   "index": 0,
110 |                   "text": "UNKNOWN"
111 |                 }
112 |               },
113 |               "type": "value"
114 |             }
115 |           ],
116 |           "thresholds": {
117 |             "mode": "absolute",
118 |             "steps": [
119 |               {
120 |                 "color": "red",
121 |                 "value": null
122 |               },
123 |               {
124 |                 "color": "red",
125 |                 "value": 0
126 |               },
127 |               {
128 |                 "color": "green",
129 |                 "value": 1
130 |               }
131 |             ]
132 |           },
133 |           "unit": "none"
134 |         },
135 |         "overrides": [
136 |           {
137 |             "matcher": {
138 |               "id": "byRegexp",
139 |               "options": "health"
140 |             },
141 |             "properties": [
142 |               {
143 |                 "id": "custom.cellOptions",
144 |                 "value": {
145 |                   "mode": "gradient",
146 |                   "type": "color-background"
147 |                 }
148 |               }
149 |             ]
150 |           },
151 |           {
152 |             "matcher": {
153 |               "id": "byName",
154 |               "options": "user_capacity"
155 |             },
156 |             "properties": [
157 |               {
158 |                 "id": "unit",
159 |                 "value": "decbytes"
160 |               }
161 |             ]
162 |           }
163 |         ]
164 |       },
165 |       "gridPos": {
166 |         "h": 7,
167 |         "w": 24,
168 |         "x": 0,
169 |         "y": 0
170 |       },
171 |       "id": 38,
172 |       "links": [],
173 |       "maxDataPoints": 100,
174 |       "options": {
175 |         "cellHeight": "sm",
176 |         "footer": {
177 |           "countRows": false,
178 |           "fields": "",
179 |           "reducer": [
180 |             "sum"
181 |           ],
182 |           "show": false
183 |         },
184 |         "frameIndex": 1,
185 |         "showHeader": true,
186 |         "sortBy": [
187 |           {
188 |             "desc": false,
189 |             "displayName": "instance"
190 |           }
191 |         ]
192 |       },
193 |       "pluginVersion": "10.2.3",
194 |       "targets": [
195 |         {
196 |           "datasource": {
197 |             "type": "prometheus",
198 |             "uid": "${DS_PROMETHEUS}"
199 |           },
200 |           "exemplar": true,
201 |           "expr": "smartprom_smart_passed",
202 |           "format": "table",
203 |           "hide": false,
204 |           "instant": true,
205 |           "interval": "",
206 |           "intervalFactor": 1,
207 |           "legendFormat": "",
208 |           "refId": "A"
209 |         }
210 |       ],
211 |       "title": "SMART Health",
212 |       "transformations": [
213 |         {
214 |           "id": "groupBy",
215 |           "options": {
216 |             "fields": {
217 |               "Value": {
218 |                 "aggregations": [
219 |                   "last"
220 |                 ],
221 |                 "operation": "aggregate"
222 |               },
223 |               "Value #A": {
224 |                 "aggregations": [
225 |                   "sum"
226 |                 ],
227 |                 "operation": "aggregate"
228 |               },
229 |               "Value #B": {
230 |                 "aggregations": [
231 |                   "sum"
232 |                 ],
233 |                 "operation": "aggregate"
234 |               },
235 |               "Value #C": {
236 |                 "aggregations": [
237 |                   "sum"
238 |                 ],
239 |                 "operation": "aggregate"
240 |               },
241 |               "Value #D": {
242 |                 "aggregations": [
243 |                   "sum"
244 |                 ],
245 |                 "operation": "aggregate"
246 |               },
247 |               "Value #E": {
248 |                 "aggregations": [
249 |                   "sum"
250 |                 ],
251 |                 "operation": "aggregate"
252 |               },
253 |               "drive": {
254 |                 "aggregations": [],
255 |                 "operation": "groupby"
256 |               },
257 |               "instance": {
258 |                 "aggregations": [],
259 |                 "operation": "groupby"
260 |               },
261 |               "model_family": {
262 |                 "aggregations": [],
263 |                 "operation": "groupby"
264 |               },
265 |               "model_name": {
266 |                 "aggregations": [],
267 |                 "operation": "groupby"
268 |               },
269 |               "serial_number": {
270 |                 "aggregations": [],
271 |                 "operation": "groupby"
272 |               },
273 |               "type": {
274 |                 "aggregations": [],
275 |                 "operation": "groupby"
276 |               },
277 |               "user_capacity": {
278 |                 "aggregations": [],
279 |                 "operation": "groupby"
280 |               }
281 |             }
282 |           }
283 |         },
284 |         {
285 |           "id": "organize",
286 |           "options": {
287 |             "excludeByName": {
288 |               "Time": true,
289 |               "__name__": false,
290 |               "backup_id": true,
291 |               "backup_type": true,
292 |               "client_hostname": false,
293 |               "client_id": true,
294 |               "client_os_version": true,
295 |               "client_username": true,
296 |               "client_version": true,
297 |               "instance": false,
298 |               "job": true,
299 |               "snapshot_id": true
300 |             },
301 |             "includeByName": {},
302 |             "indexByName": {
303 |               "Value (last)": 7,
304 |               "drive": 1,
305 |               "instance": 0,
306 |               "model_family": 2,
307 |               "model_name": 3,
308 |               "serial_number": 4,
309 |               "type": 6,
310 |               "user_capacity": 5
311 |             },
312 |             "renameByName": {
313 |               "Value": "",
314 |               "Value #A": "reallocated_sector_ct",
315 |               "Value #A (sum)": "reallocated_sector_ct",
316 |               "Value #B": "reported_uncorrect",
317 |               "Value #B (sum)": "reported_uncorrect",
318 |               "Value #C (sum)": "command_timeout",
319 |               "Value #D (sum)": "current_pending_sector",
320 |               "Value #E (sum)": "offline_uncorrectable",
321 |               "Value (last)": "health",
322 |               "job": "",
323 |               "serial_number": "",
324 |               "user_capacity": ""
325 |             }
326 |           }
327 |         }
328 |       ],
329 |       "type": "table"
330 |     },
331 |     {
332 |       "datasource": {
333 |         "type": "prometheus",
334 |         "uid": "${DS_PROMETHEUS}"
335 |       },
336 |       "description": "",
337 |       "fieldConfig": {
338 |         "defaults": {
339 |           "color": {
340 |             "mode": "thresholds"
341 |           },
342 |           "custom": {
343 |             "align": "auto",
344 |             "cellOptions": {
345 |               "type": "auto"
346 |             },
347 |             "filterable": false,
348 |             "inspect": false
349 |           },
350 |           "mappings": [],
351 |           "thresholds": {
352 |             "mode": "absolute",
353 |             "steps": [
354 |               {
355 |                 "color": "green",
356 |                 "value": null
357 |               },
358 |               {
359 |                 "color": "#EAB839",
360 |                 "value": 1
361 |               },
362 |               {
363 |                 "color": "red",
364 |                 "value": 2
365 |               }
366 |             ]
367 |           },
368 |           "unit": "none"
369 |         },
370 |         "overrides": [
371 |           {
372 |             "matcher": {
373 |               "id": "byRegexp",
374 |               "options": ".*_.*"
375 |             },
376 |             "properties": [
377 |               {
378 |                 "id": "custom.cellOptions",
379 |                 "value": {
380 |                   "mode": "gradient",
381 |                   "type": "color-background"
382 |                 }
383 |               }
384 |             ]
385 |           }
386 |         ]
387 |       },
388 |       "gridPos": {
389 |         "h": 7,
390 |         "w": 24,
391 |         "x": 0,
392 |         "y": 7
393 |       },
394 |       "id": 27,
395 |       "links": [],
396 |       "maxDataPoints": 100,
397 |       "options": {
398 |         "cellHeight": "sm",
399 |         "footer": {
400 |           "countRows": false,
401 |           "fields": "",
402 |           "reducer": [
403 |             "sum"
404 |           ],
405 |           "show": false
406 |         },
407 |         "frameIndex": 1,
408 |         "showHeader": true,
409 |         "sortBy": [
410 |           {
411 |             "desc": false,
412 |             "displayName": "instance"
413 |           }
414 |         ]
415 |       },
416 |       "pluginVersion": "10.2.3",
417 |       "targets": [
418 |         {
419 |           "exemplar": true,
420 |           "expr": "smartprom_reallocated_sector_ct_raw",
421 |           "format": "table",
422 |           "hide": false,
423 |           "instant": true,
424 |           "interval": "",
425 |           "intervalFactor": 1,
426 |           "legendFormat": "",
427 |           "refId": "A",
428 |           "datasource": {
429 |             "type": "prometheus",
430 |             "uid": "${DS_PROMETHEUS}"
431 |           }
432 |         },
433 |         {
434 |           "exemplar": true,
435 |           "expr": "smartprom_reported_uncorrect_raw",
436 |           "format": "table",
437 |           "hide": false,
438 |           "instant": true,
439 |           "interval": "",
440 |           "intervalFactor": 1,
441 |           "legendFormat": "",
442 |           "refId": "B",
443 |           "datasource": {
444 |             "type": "prometheus",
445 |             "uid": "${DS_PROMETHEUS}"
446 |           }
447 |         },
448 |         {
449 |           "exemplar": true,
450 |           "expr": "smartprom_command_timeout_raw",
451 |           "format": "table",
452 |           "hide": false,
453 |           "instant": true,
454 |           "interval": "",
455 |           "intervalFactor": 1,
456 |           "legendFormat": "",
457 |           "refId": "C",
458 |           "datasource": {
459 |             "type": "prometheus",
460 |             "uid": "${DS_PROMETHEUS}"
461 |           }
462 |         },
463 |         {
464 |           "exemplar": true,
465 |           "expr": "smartprom_current_pending_sector_raw",
466 |           "format": "table",
467 |           "hide": false,
468 |           "instant": true,
469 |           "interval": "",
470 |           "intervalFactor": 1,
471 |           "legendFormat": "",
472 |           "refId": "D",
473 |           "datasource": {
474 |             "type": "prometheus",
475 |             "uid": "${DS_PROMETHEUS}"
476 |           }
477 |         },
478 |         {
479 |           "exemplar": true,
480 |           "expr": "smartprom_offline_uncorrectable_raw",
481 |           "format": "table",
482 |           "hide": false,
483 |           "instant": true,
484 |           "interval": "",
485 |           "intervalFactor": 1,
486 |           "legendFormat": "",
487 |           "refId": "E",
488 |           "datasource": {
489 |             "type": "prometheus",
490 |             "uid": "${DS_PROMETHEUS}"
491 |           }
492 |         }
493 |       ],
494 |       "title": "Error Metrics",
495 |       "transformations": [
496 |         {
497 |           "id": "merge",
498 |           "options": {}
499 |         },
500 |         {
501 |           "id": "groupBy",
502 |           "options": {
503 |             "fields": {
504 |               "Value #A": {
505 |                 "aggregations": [
506 |                   "sum"
507 |                 ],
508 |                 "operation": "aggregate"
509 |               },
510 |               "Value #B": {
511 |                 "aggregations": [
512 |                   "sum"
513 |                 ],
514 |                 "operation": "aggregate"
515 |               },
516 |               "Value #C": {
517 |                 "aggregations": [
518 |                   "sum"
519 |                 ],
520 |                 "operation": "aggregate"
521 |               },
522 |               "Value #D": {
523 |                 "aggregations": [
524 |                   "sum"
525 |                 ],
526 |                 "operation": "aggregate"
527 |               },
528 |               "Value #E": {
529 |                 "aggregations": [
530 |                   "sum"
531 |                 ],
532 |                 "operation": "aggregate"
533 |               },
534 |               "drive": {
535 |                 "aggregations": [],
536 |                 "operation": "groupby"
537 |               },
538 |               "instance": {
539 |                 "aggregations": [],
540 |                 "operation": "groupby"
541 |               }
542 |             }
543 |           }
544 |         },
545 |         {
546 |           "id": "organize",
547 |           "options": {
548 |             "excludeByName": {
549 |               "Time": true,
550 |               "__name__": false,
551 |               "backup_id": true,
552 |               "backup_type": true,
553 |               "client_hostname": false,
554 |               "client_id": true,
555 |               "client_os_version": true,
556 |               "client_username": true,
557 |               "client_version": true,
558 |               "instance": false,
559 |               "job": true,
560 |               "snapshot_id": true
561 |             },
562 |             "indexByName": {
563 |               "Time": 0,
564 |               "Value #A": 5,
565 |               "Value #B": 6,
566 |               "__name__": 1,
567 |               "drive": 3,
568 |               "instance": 2,
569 |               "job": 4
570 |             },
571 |             "renameByName": {
572 |               "Value": "",
573 |               "Value #A": "reallocated_sector_ct",
574 |               "Value #A (sum)": "reallocated_sector_ct",
575 |               "Value #B": "reported_uncorrect",
576 |               "Value #B (sum)": "reported_uncorrect",
577 |               "Value #C (sum)": "command_timeout",
578 |               "Value #D (sum)": "current_pending_sector",
579 |               "Value #E (sum)": "offline_uncorrectable",
580 |               "job": ""
581 |             }
582 |           }
583 |         }
584 |       ],
585 |       "type": "table"
586 |     },
587 |     {
588 |       "datasource": {
589 |         "type": "prometheus",
590 |         "uid": "${DS_PROMETHEUS}"
591 |       },
592 |       "description": "",
593 |       "fieldConfig": {
594 |         "defaults": {
595 |           "color": {
596 |             "mode": "thresholds"
597 |           },
598 |           "custom": {
599 |             "align": "auto",
600 |             "cellOptions": {
601 |               "type": "auto"
602 |             },
603 |             "filterable": false,
604 |             "inspect": false
605 |           },
606 |           "mappings": [],
607 |           "thresholds": {
608 |             "mode": "absolute",
609 |             "steps": [
610 |               {
611 |                 "color": "green",
612 |                 "value": null
613 |               },
614 |               {
615 |                 "color": "#EAB839",
616 |                 "value": 50
617 |               },
618 |               {
619 |                 "color": "red",
620 |                 "value": 60
621 |               }
622 |             ]
623 |           },
624 |           "unit": "none"
625 |         },
626 |         "overrides": [
627 |           {
628 |             "matcher": {
629 |               "id": "byName",
630 |               "options": "temperature_celsius"
631 |             },
632 |             "properties": [
633 |               {
634 |                 "id": "custom.cellOptions",
635 |                 "value": {
636 |                   "mode": "gradient",
637 |                   "type": "color-background"
638 |                 }
639 |               }
640 |             ]
641 |           },
642 |           {
643 |             "matcher": {
644 |               "id": "byName",
645 |               "options": "power_on_hours"
646 |             },
647 |             "properties": [
648 |               {
649 |                 "id": "unit",
650 |                 "value": "h"
651 |               }
652 |             ]
653 |           }
654 |         ]
655 |       },
656 |       "gridPos": {
657 |         "h": 7,
658 |         "w": 24,
659 |         "x": 0,
660 |         "y": 14
661 |       },
662 |       "id": 37,
663 |       "links": [],
664 |       "maxDataPoints": 100,
665 |       "options": {
666 |         "cellHeight": "sm",
667 |         "footer": {
668 |           "countRows": false,
669 |           "fields": "",
670 |           "reducer": [
671 |             "sum"
672 |           ],
673 |           "show": false
674 |         },
675 |         "frameIndex": 1,
676 |         "showHeader": true,
677 |         "sortBy": [
678 |           {
679 |             "desc": false,
680 |             "displayName": "instance"
681 |           }
682 |         ]
683 |       },
684 |       "pluginVersion": "10.2.3",
685 |       "targets": [
686 |         {
687 |           "exemplar": true,
688 |           "expr": "smartprom_temperature_celsius_raw",
689 |           "format": "table",
690 |           "hide": false,
691 |           "instant": true,
692 |           "interval": "",
693 |           "intervalFactor": 1,
694 |           "legendFormat": "",
695 |           "refId": "A",
696 |           "datasource": {
697 |             "type": "prometheus",
698 |             "uid": "${DS_PROMETHEUS}"
699 |           }
700 |         },
701 |         {
702 |           "exemplar": true,
703 |           "expr": "smartprom_power_cycle_count_raw",
704 |           "format": "table",
705 |           "hide": false,
706 |           "instant": true,
707 |           "interval": "",
708 |           "intervalFactor": 1,
709 |           "legendFormat": "",
710 |           "refId": "B",
711 |           "datasource": {
712 |             "type": "prometheus",
713 |             "uid": "${DS_PROMETHEUS}"
714 |           }
715 |         },
716 |         {
717 |           "exemplar": true,
718 |           "expr": "smartprom_power_on_hours_raw",
719 |           "format": "table",
720 |           "hide": false,
721 |           "instant": true,
722 |           "interval": "",
723 |           "intervalFactor": 1,
724 |           "legendFormat": "",
725 |           "refId": "C",
726 |           "datasource": {
727 |             "type": "prometheus",
728 |             "uid": "${DS_PROMETHEUS}"
729 |           }
730 |         }
731 |       ],
732 |       "title": "Info Metrics",
733 |       "transformations": [
734 |         {
735 |           "id": "merge",
736 |           "options": {}
737 |         },
738 |         {
739 |           "id": "groupBy",
740 |           "options": {
741 |             "fields": {
742 |               "Value #A": {
743 |                 "aggregations": [
744 |                   "sum"
745 |                 ],
746 |                 "operation": "aggregate"
747 |               },
748 |               "Value #B": {
749 |                 "aggregations": [
750 |                   "sum"
751 |                 ],
752 |                 "operation": "aggregate"
753 |               },
754 |               "Value #C": {
755 |                 "aggregations": [
756 |                   "sum"
757 |                 ],
758 |                 "operation": "aggregate"
759 |               },
760 |               "Value #D": {
761 |                 "aggregations": [
762 |                   "sum"
763 |                 ],
764 |                 "operation": "aggregate"
765 |               },
766 |               "Value #E": {
767 |                 "aggregations": [
768 |                   "sum"
769 |                 ],
770 |                 "operation": "aggregate"
771 |               },
772 |               "drive": {
773 |                 "aggregations": [],
774 |                 "operation": "groupby"
775 |               },
776 |               "instance": {
777 |                 "aggregations": [],
778 |                 "operation": "groupby"
779 |               }
780 |             }
781 |           }
782 |         },
783 |         {
784 |           "id": "organize",
785 |           "options": {
786 |             "excludeByName": {
787 |               "Time": true,
788 |               "__name__": false,
789 |               "backup_id": true,
790 |               "backup_type": true,
791 |               "client_hostname": false,
792 |               "client_id": true,
793 |               "client_os_version": true,
794 |               "client_username": true,
795 |               "client_version": true,
796 |               "instance": false,
797 |               "job": true,
798 |               "snapshot_id": true
799 |             },
800 |             "indexByName": {
801 |               "Time": 0,
802 |               "Value #A": 5,
803 |               "Value #B": 6,
804 |               "__name__": 1,
805 |               "drive": 3,
806 |               "instance": 2,
807 |               "job": 4
808 |             },
809 |             "renameByName": {
810 |               "Value": "",
811 |               "Value #A": "reallocated_sector_ct",
812 |               "Value #A (sum)": "temperature_celsius",
813 |               "Value #B": "reported_uncorrect",
814 |               "Value #B (sum)": "power_cycle_count",
815 |               "Value #C (sum)": "power_on_hours",
816 |               "Value #D (sum)": "current_pending_sector",
817 |               "Value #E (sum)": "offline_uncorrectable",
818 |               "job": ""
819 |             }
820 |           }
821 |         }
822 |       ],
823 |       "type": "table"
824 |     },
825 |     {
826 |       "datasource": {
827 |         "type": "prometheus",
828 |         "uid": "${DS_PROMETHEUS}"
829 |       },
830 |       "fieldConfig": {
831 |         "defaults": {
832 |           "color": {
833 |             "mode": "palette-classic"
834 |           },
835 |           "custom": {
836 |             "axisBorderShow": false,
837 |             "axisCenteredZero": false,
838 |             "axisColorMode": "text",
839 |             "axisLabel": "",
840 |             "axisPlacement": "auto",
841 |             "barAlignment": 0,
842 |             "drawStyle": "line",
843 |             "fillOpacity": 0,
844 |             "gradientMode": "none",
845 |             "hideFrom": {
846 |               "legend": false,
847 |               "tooltip": false,
848 |               "viz": false
849 |             },
850 |             "insertNulls": false,
851 |             "lineInterpolation": "linear",
852 |             "lineWidth": 1,
853 |             "pointSize": 5,
854 |             "scaleDistribution": {
855 |               "type": "linear"
856 |             },
857 |             "showPoints": "never",
858 |             "spanNulls": false,
859 |             "stacking": {
860 |               "group": "A",
861 |               "mode": "none"
862 |             },
863 |             "thresholdsStyle": {
864 |               "mode": "off"
865 |             }
866 |           },
867 |           "links": [],
868 |           "mappings": [],
869 |           "thresholds": {
870 |             "mode": "absolute",
871 |             "steps": [
872 |               {
873 |                 "color": "green",
874 |                 "value": null
875 |               },
876 |               {
877 |                 "color": "red",
878 |                 "value": 80
879 |               }
880 |             ]
881 |           },
882 |           "unit": "celsius"
883 |         },
884 |         "overrides": []
885 |       },
886 |       "gridPos": {
887 |         "h": 11,
888 |         "w": 24,
889 |         "x": 0,
890 |         "y": 21
891 |       },
892 |       "id": 36,
893 |       "links": [],
894 |       "options": {
895 |         "legend": {
896 |           "calcs": [
897 |             "mean",
898 |             "lastNotNull",
899 |             "max",
900 |             "min"
901 |           ],
902 |           "displayMode": "table",
903 |           "placement": "bottom",
904 |           "showLegend": true
905 |         },
906 |         "tooltip": {
907 |           "mode": "multi",
908 |           "sort": "none"
909 |         }
910 |       },
911 |       "pluginVersion": "8.5.0",
912 |       "targets": [
913 |         {
914 |           "datasource": {
915 |             "type": "prometheus",
916 |             "uid": "${DS_PROMETHEUS}"
917 |           },
918 |           "editorMode": "code",
919 |           "exemplar": true,
920 |           "expr": "smartprom_temperature_celsius_raw",
921 |           "format": "time_series",
922 |           "interval": "",
923 |           "intervalFactor": 2,
924 |           "legendFormat": "{{instance}} {{drive}}",
925 |           "range": true,
926 |           "refId": "A"
927 |         }
928 |       ],
929 |       "title": "Temperature",
930 |       "type": "timeseries"
931 |     }
932 |   ],
933 |   "refresh": "",
934 |   "schemaVersion": 39,
935 |   "tags": [
936 |     "system"
937 |   ],
938 |   "templating": {
939 |     "list": []
940 |   },
941 |   "time": {
942 |     "from": "now-30m",
943 |     "to": "now"
944 |   },
945 |   "timepicker": {
946 |     "refresh_intervals": [
947 |       "5s",
948 |       "10s",
949 |       "30s",
950 |       "1m",
951 |       "5m",
952 |       "15m",
953 |       "30m",
954 |       "1h",
955 |       "2h",
956 |       "1d"
957 |     ],
958 |     "time_options": [
959 |       "5m",
960 |       "15m",
961 |       "1h",
962 |       "6h",
963 |       "12h",
964 |       "24h",
965 |       "2d",
966 |       "7d",
967 |       "30d"
968 |     ]
969 |   },
970 |   "timezone": "",
971 |   "title": "SMART Exporter",
972 |   "uid": "hmXXiWPnk",
973 |   "version": 12,
974 |   "weekStart": ""
975 | }


--------------------------------------------------------------------------------
/grafana/grafana_dashboard_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_1.png


--------------------------------------------------------------------------------
/grafana/grafana_dashboard_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matusnovak/prometheus-smartctl/e3e2f6f8121d53d79bf41dc9d6597a9d05284eee/grafana/grafana_dashboard_2.png


--------------------------------------------------------------------------------
/megaraid.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | from typing import Optional
 4 | 
 5 | import smartprom
 6 | 
 7 | MEGARAID_TYPE_PATTERN = r"(sat\+)?(megaraid,\d+)"
 8 | 
 9 | 
10 | def get_megaraid_device_info(dev: str, typ: str) -> dict:
11 |     """
12 |     Get device information connected with MegaRAID,
13 |     and process the information into get_device_info compatible format.
14 |     """
15 |     megaraid_id = get_megaraid_device_id(typ)
16 |     if megaraid_id is None:
17 |         return {}
18 | 
19 |     results, _ = smartprom.run_smartctl_cmd(
20 |         ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
21 |     )
22 |     results = json.loads(results)
23 |     serial_number = results.get("serial_number", "Unknown")
24 |     model_family = results.get("model_family", "Unknown")
25 | 
26 |     # When using SAS drive and smartmontools r5286 and later,
27 |     # scsi_ prefix is added to model_name field.
28 |     # https://sourceforge.net/p/smartmontools/code/5286/
29 |     model_name = results.get(
30 |         "scsi_model_name",
31 |         results.get("model_name", "Unknown"),
32 |     )
33 | 
34 |     user_capacity = "Unknown"
35 |     if "user_capacity" in results and "bytes" in results["user_capacity"]:
36 |         user_capacity = str(results["user_capacity"]["bytes"])
37 | 
38 |     return {
39 |         "model_family": model_family,
40 |         "model_name": model_name,
41 |         "serial_number": serial_number,
42 |         "user_capacity": user_capacity,
43 |     }
44 | 
45 | 
46 | def get_megaraid_device_type(dev: str, typ: str) -> str:
47 |     megaraid_id = get_megaraid_device_id(typ)
48 |     if megaraid_id is None:
49 |         return "unknown"
50 | 
51 |     results, _ = smartprom.run_smartctl_cmd(
52 |         ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
53 |     )
54 |     results = json.loads(results)
55 | 
56 |     if "device" not in results or "protocol" not in results["device"]:
57 |         return "unknown"
58 |     return "sat" if results["device"]["protocol"] == "ATA" else "scsi"
59 | 
60 | 
61 | def get_megaraid_device_id(typ: str) -> Optional[str]:
62 |     """
63 |     Returns the device ID on the MegaRAID from the typ string
64 |     """
65 |     megaraid_match = re.search(MEGARAID_TYPE_PATTERN, typ)
66 |     if not megaraid_match:
67 |         return None
68 | 
69 |     return megaraid_match.group(2)
70 | 
71 | 
72 | def smart_megaraid(dev: str, megaraid_id: str) -> dict:
73 |     """
74 |     Runs the smartctl command on device connected by MegaRAID
75 |     and processes its attributes
76 |     """
77 |     results, exit_code = smartprom.run_smartctl_cmd(
78 |         ["smartctl", "-A", "-H", "-d", megaraid_id, "--json=c", dev]
79 |     )
80 |     results = json.loads(results)
81 | 
82 |     if results["device"]["protocol"] == "ATA":
83 |         # SATA device on MegaRAID
84 |         data = results["ata_smart_attributes"]["table"]
85 |         attributes = smartprom.table_to_attributes_sat(data)
86 |         attributes["smart_passed"] = (0, smartprom.get_smart_status(results))
87 |         attributes["exit_code"] = (0, exit_code)
88 |         return attributes
89 |     elif results["device"]["protocol"] == "SCSI":
90 |         # SAS device on MegaRAID
91 |         attributes = smartprom.results_to_attributes_scsi(results)
92 |         attributes["smart_passed"] = smartprom.get_smart_status(results)
93 |         attributes["exit_code"] = exit_code
94 |         return attributes
95 |     return {}
96 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | prometheus-client==0.19.0
2 | 


--------------------------------------------------------------------------------
/smartprom.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import json
  3 | import os
  4 | import subprocess
  5 | import time
  6 | import re
  7 | from typing import Tuple
  8 | 
  9 | import prometheus_client
 10 | 
 11 | import megaraid
 12 | 
 13 | LABELS = [
 14 |     "drive",
 15 |     "type",
 16 |     "model_family",
 17 |     "model_name",
 18 |     "serial_number",
 19 |     "user_capacity",
 20 | ]
 21 | DRIVES = {}
 22 | METRICS = {}
 23 | 
 24 | # https://www.smartmontools.org/wiki/USB
 25 | SAT_TYPES = ["sat", "usbjmicron", "usbprolific", "usbsunplus"]
 26 | NVME_TYPES = ["nvme", "sntasmedia", "sntjmicron", "sntrealtek"]
 27 | SCSI_TYPES = ["scsi"]
 28 | 
 29 | 
 30 | def run_smartctl_cmd(args: list) -> Tuple[str, int]:
 31 |     """
 32 |     Runs the smartctl command on the system
 33 |     """
 34 |     out = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 35 |     stdout, stderr = out.communicate()
 36 | 
 37 |     # exit code can be != 0 even if the command returned valid data
 38 |     # see EXIT STATUS in
 39 |     # https://www.smartmontools.org/browser/trunk/smartmontools/smartctl.8.in
 40 |     if out.returncode != 0:
 41 |         stdout_msg = stdout.decode("utf-8") if stdout is not None else ""
 42 |         stderr_msg = stderr.decode("utf-8") if stderr is not None else ""
 43 |         print(
 44 |             f"WARNING: Command returned exit code {out.returncode}. "
 45 |             f"Stdout: '{stdout_msg}' Stderr: '{stderr_msg}'"
 46 |         )
 47 | 
 48 |     return stdout.decode("utf-8"), out.returncode
 49 | 
 50 | 
 51 | def get_drives() -> dict:
 52 |     """
 53 |     Returns a dictionary of devices and its types
 54 |     """
 55 |     disks = {}
 56 |     result, _ = run_smartctl_cmd(["smartctl", "--scan-open", "--json=c"])
 57 |     result_json = json.loads(result)
 58 | 
 59 |     if "devices" in result_json:
 60 |         devices = result_json["devices"]
 61 | 
 62 |         # Ignore devices that fail on open, such as Virtual Drives created by MegaRAID.
 63 |         devices = list(
 64 |             filter(
 65 |                 lambda x: (
 66 |                     x.get("open_error", "")
 67 |                     != "DELL or MegaRaid controller, please try adding '-d megaraid,N'"
 68 |                 ),
 69 |                 devices,
 70 |             )
 71 |         )
 72 | 
 73 |         for device in devices:
 74 |             dev = device["name"]
 75 |             if re.match(megaraid.MEGARAID_TYPE_PATTERN, device["type"]):
 76 |                 # If drive is connected by MegaRAID, dev has a bus name like "/dev/bus/0".
 77 |                 # After retrieving the disk information using the bus name,
 78 |                 # replace dev with a disk ID such as "megaraid,0".
 79 |                 disk_attrs = megaraid.get_megaraid_device_info(dev, device["type"])
 80 |                 disk_attrs["type"] = megaraid.get_megaraid_device_type(
 81 |                     dev, device["type"]
 82 |                 )
 83 |                 disk_attrs["bus_device"] = dev
 84 |                 disk_attrs["megaraid_id"] = megaraid.get_megaraid_device_id(
 85 |                     device["type"]
 86 |                 )
 87 |                 dev = disk_attrs["megaraid_id"]
 88 |             else:
 89 |                 disk_attrs = get_device_info(dev)
 90 |                 disk_attrs["type"] = device["type"]
 91 |             disks[dev] = disk_attrs
 92 |             print("Discovered device", dev, "with attributes", disk_attrs)
 93 |     else:
 94 |         print("No devices found. Make sure you have enough privileges.")
 95 |     return disks
 96 | 
 97 | 
 98 | def get_device_info(dev: str) -> dict:
 99 |     """
100 |     Returns a dictionary of device info
101 |     """
102 |     results, _ = run_smartctl_cmd(["smartctl", "-i", "--json=c", dev])
103 |     results = json.loads(results)
104 |     user_capacity = "Unknown"
105 |     if "user_capacity" in results and "bytes" in results["user_capacity"]:
106 |         user_capacity = str(results["user_capacity"]["bytes"])
107 |     return {
108 |         "model_family": results.get("model_family", "Unknown"),
109 |         "model_name": results.get("model_name", "Unknown"),
110 |         "serial_number": results.get("serial_number", "Unknown"),
111 |         "user_capacity": user_capacity,
112 |     }
113 | 
114 | 
115 | def get_smart_status(results: dict) -> int:
116 |     """
117 |     Returns a 1, 0 or -1 depending on if result from
118 |     smart status is True, False or unknown.
119 |     """
120 |     status = results.get("smart_status")
121 |     return +(status.get("passed")) if status is not None else -1
122 | 
123 | 
124 | def smart_sat(dev: str) -> dict:
125 |     """
126 |     Runs the smartctl command on a internal or external "sat" device
127 |     and processes its attributes
128 |     """
129 |     results, exit_code = run_smartctl_cmd(
130 |         ["smartctl", "-A", "-H", "-d", "sat", "--json=c", dev]
131 |     )
132 |     results = json.loads(results)
133 | 
134 |     attributes = table_to_attributes_sat(results["ata_smart_attributes"]["table"])
135 |     attributes["smart_passed"] = (0, get_smart_status(results))
136 |     attributes["exit_code"] = (0, exit_code)
137 |     return attributes
138 | 
139 | 
140 | def table_to_attributes_sat(data: dict) -> dict:
141 |     """
142 |     Returns a results["ata_smart_attributes"]["table"]
143 |     processed into an attributes dict
144 |     """
145 |     attributes = {}
146 |     for metric in data:
147 |         code = metric["id"]
148 |         name = metric["name"]
149 |         value = metric["value"]
150 | 
151 |         # metric['raw']['value'] contains values difficult to understand for temperatures and time up
152 |         # that's why we added some logic to parse the string value
153 |         value_raw = metric["raw"]["string"]
154 |         try:
155 |             # example value_raw: "33" or "43 (Min/Max 39/46)"
156 |             value_raw = int(value_raw.split()[0])
157 |         except:
158 |             # example value_raw: "20071h+27m+15.375s"
159 |             if "h+" in value_raw:
160 |                 value_raw = int(value_raw.split("h+")[0])
161 |             else:
162 |                 print(
163 |                     f"Raw value of sat metric '{name}' can't be parsed. raw_string: {value_raw} "
164 |                     f"raw_int: {metric['raw']['value']}"
165 |                 )
166 |                 value_raw = None
167 | 
168 |         attributes[name] = (int(code), value)
169 |         if value_raw is not None:
170 |             attributes[f"{name}_raw"] = (int(code), value_raw)
171 |     return attributes
172 | 
173 | 
174 | def smart_nvme(dev: str) -> dict:
175 |     """
176 |     Runs the smartctl command on a internal or external "nvme" device
177 |     and processes its attributes
178 |     """
179 |     results, exit_code = run_smartctl_cmd(
180 |         ["smartctl", "-A", "-H", "-d", "nvme", "--json=c", dev]
181 |     )
182 |     results = json.loads(results)
183 | 
184 |     attributes = {"smart_passed": get_smart_status(results), "exit_code": exit_code}
185 |     data = results["nvme_smart_health_information_log"]
186 |     for key, value in data.items():
187 |         if key == "temperature_sensors":
188 |             for i, _value in enumerate(value, start=1):
189 |                 attributes[f"temperature_sensor{i}"] = _value
190 |         else:
191 |             attributes[key] = value
192 |     return attributes
193 | 
194 | 
195 | def smart_scsi(dev: str) -> dict:
196 |     """
197 |     Runs the smartctl command on a "scsi" device
198 |     and processes its attributes
199 |     """
200 |     results, exit_code = run_smartctl_cmd(
201 |         ["smartctl", "-A", "-H", "-d", "scsi", "--json=c", dev]
202 |     )
203 |     results = json.loads(results)
204 | 
205 |     attributes = results_to_attributes_scsi(results)
206 |     attributes["smart_passed"] = get_smart_status(results)
207 |     attributes["exit_code"] = exit_code
208 |     return attributes
209 | 
210 | 
211 | def results_to_attributes_scsi(data: dict) -> dict:
212 |     """
213 |     Returns the result of smartctl -i on the SCSI device
214 |     processed into an attributes dict
215 |     """
216 |     attributes = {}
217 |     for key, value in data.items():
218 |         if type(value) == dict:
219 |             for _label, _value in value.items():
220 |                 if type(_value) == int:
221 |                     attributes[f"{key}_{_label}"] = _value
222 |         elif type(value) == int:
223 |             attributes[key] = value
224 |     return attributes
225 | 
226 | 
227 | def collect():
228 |     """
229 |     Collect all drive metrics and save them as Gauge type
230 |     """
231 |     global LABELS, DRIVES, METRICS, SAT_TYPES, NVME_TYPES, SCSI_TYPES
232 | 
233 |     for drive, drive_attrs in DRIVES.items():
234 |         typ = drive_attrs["type"]
235 |         try:
236 |             if "megaraid_id" in drive_attrs:
237 |                 attrs = megaraid.smart_megaraid(
238 |                     drive_attrs["bus_device"], drive_attrs["megaraid_id"]
239 |                 )
240 |             elif typ in SAT_TYPES:
241 |                 attrs = smart_sat(drive)
242 |             elif typ in NVME_TYPES:
243 |                 attrs = smart_nvme(drive)
244 |             elif typ in SCSI_TYPES:
245 |                 attrs = smart_scsi(drive)
246 |             else:
247 |                 continue
248 | 
249 |             for key, values in attrs.items():
250 |                 # Metric name in lower case
251 |                 metric = (
252 |                     "smartprom_"
253 |                     + key.replace("-", "_")
254 |                     .replace(" ", "_")
255 |                     .replace(".", "")
256 |                     .replace("/", "_")
257 |                     .lower()
258 |                 )
259 | 
260 |                 # Create metric if it does not exist
261 |                 if metric not in METRICS:
262 |                     desc = key.replace("_", " ")
263 |                     code = hex(values[0]) if typ in SAT_TYPES else hex(values)
264 |                     print(f"Adding new gauge {metric} ({code})")
265 |                     METRICS[metric] = prometheus_client.Gauge(
266 |                         metric, f"({code}) {desc}", LABELS
267 |                     )
268 | 
269 |                 # Update metric
270 |                 metric_val = values[1] if typ in SAT_TYPES else values
271 | 
272 |                 METRICS[metric].labels(
273 |                     drive=drive,
274 |                     type=typ,
275 |                     model_family=drive_attrs["model_family"],
276 |                     model_name=drive_attrs["model_name"],
277 |                     serial_number=drive_attrs["serial_number"],
278 |                     user_capacity=drive_attrs["user_capacity"],
279 |                 ).set(metric_val)
280 | 
281 |         except Exception as e:
282 |             print("Exception:", e)
283 |             pass
284 | 
285 | 
286 | def main():
287 |     """
288 |     Starts a server and exposes the metrics
289 |     """
290 |     global DRIVES
291 | 
292 |     # Validate configuration
293 |     exporter_address = os.environ.get("SMARTCTL_EXPORTER_ADDRESS", "0.0.0.0")
294 |     exporter_port = int(os.environ.get("SMARTCTL_EXPORTER_PORT", 9902))
295 |     refresh_interval = int(os.environ.get("SMARTCTL_REFRESH_INTERVAL", 60))
296 |     metrics_file_enable = os.environ.get("SMARTCTL_METRICS_FILE_ENABLE", False)
297 |     metrics_file_path = os.environ.get("SMARTCTL_METRICS_FILE_PATH", "/metrics/")
298 | 
299 |     # Get drives (test smartctl)
300 |     DRIVES = get_drives()
301 | 
302 |     # Start Prometheus server
303 |     prometheus_client.start_http_server(exporter_port, exporter_address)
304 |     print(f"Server listening in http://{exporter_address}:{exporter_port}/metrics")
305 | 
306 |     while True:
307 |         collect()
308 |         if metrics_file_enable:
309 |             prometheus_client.write_to_textfile(metrics_file_path+"smartctl.prom", prometheus_client.REGISTRY)
310 |         time.sleep(refresh_interval)
311 | 
312 | 
313 | if __name__ == "__main__":
314 |     main()
315 | 


--------------------------------------------------------------------------------
/smartprom.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=SMART Prometheus metrics
 3 | 
 4 | [Service]
 5 | ExecStart=/var/lib/homelab/smartprom.py
 6 | Restart=always
 7 | 
 8 | [Install]
 9 | WantedBy=multi-user.target
10 | 


--------------------------------------------------------------------------------