├── .github └── workflows │ ├── docker-images.yml │ ├── go-binaries.yml │ └── helm-charts.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── charts ├── idrac-exporter │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ │ ├── _helpers.tpl │ │ ├── config.yaml │ │ ├── deployment.yaml │ │ ├── prometheusrule.yaml │ │ ├── service.yaml │ │ ├── serviceaccount.yaml │ │ └── servicemonitor.yaml │ └── values.yaml └── release-config.yaml ├── cmd └── idrac_exporter │ ├── handler.go │ └── main.go ├── default-config.yml ├── entrypoint.sh ├── go.mod ├── go.sum ├── grafana ├── alerts.yaml ├── bmc1.png ├── bmc2.png ├── idrac.json ├── idrac_overview.json └── status-alternative.json ├── internal ├── collector │ ├── client.go │ ├── collector.go │ ├── metrics.go │ ├── model.go │ ├── redfish.go │ └── unmarshal.go ├── config │ ├── config.go │ ├── discover.go │ ├── env.go │ └── model.go ├── log │ ├── default.go │ └── logger.go └── version │ └── version.go └── sample-config.yml /.github/workflows/docker-images.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker Images 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | push-to-registry: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Set up QEMU 21 | uses: docker/setup-qemu-action@v3 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Docker login 27 | uses: docker/login-action@v3 28 | with: 29 | username: ${{ secrets.DOCKER_USERNAME }} 30 | password: ${{ secrets.DOCKER_PASSWORD }} 31 | 32 | - name: Docker metadata 33 | id: meta 34 | uses: docker/metadata-action@v5 35 | with: 36 | images: mrlhansen/idrac_exporter 37 | tags: type=semver,pattern={{version}} 38 | 39 | - name: Build and push 40 | id: push 41 | uses: docker/build-push-action@v5 42 | with: 43 | context: . 44 | file: Dockerfile 45 | platforms: linux/amd64,linux/arm64 46 | push: true 47 | tags: ${{ steps.meta.outputs.tags }} 48 | labels: ${{ steps.meta.outputs.labels }} 49 | -------------------------------------------------------------------------------- /.github/workflows/go-binaries.yml: -------------------------------------------------------------------------------- 1 | name: Release Go Binaries 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | 8 | permissions: 9 | contents: write 10 | packages: write 11 | 12 | jobs: 13 | release-go-binaries: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | goos: [linux, windows, darwin] 18 | goarch: [amd64, arm64] 19 | exclude: 20 | - goarch: arm64 21 | goos: windows 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | with: 26 | fetch-depth: 0 27 | 28 | - name: Environment variables 29 | run: | 30 | echo "VERSION=$(git tag --points-at HEAD | grep -oP 'v\K[0-9.]+')" >> $GITHUB_ENV 31 | echo "REVISION=$(git rev-parse HEAD)" >> $GITHUB_ENV 32 | 33 | - name: Build and release 34 | uses: wangyoucao577/go-release-action@v1 35 | with: 36 | github_token: ${{ secrets.GITHUB_TOKEN }} 37 | goos: ${{ matrix.goos }} 38 | goarch: ${{ matrix.goarch }} 39 | project_path: "./cmd/idrac_exporter" 40 | binary_name: "idrac_exporter" 41 | md5sum: false 42 | compress_assets: false 43 | ldflags: > 44 | -X github.com/mrlhansen/idrac_exporter/internal/version.Version=${{ env.VERSION }} 45 | -X github.com/mrlhansen/idrac_exporter/internal/version.Revision=${{ env.REVISION }} 46 | -------------------------------------------------------------------------------- /.github/workflows/helm-charts.yml: -------------------------------------------------------------------------------- 1 | name: Release Helm Charts 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'charts/**' 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | release-helm-charts: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Configure git 21 | run: | 22 | git config user.name "$GITHUB_ACTOR" 23 | git config user.email "$GITHUB_ACTOR@users.noreply.github.com" 24 | 25 | - name: Install helm 26 | uses: azure/setup-helm@v3 27 | 28 | - name: Release charts 29 | uses: helm/chart-releaser-action@v1.6.0 30 | env: 31 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 32 | with: 33 | skip_existing: true 34 | mark_as_latest: false 35 | config: charts/release-config.yaml 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ARCH= 2 | FROM ${ARCH}golang:1.24-alpine3.20 AS builder 3 | 4 | WORKDIR /app/src 5 | RUN apk add -U make git grep 6 | COPY . . 7 | RUN make build 8 | 9 | FROM ${ARCH}alpine:3.20 AS container 10 | 11 | WORKDIR /app 12 | COPY --from=builder /app/src/idrac_exporter /app/bin/ 13 | RUN apk add -U bash 14 | COPY default-config.yml /etc/prometheus/idrac.yml 15 | COPY entrypoint.sh /app 16 | ENTRYPOINT ["/app/entrypoint.sh"] 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Martin Hansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION = $(or $(shell git tag --points-at HEAD | grep -oP 'v\K[0-9.]+'), unknown) 2 | REVISION = $(shell git rev-parse HEAD) 3 | 4 | REPOSITORY := github.com/mrlhansen/idrac_exporter 5 | LDFLAGS := -X $(REPOSITORY)/internal/version.Version=$(VERSION) 6 | LDFLAGS += -X $(REPOSITORY)/internal/version.Revision=$(REVISION) 7 | GOFLAGS := -ldflags "$(LDFLAGS)" 8 | RUNFLAGS ?= -config config.yml -verbose 9 | 10 | build: 11 | go build $(GOFLAGS) -o idrac_exporter ./cmd/idrac_exporter 12 | 13 | run: 14 | go run ./cmd/idrac_exporter $(RUNFLAGS) 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iDRAC Exporter 2 | This is a simple Redfish (iDRAC, iLO, XClarity) exporter for [Prometheus](https://prometheus.io). The exporter uses the Redfish API to collect information and it supports the regular `/metrics` endpoint to expose metrics from the host passed via the `target` parameter. For example, to scrape metrics from a Redfish instance on the IP address `192.168.1.1` call the following URL address. 3 | 4 | ```text 5 | http://localhost:9348/metrics?target=192.168.1.1 6 | ``` 7 | 8 | Every time the exporter is called with a new target, it tries to establish a connection to the Redfish API. If the target is unreachable or if the authentication fails, the status code 500 is returned together with an error message. 9 | 10 | 11 | ## Supported Systems 12 | The program supports several different systems, because they all follow the Redfish standard. The exporter has been tested on the following systems. 13 | 14 | * HPE iLO 15 | * Dell iDRAC 16 | * Lenovo XClarity 17 | 18 | 19 | ## Installation 20 | The exporter is written in [Go](https://golang.org) and it can be downloaded and compiled using: 21 | 22 | ```sh 23 | go install github.com/mrlhansen/idrac_exporter/cmd/idrac_exporter@latest 24 | ``` 25 | 26 | ### Docker 27 | There is a `Dockerfile` in the repository for building a container image. To build it locally use: 28 | 29 | ```sh 30 | docker build -t idrac_exporter . 31 | ``` 32 | 33 | There are also pre-built images available on [Docker Hub](https://hub.docker.com/r/mrlhansen/idrac_exporter). To download and run these images, simply use the following command. 34 | 35 | ```sh 36 | docker run -v /host-path/config.yml:/etc/prometheus/idrac.yml -p 9348:9348 mrlhansen/idrac_exporter 37 | ``` 38 | 39 | Remember to set the listen address to `0.0.0.0` when running inside a container. 40 | 41 | ### Helm Chart 42 | There is also an official [Helm](https://helm.sh/docs/) chart for installing the exporter in a Kubernetes cluster. 43 | 44 | ```sh 45 | helm repo add idrac-exporter https://mrlhansen.github.io/idrac_exporter 46 | helm install idrac-exporter idrac-exporter/idrac-exporter 47 | ``` 48 | 49 | 50 | ## Configuration 51 | There are many [configuration options](sample-config.yml) for the exporter, but most importantly you need to provide a username and password for all remote hosts, and you can select which metrics should be exported. By default, the exporter looks for the configuration file in `/etc/prometheus/idrac.yml` but the path can be specified using the `-config` option. 52 | 53 | ```yaml 54 | address: 127.0.0.1 # Listen address 55 | port: 9348 # Listen port 56 | timeout: 10 # HTTP timeout (in seconds) for Redfish API calls 57 | hosts: 58 | default: 59 | username: user 60 | password: pass 61 | 123.45.6.78: 62 | username: user 63 | password: pass 64 | metrics: 65 | all: true 66 | ``` 67 | 68 | As shown in the above example, under `hosts` you can specify login information for individual hosts via their IP address or hostname, otherwise the exporter will attempt to use the login information under `default`. The login user only needs read-only permissions. Under `metrics` you can select what kind of metrics that should be returned. 69 | 70 | **For a detailed description of the configuration, please see the [sample-config.yml](sample-config.yml) file. In this file you can also find the corresponding environment variables for the different configuration options.** 71 | 72 | Because the metrics are collected on-demand it can take several minutes to scrape the metrics endpoint, depending on how many metrics groups are selected in the configuration file. For this reason, you should carefully select the metrics of interest and make sure Prometheus is configured with a sufficiently high scrape timeout value. 73 | 74 | 75 | ## List of Metrics 76 | The exporter can expose the metrics described in the sections below. For each metric you can see the name and the associated labels. For all `_health` metrics the value has the following mapping. 77 | 78 | * 0 = OK 79 | * 1 = Warning 80 | * 2 = Critical 81 | 82 | ### System 83 | These metrics include power, health, and LED state, total memory size, number of physical processors, BIOS version and machine information. 84 | 85 | ```c 86 | idrac_system_power_on 87 | idrac_system_health{status} 88 | idrac_system_indicator_led_on{state} // deprecated, new metric below 89 | idrac_system_indicator_active 90 | idrac_system_memory_size_bytes 91 | idrac_system_cpu_count{model} 92 | idrac_system_bios_info{version} 93 | idrac_system_machine_info{manufacturer,model,serial,sku} 94 | ``` 95 | 96 | ### Sensors 97 | These metrics include temperature and FAN health and speeds. 98 | 99 | ```text 100 | idrac_sensors_temperature{id,name,units} 101 | idrac_sensors_fan_health{id,name,status} 102 | idrac_sensors_fan_speed{id,name,units} 103 | ``` 104 | 105 | ### Power 106 | These metrics include two sets of power readings. The first set is PSU power readings, such as power usage, total power capacity, input voltage and efficiency. 107 | 108 | ```text 109 | idrac_power_supply_health{id,status} 110 | idrac_power_supply_output_watts{id} 111 | idrac_power_supply_input_watts{id} 112 | idrac_power_supply_capacity_watts{id} 113 | idrac_power_supply_input_voltage{id} 114 | idrac_power_supply_efficiency_percent{id} 115 | ``` 116 | 117 | The second set is the power consumption for the entire system (and sometimes also for certain subsystems, such as the CPUs). The first two metrics are instantaneous readings, while the last four metrics are the minimum, maximum and average power consumption as measure over the reported interval. 118 | 119 | ```text 120 | idrac_power_control_consumed_watts{id,name} 121 | idrac_power_control_capacity_watts{id,name} 122 | idrac_power_control_min_consumed_watts{id,name} 123 | idrac_power_control_max_consumed_watts{id,name} 124 | idrac_power_control_avg_consumed_watts{id,name} 125 | idrac_power_control_interval_in_minutes{id,name} 126 | ``` 127 | 128 | ### Processors 129 | These metrics include information about the CPUs in the system. 130 | 131 | ```text 132 | idrac_cpu_health{id,status} 133 | idrac_cpu_info{arch,id,manufacturer,model,socket} 134 | idrac_cpu_voltage{id} 135 | idrac_cpu_max_speed_mhz{id} 136 | idrac_cpu_operating_speed_mhz{id} 137 | idrac_cpu_total_cores{id} 138 | idrac_cpu_total_threads{id} 139 | ``` 140 | 141 | ### System Event Log 142 | This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the Unix timestamp for when the entry was created. 143 | 144 | ```text 145 | idrac_events_log_entry{id,message,severity} 146 | ``` 147 | 148 | ### Storage 149 | The storage metrics are divided into four different groups. 150 | 151 | * The first group defines a storage subgroup inside Redfish. All other storage metrics are children of this subgroup. 152 | * The second group is information about physical drives. 153 | * The third group is information about storage controllers. 154 | * The fourth group is information about virtual volumes, such as RAIDs. 155 | 156 | There is one last metric for Dell systems, which reports the health status of an associated RAID controller battery (when present). 157 | 158 | ```text 159 | idrac_storage_info{id,name} 160 | idrac_storage_health{id,status} 161 | 162 | idrac_storage_drive_info{id,manufacturer,mediatype,model,name,protocol,serial,slot,storage_id} 163 | idrac_storage_drive_health{id,status,storage_id} 164 | idrac_storage_drive_capacity_bytes{id,storage_id} 165 | idrac_storage_drive_life_left_percent{id,storage_id} 166 | idrac_storage_drive_indicator_active{id,storage_id} 167 | 168 | idrac_storage_controller_info{firmware,id,manufacturer,model,name,storage_id} 169 | idrac_storage_controller_health{id,status,storage_id} 170 | idrac_storage_controller_speed_mbps{id,storage_id} 171 | 172 | idrac_storage_volume_info{id,name,raidtype,storage_id,volumetype} 173 | idrac_storage_volume_health{id,status,storage_id} 174 | idrac_storage_volume_capacity_bytes{id,storage_id} 175 | idrac_storage_volume_media_span_count{id,storage_id} 176 | 177 | idrac_dell_controller_battery_health{id,name,status,storage_id} 178 | ``` 179 | 180 | ### Memory 181 | These metrics include information about memory modules in the machine. 182 | 183 | ```text 184 | idrac_memory_module_info{ecc,id,manufacturer,name,rank,serial,type} 185 | idrac_memory_module_health{id,status} 186 | idrac_memory_module_capacity_bytes{id} 187 | idrac_memory_module_speed_mhz{id} 188 | ``` 189 | 190 | ### Network 191 | These metrics include health of network interfaces, as well as health, link speed, and link status for each of the network ports. 192 | 193 | ```text 194 | idrac_network_interface_health{id,status} 195 | idrac_network_port_health{id,interface_id,status} 196 | idrac_network_port_link_up{id,interface_id,status} 197 | idrac_network_port_speed_mbps{id,interface_id} 198 | ``` 199 | 200 | ### Extra 201 | These metrics do not belong anywhere else and they might be OEM specific. At the moment only two Dell specific metrics are exported. 202 | 203 | ```text 204 | idrac_dell_battery_rollup_health{status} 205 | idrac_dell_estimated_system_airflow_cfm 206 | ``` 207 | 208 | ### Exporter 209 | These metrics contain information about the exporter itself, such as build information and how many errors that have been encountered when scraping the Redfish API. 210 | 211 | ```text 212 | idrac_exporter_build_info{goversion,revision,version} 213 | idrac_exporter_scrape_errors_total 214 | ``` 215 | 216 | 217 | ## Endpoints 218 | The exporter currently has three different endpoints. 219 | 220 | | Endpoint | Parameters | Description | 221 | | ------------ | ---------- | --------------------------------------------------- | 222 | | `/metrics` | `target` | Metrics for the specified target | 223 | | `/reset` | `target` | Reset internal state for the specified target | 224 | | `/discover` | | Endpoint for Prometheus Service Discovery | 225 | | `/health` | | Returns http status 200 and nothing else | 226 | 227 | 228 | ## Prometheus Configuration 229 | For the situation where you have a single `idrac_exporter` and multiple hosts to query, the following `prometheus.yml` snippet can be used. Here `192.168.1.1` and `192.168.1.2` are the hosts to query, and `exporter:9348` is the address and port where `idrac_exporter` is running. 230 | 231 | ```yaml 232 | scrape_configs: 233 | - job_name: idrac 234 | static_configs: 235 | - targets: ['192.168.1.1', '192.168.1.2'] 236 | relabel_configs: 237 | - source_labels: [__address__] 238 | target_label: __param_target 239 | - source_labels: [__param_target] 240 | target_label: instance 241 | - target_label: __address__ 242 | replacement: exporter:9348 243 | ``` 244 | 245 | You can also use the service discovery mechanism in Prometheus to automatically discover all the targets configured in the configuration file for the exporter. The configuration is quite similar, but instead of static targets we query the discovery endpoint. 246 | 247 | ```yaml 248 | scrape_configs: 249 | - job_name: idrac 250 | http_sd_configs: 251 | - url: http://exporter:9348/discover 252 | relabel_configs: 253 | - source_labels: [__address__] 254 | target_label: __param_target 255 | - source_labels: [__param_target] 256 | target_label: instance 257 | - source_labels: [__meta_url] 258 | target_label: __address__ 259 | regex: (https?.{3})([^\/]+)(.+) 260 | replacement: $2 261 | ``` 262 | 263 | 264 | ## Grafana Dashboard 265 | There are two Grafana Dashboards in the `grafana` folder, one that shows an overview of all systems and one that shows information for a specific machine. Thanks to [@7840vz](https://www.github.com/7840vz) for creating these! 266 | 267 | ![bmc1.png](grafana/bmc1.png) 268 | 269 | ![bmc2.png](grafana/bmc2.png) 270 | -------------------------------------------------------------------------------- /charts/idrac-exporter/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/idrac-exporter/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: idrac-exporter 3 | description: Chart for iDRAC Exporter 4 | type: application 5 | 6 | # This is the chart version. This version number should be incremented each time you make changes 7 | # to the chart and its templates, including the app version. 8 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 9 | version: "2.0.0" 10 | 11 | # This is the version number of the application being deployed. This version number should be 12 | # incremented each time you make changes to the application. Versions are not expected to 13 | # follow Semantic Versioning. They should reflect the version the application is using. 14 | # It is recommended to use it with quotes. 15 | appVersion: "2.0.0" 16 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "idrac-exporter.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "idrac-exporter.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "idrac-exporter.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "idrac-exporter.labels" -}} 37 | helm.sh/chart: {{ include "idrac-exporter.chart" . }} 38 | {{ include "idrac-exporter.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "idrac-exporter.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "idrac-exporter.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "idrac-exporter.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "idrac-exporter.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/config.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.idracConfig }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "idrac-exporter.fullname" . }}-config 6 | labels: 7 | {{- include "idrac-exporter.labels" . | nindent 4 }} 8 | stringData: 9 | idrac.yml: {{ tpl .Values.idracConfig . | quote }} 10 | {{ end }} 11 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "idrac-exporter.fullname" . }} 5 | labels: 6 | {{- include "idrac-exporter.labels" . | nindent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | {{- include "idrac-exporter.selectorLabels" . | nindent 6 }} 12 | template: 13 | metadata: 14 | {{- with .Values.podAnnotations }} 15 | annotations: 16 | {{- toYaml . | nindent 8 }} 17 | {{- end }} 18 | labels: 19 | {{- include "idrac-exporter.labels" . | nindent 8 }} 20 | {{- with .Values.podLabels }} 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | spec: 24 | {{- with .Values.imagePullSecrets }} 25 | imagePullSecrets: 26 | {{- toYaml . | nindent 8 }} 27 | {{- end }} 28 | serviceAccountName: {{ include "idrac-exporter.serviceAccountName" . }} 29 | {{- with .Values.podSecurityContext }} 30 | securityContext: 31 | {{- toYaml . | nindent 8 }} 32 | {{- end }} 33 | containers: 34 | - name: {{ .Chart.Name }} 35 | {{- with .Values.securityContext }} 36 | securityContext: 37 | {{- toYaml . | nindent 12 }} 38 | {{- end }} 39 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 40 | imagePullPolicy: {{ .Values.image.pullPolicy }} 41 | {{- with .Values.extraArgs }} 42 | args: 43 | {{- toYaml . | nindent 12 }} 44 | {{- end }} 45 | ports: 46 | - name: http 47 | containerPort: {{ .Values.service.port }} 48 | protocol: TCP 49 | livenessProbe: 50 | {{- toYaml .Values.livenessProbe | nindent 12 }} 51 | readinessProbe: 52 | {{- toYaml .Values.readinessProbe | nindent 12 }} 53 | {{- with .Values.resources }} 54 | resources: 55 | {{- toYaml . | nindent 12 }} 56 | {{- end }} 57 | {{- with .Values.env }} 58 | env: 59 | {{- toYaml . | nindent 12 }} 60 | {{- end }} 61 | {{- if or (.Values.idracConfig) (.Values.volumeMounts) }} 62 | volumeMounts: 63 | {{- end }} 64 | {{- if .Values.idracConfig }} 65 | - mountPath: "/etc/prometheus" 66 | name: config 67 | readOnly: true 68 | {{- end }} 69 | {{- range .Values.volumeMounts }} 70 | - {{ toYaml . | indent 14 | trim }} 71 | {{- end -}} 72 | {{- if or (.Values.idracConfig) (.Values.volumes) }} 73 | volumes: 74 | {{- end }} 75 | {{- if or .Values.idracConfig }} 76 | - name: config 77 | secret: 78 | secretName: {{ include "idrac-exporter.fullname" . }}-config 79 | {{- end }} 80 | {{- range .Values.volumes }} 81 | - {{ toYaml . | indent 10 | trim }} 82 | {{- end }} 83 | {{- with .Values.nodeSelector }} 84 | nodeSelector: 85 | {{- toYaml . | nindent 8 }} 86 | {{- end }} 87 | {{- with .Values.affinity }} 88 | affinity: 89 | {{- toYaml . | nindent 8 }} 90 | {{- end }} 91 | {{- with .Values.tolerations }} 92 | tolerations: 93 | {{- toYaml . | nindent 8 }} 94 | {{- end }} 95 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/prometheusrule.yaml: -------------------------------------------------------------------------------- 1 | # Source https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-rabbitmq-exporter/templates/prometheusrule.yaml 2 | {{- if .Values.prometheus.rules.enabled }} 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: PrometheusRule 5 | metadata: 6 | name: {{ template "idrac-exporter.fullname" . }} 7 | {{- with .Values.prometheus.rules.namespace }} 8 | namespace: {{ . }} 9 | {{- end }} 10 | labels: 11 | app: {{ template "idrac-exporter.name" . }} 12 | chart: {{ template "idrac-exporter.chart" . }} 13 | heritage: {{ .Release.Service }} 14 | {{- if .Values.prometheus.rules.additionalLabels }} 15 | {{ toYaml .Values.prometheus.rules.additionalLabels | indent 4 }} 16 | {{- end }} 17 | spec: 18 | {{- with .Values.prometheus.rules.additionalRules }} 19 | groups: 20 | - name: {{ template "idrac-exporter.fullname" $ }} 21 | rules: {{ tpl (toYaml .) $ | nindent 8 }} 22 | {{- end }} 23 | {{- end }} 24 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "idrac-exporter.fullname" . }} 5 | labels: 6 | {{- include "idrac-exporter.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: http 12 | protocol: TCP 13 | name: http 14 | selector: 15 | {{- include "idrac-exporter.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "idrac-exporter.serviceAccountName" . }} 6 | labels: 7 | {{- include "idrac-exporter.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /charts/idrac-exporter/templates/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | # Source https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-rabbitmq-exporter/templates/servicemonitor.yaml 2 | {{- if .Values.prometheus.monitor.enabled }} 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | name: {{ template "idrac-exporter.fullname" . }} 7 | labels: 8 | app: {{ template "idrac-exporter.name" . }} 9 | chart: {{ template "idrac-exporter.chart" . }} 10 | heritage: {{ .Release.Service }} 11 | {{- if .Values.prometheus.monitor.additionalLabels }} 12 | {{ toYaml .Values.prometheus.monitor.additionalLabels | indent 4 }} 13 | {{- end }} 14 | spec: 15 | selector: 16 | matchLabels: 17 | app: {{ template "idrac-exporter.name" . }} 18 | release: {{ .Release.Name }} 19 | endpoints: 20 | - port: http 21 | {{- if .Values.prometheus.monitor.interval }} 22 | interval: {{ .Values.prometheus.monitor.interval }} 23 | {{- end }} 24 | {{- with .Values.prometheus.monitor.metricRelabelings }} 25 | metricRelabelings: 26 | {{- toYaml . | nindent 6 }} 27 | {{- end }} 28 | {{- with .Values.prometheus.monitor.relabelings }} 29 | relabelings: 30 | {{- toYaml . | nindent 6 }} 31 | {{- end }} 32 | {{- if .Values.prometheus.monitor.namespace }} 33 | namespaceSelector: 34 | matchNames: 35 | {{- range .Values.prometheus.monitor.namespace }} 36 | - {{ . }} 37 | {{- end }} 38 | {{- with .Values.prometheus.monitor.targetLabels }} 39 | targetLabels: 40 | {{- toYaml . | nindent 4 }} 41 | {{- end }} 42 | {{- end }} 43 | {{- end }} 44 | -------------------------------------------------------------------------------- /charts/idrac-exporter/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | image: 4 | repository: mrlhansen/idrac_exporter 5 | pullPolicy: IfNotPresent 6 | # Overrides the image tag whose default is the chart appVersion. 7 | tag: "" 8 | 9 | imagePullSecrets: [] 10 | nameOverride: "" 11 | fullnameOverride: "" 12 | 13 | serviceAccount: 14 | create: true 15 | automount: true 16 | annotations: {} 17 | # The name of the service account to use. 18 | # If not set and create is true, a name is generated using the fullname template 19 | name: "" 20 | 21 | podLabels: {} 22 | 23 | podSecurityContext: {} 24 | # fsGroup: 2000 25 | 26 | securityContext: {} 27 | # capabilities: 28 | # drop: 29 | # - ALL 30 | # readOnlyRootFilesystem: true 31 | # runAsNonRoot: true 32 | # runAsUser: 1000 33 | 34 | service: 35 | type: ClusterIP 36 | port: 9348 37 | 38 | resources: {} 39 | # We usually recommend not to specify default resources and to leave this as a conscious 40 | # choice for the user. This also increases chances charts run on environments with little 41 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 42 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 43 | # limits: 44 | # cpu: 100m 45 | # memory: 128Mi 46 | # requests: 47 | # cpu: 100m 48 | # memory: 128Mi 49 | 50 | livenessProbe: 51 | httpGet: 52 | path: /health 53 | port: http 54 | readinessProbe: 55 | httpGet: 56 | path: /health 57 | port: http 58 | 59 | volumes: [] 60 | # - name: config-templated 61 | # emptyDir: 62 | # sizeLimit: 16Mi 63 | # - name: foo 64 | # secret: 65 | # secretName: mysecret 66 | # optional: false 67 | 68 | volumeMounts: [] 69 | # - mountPath: /app/config 70 | # name: config-templated 71 | # - name: foo 72 | # mountPath: "/etc/foo" 73 | # readOnly: true 74 | 75 | nodeSelector: {} 76 | 77 | tolerations: [] 78 | 79 | affinity: {} 80 | 81 | extraArgs: [] 82 | 83 | idracConfig: | 84 | address: 0.0.0.0 85 | port: 9348 86 | timeout: 60 87 | hosts: 88 | default: 89 | username: USERNAME 90 | password: PASSWORD 91 | metrics: 92 | all: true 93 | 94 | env: [] 95 | # - name: CONFIG_DEFAULT_USERNAME 96 | # value: root 97 | # - name: CONFIG_DEFAULT_PASSWORD 98 | # valueFrom: 99 | # secretKeyRef: 100 | # name: my-external-idrac-secret 101 | # key: idrac-password 102 | 103 | podAnnotations: 104 | prometheus.io/scrape: "false" 105 | prometheus.io/path: "/metrics" 106 | prometheus.io/port: "9348" 107 | 108 | # TODO: Create defaults using relabel configs mentioned in README. 109 | # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig 110 | # Use {{ tpl "{{ include \"idrac-exporter.fullname\" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.service.port }}" . }} for the service endpoint 111 | 112 | prometheus: 113 | monitor: 114 | enabled: false 115 | additionalLabels: {} 116 | interval: 15s 117 | namespace: [] 118 | metricRelabelings: [] 119 | relabelings: [] 120 | targetLabels: [] 121 | 122 | rules: 123 | enabled: false 124 | additionalLabels: {} 125 | namespace: "" 126 | additionalRules: [] 127 | -------------------------------------------------------------------------------- /charts/release-config.yaml: -------------------------------------------------------------------------------- 1 | release-name-template: "helm-{{ .Name }}-{{ .Version }}" 2 | -------------------------------------------------------------------------------- /cmd/idrac_exporter/handler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "compress/gzip" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/mrlhansen/idrac_exporter/internal/collector" 12 | "github.com/mrlhansen/idrac_exporter/internal/config" 13 | "github.com/mrlhansen/idrac_exporter/internal/log" 14 | "github.com/mrlhansen/idrac_exporter/internal/version" 15 | ) 16 | 17 | const ( 18 | contentTypeHeader = "Content-Type" 19 | contentEncodingHeader = "Content-Encoding" 20 | acceptEncodingHeader = "Accept-Encoding" 21 | ) 22 | 23 | var gzipPool = sync.Pool{ 24 | New: func() any { 25 | return gzip.NewWriter(nil) 26 | }, 27 | } 28 | 29 | const landingPageTemplate = ` 30 | iDRAC Exporter 31 | 32 |

iDRAC Exporter

33 |
Build information: version=%s revision=%s
34 | 35 | 36 | 37 | ` 38 | 39 | func rootHandler(rsp http.ResponseWriter, req *http.Request) { 40 | fmt.Fprintf(rsp, landingPageTemplate, version.Version, version.Revision) 41 | } 42 | 43 | func healthHandler(rsp http.ResponseWriter, req *http.Request) { 44 | // just return a simple 200 for now 45 | } 46 | 47 | func resetHandler(rsp http.ResponseWriter, req *http.Request) { 48 | target := req.URL.Query().Get("target") 49 | if target == "" { 50 | log.Error("Received request from %s without 'target' parameter", req.Host) 51 | http.Error(rsp, "Query parameter 'target' is mandatory", http.StatusBadRequest) 52 | return 53 | } 54 | 55 | log.Debug("Handling reset-request from %s for host %s", req.Host, target) 56 | 57 | collector.Reset(target) 58 | } 59 | 60 | func discoverHandler(rsp http.ResponseWriter, req *http.Request) { 61 | rsp.Header().Set(contentTypeHeader, "application/json") 62 | fmt.Fprint(rsp, config.GetDiscover()) 63 | } 64 | 65 | func metricsHandler(rsp http.ResponseWriter, req *http.Request) { 66 | target := req.URL.Query().Get("target") 67 | if target == "" { 68 | log.Error("Received request from %s without 'target' parameter", req.Host) 69 | http.Error(rsp, "Query parameter 'target' is mandatory", http.StatusBadRequest) 70 | return 71 | } 72 | 73 | log.Debug("Handling request from %s for host %s", req.Host, target) 74 | 75 | c, err := collector.GetCollector(target) 76 | if err != nil { 77 | errorMsg := fmt.Sprintf("Error instantiating metrics collector for host %s: %v", target, err) 78 | log.Error("%v", errorMsg) 79 | http.Error(rsp, errorMsg, http.StatusInternalServerError) 80 | return 81 | } 82 | 83 | log.Debug("Collecting metrics for host %s", target) 84 | 85 | metrics, err := c.Gather() 86 | if err != nil { 87 | errorMsg := fmt.Sprintf("Error collecting metrics for host %s: %v", target, err) 88 | log.Error("%v", errorMsg) 89 | http.Error(rsp, errorMsg, http.StatusInternalServerError) 90 | return 91 | } 92 | 93 | log.Debug("Metrics for host %s collected", target) 94 | 95 | header := rsp.Header() 96 | header.Set(contentTypeHeader, "text/plain") 97 | 98 | // Code inspired by the official Prometheus metrics http handler 99 | w := io.Writer(rsp) 100 | if gzipAccepted(req.Header) { 101 | header.Set(contentEncodingHeader, "gzip") 102 | gz := gzipPool.Get().(*gzip.Writer) 103 | defer gzipPool.Put(gz) 104 | 105 | gz.Reset(w) 106 | defer gz.Close() 107 | 108 | w = gz 109 | } 110 | 111 | fmt.Fprint(w, metrics) 112 | } 113 | 114 | // gzipAccepted returns whether the client will accept gzip-encoded content. 115 | func gzipAccepted(header http.Header) bool { 116 | a := header.Get(acceptEncodingHeader) 117 | parts := strings.Split(a, ",") 118 | for _, part := range parts { 119 | part = strings.TrimSpace(part) 120 | if part == "gzip" || strings.HasPrefix(part, "gzip;") { 121 | return true 122 | } 123 | } 124 | return false 125 | } 126 | -------------------------------------------------------------------------------- /cmd/idrac_exporter/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "net" 7 | "net/http" 8 | "strings" 9 | 10 | "github.com/mrlhansen/idrac_exporter/internal/config" 11 | "github.com/mrlhansen/idrac_exporter/internal/log" 12 | "github.com/mrlhansen/idrac_exporter/internal/version" 13 | ) 14 | 15 | func main() { 16 | var verbose bool 17 | var debug bool 18 | var configFile string 19 | var err error 20 | 21 | flag.BoolVar(&verbose, "verbose", false, "Enable more verbose logging") 22 | flag.BoolVar(&debug, "debug", false, "Dump JSON response from Redfish requests (only for debugging purpose)") 23 | flag.StringVar(&configFile, "config", "/etc/prometheus/idrac.yml", "Path to idrac exporter configuration file") 24 | flag.Parse() 25 | 26 | log.Info("Build information: version=%s revision=%s", version.Version, version.Revision) 27 | config.ReadConfig(configFile) 28 | 29 | if debug { 30 | config.Debug = true 31 | verbose = true 32 | } 33 | 34 | if verbose { 35 | log.SetLevel(log.LevelDebug) 36 | } 37 | 38 | http.HandleFunc("/discover", discoverHandler) 39 | http.HandleFunc("/metrics", metricsHandler) 40 | http.HandleFunc("/health", healthHandler) 41 | http.HandleFunc("/reset", resetHandler) 42 | http.HandleFunc("/", rootHandler) 43 | 44 | port := fmt.Sprintf("%d", config.Config.Port) 45 | host := strings.Trim(config.Config.Address, "[]") 46 | bind := net.JoinHostPort(host, port) 47 | log.Info("Server listening on %s (TLS: %v)", bind, config.Config.TLS.Enabled) 48 | 49 | if config.Config.TLS.Enabled { 50 | err = http.ListenAndServeTLS(bind, config.Config.TLS.CertFile, config.Config.TLS.KeyFile, nil) 51 | } else { 52 | err = http.ListenAndServe(bind, nil) 53 | } 54 | 55 | if err != nil { 56 | log.Fatal("%v", err) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /default-config.yml: -------------------------------------------------------------------------------- 1 | address: 0.0.0.0 2 | port: 9348 3 | hosts: 4 | default: 5 | username: root 6 | password: calvin 7 | metrics: 8 | all: true 9 | events: 10 | severity: warning 11 | maxage: 7d 12 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | auth_file="/authconfig/$NODE_NAME" 4 | if [ -f "$auth_file" ]; then 5 | export CONFIG_DEFAULT_USERNAME=$(cut -f1 -d= $auth_file) 6 | export CONFIG_DEFAULT_PASSWORD=$(cut -f2- -d= $auth_file) 7 | fi 8 | 9 | exec bin/idrac_exporter "$@" 10 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mrlhansen/idrac_exporter 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/prometheus/client_golang v1.20.5 7 | github.com/prometheus/common v0.62.0 8 | github.com/xhit/go-str2duration/v2 v2.1.0 9 | gopkg.in/yaml.v3 v3.0.1 10 | ) 11 | 12 | require ( 13 | github.com/beorn7/perks v1.0.1 // indirect 14 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 15 | github.com/kr/text v0.2.0 // indirect 16 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 17 | github.com/prometheus/client_model v0.6.1 // indirect 18 | github.com/prometheus/procfs v0.15.1 // indirect 19 | golang.org/x/sys v0.29.0 // indirect 20 | google.golang.org/protobuf v1.36.4 // indirect 21 | ) 22 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 4 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 6 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 7 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 9 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 10 | github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= 11 | github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= 12 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 13 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 14 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 15 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 16 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 17 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 18 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 19 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 20 | github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= 21 | github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= 22 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= 23 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= 24 | github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= 25 | github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= 26 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= 27 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= 28 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 29 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 30 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 31 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 32 | github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= 33 | github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= 34 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= 35 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 36 | google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= 37 | google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= 38 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 39 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 40 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 41 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 42 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 43 | -------------------------------------------------------------------------------- /grafana/alerts.yaml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: "idrac-alerts" 3 | rules: 4 | - alert: ServerNotHealthy 5 | expr: idrac_system_health{status!="OK"} 6 | for: 3m 7 | keep_firing_for: 5m 8 | labels: 9 | severity: > 10 | {{- if $labels.status -}} 11 | {{- if eq $labels.status "Warning" -}} 12 | warning 13 | {{- else if eq $labels.status "Critical" -}} 14 | critical 15 | {{- end -}} 16 | {{- else -}} 17 | critical 18 | {{- end -}} 19 | annotations: 20 | description: Server {{ $labels.instance }} is not healthy, current status is {{ $labels.status }}. Please check server BMC system event log. 21 | summary: Hardware server status is not healthy. 22 | - alert: ServerNotReporting 23 | expr: up{job=~".*idrac.*"} == 0 24 | for: 15m 25 | keep_firing_for: 5m 26 | labels: 27 | severity: warning 28 | annotations: 29 | description: Server {{ $labels.instance }} failed to reply via Redfish API. Please check idrac_exporter logs for more details. 30 | summary: Hardware server failed to reply to monitoring via Redfish API. 31 | -------------------------------------------------------------------------------- /grafana/bmc1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrlhansen/idrac_exporter/f875ebbeacfc83802895bd053047eca56c8ff997/grafana/bmc1.png -------------------------------------------------------------------------------- /grafana/bmc2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrlhansen/idrac_exporter/f875ebbeacfc83802895bd053047eca56c8ff997/grafana/bmc2.png -------------------------------------------------------------------------------- /grafana/status-alternative.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "type": "dashboard" 15 | } 16 | ] 17 | }, 18 | "editable": true, 19 | "fiscalYearStartMonth": 0, 20 | "graphTooltip": 0, 21 | "id": 59, 22 | "links": [], 23 | "panels": [ 24 | { 25 | "collapsed": false, 26 | "gridPos": { 27 | "h": 1, 28 | "w": 24, 29 | "x": 0, 30 | "y": 0 31 | }, 32 | "id": 7, 33 | "panels": [], 34 | "title": "Summary", 35 | "type": "row" 36 | }, 37 | { 38 | "datasource": { 39 | "type": "prometheus", 40 | "uid": "${DS_PROMETHEUS}" 41 | }, 42 | "description": "", 43 | "fieldConfig": { 44 | "defaults": { 45 | "color": { 46 | "mode": "thresholds" 47 | }, 48 | "mappings": [], 49 | "thresholds": { 50 | "mode": "absolute", 51 | "steps": [ 52 | { 53 | "color": "green", 54 | "value": null 55 | }, 56 | { 57 | "color": "red", 58 | "value": 80 59 | } 60 | ] 61 | } 62 | }, 63 | "overrides": [] 64 | }, 65 | "gridPos": { 66 | "h": 2, 67 | "w": 3, 68 | "x": 0, 69 | "y": 1 70 | }, 71 | "id": 8, 72 | "options": { 73 | "colorMode": "value", 74 | "graphMode": "area", 75 | "justifyMode": "auto", 76 | "orientation": "auto", 77 | "percentChangeColorMode": "standard", 78 | "reduceOptions": { 79 | "calcs": [ 80 | "lastNotNull" 81 | ], 82 | "fields": "", 83 | "values": false 84 | }, 85 | "showPercentChange": false, 86 | "textMode": "name", 87 | "wideLayout": true 88 | }, 89 | "pluginVersion": "11.1.4", 90 | "targets": [ 91 | { 92 | "datasource": { 93 | "type": "prometheus", 94 | "uid": "${DS_PROMETHEUS}" 95 | }, 96 | "editorMode": "code", 97 | "exemplar": false, 98 | "expr": "idrac_system_machine_info{instance=\"$instance\"}", 99 | "instant": true, 100 | "legendFormat": "{{manufacturer}}", 101 | "range": false, 102 | "refId": "A" 103 | } 104 | ], 105 | "title": "Vendor", 106 | "type": "stat" 107 | }, 108 | { 109 | "datasource": { 110 | "type": "prometheus", 111 | "uid": "${DS_PROMETHEUS}" 112 | }, 113 | "fieldConfig": { 114 | "defaults": { 115 | "color": { 116 | "mode": "fixed" 117 | }, 118 | "mappings": [ 119 | { 120 | "options": { 121 | "0": { 122 | "color": "green", 123 | "index": 0, 124 | "text": "OK" 125 | }, 126 | "1": { 127 | "color": "orange", 128 | "index": 1, 129 | "text": "Warning" 130 | }, 131 | "2": { 132 | "color": "red", 133 | "index": 2, 134 | "text": "Error" 135 | } 136 | }, 137 | "type": "value" 138 | } 139 | ], 140 | "noValue": "N/A", 141 | "thresholds": { 142 | "mode": "absolute", 143 | "steps": [ 144 | { 145 | "color": "green", 146 | "value": null 147 | }, 148 | { 149 | "color": "red", 150 | "value": 80 151 | } 152 | ] 153 | } 154 | }, 155 | "overrides": [] 156 | }, 157 | "gridPos": { 158 | "h": 4, 159 | "w": 3, 160 | "x": 3, 161 | "y": 1 162 | }, 163 | "id": 1, 164 | "options": { 165 | "colorMode": "value", 166 | "graphMode": "area", 167 | "justifyMode": "auto", 168 | "orientation": "auto", 169 | "percentChangeColorMode": "standard", 170 | "reduceOptions": { 171 | "calcs": [ 172 | "lastNotNull" 173 | ], 174 | "fields": "", 175 | "values": false 176 | }, 177 | "showPercentChange": false, 178 | "textMode": "auto", 179 | "wideLayout": true 180 | }, 181 | "pluginVersion": "11.1.4", 182 | "targets": [ 183 | { 184 | "datasource": { 185 | "type": "prometheus", 186 | "uid": "${DS_PROMETHEUS}" 187 | }, 188 | "editorMode": "code", 189 | "exemplar": false, 190 | "expr": "idrac_system_health{instance=\"$instance\"}", 191 | "instant": true, 192 | "legendFormat": "Health", 193 | "range": false, 194 | "refId": "A" 195 | } 196 | ], 197 | "title": "Global Health", 198 | "type": "stat" 199 | }, 200 | { 201 | "datasource": { 202 | "type": "prometheus", 203 | "uid": "${DS_PROMETHEUS}" 204 | }, 205 | "fieldConfig": { 206 | "defaults": { 207 | "color": { 208 | "mode": "fixed" 209 | }, 210 | "mappings": [ 211 | { 212 | "options": { 213 | "0": { 214 | "color": "green", 215 | "index": 0, 216 | "text": "OK" 217 | }, 218 | "1": { 219 | "color": "orange", 220 | "index": 1, 221 | "text": "Warning" 222 | }, 223 | "2": { 224 | "color": "red", 225 | "index": 2, 226 | "text": "Error" 227 | } 228 | }, 229 | "type": "value" 230 | } 231 | ], 232 | "noValue": "N/A", 233 | "thresholds": { 234 | "mode": "absolute", 235 | "steps": [ 236 | { 237 | "color": "green", 238 | "value": null 239 | }, 240 | { 241 | "color": "red", 242 | "value": 80 243 | } 244 | ] 245 | } 246 | }, 247 | "overrides": [] 248 | }, 249 | "gridPos": { 250 | "h": 4, 251 | "w": 3, 252 | "x": 6, 253 | "y": 1 254 | }, 255 | "id": 5, 256 | "options": { 257 | "colorMode": "value", 258 | "graphMode": "area", 259 | "justifyMode": "auto", 260 | "orientation": "auto", 261 | "percentChangeColorMode": "standard", 262 | "reduceOptions": { 263 | "calcs": [ 264 | "lastNotNull" 265 | ], 266 | "fields": "", 267 | "values": false 268 | }, 269 | "showPercentChange": false, 270 | "textMode": "auto", 271 | "wideLayout": true 272 | }, 273 | "pluginVersion": "11.1.4", 274 | "targets": [ 275 | { 276 | "datasource": { 277 | "type": "prometheus", 278 | "uid": "${DS_PROMETHEUS}" 279 | }, 280 | "editorMode": "code", 281 | "exemplar": false, 282 | "expr": "max(idrac_memory_module_health{instance=\"$instance\"} < 10)", 283 | "instant": true, 284 | "legendFormat": "Health", 285 | "range": false, 286 | "refId": "A" 287 | } 288 | ], 289 | "title": "Memory Health", 290 | "type": "stat" 291 | }, 292 | { 293 | "datasource": { 294 | "type": "prometheus", 295 | "uid": "${DS_PROMETHEUS}" 296 | }, 297 | "fieldConfig": { 298 | "defaults": { 299 | "color": { 300 | "mode": "fixed" 301 | }, 302 | "mappings": [ 303 | { 304 | "options": { 305 | "0": { 306 | "color": "green", 307 | "index": 0, 308 | "text": "OK" 309 | }, 310 | "1": { 311 | "color": "orange", 312 | "index": 1, 313 | "text": "Warning" 314 | }, 315 | "2": { 316 | "color": "red", 317 | "index": 2, 318 | "text": "Error" 319 | } 320 | }, 321 | "type": "value" 322 | } 323 | ], 324 | "noValue": "N/A", 325 | "thresholds": { 326 | "mode": "absolute", 327 | "steps": [ 328 | { 329 | "color": "green", 330 | "value": null 331 | }, 332 | { 333 | "color": "red", 334 | "value": 80 335 | } 336 | ] 337 | } 338 | }, 339 | "overrides": [] 340 | }, 341 | "gridPos": { 342 | "h": 4, 343 | "w": 3, 344 | "x": 9, 345 | "y": 1 346 | }, 347 | "id": 6, 348 | "options": { 349 | "colorMode": "value", 350 | "graphMode": "area", 351 | "justifyMode": "auto", 352 | "orientation": "auto", 353 | "percentChangeColorMode": "standard", 354 | "reduceOptions": { 355 | "calcs": [ 356 | "lastNotNull" 357 | ], 358 | "fields": "", 359 | "values": false 360 | }, 361 | "showPercentChange": false, 362 | "textMode": "auto", 363 | "wideLayout": true 364 | }, 365 | "pluginVersion": "11.1.4", 366 | "targets": [ 367 | { 368 | "datasource": { 369 | "type": "prometheus", 370 | "uid": "${DS_PROMETHEUS}" 371 | }, 372 | "editorMode": "code", 373 | "exemplar": false, 374 | "expr": "max(idrac_network_interface_health{instance=\"$instance\"} < 10)", 375 | "instant": true, 376 | "legendFormat": "Health", 377 | "range": false, 378 | "refId": "A" 379 | } 380 | ], 381 | "title": "Network Health", 382 | "type": "stat" 383 | }, 384 | { 385 | "datasource": { 386 | "type": "prometheus", 387 | "uid": "${DS_PROMETHEUS}" 388 | }, 389 | "fieldConfig": { 390 | "defaults": { 391 | "color": { 392 | "mode": "thresholds" 393 | }, 394 | "custom": { 395 | "align": "auto", 396 | "cellOptions": { 397 | "type": "auto" 398 | }, 399 | "inspect": false 400 | }, 401 | "mappings": [], 402 | "thresholds": { 403 | "mode": "absolute", 404 | "steps": [ 405 | { 406 | "color": "green", 407 | "value": null 408 | }, 409 | { 410 | "color": "red", 411 | "value": 80 412 | } 413 | ] 414 | } 415 | }, 416 | "overrides": [ 417 | { 418 | "matcher": { 419 | "id": "byName", 420 | "options": "Severity" 421 | }, 422 | "properties": [ 423 | { 424 | "id": "custom.width", 425 | "value": 100 426 | } 427 | ] 428 | }, 429 | { 430 | "matcher": { 431 | "id": "byName", 432 | "options": "Timestamp" 433 | }, 434 | "properties": [ 435 | { 436 | "id": "custom.width", 437 | "value": 120 438 | }, 439 | { 440 | "id": "unit", 441 | "value": "dateTimeFromNow" 442 | } 443 | ] 444 | } 445 | ] 446 | }, 447 | "gridPos": { 448 | "h": 8, 449 | "w": 12, 450 | "x": 12, 451 | "y": 1 452 | }, 453 | "id": 12, 454 | "options": { 455 | "cellHeight": "sm", 456 | "footer": { 457 | "countRows": false, 458 | "fields": "", 459 | "reducer": [ 460 | "sum" 461 | ], 462 | "show": false 463 | }, 464 | "showHeader": false, 465 | "sortBy": [] 466 | }, 467 | "pluginVersion": "11.1.4", 468 | "targets": [ 469 | { 470 | "datasource": { 471 | "type": "prometheus", 472 | "uid": "${DS_PROMETHEUS}" 473 | }, 474 | "editorMode": "code", 475 | "exemplar": false, 476 | "expr": "sort_desc(sum by (id,message,severity) (idrac_events_log_entry{instance=\"$instance\"}*1000))", 477 | "format": "table", 478 | "instant": true, 479 | "legendFormat": "__auto", 480 | "range": false, 481 | "refId": "A" 482 | } 483 | ], 484 | "title": "System Event Log", 485 | "transformations": [ 486 | { 487 | "id": "organize", 488 | "options": { 489 | "excludeByName": { 490 | "Time": true, 491 | "id": true 492 | }, 493 | "includeByName": {}, 494 | "indexByName": { 495 | "Time": 0, 496 | "Value": 4, 497 | "id": 1, 498 | "message": 3, 499 | "severity": 2 500 | }, 501 | "renameByName": { 502 | "Value": "Timestamp", 503 | "message": "Message", 504 | "severity": "Severity" 505 | } 506 | } 507 | } 508 | ], 509 | "type": "table" 510 | }, 511 | { 512 | "datasource": { 513 | "type": "prometheus", 514 | "uid": "${DS_PROMETHEUS}" 515 | }, 516 | "description": "", 517 | "fieldConfig": { 518 | "defaults": { 519 | "color": { 520 | "mode": "thresholds" 521 | }, 522 | "mappings": [], 523 | "thresholds": { 524 | "mode": "absolute", 525 | "steps": [ 526 | { 527 | "color": "green", 528 | "value": null 529 | }, 530 | { 531 | "color": "red", 532 | "value": 80 533 | } 534 | ] 535 | } 536 | }, 537 | "overrides": [] 538 | }, 539 | "gridPos": { 540 | "h": 2, 541 | "w": 3, 542 | "x": 0, 543 | "y": 3 544 | }, 545 | "id": 9, 546 | "options": { 547 | "colorMode": "value", 548 | "graphMode": "area", 549 | "justifyMode": "auto", 550 | "orientation": "auto", 551 | "percentChangeColorMode": "standard", 552 | "reduceOptions": { 553 | "calcs": [ 554 | "lastNotNull" 555 | ], 556 | "fields": "", 557 | "values": false 558 | }, 559 | "showPercentChange": false, 560 | "textMode": "name", 561 | "wideLayout": true 562 | }, 563 | "pluginVersion": "11.1.4", 564 | "targets": [ 565 | { 566 | "datasource": { 567 | "type": "prometheus", 568 | "uid": "${DS_PROMETHEUS}" 569 | }, 570 | "editorMode": "code", 571 | "exemplar": false, 572 | "expr": "idrac_system_machine_info{instance=\"$instance\"}", 573 | "instant": true, 574 | "legendFormat": "{{model}}", 575 | "range": false, 576 | "refId": "A" 577 | } 578 | ], 579 | "title": "Model", 580 | "type": "stat" 581 | }, 582 | { 583 | "datasource": { 584 | "type": "prometheus", 585 | "uid": "${DS_PROMETHEUS}" 586 | }, 587 | "description": "", 588 | "fieldConfig": { 589 | "defaults": { 590 | "color": { 591 | "mode": "thresholds" 592 | }, 593 | "mappings": [], 594 | "thresholds": { 595 | "mode": "absolute", 596 | "steps": [ 597 | { 598 | "color": "green", 599 | "value": null 600 | }, 601 | { 602 | "color": "red", 603 | "value": 80 604 | } 605 | ] 606 | } 607 | }, 608 | "overrides": [] 609 | }, 610 | "gridPos": { 611 | "h": 2, 612 | "w": 3, 613 | "x": 0, 614 | "y": 5 615 | }, 616 | "id": 10, 617 | "options": { 618 | "colorMode": "value", 619 | "graphMode": "area", 620 | "justifyMode": "auto", 621 | "orientation": "auto", 622 | "percentChangeColorMode": "standard", 623 | "reduceOptions": { 624 | "calcs": [ 625 | "lastNotNull" 626 | ], 627 | "fields": "", 628 | "values": false 629 | }, 630 | "showPercentChange": false, 631 | "textMode": "name", 632 | "wideLayout": true 633 | }, 634 | "pluginVersion": "11.1.4", 635 | "targets": [ 636 | { 637 | "datasource": { 638 | "type": "prometheus", 639 | "uid": "${DS_PROMETHEUS}" 640 | }, 641 | "editorMode": "code", 642 | "exemplar": false, 643 | "expr": "idrac_system_machine_info{instance=\"$instance\"}", 644 | "instant": true, 645 | "legendFormat": "{{serial}}", 646 | "range": false, 647 | "refId": "A" 648 | } 649 | ], 650 | "title": "Serial Number", 651 | "type": "stat" 652 | }, 653 | { 654 | "datasource": { 655 | "type": "prometheus", 656 | "uid": "${DS_PROMETHEUS}" 657 | }, 658 | "fieldConfig": { 659 | "defaults": { 660 | "color": { 661 | "mode": "fixed" 662 | }, 663 | "mappings": [ 664 | { 665 | "options": { 666 | "0": { 667 | "color": "green", 668 | "index": 0, 669 | "text": "OK" 670 | }, 671 | "1": { 672 | "color": "orange", 673 | "index": 1, 674 | "text": "Warning" 675 | }, 676 | "2": { 677 | "color": "red", 678 | "index": 2, 679 | "text": "Error" 680 | } 681 | }, 682 | "type": "value" 683 | } 684 | ], 685 | "noValue": "N/A", 686 | "thresholds": { 687 | "mode": "absolute", 688 | "steps": [ 689 | { 690 | "color": "green", 691 | "value": null 692 | }, 693 | { 694 | "color": "red", 695 | "value": 80 696 | } 697 | ] 698 | } 699 | }, 700 | "overrides": [] 701 | }, 702 | "gridPos": { 703 | "h": 4, 704 | "w": 3, 705 | "x": 3, 706 | "y": 5 707 | }, 708 | "id": 2, 709 | "options": { 710 | "colorMode": "value", 711 | "graphMode": "area", 712 | "justifyMode": "auto", 713 | "orientation": "auto", 714 | "percentChangeColorMode": "standard", 715 | "reduceOptions": { 716 | "calcs": [ 717 | "lastNotNull" 718 | ], 719 | "fields": "", 720 | "values": false 721 | }, 722 | "showPercentChange": false, 723 | "textMode": "auto", 724 | "wideLayout": true 725 | }, 726 | "pluginVersion": "11.1.4", 727 | "targets": [ 728 | { 729 | "datasource": { 730 | "type": "prometheus", 731 | "uid": "${DS_PROMETHEUS}" 732 | }, 733 | "editorMode": "code", 734 | "exemplar": false, 735 | "expr": "max(idrac_sensors_fan_health{instance=\"$instance\"} < 10)", 736 | "instant": true, 737 | "legendFormat": "Health", 738 | "range": false, 739 | "refId": "A" 740 | } 741 | ], 742 | "title": "Fan Health", 743 | "type": "stat" 744 | }, 745 | { 746 | "datasource": { 747 | "type": "prometheus", 748 | "uid": "${DS_PROMETHEUS}" 749 | }, 750 | "fieldConfig": { 751 | "defaults": { 752 | "color": { 753 | "mode": "fixed" 754 | }, 755 | "mappings": [ 756 | { 757 | "options": { 758 | "0": { 759 | "color": "green", 760 | "index": 0, 761 | "text": "OK" 762 | }, 763 | "1": { 764 | "color": "orange", 765 | "index": 1, 766 | "text": "Warning" 767 | }, 768 | "2": { 769 | "color": "red", 770 | "index": 2, 771 | "text": "Error" 772 | } 773 | }, 774 | "type": "value" 775 | } 776 | ], 777 | "noValue": "N/A", 778 | "thresholds": { 779 | "mode": "absolute", 780 | "steps": [ 781 | { 782 | "color": "green", 783 | "value": null 784 | }, 785 | { 786 | "color": "red", 787 | "value": 80 788 | } 789 | ] 790 | } 791 | }, 792 | "overrides": [] 793 | }, 794 | "gridPos": { 795 | "h": 4, 796 | "w": 3, 797 | "x": 6, 798 | "y": 5 799 | }, 800 | "id": 3, 801 | "options": { 802 | "colorMode": "value", 803 | "graphMode": "area", 804 | "justifyMode": "auto", 805 | "orientation": "auto", 806 | "percentChangeColorMode": "standard", 807 | "reduceOptions": { 808 | "calcs": [ 809 | "lastNotNull" 810 | ], 811 | "fields": "", 812 | "values": false 813 | }, 814 | "showPercentChange": false, 815 | "textMode": "auto", 816 | "wideLayout": true 817 | }, 818 | "pluginVersion": "11.1.4", 819 | "targets": [ 820 | { 821 | "datasource": { 822 | "type": "prometheus", 823 | "uid": "${DS_PROMETHEUS}" 824 | }, 825 | "editorMode": "code", 826 | "exemplar": false, 827 | "expr": "max(idrac_power_supply_health{instance=\"$instance\"} < 10)", 828 | "instant": true, 829 | "legendFormat": "Health", 830 | "range": false, 831 | "refId": "A" 832 | } 833 | ], 834 | "title": "PSU Health", 835 | "type": "stat" 836 | }, 837 | { 838 | "datasource": { 839 | "type": "prometheus", 840 | "uid": "${DS_PROMETHEUS}" 841 | }, 842 | "fieldConfig": { 843 | "defaults": { 844 | "color": { 845 | "mode": "fixed" 846 | }, 847 | "mappings": [ 848 | { 849 | "options": { 850 | "0": { 851 | "color": "green", 852 | "index": 0, 853 | "text": "OK" 854 | }, 855 | "1": { 856 | "color": "orange", 857 | "index": 1, 858 | "text": "Warning" 859 | }, 860 | "2": { 861 | "color": "red", 862 | "index": 2, 863 | "text": "Error" 864 | } 865 | }, 866 | "type": "value" 867 | } 868 | ], 869 | "noValue": "N/A", 870 | "thresholds": { 871 | "mode": "absolute", 872 | "steps": [ 873 | { 874 | "color": "green", 875 | "value": null 876 | }, 877 | { 878 | "color": "red", 879 | "value": 80 880 | } 881 | ] 882 | } 883 | }, 884 | "overrides": [] 885 | }, 886 | "gridPos": { 887 | "h": 4, 888 | "w": 3, 889 | "x": 9, 890 | "y": 5 891 | }, 892 | "id": 4, 893 | "options": { 894 | "colorMode": "value", 895 | "graphMode": "area", 896 | "justifyMode": "auto", 897 | "orientation": "auto", 898 | "percentChangeColorMode": "standard", 899 | "reduceOptions": { 900 | "calcs": [ 901 | "lastNotNull" 902 | ], 903 | "fields": "", 904 | "values": false 905 | }, 906 | "showPercentChange": false, 907 | "textMode": "auto", 908 | "wideLayout": true 909 | }, 910 | "pluginVersion": "11.1.4", 911 | "targets": [ 912 | { 913 | "datasource": { 914 | "type": "prometheus", 915 | "uid": "${DS_PROMETHEUS}" 916 | }, 917 | "editorMode": "code", 918 | "exemplar": false, 919 | "expr": "max(idrac_storage_drive_health{instance=\"$instance\"} < 10)", 920 | "instant": true, 921 | "legendFormat": "Health", 922 | "range": false, 923 | "refId": "A" 924 | } 925 | ], 926 | "title": "Disk Health", 927 | "type": "stat" 928 | }, 929 | { 930 | "datasource": { 931 | "type": "prometheus", 932 | "uid": "${DS_PROMETHEUS}" 933 | }, 934 | "description": "", 935 | "fieldConfig": { 936 | "defaults": { 937 | "color": { 938 | "mode": "thresholds" 939 | }, 940 | "mappings": [], 941 | "thresholds": { 942 | "mode": "absolute", 943 | "steps": [ 944 | { 945 | "color": "green", 946 | "value": null 947 | }, 948 | { 949 | "color": "red", 950 | "value": 80 951 | } 952 | ] 953 | } 954 | }, 955 | "overrides": [] 956 | }, 957 | "gridPos": { 958 | "h": 2, 959 | "w": 3, 960 | "x": 0, 961 | "y": 7 962 | }, 963 | "id": 11, 964 | "options": { 965 | "colorMode": "value", 966 | "graphMode": "area", 967 | "justifyMode": "auto", 968 | "orientation": "auto", 969 | "percentChangeColorMode": "standard", 970 | "reduceOptions": { 971 | "calcs": [ 972 | "lastNotNull" 973 | ], 974 | "fields": "", 975 | "values": false 976 | }, 977 | "showPercentChange": false, 978 | "textMode": "name", 979 | "wideLayout": true 980 | }, 981 | "pluginVersion": "11.1.4", 982 | "targets": [ 983 | { 984 | "datasource": { 985 | "type": "prometheus", 986 | "uid": "${DS_PROMETHEUS}" 987 | }, 988 | "editorMode": "code", 989 | "exemplar": false, 990 | "expr": "idrac_system_machine_info{instance=\"$instance\"}", 991 | "instant": true, 992 | "legendFormat": "{{sku}}", 993 | "range": false, 994 | "refId": "A" 995 | } 996 | ], 997 | "title": "SKU", 998 | "type": "stat" 999 | }, 1000 | { 1001 | "collapsed": false, 1002 | "gridPos": { 1003 | "h": 1, 1004 | "w": 24, 1005 | "x": 0, 1006 | "y": 9 1007 | }, 1008 | "id": 13, 1009 | "panels": [], 1010 | "title": "Power and Cooling", 1011 | "type": "row" 1012 | }, 1013 | { 1014 | "datasource": { 1015 | "type": "prometheus", 1016 | "uid": "${DS_PROMETHEUS}" 1017 | }, 1018 | "fieldConfig": { 1019 | "defaults": { 1020 | "color": { 1021 | "mode": "palette-classic" 1022 | }, 1023 | "custom": { 1024 | "axisBorderShow": false, 1025 | "axisCenteredZero": false, 1026 | "axisColorMode": "text", 1027 | "axisLabel": "", 1028 | "axisPlacement": "auto", 1029 | "barAlignment": 0, 1030 | "drawStyle": "line", 1031 | "fillOpacity": 0, 1032 | "gradientMode": "none", 1033 | "hideFrom": { 1034 | "legend": false, 1035 | "tooltip": false, 1036 | "viz": false 1037 | }, 1038 | "insertNulls": false, 1039 | "lineInterpolation": "linear", 1040 | "lineWidth": 1, 1041 | "pointSize": 5, 1042 | "scaleDistribution": { 1043 | "type": "linear" 1044 | }, 1045 | "showPoints": "auto", 1046 | "spanNulls": false, 1047 | "stacking": { 1048 | "group": "A", 1049 | "mode": "none" 1050 | }, 1051 | "thresholdsStyle": { 1052 | "mode": "off" 1053 | } 1054 | }, 1055 | "mappings": [], 1056 | "thresholds": { 1057 | "mode": "absolute", 1058 | "steps": [ 1059 | { 1060 | "color": "green", 1061 | "value": null 1062 | }, 1063 | { 1064 | "color": "red", 1065 | "value": 80 1066 | } 1067 | ] 1068 | }, 1069 | "unit": "celsius" 1070 | }, 1071 | "overrides": [] 1072 | }, 1073 | "gridPos": { 1074 | "h": 9, 1075 | "w": 24, 1076 | "x": 0, 1077 | "y": 10 1078 | }, 1079 | "id": 14, 1080 | "options": { 1081 | "legend": { 1082 | "calcs": [ 1083 | "min", 1084 | "max", 1085 | "mean" 1086 | ], 1087 | "displayMode": "table", 1088 | "placement": "right", 1089 | "showLegend": true 1090 | }, 1091 | "tooltip": { 1092 | "mode": "single", 1093 | "sort": "none" 1094 | } 1095 | }, 1096 | "targets": [ 1097 | { 1098 | "datasource": { 1099 | "type": "prometheus", 1100 | "uid": "${DS_PROMETHEUS}" 1101 | }, 1102 | "editorMode": "code", 1103 | "expr": "idrac_sensors_temperature{instance=\"$instance\",name=~\".*(Inlet|Ambient|Exhaust).*\"}", 1104 | "instant": false, 1105 | "legendFormat": "{{name}}", 1106 | "range": true, 1107 | "refId": "A" 1108 | } 1109 | ], 1110 | "title": "Air Temperature", 1111 | "type": "timeseries" 1112 | }, 1113 | { 1114 | "datasource": { 1115 | "type": "prometheus", 1116 | "uid": "${DS_PROMETHEUS}" 1117 | }, 1118 | "fieldConfig": { 1119 | "defaults": { 1120 | "color": { 1121 | "mode": "palette-classic" 1122 | }, 1123 | "custom": { 1124 | "axisBorderShow": false, 1125 | "axisCenteredZero": false, 1126 | "axisColorMode": "text", 1127 | "axisLabel": "", 1128 | "axisPlacement": "auto", 1129 | "barAlignment": 0, 1130 | "drawStyle": "line", 1131 | "fillOpacity": 0, 1132 | "gradientMode": "none", 1133 | "hideFrom": { 1134 | "legend": false, 1135 | "tooltip": false, 1136 | "viz": false 1137 | }, 1138 | "insertNulls": false, 1139 | "lineInterpolation": "linear", 1140 | "lineWidth": 1, 1141 | "pointSize": 5, 1142 | "scaleDistribution": { 1143 | "type": "linear" 1144 | }, 1145 | "showPoints": "auto", 1146 | "spanNulls": false, 1147 | "stacking": { 1148 | "group": "A", 1149 | "mode": "none" 1150 | }, 1151 | "thresholdsStyle": { 1152 | "mode": "off" 1153 | } 1154 | }, 1155 | "mappings": [], 1156 | "thresholds": { 1157 | "mode": "absolute", 1158 | "steps": [ 1159 | { 1160 | "color": "green", 1161 | "value": null 1162 | }, 1163 | { 1164 | "color": "red", 1165 | "value": 80 1166 | } 1167 | ] 1168 | }, 1169 | "unit": "celsius" 1170 | }, 1171 | "overrides": [] 1172 | }, 1173 | "gridPos": { 1174 | "h": 9, 1175 | "w": 24, 1176 | "x": 0, 1177 | "y": 19 1178 | }, 1179 | "id": 15, 1180 | "options": { 1181 | "legend": { 1182 | "calcs": [ 1183 | "min", 1184 | "max", 1185 | "mean" 1186 | ], 1187 | "displayMode": "table", 1188 | "placement": "right", 1189 | "showLegend": true 1190 | }, 1191 | "tooltip": { 1192 | "mode": "single", 1193 | "sort": "none" 1194 | } 1195 | }, 1196 | "targets": [ 1197 | { 1198 | "datasource": { 1199 | "type": "prometheus", 1200 | "uid": "${DS_PROMETHEUS}" 1201 | }, 1202 | "editorMode": "code", 1203 | "expr": "idrac_sensors_temperature{instance=\"$instance\",name=~\".*(CPU).*\"}", 1204 | "instant": false, 1205 | "legendFormat": "{{name}}", 1206 | "range": true, 1207 | "refId": "A" 1208 | } 1209 | ], 1210 | "title": "CPU Temperature", 1211 | "type": "timeseries" 1212 | }, 1213 | { 1214 | "datasource": { 1215 | "type": "prometheus", 1216 | "uid": "${DS_PROMETHEUS}" 1217 | }, 1218 | "fieldConfig": { 1219 | "defaults": { 1220 | "color": { 1221 | "mode": "palette-classic" 1222 | }, 1223 | "custom": { 1224 | "axisBorderShow": false, 1225 | "axisCenteredZero": false, 1226 | "axisColorMode": "text", 1227 | "axisLabel": "", 1228 | "axisPlacement": "auto", 1229 | "barAlignment": 0, 1230 | "drawStyle": "line", 1231 | "fillOpacity": 0, 1232 | "gradientMode": "none", 1233 | "hideFrom": { 1234 | "legend": false, 1235 | "tooltip": false, 1236 | "viz": false 1237 | }, 1238 | "insertNulls": false, 1239 | "lineInterpolation": "linear", 1240 | "lineWidth": 1, 1241 | "pointSize": 5, 1242 | "scaleDistribution": { 1243 | "type": "linear" 1244 | }, 1245 | "showPoints": "auto", 1246 | "spanNulls": false, 1247 | "stacking": { 1248 | "group": "A", 1249 | "mode": "none" 1250 | }, 1251 | "thresholdsStyle": { 1252 | "mode": "off" 1253 | } 1254 | }, 1255 | "mappings": [], 1256 | "thresholds": { 1257 | "mode": "absolute", 1258 | "steps": [ 1259 | { 1260 | "color": "green", 1261 | "value": null 1262 | }, 1263 | { 1264 | "color": "red", 1265 | "value": 80 1266 | } 1267 | ] 1268 | }, 1269 | "unit": "watt" 1270 | }, 1271 | "overrides": [] 1272 | }, 1273 | "gridPos": { 1274 | "h": 8, 1275 | "w": 12, 1276 | "x": 0, 1277 | "y": 28 1278 | }, 1279 | "id": 16, 1280 | "options": { 1281 | "legend": { 1282 | "calcs": [], 1283 | "displayMode": "table", 1284 | "placement": "right", 1285 | "showLegend": false 1286 | }, 1287 | "tooltip": { 1288 | "mode": "single", 1289 | "sort": "none" 1290 | } 1291 | }, 1292 | "targets": [ 1293 | { 1294 | "datasource": { 1295 | "type": "prometheus", 1296 | "uid": "${DS_PROMETHEUS}" 1297 | }, 1298 | "editorMode": "code", 1299 | "expr": "avg by (instance) (idrac_power_control_consumed_watts{instance=\"$instance\",name=~\".*Power Control|\"} > 0 or idrac_power_control_avg_consumed_watts{instance=\"$instance\",name=~\".*Power Control|\"})", 1300 | "instant": false, 1301 | "legendFormat": "Power", 1302 | "range": true, 1303 | "refId": "A" 1304 | } 1305 | ], 1306 | "title": "Power Consumption", 1307 | "type": "timeseries" 1308 | }, 1309 | { 1310 | "datasource": { 1311 | "type": "prometheus", 1312 | "uid": "${DS_PROMETHEUS}" 1313 | }, 1314 | "fieldConfig": { 1315 | "defaults": { 1316 | "color": { 1317 | "mode": "palette-classic" 1318 | }, 1319 | "custom": { 1320 | "axisBorderShow": false, 1321 | "axisCenteredZero": false, 1322 | "axisColorMode": "text", 1323 | "axisLabel": "", 1324 | "axisPlacement": "auto", 1325 | "barAlignment": 0, 1326 | "drawStyle": "line", 1327 | "fillOpacity": 0, 1328 | "gradientMode": "none", 1329 | "hideFrom": { 1330 | "legend": false, 1331 | "tooltip": false, 1332 | "viz": false 1333 | }, 1334 | "insertNulls": false, 1335 | "lineInterpolation": "linear", 1336 | "lineWidth": 1, 1337 | "pointSize": 5, 1338 | "scaleDistribution": { 1339 | "type": "linear" 1340 | }, 1341 | "showPoints": "auto", 1342 | "spanNulls": false, 1343 | "stacking": { 1344 | "group": "A", 1345 | "mode": "none" 1346 | }, 1347 | "thresholdsStyle": { 1348 | "mode": "off" 1349 | } 1350 | }, 1351 | "mappings": [], 1352 | "thresholds": { 1353 | "mode": "absolute", 1354 | "steps": [ 1355 | { 1356 | "color": "green", 1357 | "value": null 1358 | }, 1359 | { 1360 | "color": "red", 1361 | "value": 80 1362 | } 1363 | ] 1364 | }, 1365 | "unit": "rotrpm" 1366 | }, 1367 | "overrides": [] 1368 | }, 1369 | "gridPos": { 1370 | "h": 8, 1371 | "w": 12, 1372 | "x": 12, 1373 | "y": 28 1374 | }, 1375 | "id": 17, 1376 | "options": { 1377 | "legend": { 1378 | "calcs": [], 1379 | "displayMode": "table", 1380 | "placement": "right", 1381 | "showLegend": false 1382 | }, 1383 | "tooltip": { 1384 | "mode": "single", 1385 | "sort": "none" 1386 | } 1387 | }, 1388 | "targets": [ 1389 | { 1390 | "datasource": { 1391 | "type": "prometheus", 1392 | "uid": "${DS_PROMETHEUS}" 1393 | }, 1394 | "editorMode": "code", 1395 | "expr": "idrac_sensors_fan_speed{instance=\"$instance\"}", 1396 | "instant": false, 1397 | "legendFormat": "{{name}}", 1398 | "range": true, 1399 | "refId": "A" 1400 | } 1401 | ], 1402 | "title": "Fan Speed", 1403 | "type": "timeseries" 1404 | }, 1405 | { 1406 | "collapsed": false, 1407 | "gridPos": { 1408 | "h": 1, 1409 | "w": 24, 1410 | "x": 0, 1411 | "y": 36 1412 | }, 1413 | "id": 18, 1414 | "panels": [], 1415 | "title": "Devices", 1416 | "type": "row" 1417 | }, 1418 | { 1419 | "datasource": { 1420 | "type": "prometheus", 1421 | "uid": "${DS_PROMETHEUS}" 1422 | }, 1423 | "fieldConfig": { 1424 | "defaults": { 1425 | "color": { 1426 | "mode": "thresholds" 1427 | }, 1428 | "custom": { 1429 | "align": "auto", 1430 | "cellOptions": { 1431 | "type": "auto", 1432 | "wrapText": false 1433 | }, 1434 | "filterable": false, 1435 | "inspect": false 1436 | }, 1437 | "mappings": [], 1438 | "thresholds": { 1439 | "mode": "absolute", 1440 | "steps": [ 1441 | { 1442 | "color": "green", 1443 | "value": null 1444 | }, 1445 | { 1446 | "color": "red", 1447 | "value": 80 1448 | } 1449 | ] 1450 | } 1451 | }, 1452 | "overrides": [ 1453 | { 1454 | "matcher": { 1455 | "id": "byName", 1456 | "options": "Capacity" 1457 | }, 1458 | "properties": [ 1459 | { 1460 | "id": "unit", 1461 | "value": "bytes" 1462 | }, 1463 | { 1464 | "id": "custom.width", 1465 | "value": 100 1466 | } 1467 | ] 1468 | }, 1469 | { 1470 | "matcher": { 1471 | "id": "byName", 1472 | "options": "Speed" 1473 | }, 1474 | "properties": [ 1475 | { 1476 | "id": "unit", 1477 | "value": "Mhz" 1478 | }, 1479 | { 1480 | "id": "custom.width", 1481 | "value": 150 1482 | } 1483 | ] 1484 | }, 1485 | { 1486 | "matcher": { 1487 | "id": "byName", 1488 | "options": "Health" 1489 | }, 1490 | "properties": [ 1491 | { 1492 | "id": "custom.width", 1493 | "value": 150 1494 | } 1495 | ] 1496 | }, 1497 | { 1498 | "matcher": { 1499 | "id": "byName", 1500 | "options": "Type" 1501 | }, 1502 | "properties": [ 1503 | { 1504 | "id": "custom.width", 1505 | "value": 100 1506 | } 1507 | ] 1508 | } 1509 | ] 1510 | }, 1511 | "gridPos": { 1512 | "h": 10, 1513 | "w": 12, 1514 | "x": 0, 1515 | "y": 37 1516 | }, 1517 | "id": 19, 1518 | "options": { 1519 | "cellHeight": "sm", 1520 | "footer": { 1521 | "countRows": false, 1522 | "fields": "", 1523 | "reducer": [ 1524 | "sum" 1525 | ], 1526 | "show": false 1527 | }, 1528 | "frameIndex": 3, 1529 | "showHeader": true, 1530 | "sortBy": [] 1531 | }, 1532 | "pluginVersion": "11.1.4", 1533 | "targets": [ 1534 | { 1535 | "datasource": { 1536 | "type": "prometheus", 1537 | "uid": "${DS_PROMETHEUS}" 1538 | }, 1539 | "editorMode": "code", 1540 | "exemplar": false, 1541 | "expr": "sum by (id,manufacturer,name,serial,type) (idrac_memory_module_info{instance=\"$instance\"})", 1542 | "format": "table", 1543 | "instant": true, 1544 | "legendFormat": "__auto", 1545 | "range": false, 1546 | "refId": "A" 1547 | }, 1548 | { 1549 | "datasource": { 1550 | "type": "prometheus", 1551 | "uid": "${DS_PROMETHEUS}" 1552 | }, 1553 | "editorMode": "code", 1554 | "exemplar": false, 1555 | "expr": "sum by (id,status) (idrac_memory_module_health{instance=\"$instance\"})", 1556 | "format": "table", 1557 | "hide": false, 1558 | "instant": true, 1559 | "legendFormat": "__auto", 1560 | "range": false, 1561 | "refId": "B" 1562 | }, 1563 | { 1564 | "datasource": { 1565 | "type": "prometheus", 1566 | "uid": "${DS_PROMETHEUS}" 1567 | }, 1568 | "editorMode": "code", 1569 | "exemplar": false, 1570 | "expr": "sum by (id) (idrac_memory_module_capacity_bytes{instance=\"$instance\"})", 1571 | "format": "table", 1572 | "hide": false, 1573 | "instant": true, 1574 | "legendFormat": "__auto", 1575 | "range": false, 1576 | "refId": "C" 1577 | }, 1578 | { 1579 | "datasource": { 1580 | "type": "prometheus", 1581 | "uid": "${DS_PROMETHEUS}" 1582 | }, 1583 | "editorMode": "code", 1584 | "exemplar": false, 1585 | "expr": "sum by (id) (idrac_memory_module_speed_mhz{instance=\"$instance\"})", 1586 | "format": "table", 1587 | "hide": false, 1588 | "instant": true, 1589 | "legendFormat": "__auto", 1590 | "range": false, 1591 | "refId": "D" 1592 | } 1593 | ], 1594 | "title": "DIMMs", 1595 | "transformations": [ 1596 | { 1597 | "id": "merge", 1598 | "options": {} 1599 | }, 1600 | { 1601 | "id": "convertFieldType", 1602 | "options": { 1603 | "conversions": [ 1604 | { 1605 | "destinationType": "number", 1606 | "targetField": "id" 1607 | } 1608 | ], 1609 | "fields": {} 1610 | } 1611 | }, 1612 | { 1613 | "id": "sortBy", 1614 | "options": { 1615 | "fields": {}, 1616 | "sort": [ 1617 | { 1618 | "desc": false, 1619 | "field": "id" 1620 | } 1621 | ] 1622 | } 1623 | }, 1624 | { 1625 | "id": "organize", 1626 | "options": { 1627 | "excludeByName": { 1628 | "Time": true, 1629 | "Value #A": true, 1630 | "Value #B": true, 1631 | "id": true 1632 | }, 1633 | "includeByName": {}, 1634 | "indexByName": { 1635 | "Time": 0, 1636 | "Value #A": 7, 1637 | "Value #B": 8, 1638 | "Value #C": 9, 1639 | "Value #D": 10, 1640 | "id": 2, 1641 | "manufacturer": 3, 1642 | "name": 1, 1643 | "serial": 5, 1644 | "status": 4, 1645 | "type": 6 1646 | }, 1647 | "renameByName": { 1648 | "Value #A": "", 1649 | "Value #B": "", 1650 | "Value #C": "Capacity", 1651 | "Value #D": "Speed", 1652 | "manufacturer": "Vendor", 1653 | "name": "Name", 1654 | "serial": "Serial Number", 1655 | "status": "Health", 1656 | "type": "Type" 1657 | } 1658 | } 1659 | } 1660 | ], 1661 | "type": "table" 1662 | }, 1663 | { 1664 | "datasource": { 1665 | "type": "prometheus", 1666 | "uid": "${DS_PROMETHEUS}" 1667 | }, 1668 | "fieldConfig": { 1669 | "defaults": { 1670 | "color": { 1671 | "mode": "thresholds" 1672 | }, 1673 | "custom": { 1674 | "align": "auto", 1675 | "cellOptions": { 1676 | "type": "auto", 1677 | "wrapText": false 1678 | }, 1679 | "filterable": false, 1680 | "inspect": false 1681 | }, 1682 | "mappings": [], 1683 | "thresholds": { 1684 | "mode": "absolute", 1685 | "steps": [ 1686 | { 1687 | "color": "green", 1688 | "value": null 1689 | }, 1690 | { 1691 | "color": "red", 1692 | "value": 80 1693 | } 1694 | ] 1695 | } 1696 | }, 1697 | "overrides": [ 1698 | { 1699 | "matcher": { 1700 | "id": "byName", 1701 | "options": "Capacity" 1702 | }, 1703 | "properties": [ 1704 | { 1705 | "id": "unit", 1706 | "value": "decbytes" 1707 | } 1708 | ] 1709 | }, 1710 | { 1711 | "matcher": { 1712 | "id": "byName", 1713 | "options": "Life Left" 1714 | }, 1715 | "properties": [ 1716 | { 1717 | "id": "unit", 1718 | "value": "percent" 1719 | } 1720 | ] 1721 | } 1722 | ] 1723 | }, 1724 | "gridPos": { 1725 | "h": 10, 1726 | "w": 12, 1727 | "x": 12, 1728 | "y": 37 1729 | }, 1730 | "id": 21, 1731 | "options": { 1732 | "cellHeight": "sm", 1733 | "footer": { 1734 | "countRows": false, 1735 | "fields": "", 1736 | "reducer": [ 1737 | "sum" 1738 | ], 1739 | "show": false 1740 | }, 1741 | "frameIndex": 3, 1742 | "showHeader": true, 1743 | "sortBy": [] 1744 | }, 1745 | "pluginVersion": "11.1.4", 1746 | "targets": [ 1747 | { 1748 | "datasource": { 1749 | "type": "prometheus", 1750 | "uid": "${DS_PROMETHEUS}" 1751 | }, 1752 | "editorMode": "code", 1753 | "exemplar": false, 1754 | "expr": "sum by (id,manufacturer,mediatype,model,name,protocol,serial) (idrac_storage_drive_info{instance=\"$instance\"})", 1755 | "format": "table", 1756 | "instant": true, 1757 | "legendFormat": "__auto", 1758 | "range": false, 1759 | "refId": "A" 1760 | }, 1761 | { 1762 | "datasource": { 1763 | "type": "prometheus", 1764 | "uid": "${DS_PROMETHEUS}" 1765 | }, 1766 | "editorMode": "code", 1767 | "exemplar": false, 1768 | "expr": "sum by (id,status) (idrac_storage_drive_health{instance=\"$instance\"})", 1769 | "format": "table", 1770 | "hide": false, 1771 | "instant": true, 1772 | "legendFormat": "__auto", 1773 | "range": false, 1774 | "refId": "B" 1775 | }, 1776 | { 1777 | "datasource": { 1778 | "type": "prometheus", 1779 | "uid": "${DS_PROMETHEUS}" 1780 | }, 1781 | "editorMode": "code", 1782 | "exemplar": false, 1783 | "expr": "sum by (id) (idrac_storage_drive_capacity_bytes{instance=\"$instance\"})", 1784 | "format": "table", 1785 | "hide": false, 1786 | "instant": true, 1787 | "legendFormat": "__auto", 1788 | "range": false, 1789 | "refId": "C" 1790 | }, 1791 | { 1792 | "datasource": { 1793 | "type": "prometheus", 1794 | "uid": "${DS_PROMETHEUS}" 1795 | }, 1796 | "editorMode": "code", 1797 | "exemplar": false, 1798 | "expr": "sum by (id) (idrac_storage_drive_life_left_percent{instance=\"$instance\"})", 1799 | "format": "table", 1800 | "hide": false, 1801 | "instant": true, 1802 | "legendFormat": "__auto", 1803 | "range": false, 1804 | "refId": "D" 1805 | } 1806 | ], 1807 | "title": "Disks", 1808 | "transformations": [ 1809 | { 1810 | "id": "merge", 1811 | "options": {} 1812 | }, 1813 | { 1814 | "id": "organize", 1815 | "options": { 1816 | "excludeByName": { 1817 | "Time": true, 1818 | "Value #A": true, 1819 | "Value #B": true, 1820 | "id": true 1821 | }, 1822 | "includeByName": {}, 1823 | "indexByName": { 1824 | "Time": 0, 1825 | "Value #A": 7, 1826 | "Value #B": 8, 1827 | "Value #C": 9, 1828 | "Value #D": 10, 1829 | "id": 2, 1830 | "manufacturer": 3, 1831 | "mediatype": 11, 1832 | "model": 4, 1833 | "name": 1, 1834 | "protocol": 12, 1835 | "serial": 5, 1836 | "status": 6 1837 | }, 1838 | "renameByName": { 1839 | "Value #A": "", 1840 | "Value #B": "", 1841 | "Value #C": "Capacity", 1842 | "Value #D": "Life Left", 1843 | "manufacturer": "Vendor", 1844 | "mediatype": "Media", 1845 | "model": "Model", 1846 | "name": "Name", 1847 | "protocol": "Protocol", 1848 | "serial": "Serial Number", 1849 | "status": "Health", 1850 | "type": "Type" 1851 | } 1852 | } 1853 | } 1854 | ], 1855 | "type": "table" 1856 | }, 1857 | { 1858 | "datasource": { 1859 | "type": "prometheus", 1860 | "uid": "${DS_PROMETHEUS}" 1861 | }, 1862 | "fieldConfig": { 1863 | "defaults": { 1864 | "color": { 1865 | "mode": "thresholds" 1866 | }, 1867 | "custom": { 1868 | "align": "auto", 1869 | "cellOptions": { 1870 | "type": "auto" 1871 | }, 1872 | "inspect": false 1873 | }, 1874 | "mappings": [], 1875 | "thresholds": { 1876 | "mode": "absolute", 1877 | "steps": [ 1878 | { 1879 | "color": "green", 1880 | "value": null 1881 | }, 1882 | { 1883 | "color": "red", 1884 | "value": 80 1885 | } 1886 | ] 1887 | } 1888 | }, 1889 | "overrides": [ 1890 | { 1891 | "matcher": { 1892 | "id": "byName", 1893 | "options": "Speed" 1894 | }, 1895 | "properties": [ 1896 | { 1897 | "id": "unit", 1898 | "value": "Mbits" 1899 | } 1900 | ] 1901 | }, 1902 | { 1903 | "matcher": { 1904 | "id": "byName", 1905 | "options": "Link Status" 1906 | }, 1907 | "properties": [ 1908 | { 1909 | "id": "mappings", 1910 | "value": [ 1911 | { 1912 | "options": { 1913 | "0": { 1914 | "color": "red", 1915 | "index": 0, 1916 | "text": "Down" 1917 | }, 1918 | "1": { 1919 | "color": "green", 1920 | "index": 1, 1921 | "text": "Up" 1922 | } 1923 | }, 1924 | "type": "value" 1925 | } 1926 | ] 1927 | } 1928 | ] 1929 | } 1930 | ] 1931 | }, 1932 | "gridPos": { 1933 | "h": 8, 1934 | "w": 12, 1935 | "x": 0, 1936 | "y": 47 1937 | }, 1938 | "id": 20, 1939 | "options": { 1940 | "cellHeight": "sm", 1941 | "footer": { 1942 | "countRows": false, 1943 | "fields": "", 1944 | "reducer": [ 1945 | "sum" 1946 | ], 1947 | "show": false 1948 | }, 1949 | "frameIndex": 3, 1950 | "showHeader": true, 1951 | "sortBy": [] 1952 | }, 1953 | "pluginVersion": "11.1.4", 1954 | "targets": [ 1955 | { 1956 | "datasource": { 1957 | "type": "prometheus", 1958 | "uid": "${DS_PROMETHEUS}" 1959 | }, 1960 | "editorMode": "code", 1961 | "exemplar": false, 1962 | "expr": "sum by (id,interface_id) (idrac_network_port_link_up{instance=\"$instance\"})", 1963 | "format": "table", 1964 | "instant": true, 1965 | "legendFormat": "__auto", 1966 | "range": false, 1967 | "refId": "A" 1968 | }, 1969 | { 1970 | "datasource": { 1971 | "type": "prometheus", 1972 | "uid": "${DS_PROMETHEUS}" 1973 | }, 1974 | "editorMode": "code", 1975 | "exemplar": false, 1976 | "expr": "sum by (id,interface_id,status) (idrac_network_port_health{instance=\"$instance\"})", 1977 | "format": "table", 1978 | "hide": false, 1979 | "instant": true, 1980 | "legendFormat": "__auto", 1981 | "range": false, 1982 | "refId": "B" 1983 | }, 1984 | { 1985 | "datasource": { 1986 | "type": "prometheus", 1987 | "uid": "${DS_PROMETHEUS}" 1988 | }, 1989 | "editorMode": "code", 1990 | "exemplar": false, 1991 | "expr": "sum by (id,interface_id) (idrac_network_port_speed_mbps{instance=\"$instance\"})", 1992 | "format": "table", 1993 | "hide": false, 1994 | "instant": true, 1995 | "legendFormat": "__auto", 1996 | "range": false, 1997 | "refId": "D" 1998 | } 1999 | ], 2000 | "title": "Network Ports", 2001 | "transformations": [ 2002 | { 2003 | "id": "merge", 2004 | "options": {} 2005 | }, 2006 | { 2007 | "id": "sortBy", 2008 | "options": { 2009 | "fields": {}, 2010 | "sort": [ 2011 | { 2012 | "desc": false, 2013 | "field": "interface_id" 2014 | } 2015 | ] 2016 | } 2017 | }, 2018 | { 2019 | "id": "organize", 2020 | "options": { 2021 | "excludeByName": { 2022 | "Time": true, 2023 | "Value #A": false, 2024 | "Value #B": true, 2025 | "id": false 2026 | }, 2027 | "includeByName": {}, 2028 | "indexByName": { 2029 | "Time": 0, 2030 | "Value #A": 4, 2031 | "Value #B": 5, 2032 | "Value #D": 6, 2033 | "id": 2, 2034 | "interface_id": 1, 2035 | "status": 3 2036 | }, 2037 | "renameByName": { 2038 | "Value #A": "Link Status", 2039 | "Value #B": "", 2040 | "Value #C": "Capacity", 2041 | "Value #D": "Speed", 2042 | "id": "Port", 2043 | "interface_id": "Interface", 2044 | "manufacturer": "Vendor", 2045 | "name": "Name", 2046 | "serial": "Serial Number", 2047 | "status": "Health", 2048 | "type": "Type" 2049 | } 2050 | } 2051 | } 2052 | ], 2053 | "type": "table" 2054 | } 2055 | ], 2056 | "refresh": "5m", 2057 | "schemaVersion": 39, 2058 | "tags": [], 2059 | "templating": { 2060 | "list": [ 2061 | { 2062 | "current": { 2063 | "selected": false, 2064 | "text": "Infrastructure", 2065 | "value": "000000006" 2066 | }, 2067 | "hide": 0, 2068 | "includeAll": false, 2069 | "label": "Datasource", 2070 | "multi": false, 2071 | "name": "DS_PROMETHEUS", 2072 | "options": [], 2073 | "query": "prometheus", 2074 | "queryValue": "", 2075 | "refresh": 1, 2076 | "regex": "", 2077 | "skipUrlSync": false, 2078 | "type": "datasource" 2079 | }, 2080 | { 2081 | "datasource": { 2082 | "type": "prometheus", 2083 | "uid": "${DS_PROMETHEUS}" 2084 | }, 2085 | "definition": "label_values(idrac_exporter_build_info,instance)", 2086 | "hide": 0, 2087 | "includeAll": false, 2088 | "label": "Instance", 2089 | "multi": false, 2090 | "name": "instance", 2091 | "options": [], 2092 | "query": { 2093 | "qryType": 1, 2094 | "query": "label_values(idrac_exporter_build_info,instance)", 2095 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 2096 | }, 2097 | "refresh": 1, 2098 | "regex": "", 2099 | "skipUrlSync": false, 2100 | "sort": 7, 2101 | "type": "query" 2102 | } 2103 | ] 2104 | }, 2105 | "time": { 2106 | "from": "now-6h", 2107 | "to": "now" 2108 | }, 2109 | "timepicker": {}, 2110 | "timezone": "browser", 2111 | "title": "BMC Status", 2112 | "uid": "ae1q0f1m5ys5cc", 2113 | "version": 3, 2114 | "weekStart": "" 2115 | } 2116 | -------------------------------------------------------------------------------- /internal/collector/client.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | "time" 7 | 8 | "github.com/mrlhansen/idrac_exporter/internal/config" 9 | "github.com/prometheus/client_golang/prometheus" 10 | ) 11 | 12 | const ( 13 | UNKNOWN = iota 14 | DELL 15 | HPE 16 | LENOVO 17 | INSPUR 18 | H3C 19 | INVENTEC 20 | FUJITSU 21 | ) 22 | 23 | type Client struct { 24 | redfish *Redfish 25 | vendor int 26 | version int 27 | systemPath string 28 | thermalPath string 29 | powerPath string 30 | storagePath string 31 | memoryPath string 32 | networkPath string 33 | eventPath string 34 | procPath string 35 | dellPath string 36 | } 37 | 38 | func NewClient(h *config.HostConfig) *Client { 39 | client := &Client{ 40 | redfish: NewRedfish( 41 | h.Scheme, 42 | h.Hostname, 43 | h.Username, 44 | h.Password, 45 | ), 46 | } 47 | 48 | client.redfish.CreateSession() 49 | ok := client.findAllEndpoints() 50 | if !ok { 51 | client.redfish.DeleteSession() 52 | return nil 53 | } 54 | 55 | return client 56 | } 57 | 58 | func (client *Client) findAllEndpoints() bool { 59 | var root V1Response 60 | var group GroupResponse 61 | var chassis ChassisResponse 62 | var system SystemResponse 63 | var ok bool 64 | 65 | // Root 66 | ok = client.redfish.Get(redfishRootPath, &root) 67 | if !ok { 68 | return false 69 | } 70 | 71 | // System 72 | ok = client.redfish.Get(root.Systems.OdataId, &group) 73 | if !ok { 74 | return false 75 | } 76 | 77 | client.systemPath = group.Members[0].OdataId 78 | 79 | // Chassis 80 | ok = client.redfish.Get(root.Chassis.OdataId, &group) 81 | if !ok { 82 | return false 83 | } 84 | 85 | // Thermal and Power 86 | ok = client.redfish.Get(group.Members[0].OdataId, &chassis) 87 | if !ok { 88 | return false 89 | } 90 | 91 | ok = client.redfish.Get(client.systemPath, &system) 92 | if !ok { 93 | return false 94 | } 95 | 96 | client.storagePath = system.Storage.OdataId 97 | client.memoryPath = system.Memory.OdataId 98 | client.networkPath = system.NetworkInterfaces.OdataId 99 | client.thermalPath = chassis.Thermal.OdataId 100 | client.powerPath = chassis.Power.OdataId 101 | client.procPath = system.Processors.OdataId 102 | 103 | // Vendor 104 | m := strings.ToLower(system.Manufacturer) 105 | if strings.Contains(m, "dell") { 106 | client.vendor = DELL 107 | } else if strings.Contains(m, "hpe") { 108 | client.vendor = HPE 109 | } else if strings.Contains(m, "lenovo") { 110 | client.vendor = LENOVO 111 | } else if strings.Contains(m, "inspur") { 112 | client.vendor = INSPUR 113 | } else if strings.Contains(m, "h3c") { 114 | client.vendor = H3C 115 | } else if strings.Contains(m, "inventec") { 116 | client.vendor = INVENTEC 117 | } else if strings.Contains(m, "fujitsu") { 118 | client.vendor = FUJITSU 119 | } 120 | 121 | // Path for event log 122 | if config.Config.Collect.Events { 123 | switch client.vendor { 124 | case DELL: 125 | client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries" 126 | case LENOVO: 127 | { 128 | if client.redfish.Exists("/redfish/v1/Systems/1/LogServices/PlatformLog/Entries") { 129 | client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries" 130 | } else if client.redfish.Exists("/redfish/v1/Systems/1/LogServices/StandardLog/Entries") { 131 | client.eventPath = "/redfish/v1/Systems/1/LogServices/StandardLog/Entries" 132 | } 133 | } 134 | case HPE: 135 | client.eventPath = "/redfish/v1/Systems/1/LogServices/IML/Entries" 136 | case FUJITSU: 137 | client.eventPath = "/redfish/v1/Managers/iRMC/LogServices/SystemEventLog/Entries" 138 | } 139 | } 140 | 141 | // Dell OEM 142 | if config.Config.Collect.Extra { 143 | if client.vendor == DELL { 144 | if client.redfish.Exists(DellSystemPath) { 145 | client.dellPath = DellSystemPath 146 | } 147 | } 148 | } 149 | 150 | // Issue #50 151 | if client.vendor == INSPUR { 152 | client.storagePath = strings.ReplaceAll(client.storagePath, "Storages", "Storage") 153 | } 154 | 155 | // Fix for iLO 4 machines 156 | if client.vendor == HPE { 157 | if strings.Contains(root.Name, "HP RESTful") { 158 | client.memoryPath = "/redfish/v1/Systems/1/Memory/" 159 | client.storagePath = "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/" 160 | client.eventPath = "" 161 | client.version = 4 162 | } 163 | } 164 | 165 | return true 166 | } 167 | 168 | func (client *Client) RefreshSensors(mc *Collector, ch chan<- prometheus.Metric) bool { 169 | resp := ThermalResponse{} 170 | ok := client.redfish.Get(client.thermalPath, &resp) 171 | if !ok { 172 | return false 173 | } 174 | 175 | for n, t := range resp.Temperatures { 176 | if t.Status.State != StateEnabled { 177 | continue 178 | } 179 | 180 | if t.ReadingCelsius < 0 { 181 | continue 182 | } 183 | 184 | id := t.GetId(n) 185 | mc.NewSensorsTemperature(ch, t.ReadingCelsius, id, t.Name, "celsius") 186 | } 187 | 188 | for n, f := range resp.Fans { 189 | if f.Status.State != StateEnabled { 190 | continue 191 | } 192 | 193 | name := f.GetName() 194 | if name == "" { 195 | continue 196 | } 197 | 198 | units := f.GetUnits() 199 | if units == "" { 200 | continue 201 | } 202 | 203 | id := f.GetId(n) 204 | mc.NewSensorsFanHealth(ch, id, name, f.Status.Health) 205 | mc.NewSensorsFanSpeed(ch, f.GetReading(), id, name, strings.ToLower(units)) 206 | } 207 | 208 | return true 209 | } 210 | 211 | func (client *Client) RefreshSystem(mc *Collector, ch chan<- prometheus.Metric) bool { 212 | resp := SystemResponse{} 213 | ok := client.redfish.Get(client.systemPath, &resp) 214 | if !ok { 215 | return false 216 | } 217 | 218 | // Need on iLO 6 219 | if client.vendor == HPE && resp.IndicatorLED == "" { 220 | resp.IndicatorLED = resp.Oem.Hpe.IndicatorLED 221 | } 222 | 223 | mc.NewSystemPowerOn(ch, &resp) 224 | mc.NewSystemHealth(ch, &resp) 225 | mc.NewSystemIndicatorLED(ch, &resp) 226 | mc.NewSystemIndicatorActive(ch, &resp) 227 | mc.NewSystemMemorySize(ch, &resp) 228 | mc.NewSystemCpuCount(ch, &resp) 229 | mc.NewSystemBiosInfo(ch, &resp) 230 | mc.NewSystemMachineInfo(ch, &resp) 231 | 232 | return true 233 | } 234 | 235 | func (client *Client) RefreshProcessors(mc *Collector, ch chan<- prometheus.Metric) bool { 236 | group := GroupResponse{} 237 | ok := client.redfish.Get(client.procPath, &group) 238 | if !ok { 239 | return false 240 | } 241 | 242 | for _, c := range group.Members.GetLinks() { 243 | resp := Processor{} 244 | ok = client.redfish.Get(c, &resp) 245 | if !ok { 246 | return false 247 | } 248 | 249 | if resp.ProcessorType != "CPU" { 250 | continue 251 | } 252 | 253 | if resp.Status.State != StateEnabled { 254 | continue 255 | } 256 | 257 | mc.NewCpuInfo(ch, &resp) 258 | mc.NewCpuHealth(ch, &resp) 259 | mc.NewCpuVoltage(ch, &resp) 260 | mc.NewCpuMaxSpeed(ch, &resp) 261 | mc.NewCpuCurrentSpeed(ch, &resp) 262 | mc.NewCpuTotalCores(ch, &resp) 263 | mc.NewCpuTotalThreads(ch, &resp) 264 | } 265 | 266 | return true 267 | } 268 | 269 | func (client *Client) RefreshNetwork(mc *Collector, ch chan<- prometheus.Metric) bool { 270 | group := GroupResponse{} 271 | ok := client.redfish.Get(client.networkPath, &group) 272 | if !ok { 273 | return false 274 | } 275 | 276 | for _, c := range group.Members.GetLinks() { 277 | ni := NetworkInterface{} 278 | ok = client.redfish.Get(c, &ni) 279 | if !ok { 280 | return false 281 | } 282 | 283 | if ni.Status.State != StateEnabled { 284 | continue 285 | } 286 | 287 | mc.NewNetworkInterfaceHealth(ch, &ni) 288 | 289 | ports := GroupResponse{} 290 | ok = client.redfish.Get(ni.GetPorts(), &ports) 291 | if !ok { 292 | return false 293 | } 294 | 295 | for _, c := range ports.Members.GetLinks() { 296 | port := NetworkPort{} 297 | ok = client.redfish.Get(c, &port) 298 | if !ok { 299 | return false 300 | } 301 | 302 | // Issue #92 303 | if client.vendor == DELL { 304 | if ni.Id == port.Id { 305 | s := strings.Split(c, "/") 306 | port.Id = s[len(s)-1] 307 | } 308 | } 309 | 310 | mc.NewNetworkPortHealth(ch, ni.Id, &port) 311 | mc.NewNetworkPortSpeed(ch, ni.Id, &port) 312 | mc.NewNetworkPortLinkUp(ch, ni.Id, &port) 313 | } 314 | } 315 | 316 | return true 317 | } 318 | 319 | func (client *Client) RefreshPower(mc *Collector, ch chan<- prometheus.Metric) bool { 320 | resp := PowerResponse{} 321 | ok := client.redfish.Get(client.powerPath, &resp) 322 | if !ok { 323 | return false 324 | } 325 | 326 | // Issue #121 327 | if (client.vendor == FUJITSU) && (resp.Oem.TsFujitsu != nil) { 328 | for n, p := range resp.PowerSupplies { 329 | if len(p.Name) == 0 { 330 | continue 331 | } 332 | for _, v := range resp.Oem.TsFujitsu.ChassisPowerSensors { 333 | if (v.EntityID == "Power Supply") && strings.HasPrefix(v.Designation, p.Name) { 334 | resp.PowerSupplies[n].PowerInputWatts = v.CurrentPowerConsumptionW 335 | } 336 | } 337 | } 338 | if cp := resp.Oem.TsFujitsu.ChassisPowerConsumption; cp != nil { 339 | if len(resp.PowerControl) > 0 { 340 | pc := &resp.PowerControl[0] 341 | if cp.CurrentPowerConsumptionW > 0 { 342 | pc.PowerConsumedWatts = cp.CurrentPowerConsumptionW 343 | } 344 | if cp.CurrentMaximumPowerW > 0 { 345 | pc.PowerCapacityWatts = cp.CurrentMaximumPowerW 346 | } 347 | if pc.PowerMetrics == nil { 348 | pc.PowerMetrics = &PowerMetrics{ 349 | AvgConsumedWatts: cp.AveragePowerW, 350 | MaxConsumedWatts: cp.PeakPowerW, 351 | MinConsumedWatts: cp.MinimumPowerW, 352 | } 353 | } 354 | } 355 | } 356 | } 357 | 358 | for i, psu := range resp.PowerSupplies { 359 | // Status is missing, but information is there 360 | if client.vendor == INVENTEC { 361 | psu.Status.State = StateEnabled 362 | } 363 | 364 | // Issue #116 365 | if (client.vendor == HPE) && (client.version == 4) { 366 | if psu.FirmwareVersion == "0.00" { 367 | continue 368 | } 369 | } 370 | 371 | if psu.Status.State != StateEnabled { 372 | continue 373 | } 374 | 375 | id := strconv.Itoa(i) 376 | mc.NewPowerSupplyHealth(ch, psu.Status.Health, id) 377 | mc.NewPowerSupplyInputWatts(ch, psu.PowerInputWatts, id) 378 | mc.NewPowerSupplyInputVoltage(ch, psu.LineInputVoltage, id) 379 | mc.NewPowerSupplyOutputWatts(ch, psu.GetOutputPower(), id) 380 | mc.NewPowerSupplyCapacityWatts(ch, psu.PowerCapacityWatts, id) 381 | mc.NewPowerSupplyEfficiencyPercent(ch, psu.EfficiencyPercent, id) 382 | } 383 | 384 | for i, pc := range resp.PowerControl { 385 | id := strconv.Itoa(i) 386 | mc.NewPowerControlConsumedWatts(ch, pc.PowerConsumedWatts, id, pc.Name) 387 | mc.NewPowerControlCapacityWatts(ch, pc.PowerCapacityWatts, id, pc.Name) 388 | 389 | if pc.PowerMetrics == nil { 390 | continue 391 | } 392 | 393 | pm := pc.PowerMetrics 394 | mc.NewPowerControlMinConsumedWatts(ch, pm.MinConsumedWatts, id, pc.Name) 395 | mc.NewPowerControlMaxConsumedWatts(ch, pm.MaxConsumedWatts, id, pc.Name) 396 | mc.NewPowerControlAvgConsumedWatts(ch, pm.AvgConsumedWatts, id, pc.Name) 397 | mc.NewPowerControlInterval(ch, pm.IntervalInMinutes, id, pc.Name) 398 | } 399 | 400 | return true 401 | } 402 | 403 | func (client *Client) RefreshEventLog(mc *Collector, ch chan<- prometheus.Metric) bool { 404 | if client.eventPath == "" { 405 | return true 406 | } 407 | 408 | resp := EventLogResponse{} 409 | ok := client.redfish.Get(client.eventPath, &resp) 410 | if !ok { 411 | return false 412 | } 413 | 414 | level := config.Config.Event.SeverityLevel 415 | maxage := config.Config.Event.MaxAgeSeconds 416 | 417 | for _, e := range resp.Members { 418 | t, err := time.Parse(time.RFC3339, e.Created) 419 | if err != nil { 420 | continue 421 | } 422 | 423 | d := time.Since(t) 424 | if d.Seconds() > maxage { 425 | continue 426 | } 427 | 428 | severity := health2value(e.Severity) 429 | if severity < level { 430 | continue 431 | } 432 | 433 | mc.NewEventLogEntry(ch, e.Id, e.Message, e.Severity, t) 434 | } 435 | 436 | return true 437 | } 438 | 439 | func (client *Client) RefreshStorage(mc *Collector, ch chan<- prometheus.Metric) bool { 440 | group := GroupResponse{} 441 | ok := client.redfish.Get(client.storagePath, &group) 442 | if !ok { 443 | return false 444 | } 445 | 446 | for _, c := range group.Members.GetLinks() { 447 | storage := Storage{} 448 | ok = client.redfish.Get(c, &storage) 449 | if !ok { 450 | return false 451 | } 452 | 453 | // iLO 4 454 | if (client.vendor == HPE) && (client.version == 4) { 455 | grp := GroupResponse{} 456 | ok = client.redfish.Get(c+"DiskDrives/", &grp) 457 | if !ok { 458 | return false 459 | } 460 | storage.Drives = grp.Members 461 | } 462 | 463 | mc.NewStorageInfo(ch, &storage) 464 | mc.NewStorageHealth(ch, &storage) 465 | mc.NewDellControllerBatteryHealth(ch, &storage) 466 | 467 | // Drives 468 | for _, c := range storage.Drives.GetLinks() { 469 | drive := StorageDrive{} 470 | ok = client.redfish.Get(c, &drive) 471 | if !ok { 472 | return false 473 | } 474 | 475 | if drive.Status.State == StateAbsent { 476 | continue 477 | } 478 | 479 | // iLO 4 480 | if (client.vendor == HPE) && (client.version == 4) { 481 | drive.CapacityBytes = 1024 * 1024 * drive.CapacityMiB 482 | drive.Protocol = drive.InterfaceType 483 | drive.PredictedLifeLeft = 100.0 - drive.SSDEnduranceUtilizationPercentage 484 | } 485 | 486 | mc.NewStorageDriveInfo(ch, storage.Id, &drive) 487 | mc.NewStorageDriveHealth(ch, storage.Id, &drive) 488 | mc.NewStorageDriveCapacity(ch, storage.Id, &drive) 489 | mc.NewStorageDriveLifeLeft(ch, storage.Id, &drive) 490 | mc.NewStorageDriveIndicatorActive(ch, storage.Id, &drive) 491 | } 492 | 493 | // iLO 4 494 | if (client.vendor == HPE) && (client.version == 4) { 495 | continue 496 | } 497 | 498 | // Controllers 499 | if c := storage.Controllers.OdataId; len(c) > 0 { 500 | grp := GroupResponse{} 501 | ok = client.redfish.Get(c, &grp) 502 | if !ok { 503 | return false 504 | } 505 | 506 | for _, c := range grp.Members.GetLinks() { 507 | ctlr := StorageController{} 508 | ok = client.redfish.Get(c, &ctlr) 509 | if !ok { 510 | return false 511 | } 512 | 513 | mc.NewStorageControllerInfo(ch, storage.Id, &ctlr) 514 | mc.NewStorageControllerSpeed(ch, storage.Id, &ctlr) 515 | mc.NewStorageControllerHealth(ch, storage.Id, &ctlr) 516 | } 517 | } 518 | 519 | // Volumes 520 | if c := storage.Volumes.OdataId; len(c) > 0 { 521 | grp := GroupResponse{} 522 | ok = client.redfish.Get(c, &grp) 523 | if !ok { 524 | return false 525 | } 526 | 527 | for _, c := range grp.Members.GetLinks() { 528 | vol := StorageVolume{} 529 | ok = client.redfish.Get(c, &vol) 530 | if !ok { 531 | return false 532 | } 533 | 534 | mc.NewStorageVolumeInfo(ch, storage.Id, &vol) 535 | mc.NewStorageVolumeHealth(ch, storage.Id, &vol) 536 | mc.NewStorageVolumeCapacity(ch, storage.Id, &vol) 537 | mc.NewStorageVolumeMediaSpan(ch, storage.Id, &vol) 538 | } 539 | } 540 | } 541 | 542 | return true 543 | } 544 | 545 | func (client *Client) RefreshMemory(mc *Collector, ch chan<- prometheus.Metric) bool { 546 | group := GroupResponse{} 547 | ok := client.redfish.Get(client.memoryPath, &group) 548 | if !ok { 549 | return false 550 | } 551 | 552 | for _, c := range group.Members.GetLinks() { 553 | m := Memory{} 554 | ok = client.redfish.Get(c, &m) 555 | if !ok { 556 | return false 557 | } 558 | 559 | if (m.Status.State == StateAbsent) || (m.Id == "") { 560 | continue 561 | } 562 | 563 | // iLO 4 564 | if (client.vendor == HPE) && (client.version == 4) { 565 | m.Manufacturer = strings.TrimSpace(m.Manufacturer) 566 | m.RankCount = m.Rank 567 | m.MemoryDeviceType = m.DIMMType 568 | m.Status.Health = m.DIMMStatus 569 | m.CapacityMiB = m.SizeMB 570 | } 571 | 572 | mc.NewMemoryModuleInfo(ch, &m) 573 | mc.NewMemoryModuleHealth(ch, &m) 574 | mc.NewMemoryModuleCapacity(ch, &m) 575 | mc.NewMemoryModuleSpeed(ch, &m) 576 | } 577 | 578 | return true 579 | } 580 | 581 | func (client *Client) RefreshDell(mc *Collector, ch chan<- prometheus.Metric) bool { 582 | if client.dellPath == "" { 583 | return true 584 | } 585 | 586 | resp := DellSystem{} 587 | ok := client.redfish.Get(client.dellPath, &resp) 588 | if !ok { 589 | return false 590 | } 591 | 592 | mc.NewDellBatteryRollupHealth(ch, &resp) 593 | mc.NewDellEstimatedSystemAirflowCFM(ch, &resp) 594 | 595 | return true 596 | } 597 | -------------------------------------------------------------------------------- /internal/collector/collector.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | "strings" 7 | "sync" 8 | "sync/atomic" 9 | 10 | "github.com/mrlhansen/idrac_exporter/internal/config" 11 | "github.com/mrlhansen/idrac_exporter/internal/version" 12 | "github.com/prometheus/client_golang/prometheus" 13 | "github.com/prometheus/common/expfmt" 14 | ) 15 | 16 | var mu sync.Mutex 17 | var collectors = map[string]*Collector{} 18 | 19 | type Collector struct { 20 | // Internal variables 21 | client *Client 22 | registry *prometheus.Registry 23 | collected *sync.Cond 24 | collecting bool 25 | errors atomic.Uint64 26 | builder *strings.Builder 27 | 28 | // Exporter 29 | ExporterBuildInfo *prometheus.Desc 30 | ExporterScrapeErrorsTotal *prometheus.Desc 31 | 32 | // System 33 | SystemPowerOn *prometheus.Desc 34 | SystemHealth *prometheus.Desc 35 | SystemIndicatorLED *prometheus.Desc // This attribute is deprecated in Redfish 36 | SystemIndicatorActive *prometheus.Desc 37 | SystemMemorySize *prometheus.Desc 38 | SystemCpuCount *prometheus.Desc 39 | SystemBiosInfo *prometheus.Desc 40 | SystemMachineInfo *prometheus.Desc 41 | 42 | // Sensors 43 | SensorsTemperature *prometheus.Desc 44 | SensorsFanHealth *prometheus.Desc 45 | SensorsFanSpeed *prometheus.Desc 46 | 47 | // Power supply 48 | PowerSupplyHealth *prometheus.Desc 49 | PowerSupplyOutputWatts *prometheus.Desc 50 | PowerSupplyInputWatts *prometheus.Desc 51 | PowerSupplyCapacityWatts *prometheus.Desc 52 | PowerSupplyInputVoltage *prometheus.Desc 53 | PowerSupplyEfficiencyPercent *prometheus.Desc 54 | 55 | // Power control 56 | PowerControlConsumedWatts *prometheus.Desc 57 | PowerControlCapacityWatts *prometheus.Desc 58 | PowerControlMinConsumedWatts *prometheus.Desc 59 | PowerControlMaxConsumedWatts *prometheus.Desc 60 | PowerControlAvgConsumedWatts *prometheus.Desc 61 | PowerControlInterval *prometheus.Desc 62 | 63 | // System event log 64 | EventLogEntry *prometheus.Desc 65 | 66 | // Storage 67 | StorageInfo *prometheus.Desc 68 | StorageHealth *prometheus.Desc 69 | StorageDriveInfo *prometheus.Desc 70 | StorageDriveHealth *prometheus.Desc 71 | StorageDriveCapacity *prometheus.Desc 72 | StorageDriveLifeLeft *prometheus.Desc 73 | StorageDriveIndicatorActive *prometheus.Desc 74 | StorageControllerInfo *prometheus.Desc 75 | StorageControllerHealth *prometheus.Desc 76 | StorageControllerSpeed *prometheus.Desc 77 | StorageVolumeInfo *prometheus.Desc 78 | StorageVolumeHealth *prometheus.Desc 79 | StorageVolumeMediaSpan *prometheus.Desc 80 | StorageVolumeCapacity *prometheus.Desc 81 | 82 | // Memory modules 83 | MemoryModuleInfo *prometheus.Desc 84 | MemoryModuleHealth *prometheus.Desc 85 | MemoryModuleCapacity *prometheus.Desc 86 | MemoryModuleSpeed *prometheus.Desc 87 | 88 | // Network 89 | NetworkInterfaceHealth *prometheus.Desc 90 | NetworkPortHealth *prometheus.Desc 91 | NetworkPortSpeed *prometheus.Desc 92 | NetworkPortLinkUp *prometheus.Desc 93 | 94 | // Processors 95 | CpuInfo *prometheus.Desc 96 | CpuHealth *prometheus.Desc 97 | CpuVoltage *prometheus.Desc 98 | CpuMaxSpeed *prometheus.Desc 99 | CpuCurrentSpeed *prometheus.Desc 100 | CpuTotalCores *prometheus.Desc 101 | CpuTotalThreads *prometheus.Desc 102 | 103 | // Dell OEM 104 | DellBatteryRollupHealth *prometheus.Desc 105 | DellEstimatedSystemAirflowCFM *prometheus.Desc 106 | DellControllerBatteryHealth *prometheus.Desc 107 | } 108 | 109 | func NewCollector() *Collector { 110 | prefix := config.Config.MetricsPrefix 111 | 112 | collector := &Collector{ 113 | ExporterBuildInfo: prometheus.NewDesc( 114 | prometheus.BuildFQName(prefix, "exporter", "build_info"), 115 | "Constant metric with build information for the exporter", 116 | nil, prometheus.Labels{ 117 | "version": version.Version, 118 | "revision": version.Revision, 119 | "goversion": runtime.Version(), 120 | }, 121 | ), 122 | ExporterScrapeErrorsTotal: prometheus.NewDesc( 123 | prometheus.BuildFQName(prefix, "exporter", "scrape_errors_total"), 124 | "Total number of errors encountered while scraping target", 125 | nil, nil, 126 | ), 127 | SystemPowerOn: prometheus.NewDesc( 128 | prometheus.BuildFQName(prefix, "system", "power_on"), 129 | "Power state of the system", 130 | nil, nil, 131 | ), 132 | SystemHealth: prometheus.NewDesc( 133 | prometheus.BuildFQName(prefix, "system", "health"), 134 | "Health status of the system", 135 | []string{"status"}, nil, 136 | ), 137 | SystemIndicatorLED: prometheus.NewDesc( 138 | prometheus.BuildFQName(prefix, "system", "indicator_led_on"), 139 | "Indicator LED state of the system", 140 | []string{"state"}, nil, 141 | ), 142 | SystemIndicatorActive: prometheus.NewDesc( 143 | prometheus.BuildFQName(prefix, "system", "indicator_active"), 144 | "State of the system location indicator", 145 | nil, nil, 146 | ), 147 | SystemMemorySize: prometheus.NewDesc( 148 | prometheus.BuildFQName(prefix, "system", "memory_size_bytes"), 149 | "Total memory size of the system in bytes", 150 | nil, nil, 151 | ), 152 | SystemCpuCount: prometheus.NewDesc( 153 | prometheus.BuildFQName(prefix, "system", "cpu_count"), 154 | "Total number of CPUs in the system", 155 | []string{"model"}, nil, 156 | ), 157 | SystemBiosInfo: prometheus.NewDesc( 158 | prometheus.BuildFQName(prefix, "system", "bios_info"), 159 | "Information about the BIOS", 160 | []string{"version"}, nil, 161 | ), 162 | SystemMachineInfo: prometheus.NewDesc( 163 | prometheus.BuildFQName(prefix, "system", "machine_info"), 164 | "Information about the machine", 165 | []string{"manufacturer", "model", "serial", "sku", "hostname"}, nil, 166 | ), 167 | SensorsTemperature: prometheus.NewDesc( 168 | prometheus.BuildFQName(prefix, "sensors", "temperature"), 169 | "Sensors reporting temperature measurements", 170 | []string{"id", "name", "units"}, nil, 171 | ), 172 | SensorsFanHealth: prometheus.NewDesc( 173 | prometheus.BuildFQName(prefix, "sensors", "fan_health"), 174 | "Health status for fans", 175 | []string{"id", "name", "status"}, nil, 176 | ), 177 | SensorsFanSpeed: prometheus.NewDesc( 178 | prometheus.BuildFQName(prefix, "sensors", "fan_speed"), 179 | "Sensors reporting fan speed measurements", 180 | []string{"id", "name", "units"}, nil, 181 | ), 182 | PowerSupplyHealth: prometheus.NewDesc( 183 | prometheus.BuildFQName(prefix, "power_supply", "health"), 184 | "Power supply health status", 185 | []string{"id", "status"}, nil, 186 | ), 187 | PowerSupplyOutputWatts: prometheus.NewDesc( 188 | prometheus.BuildFQName(prefix, "power_supply", "output_watts"), 189 | "Power supply output in watts", 190 | []string{"id"}, nil, 191 | ), 192 | PowerSupplyInputWatts: prometheus.NewDesc( 193 | prometheus.BuildFQName(prefix, "power_supply", "input_watts"), 194 | "Power supply input in watts", 195 | []string{"id"}, nil, 196 | ), 197 | PowerSupplyCapacityWatts: prometheus.NewDesc( 198 | prometheus.BuildFQName(prefix, "power_supply", "capacity_watts"), 199 | "Power supply capacity in watts", 200 | []string{"id"}, nil, 201 | ), 202 | PowerSupplyInputVoltage: prometheus.NewDesc( 203 | prometheus.BuildFQName(prefix, "power_supply", "input_voltage"), 204 | "Power supply input voltage", 205 | []string{"id"}, nil, 206 | ), 207 | PowerSupplyEfficiencyPercent: prometheus.NewDesc( 208 | prometheus.BuildFQName(prefix, "power_supply", "efficiency_percent"), 209 | "Power supply efficiency in percentage", 210 | []string{"id"}, nil, 211 | ), 212 | PowerControlConsumedWatts: prometheus.NewDesc( 213 | prometheus.BuildFQName(prefix, "power_control", "consumed_watts"), 214 | "Consumption of power control system in watts", 215 | []string{"id", "name"}, nil, 216 | ), 217 | PowerControlCapacityWatts: prometheus.NewDesc( 218 | prometheus.BuildFQName(prefix, "power_control", "capacity_watts"), 219 | "Capacity of power control system in watts", 220 | []string{"id", "name"}, nil, 221 | ), 222 | PowerControlMinConsumedWatts: prometheus.NewDesc( 223 | prometheus.BuildFQName(prefix, "power_control", "min_consumed_watts"), 224 | "Minimum consumption of power control system during the reported interval", 225 | []string{"id", "name"}, nil, 226 | ), 227 | PowerControlMaxConsumedWatts: prometheus.NewDesc( 228 | prometheus.BuildFQName(prefix, "power_control", "max_consumed_watts"), 229 | "Maximum consumption of power control system during the reported interval", 230 | []string{"id", "name"}, nil, 231 | ), 232 | PowerControlAvgConsumedWatts: prometheus.NewDesc( 233 | prometheus.BuildFQName(prefix, "power_control", "avg_consumed_watts"), 234 | "Average consumption of power control system during the reported interval", 235 | []string{"id", "name"}, nil, 236 | ), 237 | PowerControlInterval: prometheus.NewDesc( 238 | prometheus.BuildFQName(prefix, "power_control", "interval_in_minutes"), 239 | "Interval for measurements of power control system", 240 | []string{"id", "name"}, nil, 241 | ), 242 | EventLogEntry: prometheus.NewDesc( 243 | prometheus.BuildFQName(prefix, "events", "log_entry"), 244 | "Entry from the system event log", 245 | []string{"id", "message", "severity"}, nil, 246 | ), 247 | StorageInfo: prometheus.NewDesc( 248 | prometheus.BuildFQName(prefix, "storage", "info"), 249 | "Information about storage sub systems", 250 | []string{"id", "name"}, nil, 251 | ), 252 | StorageHealth: prometheus.NewDesc( 253 | prometheus.BuildFQName(prefix, "storage", "health"), 254 | "Health status for storage sub systems", 255 | []string{"id", "status"}, nil, 256 | ), 257 | StorageDriveInfo: prometheus.NewDesc( 258 | prometheus.BuildFQName(prefix, "storage_drive", "info"), 259 | "Information about disk drives", 260 | []string{"id", "storage_id", "manufacturer", "mediatype", "model", "name", "protocol", "serial", "slot"}, nil, 261 | ), 262 | StorageDriveHealth: prometheus.NewDesc( 263 | prometheus.BuildFQName(prefix, "storage_drive", "health"), 264 | "Health status for disk drives", 265 | []string{"id", "storage_id", "status"}, nil, 266 | ), 267 | StorageDriveCapacity: prometheus.NewDesc( 268 | prometheus.BuildFQName(prefix, "storage_drive", "capacity_bytes"), 269 | "Capacity of disk drives in bytes", 270 | []string{"id", "storage_id"}, nil, 271 | ), 272 | StorageDriveLifeLeft: prometheus.NewDesc( 273 | prometheus.BuildFQName(prefix, "storage_drive", "life_left_percent"), 274 | "Predicted life left in percent", 275 | []string{"id", "storage_id"}, nil, 276 | ), 277 | StorageDriveIndicatorActive: prometheus.NewDesc( 278 | prometheus.BuildFQName(prefix, "storage_drive", "indicator_active"), 279 | "State of the drive location indicator", 280 | []string{"id", "storage_id"}, nil, 281 | ), 282 | StorageControllerInfo: prometheus.NewDesc( 283 | prometheus.BuildFQName(prefix, "storage_controller", "info"), 284 | "Information about storage controllers", 285 | []string{"id", "storage_id", "manufacturer", "model", "name", "firmware"}, nil, 286 | ), 287 | StorageControllerHealth: prometheus.NewDesc( 288 | prometheus.BuildFQName(prefix, "storage_controller", "health"), 289 | "Health status for storage controllers", 290 | []string{"id", "storage_id", "status"}, nil, 291 | ), 292 | StorageControllerSpeed: prometheus.NewDesc( 293 | prometheus.BuildFQName(prefix, "storage_controller", "speed_mbps"), 294 | "Speed of storage controllers in Mbps", 295 | []string{"id", "storage_id"}, nil, 296 | ), 297 | StorageVolumeInfo: prometheus.NewDesc( 298 | prometheus.BuildFQName(prefix, "storage_volume", "info"), 299 | "Information about virtual volumes", 300 | []string{"id", "storage_id", "name", "volumetype", "raidtype"}, nil, 301 | ), 302 | StorageVolumeHealth: prometheus.NewDesc( 303 | prometheus.BuildFQName(prefix, "storage_volume", "health"), 304 | "Health status for virtual volumes", 305 | []string{"id", "storage_id", "status"}, nil, 306 | ), 307 | StorageVolumeMediaSpan: prometheus.NewDesc( 308 | prometheus.BuildFQName(prefix, "storage_volume", "media_span_count"), 309 | "Number of media spanned by virtual volumes", 310 | []string{"id", "storage_id"}, nil, 311 | ), 312 | StorageVolumeCapacity: prometheus.NewDesc( 313 | prometheus.BuildFQName(prefix, "storage_volume", "capacity_bytes"), 314 | "Capacity of virtual volumes in bytes", 315 | []string{"id", "storage_id"}, nil, 316 | ), 317 | MemoryModuleInfo: prometheus.NewDesc( 318 | prometheus.BuildFQName(prefix, "memory_module", "info"), 319 | "Information about memory modules", 320 | []string{"id", "ecc", "manufacturer", "type", "name", "serial", "rank"}, nil, 321 | ), 322 | MemoryModuleHealth: prometheus.NewDesc( 323 | prometheus.BuildFQName(prefix, "memory_module", "health"), 324 | "Health status for memory modules", 325 | []string{"id", "status"}, nil, 326 | ), 327 | MemoryModuleCapacity: prometheus.NewDesc( 328 | prometheus.BuildFQName(prefix, "memory_module", "capacity_bytes"), 329 | "Capacity of memory modules in bytes", 330 | []string{"id"}, nil, 331 | ), 332 | MemoryModuleSpeed: prometheus.NewDesc( 333 | prometheus.BuildFQName(prefix, "memory_module", "speed_mhz"), 334 | "Speed of memory modules in Mhz", 335 | []string{"id"}, nil, 336 | ), 337 | NetworkInterfaceHealth: prometheus.NewDesc( 338 | prometheus.BuildFQName(prefix, "network_interface", "health"), 339 | "Health status for network interfaces", 340 | []string{"id", "status"}, nil, 341 | ), 342 | NetworkPortHealth: prometheus.NewDesc( 343 | prometheus.BuildFQName(prefix, "network_port", "health"), 344 | "Health status for network ports", 345 | []string{"id", "interface_id", "status"}, nil, 346 | ), 347 | NetworkPortSpeed: prometheus.NewDesc( 348 | prometheus.BuildFQName(prefix, "network_port", "speed_mbps"), 349 | "Link speed of network ports in Mbps", 350 | []string{"id", "interface_id"}, nil, 351 | ), 352 | NetworkPortLinkUp: prometheus.NewDesc( 353 | prometheus.BuildFQName(prefix, "network_port", "link_up"), 354 | "Link status of network ports (up or down)", 355 | []string{"id", "interface_id", "status"}, nil, 356 | ), 357 | CpuInfo: prometheus.NewDesc( 358 | prometheus.BuildFQName(prefix, "cpu", "info"), 359 | "Information about the CPU", 360 | []string{"id", "socket", "manufacturer", "model", "arch"}, nil, 361 | ), 362 | CpuHealth: prometheus.NewDesc( 363 | prometheus.BuildFQName(prefix, "cpu", "health"), 364 | "Health status of the CPU", 365 | []string{"id", "status"}, nil, 366 | ), 367 | CpuVoltage: prometheus.NewDesc( 368 | prometheus.BuildFQName(prefix, "cpu", "voltage"), 369 | "Current voltage of the CPU", 370 | []string{"id"}, nil, 371 | ), 372 | CpuMaxSpeed: prometheus.NewDesc( 373 | prometheus.BuildFQName(prefix, "cpu", "max_speed_mhz"), 374 | "Maximum speed of the CPU in Mhz", 375 | []string{"id"}, nil, 376 | ), 377 | CpuCurrentSpeed: prometheus.NewDesc( 378 | prometheus.BuildFQName(prefix, "cpu", "current_speed_mhz"), 379 | "Current speed of the CPU in Mhz", 380 | []string{"id"}, nil, 381 | ), 382 | CpuTotalCores: prometheus.NewDesc( 383 | prometheus.BuildFQName(prefix, "cpu", "total_cores"), 384 | "Total number of CPU cores", 385 | []string{"id"}, nil, 386 | ), 387 | CpuTotalThreads: prometheus.NewDesc( 388 | prometheus.BuildFQName(prefix, "cpu", "total_threads"), 389 | "Total number of CPU threads", 390 | []string{"id"}, nil, 391 | ), 392 | DellBatteryRollupHealth: prometheus.NewDesc( 393 | prometheus.BuildFQName(prefix, "dell", "battery_rollup_health"), 394 | "Health rollup status for the batteries", 395 | []string{"status"}, nil, 396 | ), 397 | DellEstimatedSystemAirflowCFM: prometheus.NewDesc( 398 | prometheus.BuildFQName(prefix, "dell", "estimated_system_airflow_cfm"), 399 | "Estimated system airflow in cubic feet per minute", 400 | nil, nil, 401 | ), 402 | DellControllerBatteryHealth: prometheus.NewDesc( 403 | prometheus.BuildFQName(prefix, "dell", "controller_battery_health"), 404 | "Health status of storage controller battery", 405 | []string{"id", "storage_id", "name", "status"}, nil, 406 | ), 407 | } 408 | 409 | collector.builder = new(strings.Builder) 410 | collector.collected = sync.NewCond(new(sync.Mutex)) 411 | collector.registry = prometheus.NewRegistry() 412 | collector.registry.Register(collector) 413 | 414 | return collector 415 | } 416 | 417 | func (collector *Collector) Describe(ch chan<- *prometheus.Desc) { 418 | ch <- collector.ExporterBuildInfo 419 | ch <- collector.ExporterScrapeErrorsTotal 420 | ch <- collector.SystemPowerOn 421 | ch <- collector.SystemHealth 422 | ch <- collector.SystemIndicatorLED 423 | ch <- collector.SystemIndicatorActive 424 | ch <- collector.SystemMemorySize 425 | ch <- collector.SystemCpuCount 426 | ch <- collector.SystemBiosInfo 427 | ch <- collector.SystemMachineInfo 428 | ch <- collector.SensorsTemperature 429 | ch <- collector.SensorsFanHealth 430 | ch <- collector.SensorsFanSpeed 431 | ch <- collector.PowerSupplyHealth 432 | ch <- collector.PowerSupplyOutputWatts 433 | ch <- collector.PowerSupplyInputWatts 434 | ch <- collector.PowerSupplyCapacityWatts 435 | ch <- collector.PowerSupplyInputVoltage 436 | ch <- collector.PowerSupplyEfficiencyPercent 437 | ch <- collector.PowerControlConsumedWatts 438 | ch <- collector.PowerControlCapacityWatts 439 | ch <- collector.PowerControlMinConsumedWatts 440 | ch <- collector.PowerControlMaxConsumedWatts 441 | ch <- collector.PowerControlAvgConsumedWatts 442 | ch <- collector.PowerControlInterval 443 | ch <- collector.EventLogEntry 444 | ch <- collector.StorageInfo 445 | ch <- collector.StorageHealth 446 | ch <- collector.StorageDriveInfo 447 | ch <- collector.StorageDriveHealth 448 | ch <- collector.StorageDriveCapacity 449 | ch <- collector.StorageDriveLifeLeft 450 | ch <- collector.StorageDriveIndicatorActive 451 | ch <- collector.StorageControllerInfo 452 | ch <- collector.StorageControllerHealth 453 | ch <- collector.StorageControllerSpeed 454 | ch <- collector.StorageVolumeInfo 455 | ch <- collector.StorageVolumeHealth 456 | ch <- collector.StorageVolumeMediaSpan 457 | ch <- collector.StorageVolumeCapacity 458 | ch <- collector.MemoryModuleInfo 459 | ch <- collector.MemoryModuleHealth 460 | ch <- collector.MemoryModuleCapacity 461 | ch <- collector.MemoryModuleSpeed 462 | ch <- collector.NetworkInterfaceHealth 463 | ch <- collector.NetworkPortHealth 464 | ch <- collector.NetworkPortSpeed 465 | ch <- collector.NetworkPortLinkUp 466 | ch <- collector.CpuInfo 467 | ch <- collector.CpuHealth 468 | ch <- collector.CpuVoltage 469 | ch <- collector.CpuMaxSpeed 470 | ch <- collector.CpuCurrentSpeed 471 | ch <- collector.CpuTotalCores 472 | ch <- collector.CpuTotalThreads 473 | ch <- collector.DellBatteryRollupHealth 474 | ch <- collector.DellEstimatedSystemAirflowCFM 475 | ch <- collector.DellControllerBatteryHealth 476 | } 477 | 478 | func (collector *Collector) Collect(ch chan<- prometheus.Metric) { 479 | var wg sync.WaitGroup 480 | 481 | collector.client.redfish.RefreshSession() 482 | collect := &config.Config.Collect 483 | 484 | if collect.System { 485 | wg.Add(1) 486 | go func() { 487 | ok := collector.client.RefreshSystem(collector, ch) 488 | if !ok { 489 | collector.errors.Add(1) 490 | } 491 | wg.Done() 492 | }() 493 | } 494 | 495 | if collect.Sensors { 496 | wg.Add(1) 497 | go func() { 498 | ok := collector.client.RefreshSensors(collector, ch) 499 | if !ok { 500 | collector.errors.Add(1) 501 | } 502 | wg.Done() 503 | }() 504 | } 505 | 506 | if collect.Power { 507 | wg.Add(1) 508 | go func() { 509 | ok := collector.client.RefreshPower(collector, ch) 510 | if !ok { 511 | collector.errors.Add(1) 512 | } 513 | wg.Done() 514 | }() 515 | } 516 | 517 | if collect.Network { 518 | wg.Add(1) 519 | go func() { 520 | ok := collector.client.RefreshNetwork(collector, ch) 521 | if !ok { 522 | collector.errors.Add(1) 523 | } 524 | wg.Done() 525 | }() 526 | } 527 | 528 | if collect.Events { 529 | wg.Add(1) 530 | go func() { 531 | ok := collector.client.RefreshEventLog(collector, ch) 532 | if !ok { 533 | collector.errors.Add(1) 534 | } 535 | wg.Done() 536 | }() 537 | } 538 | 539 | if collect.Storage { 540 | wg.Add(1) 541 | go func() { 542 | ok := collector.client.RefreshStorage(collector, ch) 543 | if !ok { 544 | collector.errors.Add(1) 545 | } 546 | wg.Done() 547 | }() 548 | } 549 | 550 | if collect.Memory { 551 | wg.Add(1) 552 | go func() { 553 | ok := collector.client.RefreshMemory(collector, ch) 554 | if !ok { 555 | collector.errors.Add(1) 556 | } 557 | wg.Done() 558 | }() 559 | } 560 | 561 | if collect.Processors { 562 | wg.Add(1) 563 | go func() { 564 | ok := collector.client.RefreshProcessors(collector, ch) 565 | if !ok { 566 | collector.errors.Add(1) 567 | } 568 | wg.Done() 569 | }() 570 | } 571 | 572 | if collect.Extra { 573 | wg.Add(1) 574 | go func() { 575 | ok := collector.client.RefreshDell(collector, ch) 576 | if !ok { 577 | collector.errors.Add(1) 578 | } 579 | wg.Done() 580 | }() 581 | } 582 | 583 | wg.Wait() 584 | ch <- prometheus.MustNewConstMetric(collector.ExporterBuildInfo, prometheus.UntypedValue, 1) 585 | ch <- prometheus.MustNewConstMetric(collector.ExporterScrapeErrorsTotal, prometheus.CounterValue, float64(collector.errors.Load())) 586 | } 587 | 588 | func (collector *Collector) Gather() (string, error) { 589 | collector.collected.L.Lock() 590 | 591 | // If a collection is already in progress wait for it to complete and return the cached data 592 | if collector.collecting { 593 | collector.collected.Wait() 594 | metrics := collector.builder.String() 595 | collector.collected.L.Unlock() 596 | return metrics, nil 597 | } 598 | 599 | // Set collecting to true and let other goroutines enter in critical section 600 | collector.collecting = true 601 | collector.collected.L.Unlock() 602 | 603 | // Defer set collecting to false and wake waiting goroutines 604 | defer func() { 605 | collector.collected.L.Lock() 606 | collector.collected.Broadcast() 607 | collector.collecting = false 608 | collector.collected.L.Unlock() 609 | }() 610 | 611 | // Collect metrics 612 | collector.builder.Reset() 613 | 614 | m, err := collector.registry.Gather() 615 | if err != nil { 616 | return "", err 617 | } 618 | 619 | for i := range m { 620 | expfmt.MetricFamilyToText(collector.builder, m[i]) 621 | } 622 | 623 | return collector.builder.String(), nil 624 | } 625 | 626 | // Resets an existing collector of the given target 627 | func Reset(target string) { 628 | mu.Lock() 629 | _, ok := collectors[target] 630 | if ok { 631 | delete(collectors, target) 632 | } 633 | mu.Unlock() 634 | } 635 | 636 | func GetCollector(target string) (*Collector, error) { 637 | mu.Lock() 638 | collector, ok := collectors[target] 639 | if !ok { 640 | collector = NewCollector() 641 | collectors[target] = collector 642 | } 643 | mu.Unlock() 644 | 645 | // Do not act concurrently on the same host 646 | collector.collected.L.Lock() 647 | defer collector.collected.L.Unlock() 648 | 649 | // Try to instantiate a new Redfish host 650 | if collector.client == nil { 651 | host := config.Config.GetHostCfg(target) 652 | if host == nil { 653 | return nil, fmt.Errorf("failed to get host information") 654 | } 655 | c := NewClient(host) 656 | if c == nil { 657 | return nil, fmt.Errorf("failed to instantiate new client") 658 | } else { 659 | collector.client = c 660 | } 661 | } 662 | 663 | return collector, nil 664 | } 665 | -------------------------------------------------------------------------------- /internal/collector/metrics.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "time" 8 | 9 | "github.com/prometheus/client_golang/prometheus" 10 | ) 11 | 12 | func health2value(health string) int { 13 | switch health { 14 | case "": 15 | return -1 16 | case "OK", "GoodInUse": 17 | return 0 18 | case "Warning": 19 | return 1 20 | case "Critical": 21 | return 2 22 | } 23 | return 10 24 | } 25 | 26 | func linkstatus2value(status string) int { 27 | switch status { 28 | case "Up", "LinkUp": 29 | return 1 30 | } 31 | return 0 32 | } 33 | 34 | func (mc *Collector) NewSystemPowerOn(ch chan<- prometheus.Metric, m *SystemResponse) { 35 | var value float64 36 | if m.PowerState == "On" { 37 | value = 1 38 | } 39 | ch <- prometheus.MustNewConstMetric( 40 | mc.SystemPowerOn, 41 | prometheus.GaugeValue, 42 | value, 43 | ) 44 | } 45 | 46 | func (mc *Collector) NewSystemHealth(ch chan<- prometheus.Metric, m *SystemResponse) { 47 | value := health2value(m.Status.Health) 48 | if value < 0 { 49 | return 50 | } 51 | ch <- prometheus.MustNewConstMetric( 52 | mc.SystemHealth, 53 | prometheus.GaugeValue, 54 | float64(value), 55 | m.Status.Health, 56 | ) 57 | } 58 | 59 | func (mc *Collector) NewSystemIndicatorLED(ch chan<- prometheus.Metric, m *SystemResponse) { 60 | var value float64 61 | if m.IndicatorLED != "Off" { 62 | value = 1 63 | } 64 | ch <- prometheus.MustNewConstMetric( 65 | mc.SystemIndicatorLED, 66 | prometheus.GaugeValue, 67 | value, 68 | m.IndicatorLED, 69 | ) 70 | } 71 | 72 | func (mc *Collector) NewSystemIndicatorActive(ch chan<- prometheus.Metric, m *SystemResponse) { 73 | var value float64 74 | if m.LocationIndicatorActive == nil { 75 | return 76 | } 77 | if *m.LocationIndicatorActive { 78 | value = 1 79 | } 80 | ch <- prometheus.MustNewConstMetric( 81 | mc.SystemIndicatorActive, 82 | prometheus.GaugeValue, 83 | value, 84 | ) 85 | } 86 | 87 | func (mc *Collector) NewSystemMemorySize(ch chan<- prometheus.Metric, m *SystemResponse) { 88 | if m.MemorySummary == nil { 89 | return 90 | } 91 | ch <- prometheus.MustNewConstMetric( 92 | mc.SystemMemorySize, 93 | prometheus.GaugeValue, 94 | m.MemorySummary.TotalSystemMemoryGiB*1073741824, 95 | ) 96 | } 97 | 98 | func (mc *Collector) NewSystemCpuCount(ch chan<- prometheus.Metric, m *SystemResponse) { 99 | if m.ProcessorSummary == nil { 100 | return 101 | } 102 | ch <- prometheus.MustNewConstMetric( 103 | mc.SystemCpuCount, 104 | prometheus.GaugeValue, 105 | float64(m.ProcessorSummary.Count), 106 | strings.TrimSpace(m.ProcessorSummary.Model), 107 | ) 108 | } 109 | 110 | func (mc *Collector) NewSystemBiosInfo(ch chan<- prometheus.Metric, m *SystemResponse) { 111 | ch <- prometheus.MustNewConstMetric( 112 | mc.SystemBiosInfo, 113 | prometheus.UntypedValue, 114 | 1.0, 115 | m.BiosVersion, 116 | ) 117 | } 118 | 119 | func (mc *Collector) NewSystemMachineInfo(ch chan<- prometheus.Metric, m *SystemResponse) { 120 | ch <- prometheus.MustNewConstMetric( 121 | mc.SystemMachineInfo, 122 | prometheus.UntypedValue, 123 | 1.0, 124 | strings.TrimSpace(m.Manufacturer), 125 | strings.TrimSpace(m.Model), 126 | strings.TrimSpace(m.SerialNumber), 127 | strings.TrimSpace(m.SKU), 128 | strings.TrimSpace(m.HostName), 129 | ) 130 | } 131 | 132 | func (mc *Collector) NewSensorsTemperature(ch chan<- prometheus.Metric, temperature float64, id, name, units string) { 133 | ch <- prometheus.MustNewConstMetric( 134 | mc.SensorsTemperature, 135 | prometheus.GaugeValue, 136 | temperature, 137 | id, 138 | name, 139 | units, 140 | ) 141 | } 142 | 143 | func (mc *Collector) NewSensorsFanHealth(ch chan<- prometheus.Metric, id, name, health string) { 144 | value := health2value(health) 145 | if value < 0 { 146 | return 147 | } 148 | ch <- prometheus.MustNewConstMetric( 149 | mc.SensorsFanHealth, 150 | prometheus.GaugeValue, 151 | float64(value), 152 | id, 153 | name, 154 | health, 155 | ) 156 | } 157 | 158 | func (mc *Collector) NewSensorsFanSpeed(ch chan<- prometheus.Metric, speed float64, id, name, units string) { 159 | ch <- prometheus.MustNewConstMetric( 160 | mc.SensorsFanSpeed, 161 | prometheus.GaugeValue, 162 | speed, 163 | id, 164 | name, 165 | units, 166 | ) 167 | } 168 | 169 | func (mc *Collector) NewPowerSupplyHealth(ch chan<- prometheus.Metric, health, id string) { 170 | value := health2value(health) 171 | if value < 0 { 172 | return 173 | } 174 | ch <- prometheus.MustNewConstMetric( 175 | mc.PowerSupplyHealth, 176 | prometheus.GaugeValue, 177 | float64(value), 178 | id, 179 | health, 180 | ) 181 | } 182 | 183 | func (mc *Collector) NewPowerSupplyInputWatts(ch chan<- prometheus.Metric, value float64, id string) { 184 | ch <- prometheus.MustNewConstMetric( 185 | mc.PowerSupplyInputWatts, 186 | prometheus.GaugeValue, 187 | value, 188 | id, 189 | ) 190 | } 191 | 192 | func (mc *Collector) NewPowerSupplyInputVoltage(ch chan<- prometheus.Metric, value float64, id string) { 193 | ch <- prometheus.MustNewConstMetric( 194 | mc.PowerSupplyInputVoltage, 195 | prometheus.GaugeValue, 196 | value, 197 | id, 198 | ) 199 | } 200 | 201 | func (mc *Collector) NewPowerSupplyOutputWatts(ch chan<- prometheus.Metric, value float64, id string) { 202 | ch <- prometheus.MustNewConstMetric( 203 | mc.PowerSupplyOutputWatts, 204 | prometheus.GaugeValue, 205 | value, 206 | id, 207 | ) 208 | } 209 | 210 | func (mc *Collector) NewPowerSupplyCapacityWatts(ch chan<- prometheus.Metric, value float64, id string) { 211 | ch <- prometheus.MustNewConstMetric( 212 | mc.PowerSupplyCapacityWatts, 213 | prometheus.GaugeValue, 214 | value, 215 | id, 216 | ) 217 | } 218 | 219 | func (mc *Collector) NewPowerSupplyEfficiencyPercent(ch chan<- prometheus.Metric, value float64, id string) { 220 | if value == 0 { 221 | return 222 | } 223 | ch <- prometheus.MustNewConstMetric( 224 | mc.PowerSupplyEfficiencyPercent, 225 | prometheus.GaugeValue, 226 | value, 227 | id, 228 | ) 229 | } 230 | 231 | func (mc *Collector) NewPowerControlConsumedWatts(ch chan<- prometheus.Metric, value float64, id, name string) { 232 | ch <- prometheus.MustNewConstMetric( 233 | mc.PowerControlConsumedWatts, 234 | prometheus.GaugeValue, 235 | value, 236 | id, 237 | name, 238 | ) 239 | } 240 | 241 | func (mc *Collector) NewPowerControlCapacityWatts(ch chan<- prometheus.Metric, value float64, id, name string) { 242 | ch <- prometheus.MustNewConstMetric( 243 | mc.PowerControlCapacityWatts, 244 | prometheus.GaugeValue, 245 | value, 246 | id, 247 | name, 248 | ) 249 | } 250 | 251 | func (mc *Collector) NewPowerControlMinConsumedWatts(ch chan<- prometheus.Metric, value float64, id, name string) { 252 | ch <- prometheus.MustNewConstMetric( 253 | mc.PowerControlMinConsumedWatts, 254 | prometheus.GaugeValue, 255 | value, 256 | id, 257 | name, 258 | ) 259 | } 260 | 261 | func (mc *Collector) NewPowerControlMaxConsumedWatts(ch chan<- prometheus.Metric, value float64, id, name string) { 262 | ch <- prometheus.MustNewConstMetric( 263 | mc.PowerControlMaxConsumedWatts, 264 | prometheus.GaugeValue, 265 | value, 266 | id, 267 | name, 268 | ) 269 | } 270 | 271 | func (mc *Collector) NewPowerControlAvgConsumedWatts(ch chan<- prometheus.Metric, value float64, id, name string) { 272 | ch <- prometheus.MustNewConstMetric( 273 | mc.PowerControlAvgConsumedWatts, 274 | prometheus.GaugeValue, 275 | value, 276 | id, 277 | name, 278 | ) 279 | } 280 | 281 | func (mc *Collector) NewPowerControlInterval(ch chan<- prometheus.Metric, interval int, id, name string) { 282 | ch <- prometheus.MustNewConstMetric( 283 | mc.PowerControlInterval, 284 | prometheus.GaugeValue, 285 | float64(interval), 286 | id, 287 | name, 288 | ) 289 | } 290 | 291 | func (mc *Collector) NewEventLogEntry(ch chan<- prometheus.Metric, id string, message string, severity string, created time.Time) { 292 | ch <- prometheus.MustNewConstMetric( 293 | mc.EventLogEntry, 294 | prometheus.CounterValue, 295 | float64(created.Unix()), 296 | id, 297 | strings.TrimSpace(message), 298 | severity, 299 | ) 300 | } 301 | 302 | func (mc *Collector) NewStorageInfo(ch chan<- prometheus.Metric, m *Storage) { 303 | ch <- prometheus.MustNewConstMetric( 304 | mc.StorageInfo, 305 | prometheus.UntypedValue, 306 | 1.0, 307 | m.Id, 308 | m.Name, 309 | ) 310 | } 311 | 312 | func (mc *Collector) NewStorageHealth(ch chan<- prometheus.Metric, m *Storage) { 313 | value := health2value(m.Status.Health) 314 | if value < 0 { 315 | return 316 | } 317 | ch <- prometheus.MustNewConstMetric( 318 | mc.StorageHealth, 319 | prometheus.GaugeValue, 320 | float64(value), 321 | m.Id, 322 | m.Status.Health, 323 | ) 324 | } 325 | 326 | func (mc *Collector) NewStorageDriveInfo(ch chan<- prometheus.Metric, parent string, m *StorageDrive) { 327 | var slot string 328 | 329 | if m.PhysicalLocation != nil { 330 | if m.PhysicalLocation.PartLocation != nil { 331 | slot = fmt.Sprint(m.PhysicalLocation.PartLocation.LocationOrdinalValue) 332 | } 333 | } 334 | 335 | ch <- prometheus.MustNewConstMetric( 336 | mc.StorageDriveInfo, 337 | prometheus.UntypedValue, 338 | 1.0, 339 | m.Id, 340 | parent, 341 | m.Manufacturer, 342 | m.MediaType, 343 | m.Model, 344 | m.Name, 345 | m.Protocol, 346 | m.SerialNumber, 347 | slot, 348 | ) 349 | } 350 | 351 | func (mc *Collector) NewStorageDriveHealth(ch chan<- prometheus.Metric, parent string, m *StorageDrive) { 352 | value := health2value(m.Status.Health) 353 | if value < 0 { 354 | return 355 | } 356 | ch <- prometheus.MustNewConstMetric( 357 | mc.StorageDriveHealth, 358 | prometheus.GaugeValue, 359 | float64(value), 360 | m.Id, 361 | parent, 362 | m.Status.Health, 363 | ) 364 | } 365 | 366 | func (mc *Collector) NewStorageDriveCapacity(ch chan<- prometheus.Metric, parent string, m *StorageDrive) { 367 | ch <- prometheus.MustNewConstMetric( 368 | mc.StorageDriveCapacity, 369 | prometheus.GaugeValue, 370 | float64(m.CapacityBytes), 371 | m.Id, 372 | parent, 373 | ) 374 | } 375 | 376 | func (mc *Collector) NewStorageDriveLifeLeft(ch chan<- prometheus.Metric, parent string, m *StorageDrive) { 377 | if m.PredictedLifeLeft == 0 && m.MediaType == "HDD" { 378 | return 379 | } 380 | ch <- prometheus.MustNewConstMetric( 381 | mc.StorageDriveLifeLeft, 382 | prometheus.GaugeValue, 383 | m.PredictedLifeLeft, 384 | m.Id, 385 | parent, 386 | ) 387 | } 388 | 389 | func (mc *Collector) NewStorageDriveIndicatorActive(ch chan<- prometheus.Metric, parent string, m *StorageDrive) { 390 | state := false 391 | value := 0 392 | 393 | if m.LocationIndicatorActive != nil { 394 | state = *m.LocationIndicatorActive 395 | } else if len(m.IndicatorLED) > 0 { 396 | state = (m.IndicatorLED != "Off") 397 | } else { 398 | return 399 | } 400 | 401 | if state { 402 | value = 1 403 | } 404 | 405 | ch <- prometheus.MustNewConstMetric( 406 | mc.StorageDriveIndicatorActive, 407 | prometheus.GaugeValue, 408 | float64(value), 409 | m.Id, 410 | parent, 411 | ) 412 | } 413 | 414 | func (mc *Collector) NewStorageControllerInfo(ch chan<- prometheus.Metric, parent string, m *StorageController) { 415 | ch <- prometheus.MustNewConstMetric( 416 | mc.StorageControllerInfo, 417 | prometheus.UntypedValue, 418 | 1.0, 419 | m.Id, 420 | parent, 421 | m.Manufacturer, 422 | m.Model, 423 | m.Name, 424 | m.FirmwareVersion, 425 | ) 426 | } 427 | 428 | func (mc *Collector) NewStorageControllerSpeed(ch chan<- prometheus.Metric, parent string, m *StorageController) { 429 | if m.SpeedGbps == 0 { 430 | return 431 | } 432 | ch <- prometheus.MustNewConstMetric( 433 | mc.StorageControllerSpeed, 434 | prometheus.GaugeValue, 435 | float64(1000*m.SpeedGbps), 436 | m.Id, 437 | parent, 438 | ) 439 | } 440 | 441 | func (mc *Collector) NewStorageControllerHealth(ch chan<- prometheus.Metric, parent string, m *StorageController) { 442 | value := health2value(m.Status.Health) 443 | if value < 0 { 444 | return 445 | } 446 | ch <- prometheus.MustNewConstMetric( 447 | mc.StorageControllerHealth, 448 | prometheus.GaugeValue, 449 | float64(value), 450 | m.Id, 451 | parent, 452 | m.Status.Health, 453 | ) 454 | } 455 | 456 | func (mc *Collector) NewStorageVolumeInfo(ch chan<- prometheus.Metric, parent string, m *StorageVolume) { 457 | ch <- prometheus.MustNewConstMetric( 458 | mc.StorageVolumeInfo, 459 | prometheus.UntypedValue, 460 | 1.0, 461 | m.Id, 462 | parent, 463 | strings.TrimSpace(m.Name), 464 | m.VolumeType, 465 | m.RAIDType, 466 | ) 467 | } 468 | 469 | func (mc *Collector) NewStorageVolumeHealth(ch chan<- prometheus.Metric, parent string, m *StorageVolume) { 470 | value := health2value(m.Status.Health) 471 | if value < 0 { 472 | return 473 | } 474 | ch <- prometheus.MustNewConstMetric( 475 | mc.StorageVolumeHealth, 476 | prometheus.GaugeValue, 477 | float64(value), 478 | m.Id, 479 | parent, 480 | m.Status.Health, 481 | ) 482 | } 483 | 484 | func (mc *Collector) NewStorageVolumeMediaSpan(ch chan<- prometheus.Metric, parent string, m *StorageVolume) { 485 | value := m.MediaSpanCount 486 | if value == 0 { 487 | value = m.Links.DrivesCount 488 | } 489 | if value == 0 { 490 | return 491 | } 492 | ch <- prometheus.MustNewConstMetric( 493 | mc.StorageVolumeMediaSpan, 494 | prometheus.GaugeValue, 495 | float64(value), 496 | m.Id, 497 | parent, 498 | ) 499 | } 500 | 501 | func (mc *Collector) NewStorageVolumeCapacity(ch chan<- prometheus.Metric, parent string, m *StorageVolume) { 502 | ch <- prometheus.MustNewConstMetric( 503 | mc.StorageVolumeCapacity, 504 | prometheus.GaugeValue, 505 | float64(m.CapacityBytes), 506 | m.Id, 507 | parent, 508 | ) 509 | } 510 | 511 | func (mc *Collector) NewMemoryModuleInfo(ch chan<- prometheus.Metric, m *Memory) { 512 | ch <- prometheus.MustNewConstMetric( 513 | mc.MemoryModuleInfo, 514 | prometheus.UntypedValue, 515 | 1.0, 516 | m.Id, 517 | m.ErrorCorrection, 518 | m.Manufacturer, 519 | m.MemoryDeviceType, 520 | m.Name, 521 | m.SerialNumber, 522 | fmt.Sprint(m.RankCount), 523 | ) 524 | } 525 | 526 | func (mc *Collector) NewMemoryModuleHealth(ch chan<- prometheus.Metric, m *Memory) { 527 | value := health2value(m.Status.Health) 528 | if value < 0 { 529 | return 530 | } 531 | ch <- prometheus.MustNewConstMetric( 532 | mc.MemoryModuleHealth, 533 | prometheus.GaugeValue, 534 | float64(value), 535 | m.Id, 536 | m.Status.Health, 537 | ) 538 | } 539 | 540 | func (mc *Collector) NewMemoryModuleCapacity(ch chan<- prometheus.Metric, m *Memory) { 541 | capacity := 1048576 * m.CapacityMiB 542 | if capacity == 0 { 543 | return 544 | } 545 | ch <- prometheus.MustNewConstMetric( 546 | mc.MemoryModuleCapacity, 547 | prometheus.GaugeValue, 548 | float64(capacity), 549 | m.Id, 550 | ) 551 | } 552 | 553 | func (mc *Collector) NewMemoryModuleSpeed(ch chan<- prometheus.Metric, m *Memory) { 554 | if m.OperatingSpeedMhz == 0 { 555 | return 556 | } 557 | ch <- prometheus.MustNewConstMetric( 558 | mc.MemoryModuleSpeed, 559 | prometheus.GaugeValue, 560 | float64(m.OperatingSpeedMhz), 561 | m.Id, 562 | ) 563 | } 564 | 565 | func (mc *Collector) NewNetworkInterfaceHealth(ch chan<- prometheus.Metric, m *NetworkInterface) { 566 | value := health2value(m.Status.Health) 567 | if value < 0 { 568 | return 569 | } 570 | ch <- prometheus.MustNewConstMetric( 571 | mc.NetworkInterfaceHealth, 572 | prometheus.GaugeValue, 573 | float64(value), 574 | m.Id, 575 | m.Status.Health, 576 | ) 577 | } 578 | 579 | func (mc *Collector) NewNetworkPortHealth(ch chan<- prometheus.Metric, parent string, m *NetworkPort) { 580 | value := health2value(m.Status.Health) 581 | if value < 0 { 582 | return 583 | } 584 | ch <- prometheus.MustNewConstMetric( 585 | mc.NetworkPortHealth, 586 | prometheus.GaugeValue, 587 | float64(value), 588 | m.Id, 589 | parent, 590 | m.Status.Health, 591 | ) 592 | } 593 | 594 | func (mc *Collector) NewNetworkPortSpeed(ch chan<- prometheus.Metric, parent string, m *NetworkPort) { 595 | var speed float64 596 | 597 | if m.CurrentLinkSpeedMbps > 0 { 598 | speed = m.CurrentLinkSpeedMbps 599 | } else if m.CurrentSpeedGbps > 0 { 600 | speed = 1000 * m.CurrentSpeedGbps 601 | } else if len(m.SupportedLinkCapabilities) > 0 { 602 | if s := m.SupportedLinkCapabilities[0].LinkSpeedMbps; s > 0 { 603 | speed = s 604 | } 605 | } 606 | 607 | ch <- prometheus.MustNewConstMetric( 608 | mc.NetworkPortSpeed, 609 | prometheus.GaugeValue, 610 | speed, 611 | m.Id, 612 | parent, 613 | ) 614 | } 615 | 616 | func (mc *Collector) NewNetworkPortLinkUp(ch chan<- prometheus.Metric, parent string, m *NetworkPort) { 617 | value := linkstatus2value(m.LinkStatus) 618 | ch <- prometheus.MustNewConstMetric( 619 | mc.NetworkPortLinkUp, 620 | prometheus.GaugeValue, 621 | float64(value), 622 | m.Id, 623 | parent, 624 | m.LinkStatus, 625 | ) 626 | } 627 | 628 | func (mc *Collector) NewCpuInfo(ch chan<- prometheus.Metric, m *Processor) { 629 | arch := m.InstructionSet 630 | if arch == "" { 631 | arch = m.ProcessorArchitecture 632 | } 633 | ch <- prometheus.MustNewConstMetric( 634 | mc.CpuInfo, 635 | prometheus.UntypedValue, 636 | 1.0, 637 | m.Id, 638 | m.Socket, 639 | strings.TrimSpace(m.Manufacturer), 640 | strings.TrimSpace(m.Model), 641 | arch, 642 | ) 643 | } 644 | 645 | func (mc *Collector) NewCpuVoltage(ch chan<- prometheus.Metric, m *Processor) { 646 | value := -1.0 647 | if m.Oem.Dell != nil { 648 | volt, err := strconv.ParseFloat(m.Oem.Dell.DellProcessor.Volts, 64) 649 | if err != nil { 650 | return 651 | } 652 | value = volt 653 | } else if m.Oem.Hpe != nil { 654 | value = 0.1 * float64(m.Oem.Hpe.VoltageVoltsX10) 655 | } 656 | if value < 0 { 657 | return 658 | } 659 | ch <- prometheus.MustNewConstMetric( 660 | mc.CpuVoltage, 661 | prometheus.GaugeValue, 662 | value, 663 | m.Id, 664 | ) 665 | } 666 | 667 | func (mc *Collector) NewCpuMaxSpeed(ch chan<- prometheus.Metric, m *Processor) { 668 | if m.MaxSpeedMHz == nil { 669 | return 670 | } 671 | ch <- prometheus.MustNewConstMetric( 672 | mc.CpuMaxSpeed, 673 | prometheus.GaugeValue, 674 | float64(*m.MaxSpeedMHz), 675 | m.Id, 676 | ) 677 | } 678 | 679 | func (mc *Collector) NewCpuCurrentSpeed(ch chan<- prometheus.Metric, m *Processor) { 680 | value := -1 681 | if m.OperatingSpeedMHz != nil { 682 | value = *m.OperatingSpeedMHz 683 | } else if m.Oem.Lenovo != nil { 684 | value = m.Oem.Lenovo.CurrentClockSpeedMHz 685 | } 686 | if value < 0 { 687 | return 688 | } 689 | ch <- prometheus.MustNewConstMetric( 690 | mc.CpuCurrentSpeed, 691 | prometheus.GaugeValue, 692 | float64(value), 693 | m.Id, 694 | ) 695 | } 696 | 697 | func (mc *Collector) NewCpuHealth(ch chan<- prometheus.Metric, m *Processor) { 698 | value := health2value(m.Status.Health) 699 | if value < 0 { 700 | return 701 | } 702 | ch <- prometheus.MustNewConstMetric( 703 | mc.CpuHealth, 704 | prometheus.GaugeValue, 705 | float64(value), 706 | m.Id, 707 | m.Status.Health, 708 | ) 709 | } 710 | 711 | func (mc *Collector) NewCpuTotalCores(ch chan<- prometheus.Metric, m *Processor) { 712 | ch <- prometheus.MustNewConstMetric( 713 | mc.CpuTotalCores, 714 | prometheus.GaugeValue, 715 | float64(m.TotalCores), 716 | m.Id, 717 | ) 718 | } 719 | 720 | func (mc *Collector) NewCpuTotalThreads(ch chan<- prometheus.Metric, m *Processor) { 721 | ch <- prometheus.MustNewConstMetric( 722 | mc.CpuTotalThreads, 723 | prometheus.GaugeValue, 724 | float64(m.TotalThreads), 725 | m.Id, 726 | ) 727 | } 728 | 729 | func (mc *Collector) NewDellBatteryRollupHealth(ch chan<- prometheus.Metric, m *DellSystem) { 730 | value := health2value(m.BatteryRollupStatus) 731 | if value < 0 { 732 | return 733 | } 734 | ch <- prometheus.MustNewConstMetric( 735 | mc.DellBatteryRollupHealth, 736 | prometheus.GaugeValue, 737 | float64(value), 738 | m.BatteryRollupStatus, 739 | ) 740 | } 741 | 742 | func (mc *Collector) NewDellEstimatedSystemAirflowCFM(ch chan<- prometheus.Metric, m *DellSystem) { 743 | value := m.EstimatedSystemAirflowCFM 744 | if value == 0 { 745 | return 746 | } 747 | ch <- prometheus.MustNewConstMetric( 748 | mc.DellEstimatedSystemAirflowCFM, 749 | prometheus.GaugeValue, 750 | float64(value), 751 | ) 752 | } 753 | 754 | func (mc *Collector) NewDellControllerBatteryHealth(ch chan<- prometheus.Metric, m *Storage) { 755 | if m.Oem.Dell == nil { 756 | return 757 | } 758 | value := health2value(m.Oem.Dell.DellControllerBattery.PrimaryStatus) 759 | if value < 0 { 760 | return 761 | } 762 | ch <- prometheus.MustNewConstMetric( 763 | mc.DellControllerBatteryHealth, 764 | prometheus.GaugeValue, 765 | float64(value), 766 | m.Oem.Dell.DellControllerBattery.Id, 767 | m.Id, 768 | m.Oem.Dell.DellControllerBattery.Name, 769 | m.Oem.Dell.DellControllerBattery.PrimaryStatus, 770 | ) 771 | } 772 | -------------------------------------------------------------------------------- /internal/collector/model.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "strconv" 5 | ) 6 | 7 | const ( 8 | StateEnabled = "Enabled" 9 | StateAbsent = "Absent" 10 | ) 11 | 12 | // Session 13 | type Session struct { 14 | Id string `json:"Id,omitempty"` 15 | Name string `json:"Name,omitempty"` 16 | Username string `json:"UserName,omitempty"` 17 | Password string `json:"Password,omitempty"` 18 | CreatedTime string `json:"CreatedTime,omitempty"` 19 | SessionType string `json:"SessionType,omitempty"` 20 | OdataId string `json:"@odata.id,omitempty"` 21 | } 22 | 23 | // Odata is a common structure to unmarshal Open Data Protocol metadata 24 | type Odata struct { 25 | OdataContext string `json:"@odata.context"` 26 | OdataId string `json:"@odata.id"` 27 | OdataType string `json:"@odata.type"` 28 | } 29 | 30 | type OdataSlice []Odata 31 | 32 | func (m *OdataSlice) GetLinks() []string { 33 | list := []string{} 34 | seen := map[string]bool{} 35 | 36 | for _, c := range *m { 37 | s := c.OdataId 38 | if ok := seen[s]; !ok { 39 | seen[s] = true 40 | list = append(list, s) 41 | } 42 | } 43 | 44 | return list 45 | } 46 | 47 | // Status is a common structure used in any entity with a status 48 | type Status struct { 49 | Health string `json:"Health"` 50 | HealthRollup string `json:"HealthRollup"` 51 | State string `json:"State"` 52 | } 53 | 54 | // Redundancy is a common structure used in any entity with redundancy 55 | type Redundancy struct { 56 | Name string `json:"Name"` 57 | MaxNumSupported int `json:"MaxNumSupported"` 58 | MinNumNeeded int `json:"MinNumNeeded"` 59 | Mode xstring `json:"Mode"` 60 | RedundancyEnabled bool `json:"RedundancyEnabled"` 61 | RedundancySet []any `json:"RedundancySet"` 62 | Status Status `json:"Status"` 63 | } 64 | 65 | // V1Response represents structure of the response body from /redfish/v1 66 | type V1Response struct { 67 | RedfishVersion string `json:"RedfishVersion"` 68 | Name string `json:"Name"` 69 | Product string `json:"Product"` 70 | Vendor string `json:"Vendor"` 71 | Description string `json:"Description"` 72 | AccountService Odata `json:"AccountService"` 73 | CertificateService Odata `json:"CertificateService"` 74 | Chassis Odata `json:"Chassis"` 75 | EventService Odata `json:"EventService"` 76 | Fabrics Odata `json:"Fabrics"` 77 | JobService Odata `json:"JobService"` 78 | JsonSchemas Odata `json:"JsonSchemas"` 79 | Managers Odata `json:"Managers"` 80 | Registries Odata `json:"Registries"` 81 | SessionService Odata `json:"SessionService"` 82 | Systems Odata `json:"Systems"` 83 | Tasks Odata `json:"Tasks"` 84 | TelemetryService Odata `json:"TelemetryService"` 85 | UpdateService Odata `json:"UpdateService"` 86 | } 87 | 88 | type GroupResponse struct { 89 | Name string `json:"Name"` 90 | Description string `json:"Description"` 91 | Members OdataSlice `json:"Members"` 92 | } 93 | 94 | type Processor struct { 95 | Id string `json:"Id"` 96 | Name string `json:"Name"` 97 | Description string `json:"Description"` 98 | InstructionSet string `json:"InstructionSet"` 99 | Manufacturer string `json:"Manufacturer"` 100 | MaxSpeedMHz *int `json:"MaxSpeedMHz"` 101 | Model string `json:"Model"` 102 | Family string `json:"Family"` 103 | OperatingSpeedMHz *int `json:"OperatingSpeedMHz"` 104 | PartNumber string `json:"PartNumber"` 105 | ProcessorArchitecture string `json:"ProcessorArchitecture"` 106 | ProcessorId struct { 107 | EffectiveFamily string `json:"EffectiveFamily"` 108 | EffectiveModel string `json:"EffectiveModel"` 109 | IdentificationRegisters string `json:"IdentificationRegisters"` 110 | MicrocodeInfo string `json:"MicrocodeInfo"` 111 | ProtectedIdentificationNumber string `json:"ProtectedIdentificationNumber"` 112 | Step string `json:"Step"` 113 | VendorID string `json:"VendorId"` 114 | } `json:"ProcessorId"` 115 | ProcessorType string `json:"ProcessorType"` 116 | Socket string `json:"Socket"` 117 | Status Status `json:"Status"` 118 | TDPWatts float64 `json:"TDPWatts"` 119 | TotalCores int `json:"TotalCores"` 120 | TotalEnabledCores int `json:"TotalEnabledCores"` 121 | TotalThreads int `json:"TotalThreads"` 122 | TurboState string `json:"TurboState"` 123 | Version string `json:"Version"` 124 | Oem struct { 125 | Lenovo *struct { 126 | CurrentClockSpeedMHz int `json:"CurrentClockSpeedMHz"` 127 | } `json:"Lenovo"` 128 | Hpe *struct { 129 | VoltageVoltsX10 int `json:"VoltageVoltsX10"` 130 | } `json:"Hpe"` 131 | Dell *struct { 132 | DellProcessor struct { 133 | Volts string `json:"Volts"` 134 | } `json:"DellProcessor"` 135 | } `json:"Dell"` 136 | } `json:"Oem"` 137 | } 138 | 139 | type ChassisResponse struct { 140 | Name string `json:"Name"` 141 | AssetTag string `json:"AssetTag"` 142 | SerialNumber string `json:"SerialNumber"` 143 | PartNumber string `json:"PartNumber"` 144 | Model string `json:"Model"` 145 | ChassisType string `json:"ChassisType"` 146 | Manufacturer string `json:"Manufacturer"` 147 | Description string `json:"Description"` 148 | SKU string `json:"SKU"` 149 | PowerState string `json:"PowerState"` 150 | EnvironmentalClass string `json:"EnvironmentalClass"` 151 | IndicatorLED string `json:"IndicatorLED"` 152 | LocationIndicatorActive *bool `json:"LocationIndicatorActive"` 153 | Assembly Odata `json:"Assembly"` 154 | Location *struct { 155 | Info string `json:"Info"` 156 | InfoFormat string `json:"InfoFormat"` 157 | Placement struct { 158 | Rack string `json:"Rack"` 159 | Row string `json:"Row"` 160 | } `json:"Placement"` 161 | PostalAddress struct { 162 | Building string `json:"Building"` 163 | Room string `json:"Room"` 164 | } `json:"PostalAddress"` 165 | } `json:"Location"` 166 | Memory Odata `json:"Memory"` 167 | NetworkAdapters Odata `json:"NetworkAdapters"` 168 | PCIeSlots Odata `json:"PCIeSlots"` 169 | Power Odata `json:"Power"` 170 | Sensors Odata `json:"Sensors"` 171 | Status Status `json:"Status"` 172 | Thermal Odata `json:"Thermal"` 173 | PhysicalSecurity *struct { 174 | IntrusionSensor string `json:"IntrusionSensor"` 175 | IntrusionSensorNumber int `json:"IntrusionSensorNumber"` 176 | IntrusionSensorReArm string `json:"IntrusionSensorReArm"` 177 | } `json:"PhysicalSecurity"` 178 | } 179 | 180 | type ThermalResponse struct { 181 | Name string `json:"Name"` 182 | Description string `json:"Description"` 183 | Fans []Fan `json:"Fans"` 184 | Temperatures []Temperature `json:"Temperatures"` 185 | Redundancy []Redundancy `json:"Redundancy"` 186 | } 187 | 188 | type Fan struct { 189 | Name string `json:"Name"` 190 | FanName string `json:"FanName"` 191 | MemberId string `json:"MemberId"` 192 | Assembly Odata `json:"Assembly"` 193 | HotPluggable bool `json:"HotPluggable"` 194 | MaxReadingRange any `json:"MaxReadingRange"` 195 | MinReadingRange any `json:"MinReadingRange"` 196 | PhysicalContext string `json:"PhysicalContext"` 197 | Reading float64 `json:"Reading"` 198 | CurrentReading float64 `json:"CurrentReading"` 199 | Units string `json:"Units"` 200 | ReadingUnits string `json:"ReadingUnits"` 201 | Redundancy []Redundancy `json:"Redundancy"` 202 | Status Status `json:"Status"` 203 | LowerThresholdCritical any `json:"LowerThresholdCritical"` 204 | LowerThresholdFatal any `json:"LowerThresholdFatal"` 205 | LowerThresholdNonCritical any `json:"LowerThresholdNonCritical"` 206 | UpperThresholdCritical any `json:"UpperThresholdCritical"` 207 | UpperThresholdFatal any `json:"UpperThresholdFatal"` 208 | UpperThresholdNonCritical any `json:"UpperThresholdNonCritical"` 209 | } 210 | 211 | func (f *Fan) GetName() string { 212 | if f.FanName != "" { 213 | return f.FanName 214 | } 215 | return f.Name 216 | } 217 | 218 | func (f *Fan) GetReading() float64 { 219 | if f.Reading > 0 { 220 | return f.Reading 221 | } 222 | return f.CurrentReading 223 | } 224 | 225 | func (f *Fan) GetUnits() string { 226 | if f.ReadingUnits != "" { 227 | return f.ReadingUnits 228 | } 229 | return f.Units 230 | } 231 | 232 | func (f *Fan) GetId(fallback int) string { 233 | if len(f.MemberId) > 0 { 234 | return f.MemberId 235 | } 236 | return strconv.Itoa(fallback) 237 | } 238 | 239 | type Temperature struct { 240 | Name string `json:"Name"` 241 | Number int `json:"Number"` 242 | MemberId string `json:"MemberId"` 243 | ReadingCelsius float64 `json:"ReadingCelsius"` 244 | MaxReadingRangeTemp float64 `json:"MaxReadingRangeTemp"` 245 | MinReadingRangeTemp float64 `json:"MinReadingRangeTemp"` 246 | PhysicalContext string `json:"PhysicalContext"` 247 | LowerThresholdCritical float64 `json:"LowerThresholdCritical"` 248 | LowerThresholdFatal float64 `json:"LowerThresholdFatal"` 249 | LowerThresholdNonCritical float64 `json:"LowerThresholdNonCritical"` 250 | UpperThresholdCritical float64 `json:"UpperThresholdCritical"` 251 | UpperThresholdFatal float64 `json:"UpperThresholdFatal"` 252 | UpperThresholdNonCritical float64 `json:"UpperThresholdNonCritical"` 253 | Status Status `json:"Status"` 254 | } 255 | 256 | func (t *Temperature) GetId(fallback int) string { 257 | if len(t.MemberId) > 0 { 258 | return t.MemberId 259 | } 260 | if t.Number > 0 { 261 | return strconv.Itoa(t.Number) 262 | } 263 | return strconv.Itoa(fallback) 264 | } 265 | 266 | type Storage struct { 267 | Id string `json:"Id"` 268 | Name string `json:"Name"` 269 | Description string `json:"Description"` 270 | Drives OdataSlice `json:"Drives"` 271 | Controllers Odata `json:"Controllers"` 272 | Volumes Odata `json:"Volumes"` 273 | Status Status `json:"Status"` 274 | StorageControllers []struct { // deprecated 275 | FirmwareVersion string `json:"FirmwareVersion"` 276 | Manufacturer string `json:"Manufacturer"` 277 | Model string `json:"Model"` 278 | Name string `json:"Name"` 279 | SpeedGbps float64 `json:"SpeedGbps"` 280 | Status Status `json:"Status"` 281 | } `json:"StorageControllers"` 282 | Oem struct { 283 | Dell *struct { 284 | DellControllerBattery struct { 285 | Id string `json:"Id"` 286 | Name string `json:"Name"` 287 | Description string `json:"Description"` 288 | PrimaryStatus string `json:"PrimaryStatus"` 289 | RAIDState string `json:"RAIDState"` 290 | } `json:"DellControllerBattery"` 291 | } `json:"Dell"` 292 | } `json:"Oem"` 293 | } 294 | 295 | type StorageController struct { 296 | Id string `json:"Id"` 297 | Name string `json:"Name"` 298 | Description string `json:"Description"` 299 | FirmwareVersion string `json:"FirmwareVersion"` 300 | Manufacturer string `json:"Manufacturer"` 301 | Model string `json:"Model"` 302 | SpeedGbps float64 `json:"SpeedGbps"` 303 | SerialNumber string `json:"SerialNumber"` 304 | CacheSummary struct { 305 | TotalCacheSizeMiB int `json:"TotalCacheSizeMiB"` 306 | } `json:"CacheSummary"` 307 | ControllerRates struct { 308 | ConsistencyCheckRatePercent int `json:"ConsistencyCheckRatePercent"` 309 | RebuildRatePercent int `json:"RebuildRatePercent"` 310 | } `json:"ControllerRates"` 311 | PCIeInterface struct { 312 | LanesInUse int `json:"LanesInUse"` 313 | MaxLanes int `json:"MaxLanes"` 314 | } `json:"PCIeInterface"` 315 | Status Status `json:"Status"` 316 | SupportedControllerProtocols []string `json:"SupportedControllerProtocols"` 317 | SupportedDeviceProtocols []string `json:"SupportedDeviceProtocols"` 318 | SupportedRAIDTypes []string `json:"SupportedRAIDTypes"` 319 | } 320 | 321 | type StorageDrive struct { 322 | Id string `json:"Id"` 323 | Name string `json:"Name"` 324 | Description string `json:"Description"` 325 | IndicatorLED string `json:"IndicatorLED"` 326 | LocationIndicatorActive *bool `json:"LocationIndicatorActive"` 327 | MediaType string `json:"MediaType"` 328 | Manufacturer string `json:"Manufacturer"` 329 | Model string `json:"Model"` 330 | CapacityBytes int `json:"CapacityBytes"` 331 | BlockSizeBytes int `json:"BlockSizeBytes"` 332 | CapableSpeedGbs float64 `json:"CapableSpeedGbs"` 333 | Status Status `json:"Status"` 334 | SerialNumber string `json:"SerialNumber"` 335 | Protocol string `json:"Protocol"` 336 | Revision string `json:"Revision"` 337 | PartNumber string `json:"PartNumber"` 338 | PredictedLifeLeft float64 `json:"PredictedMediaLifeLeftPercent"` 339 | RotationSpeedRPM float64 `json:"RotationSpeedRPM"` 340 | PhysicalLocation *struct { 341 | PartLocation *struct { 342 | LocationOrdinalValue int `json:"LocationOrdinalValue"` 343 | } `json:"PartLocation"` 344 | } `json:"PhysicalLocation"` 345 | // iLO 4 346 | CapacityMiB int `json:"CapacityMiB"` 347 | InterfaceType string `json:"InterfaceType"` 348 | SSDEnduranceUtilizationPercentage float64 `json:"SSDEnduranceUtilizationPercentage"` 349 | } 350 | 351 | type StorageVolume struct { 352 | Id string `json:"Id"` 353 | Name string `json:"Name"` 354 | Description string `json:"Description"` 355 | BlockSizeBytes int `json:"BlockSizeBytes"` 356 | CapacityBytes int `json:"CapacityBytes"` 357 | OptimumIOSizeBytes int `json:"OptimumIOSizeBytes"` 358 | StripSizeBytes int `json:"StripSizeBytes"` 359 | DisplayName string `json:"DisplayName"` 360 | Encrypted bool `json:"Encrypted"` 361 | EncryptionTypes []string `json:"EncryptionTypes"` 362 | MediaSpanCount int `json:"MediaSpanCount"` 363 | RAIDType string `json:"RAIDType"` 364 | ReadCachePolicy string `json:"ReadCachePolicy"` 365 | Status Status `json:"Status"` 366 | VolumeType string `json:"VolumeType"` 367 | WriteCachePolicy string `json:"WriteCachePolicy"` 368 | Links struct { 369 | DrivesCount int `json:"Drives@odata.count"` 370 | Drives OdataSlice `json:"Drives"` 371 | } `json:"Links"` 372 | } 373 | 374 | type Memory struct { 375 | Id string `json:"Id"` 376 | Name string `json:"Name"` 377 | Description string `json:"Description"` 378 | Manufacturer string `json:"Manufacturer"` 379 | ErrorCorrection string `json:"ErrorCorrection"` 380 | MemoryDeviceType string `json:"MemoryDeviceType"` 381 | AllowedSpeedsMHz []int `json:"AllowedSpeedsMHz"` 382 | OperatingSpeedMhz int `json:"OperatingSpeedMhz"` 383 | CapacityMiB int `json:"CapacityMiB"` 384 | PartNumber string `json:"PartNumber"` 385 | SerialNumber string `json:"SerialNumber"` 386 | DeviceLocator string `json:"DeviceLocator"` 387 | RankCount int `json:"RankCount"` 388 | BusWidthBits int `json:"BusWidthBits"` 389 | DataWidthBits int `json:"DataWidthBits"` 390 | Status Status `json:"Status"` 391 | // iLO 4 392 | HPMemoryType string `json:"HPMemoryType"` 393 | DIMMStatus string `json:"DIMMStatus"` 394 | DIMMType string `json:"DIMMType"` 395 | MaximumFrequencyMHz int `json:"MaximumFrequencyMHz"` 396 | Rank int `json:"Rank"` 397 | SizeMB int `json:"SizeMB"` 398 | } 399 | 400 | type NetworkInterface struct { 401 | Id string `json:"Id"` 402 | Name string `json:"Name"` 403 | Description string `json:"Description"` 404 | Status Status `json:"Status"` 405 | NetworkPorts Odata `json:"NetworkPorts"` 406 | Ports Odata `json:"Ports"` 407 | } 408 | 409 | func (n *NetworkInterface) GetPorts() string { 410 | if n.NetworkPorts.OdataId != "" { 411 | return n.NetworkPorts.OdataId 412 | } else { 413 | return n.Ports.OdataId 414 | } 415 | } 416 | 417 | type NetworkPort struct { 418 | Id string `json:"Id"` 419 | Name string `json:"Name"` 420 | Description string `json:"Description"` 421 | LinkStatus string `json:"LinkStatus"` 422 | CurrentLinkSpeedMbps float64 `json:"CurrentLinkSpeedMbps"` 423 | CurrentSpeedGbps float64 `json:"CurrentSpeedGbps"` 424 | Status Status `json:"Status"` 425 | SupportedLinkCapabilities []struct { 426 | LinkNetworkTechnology string `json:"LinkNetworkTechnology"` 427 | LinkSpeedMbps float64 `json:"LinkSpeedMbps"` 428 | } `json:"SupportedLinkCapabilities"` 429 | } 430 | 431 | type SystemResponse struct { 432 | IndicatorLED string `json:"IndicatorLED"` 433 | LocationIndicatorActive *bool `json:"LocationIndicatorActive"` 434 | Manufacturer string `json:"Manufacturer"` 435 | AssetTag string `json:"AssetTag"` 436 | PartNumber string `json:"PartNumber"` 437 | Description string `json:"Description"` 438 | HostName string `json:"HostName"` 439 | PowerState string `json:"PowerState"` 440 | Bios Odata `json:"Bios"` 441 | BiosVersion string `json:"BiosVersion"` 442 | Boot *struct { 443 | BootOptions Odata `json:"BootOptions"` 444 | Certificates Odata `json:"Certificates"` 445 | BootOrder []string `json:"BootOrder"` 446 | BootSourceOverrideEnabled string `json:"BootSourceOverrideEnabled"` 447 | BootSourceOverrideMode string `json:"BootSourceOverrideMode"` 448 | BootSourceOverrideTarget string `json:"BootSourceOverrideTarget"` 449 | UefiTargetBootSourceOverride any `json:"UefiTargetBootSourceOverride"` 450 | BootSourceOverrideTargetRedfishAllowableValues []string `json:"BootSourceOverrideTarget@Redfish.AllowableValues"` 451 | } `json:"Boot"` 452 | EthernetInterfaces Odata `json:"EthernetInterfaces"` 453 | HostWatchdogTimer *struct { 454 | FunctionEnabled bool `json:"FunctionEnabled"` 455 | Status Status `json:"Status"` 456 | TimeoutAction string `json:"TimeoutAction"` 457 | } `json:"HostWatchdogTimer"` 458 | HostingRoles []any `json:"HostingRoles"` 459 | Memory Odata `json:"Memory"` 460 | MemorySummary *struct { 461 | MemoryMirroring string `json:"MemoryMirroring"` 462 | Status Status `json:"Status"` 463 | TotalSystemMemoryGiB float64 `json:"TotalSystemMemoryGiB"` 464 | } `json:"MemorySummary"` 465 | Model string `json:"Model"` 466 | Name string `json:"Name"` 467 | NetworkInterfaces Odata `json:"NetworkInterfaces"` 468 | PCIeDevices OdataSlice `json:"PCIeDevices"` 469 | PCIeFunctions OdataSlice `json:"PCIeFunctions"` 470 | ProcessorSummary *struct { 471 | Count int `json:"Count"` 472 | LogicalProcessorCount int `json:"LogicalProcessorCount"` 473 | Model string `json:"Model"` 474 | Status Status `json:"Status"` 475 | } `json:"ProcessorSummary"` 476 | Processors Odata `json:"Processors"` 477 | SKU string `json:"SKU"` 478 | SecureBoot Odata `json:"SecureBoot"` 479 | SerialNumber string `json:"SerialNumber"` 480 | SimpleStorage Odata `json:"SimpleStorage"` 481 | Status Status `json:"Status"` 482 | Storage Odata `json:"Storage"` 483 | SystemType string `json:"SystemType"` 484 | TrustedModules []struct { 485 | FirmwareVersion string `json:"FirmwareVersion"` 486 | InterfaceType string `json:"InterfaceType"` 487 | Status Status `json:"Status"` 488 | } `json:"TrustedModules"` 489 | Oem struct { 490 | Hpe struct { 491 | IndicatorLED string `json:"IndicatorLED"` 492 | } `json:"Hpe"` 493 | } `json:"Oem"` 494 | } 495 | 496 | type PowerResponse struct { 497 | Name string `json:"Name"` 498 | Description string `json:"Description"` 499 | PowerControl []PowerControlUnit `json:"PowerControl"` 500 | PowerSupplies []PowerSupplyUnit `json:"PowerSupplies"` 501 | Redundancy []Redundancy `json:"Redundancy"` 502 | Voltages []struct { 503 | Name string `json:"Name"` 504 | SensorNumber int `json:"SensorNumber"` 505 | PhysicalContext string `json:"PhysicalContext"` 506 | Status Status `json:"Status"` 507 | // These should be float64, but they have been seen reported as "N/A" so we use the any type 508 | ReadingVolts any `json:"ReadingVolts"` 509 | LowerThresholdCritical any `json:"LowerThresholdCritical"` 510 | LowerThresholdFatal any `json:"LowerThresholdFatal"` 511 | LowerThresholdNonCritical any `json:"LowerThresholdNonCritical"` 512 | UpperThresholdCritical any `json:"UpperThresholdCritical"` 513 | UpperThresholdFatal any `json:"UpperThresholdFatal"` 514 | UpperThresholdNonCritical any `json:"UpperThresholdNonCritical"` 515 | } `json:"Voltages"` 516 | Oem struct { 517 | TsFujitsu *struct { 518 | OdataType string `json:"@odata.type"` 519 | PsuSumStatus string `json:"PsuSumStatus"` 520 | VoltageSumStatus string `json:"VoltageSumStatus"` 521 | PowerConfigSumStatus string `json:"PowerConfigSumStatus"` 522 | ChassisPowerConsumption *struct { 523 | CurrentPowerConsumptionW float64 `json:"CurrentPowerConsumptionW"` 524 | MinimumPowerW float64 `json:"MinimumPowerW"` 525 | PeakPowerW float64 `json:"PeakPowerW"` 526 | AveragePowerW float64 `json:"AveragePowerW"` 527 | WarningThresholdW float64 `json:"WarningThresholdW"` 528 | CriticalThresholdW float64 `json:"CriticalThresholdW"` 529 | Designation string `json:"Designation"` 530 | CurrentMaximumPowerW float64 `json:"CurrentMaximumPowerW"` 531 | } `json:"ChassisPowerConsumption"` 532 | ChassisPowerSensors []struct { 533 | Designation string `json:"Designation"` 534 | EntityID string `json:"EntityId"` 535 | EntityInstance int `json:"EntityInstance"` 536 | CurrentPowerConsumptionW float64 `json:"CurrentPowerConsumptionW"` 537 | LegacyStatus string `json:"LegacyStatus"` 538 | } `json:"ChassisPowerSensors"` 539 | MaxUsage float64 `json:"MaxUsage"` 540 | ControlMode string `json:"ControlMode"` 541 | PsuSmartRedundancyStatusSensor string `json:"PsuSmartRedundancyStatusSensor"` 542 | PsuSmartRedundancyActivePSUSensor int `json:"PsuSmartRedundancyActivePSUSensor"` 543 | } `json:"ts_fujitsu"` 544 | } `json:"Oem"` 545 | } 546 | 547 | type PowerControlUnit struct { 548 | Id string `json:"Id"` 549 | Name string `json:"Name"` 550 | PowerAllocatedWatts float64 `json:"PowerAllocatedWatts"` 551 | PowerAvailableWatts float64 `json:"PowerAvailableWatts"` 552 | PowerCapacityWatts float64 `json:"PowerCapacityWatts"` 553 | PowerConsumedWatts float64 `json:"PowerConsumedWatts"` 554 | PowerRequestedWatts float64 `json:"PowerRequestedWatts"` 555 | PowerLimit *struct { 556 | CorrectionInMs int `json:"CorrectionInMs"` 557 | LimitException string `json:"LimitException"` 558 | LimitInWatts int `json:"LimitInWatts"` 559 | } `json:"PowerLimit"` 560 | PowerMetrics *PowerMetrics `json:"PowerMetrics"` 561 | } 562 | 563 | type PowerMetrics struct { 564 | AvgConsumedWatts float64 `json:"AverageConsumedWatts"` 565 | MaxConsumedWatts float64 `json:"MaxConsumedWatts"` 566 | MinConsumedWatts float64 `json:"MinConsumedWatts"` 567 | IntervalInMinutes int `json:"IntervalInMin"` 568 | } 569 | 570 | type PowerSupplyUnit struct { 571 | Name string `json:"Name"` 572 | Assembly Odata `json:"Assembly"` 573 | FirmwareVersion string `json:"FirmwareVersion"` 574 | InputRanges []struct { 575 | InputType string `json:"InputType"` 576 | MaximumFrequencyHz float64 `json:"MaximumFrequencyHz"` 577 | MaximumVoltage float64 `json:"MaximumVoltage"` 578 | MinimumFrequencyHz float64 `json:"MinimumFrequencyHz"` 579 | MinimumVoltage float64 `json:"MinimumVoltage"` 580 | OutputWattage float64 `json:"OutputWattage"` 581 | } `json:"InputRanges"` 582 | HotPluggable bool `json:"HotPluggable"` 583 | EfficiencyPercent float64 `json:"EfficiencyPercent"` 584 | PowerOutputWatts float64 `json:"PowerOutputWatts"` 585 | LastPowerOutputWatts float64 `json:"LastPowerOutputWatts"` 586 | PowerInputWatts float64 `json:"PowerInputWatts"` 587 | PowerCapacityWatts float64 `json:"PowerCapacityWatts"` 588 | LineInputVoltage float64 `json:"LineInputVoltage"` 589 | LineInputVoltageType string `json:"LineInputVoltageType"` 590 | Manufacturer string `json:"Manufacturer"` 591 | Model string `json:"Model"` 592 | PartNumber string `json:"PartNumber"` 593 | PowerSupplyType string `json:"PowerSupplyType"` 594 | SerialNumber string `json:"SerialNumber"` 595 | SparePartNumber string `json:"SparePartNumber"` 596 | Status Status `json:"Status"` 597 | Redundancy []Redundancy `json:"Redundancy"` 598 | } 599 | 600 | func (psu *PowerSupplyUnit) GetOutputPower() float64 { 601 | if psu.PowerOutputWatts > 0 { 602 | return psu.PowerOutputWatts 603 | } 604 | return psu.LastPowerOutputWatts 605 | } 606 | 607 | type EventLogResponse struct { 608 | Name string `json:"Name"` 609 | Description string `json:"Description"` 610 | Members []struct { 611 | Id string `json:"Id"` 612 | EventId string `json:"EventId"` 613 | Name string `json:"Name"` 614 | Created string `json:"Created"` 615 | Description string `json:"Description"` 616 | EntryCode xstring `json:"EntryCode"` 617 | EntryType string `json:"EntryType"` 618 | Message string `json:"Message"` 619 | MessageArgs []any `json:"MessageArgs"` 620 | MessageId string `json:"MessageId"` 621 | SensorNumber int `json:"SensorNumber"` 622 | SensorType xstring `json:"SensorType"` 623 | Severity string `json:"Severity"` 624 | } `json:"Members"` 625 | } 626 | 627 | // Dell OEM 628 | const DellSystemPath string = "/redfish/v1/Systems/System.Embedded.1/Oem/Dell/DellSystem/System.Embedded.1" 629 | 630 | type DellSystem struct { 631 | BIOSReleaseDate string `json:"BIOSReleaseDate"` 632 | BatteryRollupStatus string `json:"BatteryRollupStatus"` 633 | CoolingRollupStatus string `json:"CoolingRollupStatus"` 634 | CurrentRollupStatus string `json:"CurrentRollupStatus"` 635 | EstimatedExhaustTemperatureCelsius int `json:"EstimatedExhaustTemperatureCelsius"` 636 | EstimatedSystemAirflowCFM int `json:"EstimatedSystemAirflowCFM"` 637 | ExpressServiceCode string `json:"ExpressServiceCode"` 638 | FanRollupStatus string `json:"FanRollupStatus"` 639 | IntrusionRollupStatus string `json:"IntrusionRollupStatus"` 640 | LicensingRollupStatus string `json:"LicensingRollupStatus"` 641 | MaxCPUSockets int `json:"MaxCPUSockets"` 642 | MaxDIMMSlots int `json:"MaxDIMMSlots"` 643 | MaxPCIeSlots int `json:"MaxPCIeSlots"` 644 | MaxSystemMemoryMiB int `json:"MaxSystemMemoryMiB"` 645 | PSRollupStatus string `json:"PSRollupStatus"` 646 | PopulatedDIMMSlots int `json:"PopulatedDIMMSlots"` 647 | PopulatedPCIeSlots int `json:"PopulatedPCIeSlots"` 648 | PowerCapEnabledState string `json:"PowerCapEnabledState"` 649 | SELRollupStatus string `json:"SELRollupStatus"` 650 | StorageRollupStatus string `json:"StorageRollupStatus"` 651 | SystemGeneration string `json:"SystemGeneration"` 652 | SystemHealthRollupStatus string `json:"SystemHealthRollupStatus"` 653 | TempRollupStatus string `json:"TempRollupStatus"` 654 | TempStatisticsRollupStatus string `json:"TempStatisticsRollupStatus"` 655 | } 656 | -------------------------------------------------------------------------------- /internal/collector/redfish.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | neturl "net/url" 11 | "path" 12 | "strings" 13 | "time" 14 | 15 | "github.com/mrlhansen/idrac_exporter/internal/config" 16 | "github.com/mrlhansen/idrac_exporter/internal/log" 17 | ) 18 | 19 | type Redfish struct { 20 | http *http.Client 21 | baseurl string 22 | hostname string 23 | username string 24 | password string 25 | session struct { 26 | disabled bool 27 | id string 28 | token string 29 | } 30 | } 31 | 32 | const redfishRootPath = "/redfish/v1" 33 | 34 | func NewRedfish(scheme, hostname, username, password string) *Redfish { 35 | return &Redfish{ 36 | baseurl: fmt.Sprintf("%s://%s", scheme, hostname), 37 | hostname: hostname, 38 | username: username, 39 | password: password, 40 | http: &http.Client{ 41 | Transport: &http.Transport{ 42 | Proxy: http.ProxyFromEnvironment, 43 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 44 | }, 45 | Timeout: time.Duration(config.Config.Timeout) * time.Second, 46 | }, 47 | } 48 | } 49 | 50 | func (r *Redfish) CreateSession() bool { 51 | url := fmt.Sprintf("%s/redfish/v1/SessionService/Sessions", r.baseurl) 52 | session := Session{ 53 | Username: r.username, 54 | Password: r.password, 55 | } 56 | body, _ := json.Marshal(&session) 57 | 58 | resp, err := r.http.Post(url, "application/json", bytes.NewBuffer(body)) 59 | if resp != nil { 60 | defer resp.Body.Close() 61 | } 62 | if err != nil { 63 | log.Error("Failed to query %q: %v", url, err) 64 | return false 65 | } 66 | 67 | if resp.StatusCode != http.StatusCreated { 68 | log.Error("Unexpected status code from %q: %s", url, resp.Status) 69 | return false 70 | } 71 | 72 | err = json.NewDecoder(resp.Body).Decode(&session) 73 | if err != nil { 74 | log.Error("Error decoding response from %q: %v", url, err) 75 | return false 76 | } 77 | 78 | r.session.id = session.OdataId 79 | r.session.token = resp.Header.Get("X-Auth-Token") 80 | 81 | // iLO 4 82 | if len(r.session.id) == 0 { 83 | u, err := neturl.Parse(resp.Header.Get("Location")) 84 | if err == nil { 85 | r.session.id = u.Path 86 | } 87 | } 88 | 89 | log.Debug("Succesfully created session: %s", path.Base(r.session.id)) 90 | return true 91 | } 92 | 93 | func (r *Redfish) DeleteSession() bool { 94 | if len(r.session.token) == 0 { 95 | return true 96 | } 97 | 98 | url := fmt.Sprintf("%s%s", r.baseurl, r.session.id) 99 | req, err := http.NewRequest("DELETE", url, nil) 100 | if err != nil { 101 | return false 102 | } 103 | 104 | req.Header.Add("Accept", "application/json") 105 | req.Header.Set("X-Auth-Token", r.session.token) 106 | 107 | resp, err := r.http.Do(req) 108 | if resp != nil { 109 | resp.Body.Close() 110 | } 111 | if err != nil { 112 | log.Error("Failed to query %q: %v", url, err) 113 | return false 114 | } 115 | 116 | if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent { 117 | log.Error("Unexpected status code from %q: %s", url, resp.Status) 118 | return false 119 | } 120 | 121 | log.Debug("Succesfully deleted session: %s", path.Base(r.session.id)) 122 | r.session.id = "" 123 | r.session.token = "" 124 | 125 | return true 126 | } 127 | 128 | func (r *Redfish) RefreshSession() bool { 129 | if r.session.disabled { 130 | return false 131 | } 132 | 133 | defer func() { 134 | if r.session.disabled { 135 | log.Info("Session authentication disabled for %s due to failed refresh", r.hostname) 136 | } 137 | }() 138 | 139 | if len(r.session.token) == 0 { 140 | ok := r.CreateSession() 141 | if !ok { 142 | r.session.disabled = true 143 | } 144 | return ok 145 | } 146 | 147 | url := fmt.Sprintf("%s%s", r.baseurl, r.session.id) 148 | req, err := http.NewRequest("GET", url, nil) 149 | if err != nil { 150 | return false 151 | } 152 | 153 | req.Header.Add("Accept", "application/json") 154 | req.Header.Set("X-Auth-Token", r.session.token) 155 | 156 | resp, err := r.http.Do(req) 157 | if resp != nil { 158 | resp.Body.Close() 159 | } 160 | if err != nil { 161 | return false 162 | } 163 | 164 | if resp.StatusCode == http.StatusUnauthorized { 165 | if r.CreateSession() { 166 | return true 167 | } else { 168 | r.session.disabled = true 169 | r.session.token = "" 170 | r.session.id = "" 171 | return false 172 | } 173 | } 174 | 175 | return true 176 | } 177 | 178 | func (r *Redfish) Get(path string, res any) bool { 179 | if !strings.HasPrefix(path, redfishRootPath) { 180 | return false 181 | } 182 | 183 | url := fmt.Sprintf("%s%s", r.baseurl, path) 184 | req, err := http.NewRequest("GET", url, nil) 185 | if err != nil { 186 | return false 187 | } 188 | 189 | req.Header.Add("Accept", "application/json") 190 | if len(r.session.token) > 0 { 191 | req.Header.Set("X-Auth-Token", r.session.token) 192 | } else { 193 | req.SetBasicAuth(r.username, r.password) 194 | } 195 | 196 | log.Debug("Querying %q", url) 197 | resp, err := r.http.Do(req) 198 | if resp != nil { 199 | defer resp.Body.Close() 200 | } 201 | if err != nil { 202 | log.Error("Failed to query %q: %v", url, err) 203 | return false 204 | } 205 | 206 | if resp.StatusCode != http.StatusOK { 207 | log.Error("Unexpected status code from %q: %s", url, resp.Status) 208 | return false 209 | } 210 | 211 | body, err := io.ReadAll(resp.Body) 212 | if err != nil { 213 | log.Error("Error reading response from %q: %v", url, err) 214 | return false 215 | } 216 | 217 | if config.Debug { 218 | log.Debug("Response from %q: %s", url, body) 219 | } 220 | 221 | err = json.Unmarshal(body, res) 222 | if err != nil { 223 | log.Error("Error decoding response from %q: %v", url, err) 224 | return false 225 | } 226 | 227 | return true 228 | } 229 | 230 | func (r *Redfish) Exists(path string) bool { 231 | if !strings.HasPrefix(path, redfishRootPath) { 232 | return false 233 | } 234 | 235 | url := fmt.Sprintf("%s%s", r.baseurl, path) 236 | req, err := http.NewRequest("HEAD", url, nil) 237 | if err != nil { 238 | return false 239 | } 240 | 241 | req.Header.Add("Accept", "application/json") 242 | if len(r.session.token) > 0 { 243 | req.Header.Set("X-Auth-Token", r.session.token) 244 | } else { 245 | req.SetBasicAuth(r.username, r.password) 246 | } 247 | 248 | resp, err := r.http.Do(req) 249 | if resp != nil { 250 | resp.Body.Close() 251 | } 252 | if err != nil { 253 | return false 254 | } 255 | 256 | if resp.StatusCode >= 400 && resp.StatusCode <= 499 { 257 | return false 258 | } 259 | 260 | return true 261 | } 262 | -------------------------------------------------------------------------------- /internal/collector/unmarshal.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | // When unmarshalling JSON from iDRAC, the "xstring" type defined here can be 8 | // one of the following: 9 | // - nil 10 | // - string 11 | // - [{"Member": "VALUE"}] 12 | type xstring string 13 | 14 | func (w *xstring) UnmarshalJSON(data []byte) error { 15 | var x any 16 | 17 | err := json.Unmarshal(data, &x) 18 | if err != nil { 19 | return err 20 | } 21 | 22 | if x == nil { 23 | *w = xstring("") 24 | return nil 25 | } 26 | 27 | s, ok := x.(string) 28 | if ok { 29 | *w = xstring(s) 30 | return nil 31 | } 32 | 33 | list := x.([]any) 34 | dict := list[0].(map[string]any) 35 | s, ok = dict["Member"].(string) 36 | if ok { 37 | *w = xstring(s) 38 | return nil 39 | } 40 | 41 | *w = xstring("") 42 | return nil 43 | } 44 | -------------------------------------------------------------------------------- /internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | 7 | "github.com/mrlhansen/idrac_exporter/internal/log" 8 | "github.com/xhit/go-str2duration/v2" 9 | "gopkg.in/yaml.v3" 10 | ) 11 | 12 | var Debug bool = false 13 | var Config RootConfig = RootConfig{ 14 | Hosts: make(map[string]*HostConfig), 15 | } 16 | 17 | func (c *RootConfig) GetHostCfg(target string) *HostConfig { 18 | c.mutex.Lock() 19 | defer c.mutex.Unlock() 20 | 21 | hostCfg, ok := c.Hosts[target] 22 | if !ok { 23 | def, ok := c.Hosts["default"] 24 | if !ok { 25 | log.Error("Could not find login information for host: %s", target) 26 | return nil 27 | } 28 | hostCfg = &HostConfig{ 29 | Hostname: target, 30 | Scheme: def.Scheme, 31 | Username: def.Username, 32 | Password: def.Password, 33 | } 34 | c.Hosts[target] = hostCfg 35 | } 36 | 37 | return hostCfg 38 | } 39 | 40 | func readConfigFile(filename string) { 41 | yamlFile, err := os.Open(filename) 42 | if err != nil { 43 | log.Fatal("Failed to open configuration file: %s: %s", filename, err) 44 | } 45 | 46 | err = yaml.NewDecoder(yamlFile).Decode(&Config) 47 | yamlFile.Close() 48 | if err != nil { 49 | log.Fatal("Invalid configuration file: %s: %s", filename, err.Error()) 50 | } 51 | 52 | log.Info("Configuration file: %s", filename) 53 | } 54 | 55 | func ReadConfig(filename string) { 56 | if len(filename) > 0 { 57 | readConfigFile(filename) 58 | } 59 | 60 | readConfigEnv() 61 | 62 | // main section 63 | if Config.Address == "" { 64 | Config.Address = "0.0.0.0" 65 | } 66 | 67 | if Config.Port == 0 { 68 | Config.Port = 9348 69 | } 70 | 71 | if Config.Timeout == 0 { 72 | Config.Timeout = 10 73 | } 74 | 75 | if Config.MetricsPrefix == "" { 76 | Config.MetricsPrefix = "idrac" 77 | } 78 | 79 | if Config.HttpsProxy != "" { 80 | os.Setenv("HTTPS_PROXY", Config.HttpsProxy) 81 | } 82 | 83 | // hosts section 84 | if len(Config.Hosts) == 0 { 85 | log.Fatal("Invalid configuration: empty section: hosts") 86 | } 87 | 88 | for k, v := range Config.Hosts { 89 | if v == nil { 90 | log.Fatal("Invalid configuration: missing username and password for host: %s", k) 91 | } 92 | if v.Username == "" { 93 | log.Fatal("Invalid configuration: missing username for host: %s", k) 94 | } 95 | if v.Password == "" { 96 | log.Fatal("Invalid configuration: missing password for host: %s", k) 97 | } 98 | 99 | switch v.Scheme { 100 | case "": 101 | v.Scheme = "https" 102 | case "http", "https": 103 | default: 104 | log.Fatal("Invalid configuration: invalid scheme for host: %s", k) 105 | } 106 | 107 | v.Hostname = k 108 | } 109 | 110 | // events section 111 | switch strings.ToLower(Config.Event.Severity) { 112 | case "ok": 113 | Config.Event.SeverityLevel = 0 114 | case "warning", "": 115 | Config.Event.SeverityLevel = 1 116 | case "critical": 117 | Config.Event.SeverityLevel = 2 118 | default: 119 | log.Fatal("Invalid configuration: invalid value: %s", Config.Event.Severity) 120 | } 121 | 122 | if Config.Event.MaxAge == "" { 123 | Config.Event.MaxAge = "7d" 124 | } 125 | 126 | t, err := str2duration.ParseDuration(Config.Event.MaxAge) 127 | if err != nil { 128 | log.Fatal("Invalid configuration: unable to parse duration: %v", err) 129 | } 130 | Config.Event.MaxAgeSeconds = t.Seconds() 131 | 132 | // metrics 133 | if Config.Collect.All { 134 | Config.Collect.System = true 135 | Config.Collect.Sensors = true 136 | Config.Collect.Events = true 137 | Config.Collect.Power = true 138 | Config.Collect.Storage = true 139 | Config.Collect.Memory = true 140 | Config.Collect.Network = true 141 | Config.Collect.Processors = true 142 | Config.Collect.Extra = true 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /internal/config/discover.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "encoding/json" 5 | "log" 6 | ) 7 | 8 | type DiscoverItem struct { 9 | Targets []string `json:"targets"` 10 | Labels map[string]string `json:"labels,omitempty"` 11 | } 12 | 13 | func GetDiscover() string { 14 | var list []DiscoverItem 15 | for t := range Config.Hosts { 16 | if t == "default" { 17 | continue 18 | } 19 | list = append(list, DiscoverItem{ 20 | Targets: []string{t}, 21 | }) 22 | } 23 | 24 | if len(list) == 0 { 25 | return "[]" 26 | } 27 | 28 | b, err := json.Marshal(list) 29 | if err != nil { 30 | log.Printf("failed to marshal json: %v", err) 31 | return "[]" 32 | } 33 | 34 | return string(b) 35 | } 36 | -------------------------------------------------------------------------------- /internal/config/env.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | func getEnvString(env string, val *string) { 10 | value := os.Getenv(env) 11 | if len(value) == 0 { 12 | return 13 | } 14 | 15 | *val = value 16 | } 17 | 18 | func getEnvBool(env string, val *bool) { 19 | value := os.Getenv(env) 20 | if len(value) == 0 { 21 | return 22 | } 23 | 24 | switch strings.ToLower(value) { 25 | case "0", "false": 26 | *val = false 27 | default: 28 | *val = true 29 | } 30 | } 31 | 32 | func getEnvUint(env string, val *uint) { 33 | s := os.Getenv(env) 34 | if len(s) == 0 { 35 | return 36 | } 37 | 38 | value, err := strconv.ParseUint(s, 10, 0) 39 | if err == nil { 40 | *val = uint(value) 41 | } 42 | } 43 | 44 | func readConfigEnv() { 45 | var username string 46 | var password string 47 | var scheme string 48 | 49 | getEnvString("CONFIG_ADDRESS", &Config.Address) 50 | getEnvString("CONFIG_METRICS_PREFIX", &Config.MetricsPrefix) 51 | getEnvString("CONFIG_DEFAULT_USERNAME", &username) 52 | getEnvString("CONFIG_DEFAULT_PASSWORD", &password) 53 | getEnvString("CONFIG_DEFAULT_SCHEME", &scheme) 54 | getEnvString("CONFIG_EVENTS_SEVERITY", &Config.Event.Severity) 55 | getEnvString("CONFIG_EVENTS_MAXAGE", &Config.Event.MaxAge) 56 | getEnvString("CONFIG_TLS_CERT_FILE", &Config.TLS.CertFile) 57 | getEnvString("CONFIG_TLS_KEY_FILE", &Config.TLS.KeyFile) 58 | 59 | getEnvUint("CONFIG_PORT", &Config.Port) 60 | getEnvUint("CONFIG_TIMEOUT", &Config.Timeout) 61 | 62 | getEnvBool("CONFIG_TLS_ENABLED", &Config.TLS.Enabled) 63 | getEnvBool("CONFIG_METRICS_ALL", &Config.Collect.All) 64 | getEnvBool("CONFIG_METRICS_SYSTEM", &Config.Collect.System) 65 | getEnvBool("CONFIG_METRICS_SENSORS", &Config.Collect.Sensors) 66 | getEnvBool("CONFIG_METRICS_EVENTS", &Config.Collect.Events) 67 | getEnvBool("CONFIG_METRICS_POWER", &Config.Collect.Power) 68 | getEnvBool("CONFIG_METRICS_STORAGE", &Config.Collect.Storage) 69 | getEnvBool("CONFIG_METRICS_MEMORY", &Config.Collect.Memory) 70 | getEnvBool("CONFIG_METRICS_NETWORK", &Config.Collect.Network) 71 | getEnvBool("CONFIG_METRICS_PROCESSORS", &Config.Collect.Processors) 72 | getEnvBool("CONFIG_METRICS_EXTRA", &Config.Collect.Extra) 73 | 74 | def, ok := Config.Hosts["default"] 75 | if !ok { 76 | def = &HostConfig{} 77 | } 78 | 79 | if len(username) > 0 { 80 | def.Username = username 81 | ok = true 82 | } 83 | 84 | if len(password) > 0 { 85 | def.Password = password 86 | ok = true 87 | } 88 | 89 | if len(scheme) > 0 { 90 | def.Scheme = scheme 91 | ok = true 92 | } 93 | 94 | if ok { 95 | Config.Hosts["default"] = def 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /internal/config/model.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "sync" 4 | 5 | type HostConfig struct { 6 | Username string `yaml:"username"` 7 | Password string `yaml:"password"` 8 | Scheme string `yaml:"scheme"` 9 | Hostname string 10 | } 11 | 12 | type CollectConfig struct { 13 | All bool `yaml:"all"` 14 | System bool `yaml:"system"` 15 | Sensors bool `yaml:"sensors"` 16 | Events bool `yaml:"events"` 17 | Power bool `yaml:"power"` 18 | Storage bool `yaml:"storage"` 19 | Memory bool `yaml:"memory"` 20 | Network bool `yaml:"network"` 21 | Processors bool `yaml:"processors"` 22 | Extra bool `yaml:"extra"` 23 | } 24 | 25 | type EventConfig struct { 26 | Severity string `yaml:"severity"` 27 | MaxAge string `yaml:"maxage"` 28 | SeverityLevel int 29 | MaxAgeSeconds float64 30 | } 31 | 32 | type TLSConfig struct { 33 | Enabled bool `yaml:"enabled"` 34 | CertFile string `yaml:"cert_file"` 35 | KeyFile string `yaml:"key_file"` 36 | } 37 | 38 | type RootConfig struct { 39 | mutex sync.Mutex 40 | Address string `yaml:"address"` 41 | Port uint `yaml:"port"` 42 | HttpsProxy string `yaml:"https_proxy"` 43 | MetricsPrefix string `yaml:"metrics_prefix"` 44 | Collect CollectConfig `yaml:"metrics"` 45 | Event EventConfig `yaml:"events"` 46 | TLS TLSConfig `yaml:"tls"` 47 | Timeout uint `yaml:"timeout"` 48 | Hosts map[string]*HostConfig `yaml:"hosts"` 49 | } 50 | -------------------------------------------------------------------------------- /internal/log/default.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import "os" 4 | 5 | var logger = &Logger{ 6 | level: LevelInfo, 7 | console: true, 8 | dateFormat: "2006-01-02T15:04:05.000", 9 | writer: os.Stdout, 10 | } 11 | 12 | func SetDefaultLogger(l *Logger) { 13 | logger = l 14 | } 15 | 16 | func SetLogFile(path string) error { 17 | return logger.SetLogFile(path) 18 | } 19 | 20 | func SetLevel(level int) { 21 | logger.SetLevel(level) 22 | } 23 | 24 | func Fatal(format string, args ...any) { 25 | logger.Fatal(format, args...) 26 | } 27 | 28 | func Error(format string, args ...any) { 29 | logger.Error(format, args...) 30 | } 31 | 32 | func Warn(format string, args ...any) { 33 | logger.Warn(format, args...) 34 | } 35 | 36 | func Info(format string, args ...any) { 37 | logger.Info(format, args...) 38 | } 39 | 40 | func Debug(format string, args ...any) { 41 | logger.Debug(format, args...) 42 | } 43 | -------------------------------------------------------------------------------- /internal/log/logger.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "strings" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | const ( 13 | LevelFatal = 0 14 | LevelError = 1 15 | LevelWarn = 2 16 | LevelInfo = 3 17 | LevelDebug = 4 18 | ) 19 | 20 | type Logger struct { 21 | level int 22 | dateFormat string 23 | console bool 24 | logFile string 25 | file *os.File 26 | writer io.Writer 27 | mu sync.Mutex 28 | } 29 | 30 | func NewLogger(level int, console bool) *Logger { 31 | var w io.Writer 32 | if console { 33 | w = os.Stdout 34 | } 35 | 36 | return &Logger{ 37 | level: level, 38 | console: console, 39 | dateFormat: "2006-01-02T15:04:05.000", 40 | writer: w, 41 | } 42 | } 43 | 44 | func (log *Logger) SetLogFile(path string) error { 45 | log.mu.Lock() 46 | defer log.mu.Unlock() 47 | 48 | log.logFile = path 49 | return log.open() 50 | } 51 | 52 | func (log *Logger) SetLevel(level int) { 53 | log.level = level 54 | } 55 | 56 | func (log *Logger) Fatal(format string, args ...any) { 57 | log.write(LevelFatal, format, args...) 58 | if log.file != nil { 59 | log.file.Close() 60 | } 61 | os.Exit(1) 62 | } 63 | 64 | func (log *Logger) Error(format string, args ...any) { 65 | if LevelError > log.level { 66 | return 67 | } 68 | log.write(LevelError, format, args...) 69 | } 70 | 71 | func (log *Logger) Warn(format string, args ...any) { 72 | if LevelWarn > log.level { 73 | return 74 | } 75 | log.write(LevelWarn, format, args...) 76 | } 77 | 78 | func (log *Logger) Info(format string, args ...any) { 79 | if LevelInfo > log.level { 80 | return 81 | } 82 | log.write(LevelInfo, format, args...) 83 | } 84 | 85 | func (log *Logger) Debug(format string, args ...any) { 86 | if LevelDebug > log.level { 87 | return 88 | } 89 | log.write(LevelDebug, format, args...) 90 | } 91 | 92 | func (log *Logger) open() error { 93 | perms := os.O_WRONLY | os.O_APPEND | os.O_CREATE 94 | 95 | f, err := os.OpenFile(log.logFile, perms, 0o640) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | if log.console { 101 | log.writer = io.MultiWriter(os.Stdout, f) 102 | } else { 103 | log.writer = f 104 | } 105 | 106 | log.file = f 107 | return nil 108 | } 109 | 110 | func (log *Logger) write(level int, format string, args ...any) { 111 | var lvlstr string 112 | 113 | if log.writer == nil { 114 | return 115 | } 116 | 117 | switch level { 118 | case LevelFatal: 119 | lvlstr = "FATAL" 120 | case LevelError: 121 | lvlstr = "ERROR" 122 | case LevelWarn: 123 | lvlstr = "WARN" 124 | case LevelInfo: 125 | lvlstr = "INFO" 126 | case LevelDebug: 127 | lvlstr = "DEBUG" 128 | } 129 | 130 | log.mu.Lock() 131 | defer log.mu.Unlock() 132 | 133 | dt := time.Now() 134 | f := fmt.Sprintf(format, args...) 135 | f = fmt.Sprintf("%s %-5s %s\n", dt.Format(log.dateFormat), lvlstr, strings.TrimSpace(f)) 136 | log.writer.Write([]byte(f)) 137 | } 138 | -------------------------------------------------------------------------------- /internal/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | var ( 4 | Version string 5 | Revision string 6 | ) 7 | -------------------------------------------------------------------------------- /sample-config.yml: -------------------------------------------------------------------------------- 1 | # This is a sample configuration file for the iDRAC exporter, 2 | # including default values and equivalent environment variables. 3 | # The environment variables take precedence over the values in 4 | # the configuration file. 5 | 6 | # Listen address 7 | # Default value: 0.0.0.0 8 | # Environment variable CONFIG_ADDRESS=0.0.0.0 9 | address: 0.0.0.0 10 | 11 | # Listen port 12 | # Default value: 9348 13 | # Environment variable CONFIG_PORT=9348 14 | port: 9348 15 | 16 | # HTTP timeout in seconds for Redfish API calls 17 | # Default value: 10 18 | # Environment variable CONFIG_TIMEOUT=10 19 | timeout: 10 20 | 21 | # Prefix for the exported metrics 22 | # Default value: idrac 23 | # Environment variable CONFIG_METRICS_PREFIX=idrac 24 | metrics_prefix: idrac 25 | 26 | # Enable the use of an https proxy for all requests 27 | # Environment variable: HTTPS_PROXY=http://localhost:8888 28 | https_proxy: http://localhost:8888 29 | 30 | # The TLS section is used to enable HTTPS for the exporter. To enable TLS you 31 | # need a PEM encoded certificate and private key. The public certificate must 32 | # include the entire chain of trust. 33 | # TLS can also be configured using the corresponding environment variables. 34 | tls: 35 | enabled: false # CONFIG_TLS_ENABLED=false 36 | cert_file: "" # CONFIG_TLS_CERT_FILE= 37 | key_file: "" # CONFIG_TLS_KEY_FILE= 38 | 39 | # The hosts section is used to define login information for the different targets. 40 | # Hosts can be referenced either via their IP address or their hostname, as long 41 | # as it matches the "target" parameter when scraping the metrics. Optionally you 42 | # can also specify a scheme (http or https) for accessing the Redfish API, which 43 | # automatically defaults to https. 44 | # 45 | # When the "target" does not match any host, the exporter will attempt to use the 46 | # login information under "default". 47 | # 48 | # The default username and password can be configured using the two environment 49 | # variables CONFIG_DEFAULT_USERNAME and CONFIG_DEFAULT_PASSWORD 50 | hosts: 51 | default: 52 | username: user 53 | password: pass 54 | 192.168.1.1: 55 | username: user 56 | password: pass 57 | scheme: http 58 | host01.example.com: 59 | username: user 60 | password: pass 61 | 62 | # The metrics section is used to select different groups of metrics. 63 | # See the README file for a detailed list of metrics in each group. 64 | # Each section can also be enabled using the shown environment variable. 65 | # The group "all" overrides all other groups and enables all metrics. 66 | metrics: 67 | all: false # CONFIG_METRICS_ALL=false 68 | processors: false # CONFIG_METRICS_PROCESSORS=false 69 | system: false # CONFIG_METRICS_SYSTEM=false 70 | sensors: false # CONFIG_METRICS_SENSORS=false 71 | power: false # CONFIG_METRICS_POWER=false 72 | events: false # CONFIG_METRICS_EVENTS=false 73 | storage: false # CONFIG_METRICS_STORAGE=false 74 | memory: false # CONFIG_METRICS_MEMORY=false 75 | network: false # CONFIG_METRICS_NETWORK=false 76 | extra: false # CONFIG_METRICS_EXTRA=false 77 | 78 | # The events section is used for filtering events when the "events" metrics group 79 | # is enabled. Events can be filtered based on minimum severity and maximum age. 80 | # Severity must be one of "ok", "warning", "critical" 81 | events: 82 | severity: warning # CONFIG_EVENTS_SEVERITY=warning 83 | maxage: 7d # CONFIG_EVENTS_MAXAGE=7d 84 | --------------------------------------------------------------------------------