├── .circleci └── config.yml ├── .editorconfig ├── .gitignore ├── .golangci.errcheck-exclude ├── .golangci.yml ├── .promu.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── Makefile.common ├── README.md ├── VERSION ├── cmd ├── gpfs_exporter │ ├── main.go │ └── main_test.go ├── gpfs_mmdf_exporter │ ├── main.go │ └── main_test.go └── gpfs_mmlssnapshot_exporter │ ├── main.go │ └── main_test.go ├── codecov.yml ├── collectors ├── collector.go ├── collector_test.go ├── config.go ├── config_test.go ├── mmces.go ├── mmces_test.go ├── mmdf.go ├── mmdf_test.go ├── mmgetstate.go ├── mmgetstate_test.go ├── mmhealth.go ├── mmhealth_test.go ├── mmlsfileset.go ├── mmlsfileset_test.go ├── mmlspool.go ├── mmlspool_test.go ├── mmlsqos.go ├── mmlsqos_test.go ├── mmlssnapshot.go ├── mmlssnapshot_test.go ├── mmpmon.go ├── mmpmon_test.go ├── mmrepquota.go ├── mmrepquota_test.go ├── mount.go ├── mount_test.go ├── verbs.go ├── verbs_test.go ├── waiter.go └── waiter_test.go ├── go.mod ├── go.sum └── systemd └── gpfs_exporter.service /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2.1 3 | 4 | orbs: 5 | prometheus: prometheus/prometheus@0.17.1 6 | codecov: codecov/codecov@3.2.2 7 | 8 | executors: 9 | # Whenever the Go version is updated here, .promu.yml should 10 | # also be updated. 11 | golang: 12 | docker: 13 | - image: cimg/go:1.20.3 14 | 15 | jobs: 16 | test: 17 | executor: golang 18 | steps: 19 | - prometheus/setup_environment 20 | - run: make 21 | - run: make coverage 22 | - codecov/upload: 23 | file: coverage.txt 24 | - prometheus/store_artifact: 25 | file: gpfs_exporter 26 | workflows: 27 | version: 2 28 | gpfs_exporter: 29 | jobs: 30 | - test: 31 | filters: 32 | tags: 33 | only: /.*/ 34 | - prometheus/build: 35 | name: build 36 | parallelism: 1 37 | filters: 38 | tags: 39 | only: /.*/ 40 | - prometheus/publish_release: 41 | context: org-context 42 | docker_hub_organization: '' 43 | quay_io_organization: '' 44 | requires: 45 | - test 46 | - build 47 | filters: 48 | tags: 49 | only: /^v([0-9]).*/ 50 | branches: 51 | ignore: /.*/ 52 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | 11 | [*.go] 12 | indent_size = 4 13 | indent_style = tab 14 | 15 | # Tab indentation (no size specified) 16 | [Makefile] 17 | indent_style = tab 18 | 19 | [{*.md,*.yaml,*.yml}] 20 | indent_style = space 21 | indent_size = 2 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.tarballs 2 | /.build 3 | /gpfs_exporter 4 | /gpfs_mmdf_exporter 5 | /gpfs_mmlssnapshot_exporter 6 | /c.out 7 | /coverage.html 8 | /coverage.txt 9 | -------------------------------------------------------------------------------- /.golangci.errcheck-exclude: -------------------------------------------------------------------------------- 1 | // Used in HTTP handlers, any error is handled by the server itself. 2 | (net/http.ResponseWriter).Write 3 | // Never check for logger errors. 4 | (github.com/go-kit/log.Logger).Log 5 | // Do not check for rename errors 6 | os.Rename 7 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters-settings: 2 | errcheck: 3 | exclude: .golangci.errcheck-exclude 4 | -------------------------------------------------------------------------------- /.promu.yml: -------------------------------------------------------------------------------- 1 | go: 2 | version: 1.20 3 | cgo: false 4 | repository: 5 | path: github.com/treydock/gpfs_exporter 6 | build: 7 | binaries: 8 | - name: gpfs_exporter 9 | path: ./cmd/gpfs_exporter 10 | - name: gpfs_mmdf_exporter 11 | path: ./cmd/gpfs_mmdf_exporter 12 | - name: gpfs_mmlssnapshot_exporter 13 | path: ./cmd/gpfs_mmlssnapshot_exporter 14 | flags: -a -tags netgo 15 | ldflags: | 16 | -extldflags "-static" 17 | -X github.com/prometheus/common/version.Version={{.Version}} 18 | -X github.com/prometheus/common/version.Revision={{.Revision}} 19 | -X github.com/prometheus/common/version.Branch={{.Branch}} 20 | -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}} 21 | -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} 22 | tarball: 23 | files: 24 | - LICENSE 25 | - CHANGELOG.md 26 | crossbuild: 27 | platforms: 28 | - linux/amd64 29 | - linux/ppc64le 30 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 3.1.0 / 2025-06-12 2 | 3 | * Add mmlspool collector (#72) 4 | 5 | ## 3.0.1 / 2024-03-21 6 | 7 | * add fileset label to user and group quotas to avoid duplicates (#68) 8 | 9 | ## 3.0.0 / 2023-11-20 10 | 11 | * [BREAKING] Change how mmhealth event filtering works and make events a metric (#62) 12 | * Add gpfs_health_event metric. 13 | * The collector.mmhealth.ignored-event flag will only filter events for the gpfs_health_event metric 14 | * TLS and auth support (#65) 15 | 16 | ## 3.0.0-rc.1 / 2023-07-12 17 | 18 | * [BREAKING] Change how mmhealth event filtering works and make events a metric (#62) 19 | * Add gpfs_health_event metric. 20 | * The collector.mmhealth.ignored-event flag will only filter events for the gpfs_health_event metric 21 | 22 | ## 2.6.0 / 2023-07-09 23 | 24 | * Add users and groups mmrepquota (#59) 25 | 26 | ## 2.5.1 / 2023-07-05 27 | 28 | * Skip NODE mmhealth status if filtering out releated event (#61) 29 | 30 | ## 2.5.0 / 2023-06-20 31 | 32 | * Support filtering mmhealth by events (#60) 33 | 34 | ## 2.4.0 / 2023-05-06 35 | 36 | * Update to Go 1.20.3 and update Go dependencies (#58) 37 | 38 | ## 2.3.0 / 2023-04-30 39 | 40 | * Add mmlsqos collector (#56) 41 | 42 | ## 2.2.0 / 2022-10-13 43 | 44 | * Allow sudo command to be changed (#54) 45 | 46 | ## 2.1.0 / 2022-09-19 47 | 48 | * Update mmdf collector to collect pool data 49 | 50 | ## 2.0.0 / 2022-03-08 51 | 52 | * [BREAKING] Change how waiter metrics are presented and stored 53 | * Replace `gpfs_mmdiag_waiter` with `gpfs_waiter_seconds` that is a histogram, no longer use `thread` label 54 | * Replace `gpfs_mmdiag_waiter_info` with `gpfs_waiter_info_count` that is count of waiter name without `thread` or `reason` labels 55 | * Add flag `--collector.waiter.buckets` to define histogram buckets 56 | * Add flag `--collector.waiter.log-reason` to enable logging of waiter reasons 57 | * [BREAKING] Rename `mmdiag` collector to `waiter` 58 | * Replace `--collector.mmdiag` with `--collector.waiter` 59 | * Replace `--no-collector.mmdiag` with `--no-collector.waiter` 60 | * Remove `--collector.mmdiag.waiter-threshold` flag 61 | * Replace `--collector.mmdiag.waiter-exclude` with `--collector.waiter.exclude` 62 | * Replace `--collector.mmdiag.timeout` with `--collector.waiter.timeout` 63 | * [BREAKING] The waiter exclude will only compare against waiter name 64 | * Update Go to 1.17 65 | * Update Go module dependencies 66 | 67 | ## 2.0.0-rc.2 / 2021-09-23 68 | 69 | * [BREAKING] Change how waiter metrics are presented and stored 70 | * Replace `gpfs_mmdiag_waiter` with `gpfs_waiter_seconds` that is a histogram, no longer use `thread` label 71 | * Replace `gpfs_mmdiag_waiter_info` with `gpfs_waiter_info_count` that is count of waiter name without `thread` or `reason` labels 72 | * Add flag `--collector.waiter.buckets` to define histogram buckets 73 | * Add flag `--collector.waiter.log-reason` to enable logging of waiter reasons 74 | * [BREAKING] Rename `mmdiag` collector to `waiter` 75 | * Replace `--collector.mmdiag` with `--collector.waiter` 76 | * Replace `--no-collector.mmdiag` with `--no-collector.waiter` 77 | * Remove `--collector.mmdiag.waiter-threshold` flag 78 | * Replace `--collector.mmdiag.waiter-exclude` with `--collector.waiter.exclude` 79 | * Replace `--collector.mmdiag.timeout` with `--collector.waiter.timeout` 80 | * [BREAKING] The waiter exclude will only compare against waiter name 81 | 82 | ## 1.5.1 / 2021-06-17 83 | 84 | * Fix `mmdf` collector to still write last collection metric during errors 85 | 86 | ## 1.5.0 / 2021-06-07 87 | 88 | * Add `gpfs_mmdiag_waiter_info` metric 89 | 90 | ## 1.4.0 / 2021-05-27 91 | 92 | * Add config collector, enabled by default 93 | 94 | ## 1.3.0 / 2021-04-23 95 | 96 | ### Changes 97 | 98 | * Update to Go 1.16 99 | * Add mmlsfileset collector 100 | 101 | ## 1.2.0 / 2021-04-15 102 | 103 | ### Changes 104 | 105 | * Add mmlssnapshot collector 106 | 107 | ## 1.1.2 / 2021-04-12 108 | 109 | ### Bug fixes 110 | 111 | * Do not produce errors if no metadata is reported by mmdf 112 | 113 | ## 1.1.1 / 2021-03-31 114 | 115 | ### Bug fixes 116 | 117 | * Fix possible index out of range parsing errors with mmdf collector 118 | 119 | ## 1.1.0 / 2021-01-02 120 | 121 | ### Changes 122 | 123 | * Allow mmhealth items to be filtered out via CLI flags 124 | * Allow mmces services to be filtered out via CLI flags 125 | 126 | ## 1.0.0 / 2020-11-24 127 | 128 | ### **Breaking Changes** 129 | 130 | * Remove --exporter.use-cache flag and all caching logic 131 | * Rename several metrics to standardize naming conventions 132 | * gpfs_perf_read_bytes to gpfs_perf_read_bytes_total 133 | * gpfs_perf_write_bytes to gpfs_perf_write_bytes_total 134 | * gpfs_perf_operations to gpfs_perf_operations_total 135 | * gpfs_fs_inodes_allocated to gpfs_fs_allocated_inodes 136 | * gpfs_fs_inodes_free to gpfs_fs_free_inodes 137 | * gpfs_fs_inodes_total to gpfs_fs_total_inodes 138 | * gpfs_fs_inodes_used to gpfs_fs_used_inodes 139 | * gpfs_fs_total_inodes to gpfs_fs_inodes 140 | * gpfs_fs_total_bytes to gpfs_fs_size_bytes 141 | * gpfs_fs_metadata_total_bytes to gpfs_fs_metadata_size_bytes 142 | * Removed metrics that can be calculated using other metrics 143 | * gpfs_fs_metadata_free_percent 144 | * gpfs_fs_free_percent 145 | * Remove nodename label from gpfs_perf_* metrics, replace with gpfs_perf_info metric 146 | * mmces state metrics will have one metric per possible state, with active state having value 1 147 | * mmhealth status metrics will have one metric per possible status with active status having value 1 148 | 149 | ### Changes 150 | 151 | * Update to Go 1.15 and update all dependencies 152 | * Improved error handling for cron gpfs_mmdf_exporter 153 | * Add mmrepquota collector to collect quota information for filesets 154 | 155 | ## 0.11.1 / 2020-04-21 156 | 157 | * Fix mount collector to avoid false positives 158 | 159 | ## 0.11.0 / 2020-04-04 160 | 161 | * Improve timeout/error handling around mmlsfs and add tests 162 | 163 | ## 0.10.0 / 2020-04-04 164 | 165 | * Simplified timeout and error handling 166 | 167 | ## 0.9.0 / 2020-03-16 168 | 169 | ### Changes 170 | 171 | * Allow caching of metrics if errors or timeouts occur 172 | * Improved testing 173 | 174 | ## 0.8.0 / 2020-03-05 175 | 176 | ### Changes 177 | 178 | * Add mmgetstate collector and metrics 179 | * Use promlog for logging 180 | 181 | ## 0.7.0 / 2020-03-02 182 | 183 | ### Changes 184 | 185 | * Add timeouts to all collectors 186 | 187 | ## 0.6.0 / 2020-02-25 188 | 189 | ### Changes 190 | 191 | * Update client_golang dependency 192 | * Testing improvements 193 | 194 | ## 0.5.0 / 2020-02-18 195 | 196 | ### Changes 197 | 198 | * Support excluding waiters 199 | 200 | ## 0.4.0 / 2020-02-17 201 | 202 | ### Changes 203 | 204 | * Refactor mmdiag waiter metric collection 205 | 206 | ## 0.3.1 / 2020-02-17 207 | 208 | ### Fixes 209 | 210 | * Avoid duplicate metrics for collector errors 211 | 212 | ## 0.3.0 / 2020-02-15 213 | 214 | ### Changes 215 | 216 | * Add mmdiag collector with waiters metric 217 | * Add mmces collector with service state metrics 218 | 219 | ## 0.2.0 / 2020-01-30 220 | 221 | ### Changes 222 | 223 | * Move all metrics to /metrics endpoint, remove /gpfs endpoint 224 | * Add --web.disable-exporter-metrics flag 225 | 226 | ## 0.1.0 / 2020-01-29 227 | 228 | ### Changes 229 | 230 | * Rename gpfs_mmhealth_state to gpfs_mmhealth_status 231 | * Add status label to mmhealth status metric 232 | 233 | ## 0.0.1 / 2020-01-29 234 | 235 | ### Changes 236 | 237 | * Initial Release 238 | 239 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Needs to be defined before including Makefile.common to auto-generate targets 2 | DOCKER_ARCHS ?= amd64 armv7 arm64 ppc64le s390x 3 | DOCKER_REPO ?= treydock 4 | export GOPATH ?= $(firstword $(subst :, ,$(shell go env GOPATH))) 5 | 6 | include Makefile.common 7 | 8 | DOCKER_IMAGE_NAME ?= gpfs_exporter 9 | 10 | coverage: 11 | go test -race -coverprofile=coverage.txt -covermode=atomic ./... 12 | -------------------------------------------------------------------------------- /Makefile.common: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Prometheus Authors 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | 15 | # A common Makefile that includes rules to be reused in different prometheus projects. 16 | # !!! Open PRs only against the prometheus/prometheus/Makefile.common repository! 17 | 18 | # Example usage : 19 | # Create the main Makefile in the root project directory. 20 | # include Makefile.common 21 | # customTarget: 22 | # @echo ">> Running customTarget" 23 | # 24 | 25 | # Ensure GOBIN is not set during build so that promu is installed to the correct path 26 | unexport GOBIN 27 | 28 | GO ?= go 29 | GOFMT ?= $(GO)fmt 30 | FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) 31 | GOOPTS ?= 32 | GOHOSTOS ?= $(shell $(GO) env GOHOSTOS) 33 | GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH) 34 | 35 | GO_VERSION ?= $(shell $(GO) version) 36 | GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION)) 37 | PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.') 38 | 39 | PROMU := $(FIRST_GOPATH)/bin/promu 40 | pkgs = ./... 41 | 42 | ifeq (arm, $(GOHOSTARCH)) 43 | GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM) 44 | GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM) 45 | else 46 | GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) 47 | endif 48 | 49 | GOTEST := $(GO) test 50 | GOTEST_DIR := 51 | ifneq ($(CIRCLE_JOB),) 52 | ifneq ($(shell which gotestsum),) 53 | GOTEST_DIR := test-results 54 | GOTEST := gotestsum --junitfile $(GOTEST_DIR)/unit-tests.xml -- 55 | endif 56 | endif 57 | 58 | PROMU_VERSION ?= 0.14.0 59 | PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz 60 | 61 | SKIP_GOLANGCI_LINT := 62 | GOLANGCI_LINT := 63 | GOLANGCI_LINT_OPTS ?= 64 | GOLANGCI_LINT_VERSION ?= v1.51.2 65 | # golangci-lint only supports linux, darwin and windows platforms on i386/amd64. 66 | # windows isn't included here because of the path separator being different. 67 | ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) 68 | ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) 69 | # If we're in CI and there is an Actions file, that means the linter 70 | # is being run in Actions, so we don't need to run it here. 71 | ifneq (,$(SKIP_GOLANGCI_LINT)) 72 | GOLANGCI_LINT := 73 | else ifeq (,$(CIRCLE_JOB)) 74 | GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint 75 | else ifeq (,$(wildcard .github/workflows/golangci-lint.yml)) 76 | GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint 77 | endif 78 | endif 79 | endif 80 | 81 | PREFIX ?= $(shell pwd) 82 | BIN_DIR ?= $(shell pwd) 83 | DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) 84 | DOCKERFILE_PATH ?= ./Dockerfile 85 | DOCKERBUILD_CONTEXT ?= ./ 86 | DOCKER_REPO ?= prom 87 | 88 | DOCKER_ARCHS ?= amd64 89 | 90 | BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) 91 | PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) 92 | TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS)) 93 | 94 | SANITIZED_DOCKER_IMAGE_TAG := $(subst +,-,$(DOCKER_IMAGE_TAG)) 95 | 96 | ifeq ($(GOHOSTARCH),amd64) 97 | ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows)) 98 | # Only supported on amd64 99 | test-flags := -race 100 | endif 101 | endif 102 | 103 | # This rule is used to forward a target like "build" to "common-build". This 104 | # allows a new "build" target to be defined in a Makefile which includes this 105 | # one and override "common-build" without override warnings. 106 | %: common-% ; 107 | 108 | .PHONY: common-all 109 | common-all: precheck style check_license lint yamllint unused build test 110 | 111 | .PHONY: common-style 112 | common-style: 113 | @echo ">> checking code style" 114 | @fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \ 115 | if [ -n "$${fmtRes}" ]; then \ 116 | echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \ 117 | echo "Please ensure you are using $$($(GO) version) for formatting code."; \ 118 | exit 1; \ 119 | fi 120 | 121 | .PHONY: common-check_license 122 | common-check_license: 123 | @echo ">> checking license header" 124 | @licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \ 125 | awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \ 126 | done); \ 127 | if [ -n "$${licRes}" ]; then \ 128 | echo "license header checking failed:"; echo "$${licRes}"; \ 129 | exit 1; \ 130 | fi 131 | 132 | .PHONY: common-deps 133 | common-deps: 134 | @echo ">> getting dependencies" 135 | $(GO) mod download 136 | 137 | .PHONY: update-go-deps 138 | update-go-deps: 139 | @echo ">> updating Go dependencies" 140 | @for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \ 141 | $(GO) get -d $$m; \ 142 | done 143 | $(GO) mod tidy 144 | 145 | .PHONY: common-test-short 146 | common-test-short: $(GOTEST_DIR) 147 | @echo ">> running short tests" 148 | $(GOTEST) -short $(GOOPTS) $(pkgs) 149 | 150 | .PHONY: common-test 151 | common-test: $(GOTEST_DIR) 152 | @echo ">> running all tests" 153 | $(GOTEST) $(test-flags) $(GOOPTS) $(pkgs) 154 | 155 | $(GOTEST_DIR): 156 | @mkdir -p $@ 157 | 158 | .PHONY: common-format 159 | common-format: 160 | @echo ">> formatting code" 161 | $(GO) fmt $(pkgs) 162 | 163 | .PHONY: common-vet 164 | common-vet: 165 | @echo ">> vetting code" 166 | $(GO) vet $(GOOPTS) $(pkgs) 167 | 168 | .PHONY: common-lint 169 | common-lint: $(GOLANGCI_LINT) 170 | ifdef GOLANGCI_LINT 171 | @echo ">> running golangci-lint" 172 | # 'go list' needs to be executed before staticcheck to prepopulate the modules cache. 173 | # Otherwise staticcheck might fail randomly for some reason not yet explained. 174 | $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null 175 | $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs) 176 | endif 177 | 178 | .PHONY: common-yamllint 179 | common-yamllint: 180 | @echo ">> running yamllint on all YAML files in the repository" 181 | ifeq (, $(shell which yamllint)) 182 | @echo "yamllint not installed so skipping" 183 | else 184 | yamllint . 185 | endif 186 | 187 | # For backward-compatibility. 188 | .PHONY: common-staticcheck 189 | common-staticcheck: lint 190 | 191 | .PHONY: common-unused 192 | common-unused: 193 | @echo ">> running check for unused/missing packages in go.mod" 194 | $(GO) mod tidy 195 | @git diff --exit-code -- go.sum go.mod 196 | 197 | .PHONY: common-build 198 | common-build: promu 199 | @echo ">> building binaries" 200 | $(PROMU) build --prefix $(PREFIX) $(PROMU_BINARIES) 201 | 202 | .PHONY: common-tarball 203 | common-tarball: promu 204 | @echo ">> building release tarball" 205 | $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) 206 | 207 | .PHONY: common-docker $(BUILD_DOCKER_ARCHS) 208 | common-docker: $(BUILD_DOCKER_ARCHS) 209 | $(BUILD_DOCKER_ARCHS): common-docker-%: 210 | docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ 211 | -f $(DOCKERFILE_PATH) \ 212 | --build-arg ARCH="$*" \ 213 | --build-arg OS="linux" \ 214 | $(DOCKERBUILD_CONTEXT) 215 | 216 | .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) 217 | common-docker-publish: $(PUBLISH_DOCKER_ARCHS) 218 | $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: 219 | docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" 220 | 221 | DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION))) 222 | .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) 223 | common-docker-tag-latest: $(TAG_DOCKER_ARCHS) 224 | $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: 225 | docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest" 226 | docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)" 227 | 228 | .PHONY: common-docker-manifest 229 | common-docker-manifest: 230 | DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)) 231 | DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" 232 | 233 | .PHONY: promu 234 | promu: $(PROMU) 235 | 236 | $(PROMU): 237 | $(eval PROMU_TMP := $(shell mktemp -d)) 238 | curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP) 239 | mkdir -p $(FIRST_GOPATH)/bin 240 | cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu 241 | rm -r $(PROMU_TMP) 242 | 243 | .PHONY: proto 244 | proto: 245 | @echo ">> generating code from proto files" 246 | @./scripts/genproto.sh 247 | 248 | ifdef GOLANGCI_LINT 249 | $(GOLANGCI_LINT): 250 | mkdir -p $(FIRST_GOPATH)/bin 251 | curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \ 252 | | sed -e '/install -d/d' \ 253 | | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) 254 | endif 255 | 256 | .PHONY: precheck 257 | precheck:: 258 | 259 | define PRECHECK_COMMAND_template = 260 | precheck:: $(1)_precheck 261 | 262 | PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1))) 263 | .PHONY: $(1)_precheck 264 | $(1)_precheck: 265 | @if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \ 266 | echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \ 267 | exit 1; \ 268 | fi 269 | endef 270 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPFS Prometheus exporter 2 | 3 | [![Build Status](https://circleci.com/gh/treydock/gpfs_exporter/tree/master.svg?style=shield)](https://circleci.com/gh/treydock/gpfs_exporter) 4 | [![GitHub release](https://img.shields.io/github/v/release/treydock/gpfs_exporter?include_prereleases&sort=semver)](https://github.com/treydock/gpfs_exporter/releases/latest) 5 | ![GitHub All Releases](https://img.shields.io/github/downloads/treydock/gpfs_exporter/total) 6 | [![Go Report Card](https://goreportcard.com/badge/github.com/treydock/gpfs_exporter)](https://goreportcard.com/report/github.com/treydock/gpfs_exporter) 7 | [![codecov](https://codecov.io/gh/treydock/gpfs_exporter/branch/master/graph/badge.svg)](https://codecov.io/gh/treydock/gpfs_exporter) 8 | 9 | # GPFS Prometheus exporter 10 | 11 | The GPFS exporter collects metrics from the GPFS filesystem. 12 | The exporter supports the `/metrics` endpoint to gather GPFS metrics and metrics about the exporter. 13 | 14 | ## Collectors 15 | 16 | Collectors are enabled or disabled via `--collector.` and `--no-collector.` flags. 17 | 18 | Name | Description | Default 19 | -----|-------------|-------- 20 | mmgetstate | Collect state via mmgetstate | Enabled 21 | mmpmon| Collect metrics from `mmpmon` using `fs_io_s` | Enabled 22 | mount | Check status of GPFS mounts. | Enabled 23 | config | Collect configs via 'mmdiag --config' | Enabled 24 | verbs | Test if GPFS is using verbs interface | Disabled 25 | mmhealth | Test node health through `mmhealth` | Disabled 26 | waiter | Collect waiters via 'mmdiag --waiters' | Disabled 27 | mmdf | Collect filesystem space for inodes, block and metadata. | Disabled 28 | mmces | Collect state of CES | Disabled 29 | mmrepquota | Collect fileset quota information | Disabled 30 | mmlssnapshot | Collect GPFS snapshot information | Disabled 31 | mmlsfileset | Collect GPFS fileset information | Disabled 32 | mmlsqos | Collect GPFS I/O performance values of a file system, when you enable Quality of Service | Disabled 33 | mmlspool | Collect GPFS pool data | Disabled 34 | 35 | ### mount 36 | 37 | The default behavior of the `mount` collector is to collect mount statuses on GPFS mounts in /proc/mounts or /etc/fstab. The `--collector.mount.mounts` flag can be used to adjust which mount points to check. 38 | 39 | ### mmhealth 40 | 41 | The mmhealth statuses and events collected can be filtered with the following flags that all take a regex. 42 | 43 | * `--collector.mmhealth.ignored-component` - The component regex to ignore. 44 | * `--collector.mmhealth.ignored-entityname` - The entity name regex to ignore. 45 | * `--collector.mmhealth.ignored-entitytype` - The entity type regex to ignore. 46 | * `--collector.mmhealth.ignored-event` - The event regex to ignore. 47 | 48 | ### waiter 49 | 50 | The waiter's seconds are stored in Histogram buckets defined by `--collector.waiter.buckets` which is a comma separated list of durations that are converted to seconds so `1s,5s,30s,1m` would have buckets of `[]float64{1,5,30,60}`. 51 | 52 | The flag `--collector.waiter.exclude` defines a regular expression of waiter names to exclude. 53 | 54 | The flag `--collector.waiter.log-reason` can enable logging of waiter reasons. The reason can produce very high cardinality so it is not included in metrics. 55 | 56 | ### mmdf 57 | 58 | Due to the time it can take to execute mmdf that is an executable provided that can be used to collect mmdf via cron. See `gpfs_mmdf_exporter`. 59 | 60 | Flags: 61 | 62 | * `--output` - This is expected to be a path collected by the Prometheus node_exporter textfile collector 63 | * `--collector.mmdf.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems listed by `mmlsfs`. 64 | 65 | ### mmces 66 | 67 | The command used to collect CES states needs a specific node name. 68 | The `--collector.mmces.nodename` flag can be used to specify which CES node to check. 69 | The default is FQDN of those running the exporter. 70 | 71 | ### mmrepquota 72 | 73 | * `--collector.mmrepquota.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems. 74 | * `--collector.mmrepquota.quota-types` - Comma seperated list of filesystem types to collect (`fileset` for FILESET, `user` for USR, `group` for GRP). Default is FILESET only. Ex: `fileset,user` collects FILESET and USR. 75 | 76 | ### mmlssnapshot 77 | 78 | * `--collector.mmlssnapshot.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems listed by `mmlsfs`. 79 | * `--collector.mmlssnapshot.get-size` - Pass this flag to collect snapshot sizes. This operation could take a long time depending on filesystem size, consider using `gpfs_mmlssnapshot_exporter` instead. 80 | 81 | The exporter `gpfs_mmlssnapshot_exporter` is provided to allow snapshot collection, including size (with `--collector.mmlssnapshot.get-size`) to be collected with cron rather than a Prometheus scrape through the normal exporter. 82 | 83 | ### mmlsfileset 84 | 85 | * `--collector.mmlsfileset.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems listed by `mmlsfs`. 86 | 87 | **NOTE**: This collector does not collect used inodes. To get used inodes look at using the [mmrepquota](#mmrepquota) collector. 88 | 89 | ### mmlsqos 90 | 91 | Displays the I/O performance values of a file system, when you enable Quality of Service for I/O operations (QoS) with the mmchqos command. 92 | 93 | Flags: 94 | * `--collector.mmlsqos.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems listed by `mmlsfs`. 95 | * `--collector.mmlsqos.timeout` - Count of seconds for running mmlsqos command before timeout error will be raised. Default value is 60 seconds. 96 | * `--collector.mmlsqos.seconds` - Displays the I/O performance values for the previous number of seconds. The valid range of seconds is 1-999. The default value is 60 seconds. 97 | 98 | ### mmlspool 99 | 100 | Collects GPFS pool data 101 | 102 | Flags: 103 | * `--collector.mmlspool.filesystems` - A comma separated list of filesystems to collect. Default is to collect all filesystems listed by `mmlsfs`. 104 | * `--collector.mmlsqos.timeout` - Count of seconds for running mmlspool command before timeout error will be raised. Default value is 30 seconds. 105 | 106 | ## Sudo 107 | 108 | Ensure the user running `gpfs_exporter` can execute GPFS commands necessary to collect metrics. 109 | The following sudo config assumes `gpfs_exporter` is running as `gpfs_exporter`. 110 | 111 | ``` 112 | Defaults:gpfs_exporter !syslog 113 | Defaults:gpfs_exporter !requiretty 114 | # mmgetstate collector 115 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmgetstate -Y 116 | # mmpmon collector 117 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmpmon -s -p 118 | # config collector 119 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmdiag --config -Y 120 | # mmhealth collector 121 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmhealth node show -Y 122 | # verbs collector 123 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmfsadm test verbs status 124 | # mmdf/mmlssnapshot collector if filesystems not specified 125 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlsfs all -Y -T 126 | # waiter collector 127 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmdiag --waiters -Y 128 | # mmces collector 129 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmces state show * 130 | # mmdf collector, each filesystem must be listed 131 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmdf project -Y 132 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmdf scratch -Y 133 | # mmrepquota collector, filesystems not specified 134 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmrepquota -j -Y -a 135 | # mmrepquota collector, filesystems specified 136 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmrepquota -j -Y project scratch 137 | # mmlssnapshot collector, each filesystem must be listed 138 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlssnapshot project -s all -Y 139 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlssnapshot ess -s all -Y 140 | # mmlsfileset collector, each filesystem must be listed 141 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlsfileset project -Y 142 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlsfileset ess -Y 143 | # mmlsqos collector, each filesystem must be listed 144 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlsqos mmfs1 -Y 145 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlsqos ess -Y 146 | # mmlspool collector, each filesystem must be listed 147 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlspool mmfs1 -Y 148 | gpfs_exporter ALL=(ALL) NOPASSWD:/usr/lpp/mmfs/bin/mmlspool ess -Y 149 | ``` 150 | 151 | ## Install 152 | 153 | Download the [latest release](https://github.com/treydock/gpfs_exporter/releases) 154 | 155 | Add the user that will run `gpfs_exporter` 156 | 157 | ``` 158 | groupadd -r gpfs_exporter 159 | useradd -r -d /var/lib/gpfs_exporter -s /sbin/nologin -M -g gpfs_exporter -M gpfs_exporter 160 | ``` 161 | 162 | Install compiled binaries after extracting tar.gz from release page. 163 | 164 | ``` 165 | cp /tmp/gpfs_exporter /usr/local/bin/gpfs_exporter 166 | ``` 167 | 168 | Add sudo rules, see [sudo section](#sudo) 169 | 170 | Add systemd unit file and start service. Modify the `ExecStart` with desired flags. 171 | 172 | ``` 173 | cp systemd/gpfs_exporter.service /etc/systemd/system/gpfs_exporter.service 174 | systemctl daemon-reload 175 | systemctl start gpfs_exporter 176 | ``` 177 | 178 | ## Build from source 179 | 180 | To produce the `gpfs_exporter`, `gpfs_mmdf_exporter`, and `gpfs_mmlssnapshot_exporter` binaries: 181 | 182 | ``` 183 | make build 184 | ``` 185 | 186 | Or 187 | 188 | ``` 189 | go get github.com/treydock/gpfs_exporter/cmd/gpfs_exporter 190 | go get github.com/treydock/gpfs_exporter/cmd/gpfs_mmdf_exporter 191 | go get github.com/treydock/gpfs_exporter/cmd/gpfs_mmlssnapshot_exporter 192 | ``` 193 | 194 | ## TLS and basic auth 195 | 196 | `gpfs_exporter` supports TLS and basic auth using [exporter-toolkit](https://github.com/prometheus/exporter-toolkit). To use TLS and/or basic auth, users need to use `--web.config.file` CLI flag as follows 197 | 198 | ``` 199 | gpfs_exporter --web.config.file=web-config.yaml 200 | ``` 201 | 202 | A sample `web-config.yaml` file can be fetched from [exporter-toolkit repository](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-config.yml). The reference of the `web-config.yaml` file can be consulted in the [docs](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md). 203 | 204 | ## Grafana 205 | 206 | There is an example [GPFS Performance](https://grafana.com/grafana/dashboards/14844) dashboard. See the description on that dashboard for additional information on labels needed to utilize that dashboard. 207 | 208 | ## Prometheus Configuration 209 | 210 | This is an example scrape config with some metrics excluded for HPC compute nodes with label `role=compute`: 211 | 212 | ```yaml 213 | - job_name: gpfs 214 | scrape_timeout: 2m 215 | scrape_interval: 3m 216 | relabel_configs: 217 | - source_labels: [__address__] 218 | regex: "([^.]+)..*" 219 | replacement: "$1" 220 | target_label: host 221 | metric_relabel_configs: 222 | - source_labels: [__name__,role] 223 | regex: gpfs_(mount|health|verbs)_status;compute 224 | action: drop 225 | - source_labels: [__name__,collector,role] 226 | regex: gpfs_exporter_(collect_error|collector_duration_seconds);(mmhealth|mount|verbs);compute 227 | action: drop 228 | - source_labels: [__name__,role] 229 | regex: "^(go|process|promhttp)_.*;compute" 230 | action: drop 231 | file_sd_configs: 232 | - files: 233 | - "/etc/prometheus/file_sd_config.d/gpfs_*.yaml" 234 | ``` 235 | 236 | An example scrape target configuration: 237 | 238 | ```yaml 239 | - targets: 240 | - c0001.example.com:9303 241 | labels: 242 | host: c0001 243 | cluster: example 244 | environment: production 245 | role: compute 246 | ``` 247 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 3.1.0 2 | -------------------------------------------------------------------------------- /cmd/gpfs_exporter/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "fmt" 18 | "net/http" 19 | "os" 20 | 21 | "github.com/alecthomas/kingpin/v2" 22 | "github.com/go-kit/log" 23 | "github.com/go-kit/log/level" 24 | "github.com/prometheus/client_golang/prometheus" 25 | "github.com/prometheus/client_golang/prometheus/promhttp" 26 | "github.com/prometheus/common/promlog" 27 | "github.com/prometheus/common/promlog/flag" 28 | "github.com/prometheus/common/version" 29 | "github.com/prometheus/exporter-toolkit/web" 30 | "github.com/prometheus/exporter-toolkit/web/kingpinflag" 31 | "github.com/treydock/gpfs_exporter/collectors" 32 | ) 33 | 34 | var ( 35 | listenAddr = ":9303" 36 | disableExporterMetrics = kingpin.Flag("web.disable-exporter-metrics", "Exclude metrics about the exporter (promhttp_*, process_*, go_*)").Default("false").Bool() 37 | ) 38 | 39 | func metricsHandler(logger log.Logger) http.HandlerFunc { 40 | return func(w http.ResponseWriter, r *http.Request) { 41 | registry := prometheus.NewRegistry() 42 | 43 | gpfsCollector := collectors.NewGPFSCollector(logger) 44 | gpfsCollector.Lock() 45 | defer gpfsCollector.Unlock() 46 | for key, collector := range gpfsCollector.Collectors { 47 | level.Debug(logger).Log("msg", fmt.Sprintf("Enabled collector %s", key)) 48 | registry.MustRegister(collector) 49 | } 50 | 51 | gatherers := prometheus.Gatherers{registry} 52 | if !*disableExporterMetrics { 53 | gatherers = append(gatherers, prometheus.DefaultGatherer) 54 | } 55 | 56 | // Delegate http serving to Prometheus client library, which will call collector.Collect. 57 | h := promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{}) 58 | h.ServeHTTP(w, r) 59 | } 60 | } 61 | 62 | func main() { 63 | var toolkitFlags = kingpinflag.AddFlags(kingpin.CommandLine, listenAddr) 64 | 65 | promlogConfig := &promlog.Config{} 66 | flag.AddFlags(kingpin.CommandLine, promlogConfig) 67 | kingpin.Version(version.Print("gpfs_exporter")) 68 | kingpin.HelpFlag.Short('h') 69 | kingpin.Parse() 70 | 71 | logger := promlog.New(promlogConfig) 72 | level.Info(logger).Log("msg", "Starting gpfs_exporter", "version", version.Info()) 73 | level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext()) 74 | level.Info(logger).Log("msg", "Starting Server", "address", listenAddr) 75 | 76 | http.Handle("/metrics", metricsHandler(logger)) 77 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 78 | w.Write([]byte(` 79 | GPFS Exporter 80 | 81 |

GPFS Metrics Exporter

82 |

Metrics

83 | 84 | `)) 85 | }) 86 | server := &http.Server{} 87 | if err := web.ListenAndServe(server, toolkitFlags, logger); err != nil { 88 | level.Error(logger).Log("err", err) 89 | os.Exit(1) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /cmd/gpfs_exporter/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "io" 20 | "net/http" 21 | "os" 22 | "strings" 23 | "testing" 24 | "time" 25 | 26 | "github.com/alecthomas/kingpin/v2" 27 | "github.com/go-kit/log" 28 | "github.com/treydock/gpfs_exporter/collectors" 29 | ) 30 | 31 | const ( 32 | address = "localhost:19303" 33 | ) 34 | 35 | var ( 36 | mmpmonStdout = ` 37 | _fs_io_s_ _n_ 10.22.0.106 _nn_ ib-pitzer-rw02.ten _rc_ 0 _t_ 1579358234 _tu_ 53212 _cl_ gpfs.domain _fs_ scratch _d_ 48 _br_ 205607400434 _bw_ 74839282351 _oc_ 2377656 _cc_ 2201576 _rdc_ 59420404 _wc_ 18874626 _dir_ 40971 _iu_ 544768 38 | _fs_io_s_ _n_ 10.22.0.106 _nn_ ib-pitzer-rw02.ten _rc_ 0 _t_ 1579358234 _tu_ 53212 _cl_ gpfs.domain _fs_ project _d_ 96 _br_ 0 _bw_ 0 _oc_ 513 _cc_ 513 _rdc_ 0 _wc_ 0 _dir_ 0 _iu_ 169 39 | ` 40 | mmgetstateStdout = ` 41 | mmgetstate::HEADER:version:reserved:reserved:nodeName:nodeNumber:state:quorum:nodesUp:totalNodes:remarks:cnfsState: 42 | mmgetstate::0:1:::ib-proj-nsd05.domain:11:active:4:7:1122::(undefined): 43 | ` 44 | configStdout = ` 45 | mmdiag:config:HEADER:version:reserved:reserved:name:value:changed: 46 | mmdiag:config:0:1:::opensslLibName:/usr/lib64/libssl.so.10%3A/usr/lib64/libssl.so.6%3A/usr/lib64/libssl.so.0.9.8%3A/lib64/libssl.so.6%3Alibssl.so%3Alibss 47 | l.so.0%3Alibssl.so.4%3A/lib64/libssl.so.1.0.0:: 48 | mmdiag:config:0:1:::pagepool:4294967296:static: 49 | mmdiag:config:0:1:::pagepoolMaxPhysMemPct:75:: 50 | mmdiag:config:0:1:::parallelMetadataWrite:0:: 51 | ` 52 | ) 53 | 54 | func TestMain(m *testing.M) { 55 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 56 | os.Exit(1) 57 | } 58 | varTrue := true 59 | disableExporterMetrics = &varTrue 60 | go func() { 61 | http.Handle("/metrics", metricsHandler(log.NewNopLogger())) 62 | err := http.ListenAndServe(address, nil) 63 | if err != nil { 64 | os.Exit(1) 65 | } 66 | }() 67 | time.Sleep(1 * time.Second) 68 | 69 | exitVal := m.Run() 70 | 71 | os.Exit(exitVal) 72 | } 73 | 74 | func TestMetricsHandler(t *testing.T) { 75 | collectors.MmgetstateExec = func(ctx context.Context) (string, error) { 76 | return mmgetstateStdout, nil 77 | } 78 | collectors.MmpmonExec = func(ctx context.Context) (string, error) { 79 | return mmpmonStdout, nil 80 | } 81 | collectors.MmdiagExec = func(arg string, ctx context.Context) (string, error) { 82 | return configStdout, nil 83 | } 84 | body, err := queryExporter() 85 | if err != nil { 86 | t.Fatalf("Unexpected error GET /metrics: %s", err.Error()) 87 | } 88 | if !strings.Contains(body, "gpfs_exporter_collect_error{collector=\"mount\"} 0") { 89 | t.Errorf("Unexpected value for gpfs_exporter_collect_error") 90 | } 91 | } 92 | 93 | func queryExporter() (string, error) { 94 | resp, err := http.Get(fmt.Sprintf("http://%s/metrics", address)) 95 | if err != nil { 96 | return "", err 97 | } 98 | b, err := io.ReadAll(resp.Body) 99 | if err != nil { 100 | return "", err 101 | } 102 | if err := resp.Body.Close(); err != nil { 103 | return "", err 104 | } 105 | if want, have := http.StatusOK, resp.StatusCode; want != have { 106 | return "", fmt.Errorf("want /metrics status code %d, have %d. Body:\n%s", want, have, b) 107 | } 108 | return string(b), nil 109 | } 110 | -------------------------------------------------------------------------------- /cmd/gpfs_mmdf_exporter/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "fmt" 18 | "os" 19 | "path/filepath" 20 | "sort" 21 | "strings" 22 | 23 | "github.com/alecthomas/kingpin/v2" 24 | "github.com/go-kit/log" 25 | "github.com/go-kit/log/level" 26 | "github.com/gofrs/flock" 27 | "github.com/prometheus/client_golang/prometheus" 28 | dto "github.com/prometheus/client_model/go" 29 | "github.com/prometheus/common/expfmt" 30 | "github.com/prometheus/common/promlog" 31 | "github.com/prometheus/common/promlog/flag" 32 | "github.com/prometheus/common/version" 33 | "github.com/treydock/gpfs_exporter/collectors" 34 | ) 35 | 36 | var ( 37 | output = kingpin.Flag("output", "Path to node exporter collected file").Required().String() 38 | lockFile = kingpin.Flag("lockfile", "Lock file path").Default("/tmp/gpfs_mmdf_exporter.lock").String() 39 | ) 40 | 41 | func writeMetrics(mfs []*dto.MetricFamily, logger log.Logger) error { 42 | tmp, err := os.CreateTemp(filepath.Dir(*output), filepath.Base(*output)) 43 | if err != nil { 44 | level.Error(logger).Log("msg", "Unable to create temp file", "err", err) 45 | return err 46 | } 47 | defer os.Remove(tmp.Name()) 48 | for _, mf := range mfs { 49 | if _, err := expfmt.MetricFamilyToText(tmp, mf); err != nil { 50 | level.Error(logger).Log("msg", "Error generating metric text", "err", err) 51 | return err 52 | } 53 | } 54 | if err := tmp.Close(); err != nil { 55 | level.Error(logger).Log("msg", "Error closing tmp file", "err", err) 56 | return err 57 | } 58 | if err := os.Chmod(tmp.Name(), 0644); err != nil { 59 | level.Error(logger).Log("msg", "Error executing chmod 0644 on tmp file", "err", err) 60 | return err 61 | } 62 | level.Debug(logger).Log("msg", "Renaming temp file to output", "temp", tmp.Name(), "output", *output) 63 | if err := os.Rename(tmp.Name(), *output); err != nil { 64 | level.Error(logger).Log("msg", "Error renaming tmp file to output", "err", err) 65 | return err 66 | } 67 | return nil 68 | } 69 | 70 | func collect(logger log.Logger) error { 71 | registry := prometheus.NewRegistry() 72 | registry.MustRegister(collectors.NewMmdfCollector(logger)) 73 | var newMfs []*dto.MetricFamily 74 | var failures []string 75 | mfs, err := registry.Gather() 76 | if err != nil { 77 | level.Error(logger).Log("msg", "Error executing Gather", "err", err) 78 | return err 79 | } 80 | for _, mf := range mfs { 81 | if strings.HasPrefix(mf.GetName(), "gpfs_exporter") { 82 | newMfs = append(newMfs, mf) 83 | } 84 | if mf.GetName() != "gpfs_exporter_collect_error" && mf.GetName() != "gpfs_exporter_collect_timeout" { 85 | continue 86 | } 87 | for _, m := range mf.GetMetric() { 88 | if m.GetGauge().GetValue() != 1 { 89 | continue 90 | } 91 | for _, l := range m.GetLabel() { 92 | if l.GetName() == "collector" && strings.HasPrefix(l.GetValue(), "mmdf-") { 93 | failures = append(failures, l.GetValue()) 94 | } 95 | } 96 | } 97 | } 98 | 99 | if len(failures) != 0 && collectors.FileExists(*output) { 100 | file, err := os.Open(*output) 101 | if err != nil { 102 | level.Error(logger).Log("msg", "Error opening metrics file", "err", err) 103 | goto failure 104 | } 105 | parser := expfmt.TextParser{} 106 | prevMfs, err := parser.TextToMetricFamilies(file) 107 | file.Close() 108 | if err != nil { 109 | level.Error(logger).Log("msg", "Error parsing output metrics", "err", err) 110 | goto failure 111 | } 112 | keys := make([]string, 0, len(prevMfs)) 113 | for k := range prevMfs { 114 | keys = append(keys, k) 115 | } 116 | sort.Strings(keys) 117 | for _, n := range keys { 118 | mf := prevMfs[n] 119 | if !strings.HasPrefix(n, "gpfs_exporter") { 120 | newMfs = append(newMfs, mf) 121 | } 122 | } 123 | } else { 124 | newMfs = mfs 125 | } 126 | 127 | if err := writeMetrics(newMfs, logger); err != nil { 128 | return err 129 | } 130 | if len(failures) != 0 { 131 | return fmt.Errorf("Error with collection") 132 | } 133 | return nil 134 | 135 | failure: 136 | if err := writeMetrics(mfs, logger); err != nil { 137 | return err 138 | } 139 | return err 140 | } 141 | 142 | func main() { 143 | promlogConfig := &promlog.Config{} 144 | flag.AddFlags(kingpin.CommandLine, promlogConfig) 145 | kingpin.Version(version.Print("gpfs_exporter")) 146 | kingpin.HelpFlag.Short('h') 147 | kingpin.Parse() 148 | 149 | logger := promlog.New(promlogConfig) 150 | 151 | fileLock := flock.New(*lockFile) 152 | locked, err := fileLock.TryLock() 153 | if err != nil { 154 | level.Error(logger).Log("msg", "Unable to obtain lock on lock file", "lockfile", *lockFile) 155 | level.Error(logger).Log("msg", err) 156 | os.Exit(1) 157 | } 158 | if !locked { 159 | level.Error(logger).Log("msg", fmt.Sprintf("Lock file %s is locked", *lockFile)) 160 | os.Exit(1) 161 | } 162 | err = collect(logger) 163 | if err != nil { 164 | os.Exit(1) 165 | } 166 | _ = fileLock.Unlock() 167 | } 168 | -------------------------------------------------------------------------------- /cmd/gpfs_mmdf_exporter/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/alecthomas/kingpin/v2" 24 | "github.com/go-kit/log" 25 | "github.com/treydock/gpfs_exporter/collectors" 26 | ) 27 | 28 | var ( 29 | outputPath string 30 | mmdfStdout = ` 31 | mmdf:nsd:HEADER:version:reserved:reserved:nsdName:storagePool:diskSize:failureGroup:metadata:data:freeBlocks:freeBlocksPct:freeFragments:freeFragmentsPct:diskAvailableForAlloc: 32 | mmdf:poolTotal:HEADER:version:reserved:reserved:poolName:poolSize:freeBlocks:freeBlocksPct:freeFragments:freeFragmentsPct:maxDiskSize: 33 | mmdf:data:HEADER:version:reserved:reserved:totalData:freeBlocks:freeBlocksPct:freeFragments:freeFragmentsPct: 34 | mmdf:metadata:HEADER:version:reserved:reserved:totalMetadata:freeBlocks:freeBlocksPct:freeFragments:freeFragmentsPct: 35 | mmdf:fsTotal:HEADER:version:reserved:reserved:fsSize:freeBlocks:freeBlocksPct:freeFragments:freeFragmentsPct: 36 | mmdf:inode:HEADER:version:reserved:reserved:usedInodes:freeInodes:allocatedInodes:maxInodes: 37 | mmdf:nsd:0:1:::P_META_VD102:system:771751936:300:Yes:No:320274944:41:5005384:1:: 38 | mmdf:nsd:0:1:::P_DATA_VD02:data:46766489600:200:No:Yes:6092915712:13:154966272:0:: 39 | mmdf:poolTotal:0:1:::system:783308292096:380564840448:49:10024464464:1:1153081262080: 40 | mmdf:data:0:1:::3647786188800:475190722560:13:12059515296:0: 41 | mmdf:metadata:0:1:::13891534848:6011299328:43:58139768:0: 42 | mmdf:poolTotal:0:1:::data:3064453922816:1342362296320:44:1999215152:0:10143773212672: 43 | mmdf:fsTotal:0:1:::3661677723648:481202021888:14:12117655064:0: 44 | mmdf:inode:0:1:::430741822:484301506:915043328:1332164000: 45 | ` 46 | expected = ` 47 | # HELP gpfs_fs_allocated_inodes GPFS filesystem inodes allocated 48 | # TYPE gpfs_fs_allocated_inodes gauge 49 | gpfs_fs_allocated_inodes{fs="project"} 9.15043328e+08 50 | # HELP gpfs_fs_free_bytes GPFS filesystem free size in bytes 51 | # TYPE gpfs_fs_free_bytes gauge 52 | gpfs_fs_free_bytes{fs="project"} 4.92750870413312e+14 53 | # HELP gpfs_fs_free_inodes GPFS filesystem inodes free 54 | # TYPE gpfs_fs_free_inodes gauge 55 | gpfs_fs_free_inodes{fs="project"} 4.84301506e+08 56 | # HELP gpfs_fs_inodes GPFS filesystem inodes total 57 | # TYPE gpfs_fs_inodes gauge 58 | gpfs_fs_inodes{fs="project"} 1.332164e+09 59 | # HELP gpfs_fs_metadata_free_bytes GPFS metadata free size in bytes 60 | # TYPE gpfs_fs_metadata_free_bytes gauge 61 | gpfs_fs_metadata_free_bytes{fs="project"} 6.155570511872e+12 62 | # HELP gpfs_fs_metadata_size_bytes GPFS total metadata size in bytes 63 | # TYPE gpfs_fs_metadata_size_bytes gauge 64 | gpfs_fs_metadata_size_bytes{fs="project"} 1.4224931684352e+13 65 | # HELP gpfs_fs_pool_free_bytes GPFS pool free size in bytes 66 | # TYPE gpfs_fs_pool_free_bytes gauge 67 | gpfs_fs_pool_free_bytes{fs="project",pool="data"} 1.37457899143168e+15 68 | gpfs_fs_pool_free_bytes{fs="project",pool="system"} 3.89698396618752e+14 69 | # HELP gpfs_fs_pool_free_fragments_bytes GPFS pool free fragments in bytes 70 | # TYPE gpfs_fs_pool_free_fragments_bytes gauge 71 | gpfs_fs_pool_free_fragments_bytes{fs="project",pool="data"} 2.047196315648e+12 72 | gpfs_fs_pool_free_fragments_bytes{fs="project",pool="system"} 1.0265051611136e+13 73 | # HELP gpfs_fs_pool_max_disk_size_bytes GPFS pool max disk size in bytes 74 | # TYPE gpfs_fs_pool_max_disk_size_bytes gauge 75 | gpfs_fs_pool_max_disk_size_bytes{fs="project",pool="data"} 1.0387223769776128e+16 76 | gpfs_fs_pool_max_disk_size_bytes{fs="project",pool="system"} 1.18075521236992e+15 77 | # HELP gpfs_fs_pool_total_bytes GPFS pool total size in bytes 78 | # TYPE gpfs_fs_pool_total_bytes gauge 79 | gpfs_fs_pool_total_bytes{fs="project",pool="data"} 3.138000816963584e+15 80 | gpfs_fs_pool_total_bytes{fs="project",pool="system"} 8.02107691106304e+14 81 | # HELP gpfs_fs_size_bytes GPFS filesystem total size in bytes 82 | # TYPE gpfs_fs_size_bytes gauge 83 | gpfs_fs_size_bytes{fs="project"} 3.749557989015552e+15 84 | # HELP gpfs_fs_used_inodes GPFS filesystem inodes used 85 | # TYPE gpfs_fs_used_inodes gauge 86 | gpfs_fs_used_inodes{fs="project"} 4.30741822e+08` 87 | expectedNoError = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 88 | # TYPE gpfs_exporter_collect_error gauge 89 | gpfs_exporter_collect_error{collector="mmdf-project"} 0 90 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 91 | # TYPE gpfs_exporter_collect_timeout gauge 92 | gpfs_exporter_collect_timeout{collector="mmdf-project"} 0` 93 | expectedError = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 94 | # TYPE gpfs_exporter_collect_error gauge 95 | gpfs_exporter_collect_error{collector="mmdf-project"} 1 96 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 97 | # TYPE gpfs_exporter_collect_timeout gauge 98 | gpfs_exporter_collect_timeout{collector="mmdf-project"} 0` 99 | expectedTimeout = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 100 | # TYPE gpfs_exporter_collect_error gauge 101 | gpfs_exporter_collect_error{collector="mmdf-project"} 0 102 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 103 | # TYPE gpfs_exporter_collect_timeout gauge 104 | gpfs_exporter_collect_timeout{collector="mmdf-project"} 1` 105 | ) 106 | 107 | func TestMain(m *testing.M) { 108 | tmpDir, err := os.MkdirTemp(os.TempDir(), "output") 109 | if err != nil { 110 | os.Exit(1) 111 | } 112 | outputPath = tmpDir + "/output" 113 | defer os.RemoveAll(tmpDir) 114 | if _, err := kingpin.CommandLine.Parse([]string{fmt.Sprintf("--output=%s", outputPath), "--collector.mmdf.filesystems=project"}); err != nil { 115 | os.Exit(1) 116 | } 117 | exitVal := m.Run() 118 | os.Exit(exitVal) 119 | } 120 | 121 | func TestCollect(t *testing.T) { 122 | collectors.MmdfExec = func(fs string, ctx context.Context) (string, error) { 123 | return mmdfStdout, nil 124 | } 125 | err := collect(log.NewNopLogger()) 126 | if err != nil { 127 | t.Errorf("Unexpected error: %s", err.Error()) 128 | return 129 | } 130 | content, err := os.ReadFile(outputPath) 131 | if err != nil { 132 | t.Errorf("Unexpected error: %s", err.Error()) 133 | return 134 | } 135 | if !strings.Contains(string(content), expected) { 136 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 137 | } 138 | if !strings.Contains(string(content), expectedNoError) { 139 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 140 | } 141 | } 142 | 143 | func TestCollectError(t *testing.T) { 144 | collectors.MmdfExec = func(fs string, ctx context.Context) (string, error) { 145 | return "", fmt.Errorf("Error") 146 | } 147 | w := log.NewSyncWriter(os.Stderr) 148 | logger := log.NewLogfmtLogger(w) 149 | err := collect(logger) 150 | if err == nil { 151 | t.Errorf("Expected error") 152 | return 153 | } 154 | content, err := os.ReadFile(outputPath) 155 | if err != nil { 156 | t.Errorf("Unexpected error: %s", err.Error()) 157 | return 158 | } 159 | if !strings.Contains(string(content), expected) { 160 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 161 | } 162 | if !strings.Contains(string(content), expectedError) { 163 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 164 | } 165 | } 166 | 167 | func TestCollectTimeout(t *testing.T) { 168 | collectors.MmdfExec = func(fs string, ctx context.Context) (string, error) { 169 | return "", context.DeadlineExceeded 170 | } 171 | w := log.NewSyncWriter(os.Stderr) 172 | logger := log.NewLogfmtLogger(w) 173 | err := collect(logger) 174 | if err == nil { 175 | t.Errorf("Expected error") 176 | return 177 | } 178 | content, err := os.ReadFile(outputPath) 179 | if err != nil { 180 | t.Errorf("Unexpected error: %s", err.Error()) 181 | return 182 | } 183 | if !strings.Contains(string(content), expected) { 184 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 185 | } 186 | if !strings.Contains(string(content), expectedTimeout) { 187 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /cmd/gpfs_mmlssnapshot_exporter/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "fmt" 18 | "os" 19 | "path/filepath" 20 | "sort" 21 | "strings" 22 | 23 | "github.com/alecthomas/kingpin/v2" 24 | "github.com/go-kit/log" 25 | "github.com/go-kit/log/level" 26 | "github.com/gofrs/flock" 27 | "github.com/prometheus/client_golang/prometheus" 28 | dto "github.com/prometheus/client_model/go" 29 | "github.com/prometheus/common/expfmt" 30 | "github.com/prometheus/common/promlog" 31 | "github.com/prometheus/common/promlog/flag" 32 | "github.com/prometheus/common/version" 33 | "github.com/treydock/gpfs_exporter/collectors" 34 | ) 35 | 36 | var ( 37 | output = kingpin.Flag("output", "Path to node exporter collected file").Required().String() 38 | lockFile = kingpin.Flag("lockfile", "Lock file path").Default("/tmp/gpfs_mmdf_exporter.lock").String() 39 | ) 40 | 41 | func writeMetrics(mfs []*dto.MetricFamily, logger log.Logger) error { 42 | tmp, err := os.CreateTemp(filepath.Dir(*output), filepath.Base(*output)) 43 | if err != nil { 44 | level.Error(logger).Log("msg", "Unable to create temp file", "err", err) 45 | return err 46 | } 47 | defer os.Remove(tmp.Name()) 48 | for _, mf := range mfs { 49 | if _, err := expfmt.MetricFamilyToText(tmp, mf); err != nil { 50 | level.Error(logger).Log("msg", "Error generating metric text", "err", err) 51 | return err 52 | } 53 | } 54 | if err := tmp.Close(); err != nil { 55 | level.Error(logger).Log("msg", "Error closing tmp file", "err", err) 56 | return err 57 | } 58 | if err := os.Chmod(tmp.Name(), 0644); err != nil { 59 | level.Error(logger).Log("msg", "Error executing chmod 0644 on tmp file", "err", err) 60 | return err 61 | } 62 | level.Debug(logger).Log("msg", "Renaming temp file to output", "temp", tmp.Name(), "output", *output) 63 | if err := os.Rename(tmp.Name(), *output); err != nil { 64 | level.Error(logger).Log("msg", "Error renaming tmp file to output", "err", err) 65 | return err 66 | } 67 | return nil 68 | } 69 | 70 | func collect(logger log.Logger) error { 71 | registry := prometheus.NewRegistry() 72 | registry.MustRegister(collectors.NewMmlssnapshotCollector(logger)) 73 | var newMfs []*dto.MetricFamily 74 | var failures []string 75 | mfs, err := registry.Gather() 76 | if err != nil { 77 | level.Error(logger).Log("msg", "Error executing Gather", "err", err) 78 | return err 79 | } 80 | for _, mf := range mfs { 81 | if strings.HasPrefix(mf.GetName(), "gpfs_exporter") { 82 | newMfs = append(newMfs, mf) 83 | } 84 | if mf.GetName() != "gpfs_exporter_collect_error" && mf.GetName() != "gpfs_exporter_collect_timeout" { 85 | continue 86 | } 87 | for _, m := range mf.GetMetric() { 88 | if m.GetGauge().GetValue() != 1 { 89 | continue 90 | } 91 | for _, l := range m.GetLabel() { 92 | if l.GetName() == "collector" && strings.HasPrefix(l.GetValue(), "mmlssnapshot-") { 93 | failures = append(failures, l.GetValue()) 94 | } 95 | } 96 | } 97 | } 98 | 99 | if len(failures) != 0 && collectors.FileExists(*output) { 100 | file, err := os.Open(*output) 101 | if err != nil { 102 | level.Error(logger).Log("msg", "Error opening metrics file", "err", err) 103 | goto failure 104 | } 105 | parser := expfmt.TextParser{} 106 | prevMfs, err := parser.TextToMetricFamilies(file) 107 | file.Close() 108 | if err != nil { 109 | level.Error(logger).Log("msg", "Error parsing output metrics", "err", err) 110 | goto failure 111 | } 112 | keys := make([]string, 0, len(prevMfs)) 113 | for k := range prevMfs { 114 | keys = append(keys, k) 115 | } 116 | sort.Strings(keys) 117 | for _, n := range keys { 118 | mf := prevMfs[n] 119 | if !strings.HasPrefix(n, "gpfs_exporter") { 120 | newMfs = append(newMfs, mf) 121 | } 122 | } 123 | } else { 124 | newMfs = mfs 125 | } 126 | 127 | if err := writeMetrics(newMfs, logger); err != nil { 128 | return err 129 | } 130 | if len(failures) != 0 { 131 | return fmt.Errorf("Error with collection") 132 | } 133 | return nil 134 | 135 | failure: 136 | if err := writeMetrics(mfs, logger); err != nil { 137 | return err 138 | } 139 | return err 140 | } 141 | 142 | func main() { 143 | promlogConfig := &promlog.Config{} 144 | flag.AddFlags(kingpin.CommandLine, promlogConfig) 145 | kingpin.Version(version.Print("gpfs_exporter")) 146 | kingpin.HelpFlag.Short('h') 147 | kingpin.Parse() 148 | 149 | logger := promlog.New(promlogConfig) 150 | 151 | fileLock := flock.New(*lockFile) 152 | locked, err := fileLock.TryLock() 153 | if err != nil { 154 | level.Error(logger).Log("msg", "Unable to obtain lock on lock file", "lockfile", *lockFile) 155 | level.Error(logger).Log("msg", err) 156 | os.Exit(1) 157 | } 158 | if !locked { 159 | level.Error(logger).Log("msg", fmt.Sprintf("Lock file %s is locked", *lockFile)) 160 | os.Exit(1) 161 | } 162 | err = collect(logger) 163 | if err != nil { 164 | os.Exit(1) 165 | } 166 | _ = fileLock.Unlock() 167 | } 168 | -------------------------------------------------------------------------------- /cmd/gpfs_mmlssnapshot_exporter/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package main 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "strings" 21 | "testing" 22 | "time" 23 | 24 | "github.com/alecthomas/kingpin/v2" 25 | "github.com/go-kit/log" 26 | "github.com/treydock/gpfs_exporter/collectors" 27 | ) 28 | 29 | var ( 30 | outputPath string 31 | mmlssnapshotStdout = ` 32 | mmlssnapshot::HEADER:version:reserved:reserved:filesystemName:directory:snapID:status:created:quotas:data:metadata:fileset:snapType: 33 | mmlssnapshot::0:1:::ess:20210120:27107:Valid:Wed Jan 20 00%3A30%3A02 2021::823587352320:529437984::: 34 | mmlssnapshot::0:1:::ess:20201115_PAS1736:16337:Valid:Sun Nov 15 02%3A47%3A48 2020::0:205184:PAS1736:: 35 | ` 36 | expected = ` 37 | # HELP gpfs_snapshot_created_timestamp_seconds GPFS snapshot creation timestamp 38 | # TYPE gpfs_snapshot_created_timestamp_seconds gauge 39 | gpfs_snapshot_created_timestamp_seconds{fileset="",fs="ess",id="27107",snapshot="20210120"} 1.611120602e+09 40 | gpfs_snapshot_created_timestamp_seconds{fileset="PAS1736",fs="ess",id="16337",snapshot="20201115_PAS1736"} 1.605426468e+09 41 | # HELP gpfs_snapshot_data_size_bytes GPFS snapshot data size 42 | # TYPE gpfs_snapshot_data_size_bytes gauge 43 | gpfs_snapshot_data_size_bytes{fileset="",fs="ess",id="27107",snapshot="20210120"} 8.4335344877568e+14 44 | gpfs_snapshot_data_size_bytes{fileset="PAS1736",fs="ess",id="16337",snapshot="20201115_PAS1736"} 0 45 | # HELP gpfs_snapshot_metadata_size_bytes GPFS snapshot metadata size 46 | # TYPE gpfs_snapshot_metadata_size_bytes gauge 47 | gpfs_snapshot_metadata_size_bytes{fileset="",fs="ess",id="27107",snapshot="20210120"} 5.42144495616e+11 48 | gpfs_snapshot_metadata_size_bytes{fileset="PAS1736",fs="ess",id="16337",snapshot="20201115_PAS1736"} 2.10108416e+08 49 | # HELP gpfs_snapshot_status_info GPFS snapshot status 50 | # TYPE gpfs_snapshot_status_info gauge 51 | gpfs_snapshot_status_info{fileset="",fs="ess",id="27107",snapshot="20210120",status="Valid"} 1 52 | gpfs_snapshot_status_info{fileset="PAS1736",fs="ess",id="16337",snapshot="20201115_PAS1736",status="Valid"} 1` 53 | expectedNoError = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 54 | # TYPE gpfs_exporter_collect_error gauge 55 | gpfs_exporter_collect_error{collector="mmlssnapshot-ess"} 0 56 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 57 | # TYPE gpfs_exporter_collect_timeout gauge 58 | gpfs_exporter_collect_timeout{collector="mmlssnapshot-ess"} 0` 59 | expectedError = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 60 | # TYPE gpfs_exporter_collect_error gauge 61 | gpfs_exporter_collect_error{collector="mmlssnapshot-ess"} 1 62 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 63 | # TYPE gpfs_exporter_collect_timeout gauge 64 | gpfs_exporter_collect_timeout{collector="mmlssnapshot-ess"} 0` 65 | expectedTimeout = `# HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 66 | # TYPE gpfs_exporter_collect_error gauge 67 | gpfs_exporter_collect_error{collector="mmlssnapshot-ess"} 0 68 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 69 | # TYPE gpfs_exporter_collect_timeout gauge 70 | gpfs_exporter_collect_timeout{collector="mmlssnapshot-ess"} 1` 71 | ) 72 | 73 | func TestMain(m *testing.M) { 74 | tmpDir, err := os.MkdirTemp(os.TempDir(), "output") 75 | if err != nil { 76 | os.Exit(1) 77 | } 78 | outputPath = tmpDir + "/output" 79 | defer os.RemoveAll(tmpDir) 80 | if _, err := kingpin.CommandLine.Parse([]string{fmt.Sprintf("--output=%s", outputPath), "--collector.mmlssnapshot.filesystems=ess", "--collector.mmlssnapshot.get-size"}); err != nil { 81 | os.Exit(1) 82 | } 83 | collectors.NowLocation = func() *time.Location { 84 | return time.FixedZone("EST", -5*60*60) 85 | } 86 | exitVal := m.Run() 87 | os.Exit(exitVal) 88 | } 89 | 90 | func TestCollect(t *testing.T) { 91 | collectors.MmlssnapshotExec = func(fs string, ctx context.Context) (string, error) { 92 | return mmlssnapshotStdout, nil 93 | } 94 | err := collect(log.NewNopLogger()) 95 | if err != nil { 96 | t.Errorf("Unexpected error: %s", err.Error()) 97 | return 98 | } 99 | content, err := os.ReadFile(outputPath) 100 | if err != nil { 101 | t.Errorf("Unexpected error: %s", err.Error()) 102 | return 103 | } 104 | if !strings.Contains(string(content), expected) { 105 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 106 | } 107 | if !strings.Contains(string(content), expectedNoError) { 108 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 109 | } 110 | } 111 | 112 | func TestCollectError(t *testing.T) { 113 | collectors.MmlssnapshotExec = func(fs string, ctx context.Context) (string, error) { 114 | return "", fmt.Errorf("Error") 115 | } 116 | w := log.NewSyncWriter(os.Stderr) 117 | logger := log.NewLogfmtLogger(w) 118 | err := collect(logger) 119 | if err == nil { 120 | t.Errorf("Expected error") 121 | return 122 | } 123 | content, err := os.ReadFile(outputPath) 124 | if err != nil { 125 | t.Errorf("Unexpected error: %s", err.Error()) 126 | return 127 | } 128 | if !strings.Contains(string(content), expected) { 129 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 130 | } 131 | if !strings.Contains(string(content), expectedError) { 132 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 133 | } 134 | } 135 | 136 | func TestCollectTimeout(t *testing.T) { 137 | collectors.MmlssnapshotExec = func(fs string, ctx context.Context) (string, error) { 138 | return "", context.DeadlineExceeded 139 | } 140 | w := log.NewSyncWriter(os.Stderr) 141 | logger := log.NewLogfmtLogger(w) 142 | err := collect(logger) 143 | if err == nil { 144 | t.Errorf("Expected error") 145 | return 146 | } 147 | content, err := os.ReadFile(outputPath) 148 | if err != nil { 149 | t.Errorf("Unexpected error: %s", err.Error()) 150 | return 151 | } 152 | if !strings.Contains(string(content), expected) { 153 | t.Errorf("Unexpected content:\n%s\nExpected:\n%s", string(content), expected) 154 | } 155 | if !strings.Contains(string(content), expectedTimeout) { 156 | t.Errorf("Unexpected error metrics:\n%s\nExpected:\n%s", string(content), expectedError) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /collectors/collector.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "net/url" 21 | "os" 22 | "os/exec" 23 | "sort" 24 | "strconv" 25 | "strings" 26 | "sync" 27 | "time" 28 | 29 | "github.com/alecthomas/kingpin/v2" 30 | "github.com/go-kit/log" 31 | "github.com/go-kit/log/level" 32 | "github.com/prometheus/client_golang/prometheus" 33 | ) 34 | 35 | const ( 36 | namespace = "gpfs" 37 | ) 38 | 39 | var ( 40 | collectorState = make(map[string]*bool) 41 | factories = make(map[string]func(logger log.Logger) Collector) 42 | execCommand = exec.CommandContext 43 | MmlsfsExec = mmlsfs 44 | MmdiagExec = mmdiag 45 | NowLocation = func() *time.Location { 46 | return time.Now().Location() 47 | } 48 | collectDuration = prometheus.NewDesc( 49 | prometheus.BuildFQName(namespace, "exporter", "collector_duration_seconds"), 50 | "Collector time duration.", 51 | []string{"collector"}, nil) 52 | collectError = prometheus.NewDesc( 53 | prometheus.BuildFQName(namespace, "exporter", "collect_error"), 54 | "Indicates if error has occurred during collection", 55 | []string{"collector"}, nil) 56 | collecTimeout = prometheus.NewDesc( 57 | prometheus.BuildFQName(namespace, "exporter", "collect_timeout"), 58 | "Indicates the collector timed out", 59 | []string{"collector"}, nil) 60 | lastExecution = prometheus.NewDesc( 61 | prometheus.BuildFQName(namespace, "exporter", "last_execution"), 62 | "Last execution time of ", []string{"collector"}, nil) 63 | sudoCmd = kingpin.Flag("config.sudo.command", "The command to run sudo").Default("sudo").String() 64 | mmlsfsTimeout = kingpin.Flag("config.mmlsfs.timeout", "Timeout for mmlsfs execution").Default("5").Int() 65 | ) 66 | 67 | type DurationBucketValues []float64 68 | 69 | func (d *DurationBucketValues) Set(value string) error { 70 | buckets := []float64{} 71 | bucketDurations := strings.Split(value, ",") 72 | for _, bucketDuration := range bucketDurations { 73 | duration, err := time.ParseDuration(bucketDuration) 74 | if err != nil { 75 | return fmt.Errorf("'%s' is not a valid bucket duration", value) 76 | } 77 | buckets = append(buckets, duration.Seconds()) 78 | } 79 | sort.Float64s(buckets) 80 | *d = buckets 81 | return nil 82 | } 83 | 84 | func (d *DurationBucketValues) String() string { 85 | return "" 86 | } 87 | 88 | func DurationBuckets(s kingpin.Settings) (target *[]float64) { 89 | target = &[]float64{} 90 | s.SetValue((*DurationBucketValues)(target)) 91 | return 92 | } 93 | 94 | type GPFSFilesystem struct { 95 | Name string 96 | Mountpoint string 97 | } 98 | 99 | type GPFSCollector struct { 100 | sync.Mutex 101 | Collectors map[string]Collector 102 | } 103 | 104 | type Collector interface { 105 | // Get new metrics and expose them via prometheus registry. 106 | Describe(ch chan<- *prometheus.Desc) 107 | Collect(ch chan<- prometheus.Metric) 108 | } 109 | 110 | func registerCollector(collector string, isDefaultEnabled bool, factory func(logger log.Logger) Collector) { 111 | var helpDefaultState string 112 | if isDefaultEnabled { 113 | helpDefaultState = "enabled" 114 | } else { 115 | helpDefaultState = "disabled" 116 | } 117 | flagName := fmt.Sprintf("collector.%s", collector) 118 | flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", collector, helpDefaultState) 119 | defaultValue := fmt.Sprintf("%v", isDefaultEnabled) 120 | flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Bool() 121 | collectorState[collector] = flag 122 | factories[collector] = factory 123 | } 124 | 125 | func NewGPFSCollector(logger log.Logger) *GPFSCollector { 126 | collectors := make(map[string]Collector) 127 | for key, enabled := range collectorState { 128 | var collector Collector 129 | if *enabled { 130 | collector = factories[key](log.With(logger, "collector", key)) 131 | collectors[key] = collector 132 | } 133 | } 134 | return &GPFSCollector{Collectors: collectors} 135 | } 136 | 137 | func SliceContains(slice []string, str string) bool { 138 | for _, s := range slice { 139 | if str == s { 140 | return true 141 | } 142 | } 143 | return false 144 | } 145 | 146 | func SliceIndex(slice []string, str string) int { 147 | for i, v := range slice { 148 | if v == str { 149 | return i 150 | } 151 | } 152 | return -1 153 | } 154 | 155 | func ParseFloat(str string, toBytes bool, logger log.Logger) (float64, error) { 156 | if val, err := strconv.ParseFloat(str, 64); err == nil { 157 | if toBytes { 158 | val = val * 1024 159 | } 160 | return val, nil 161 | } else { 162 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s: %s", str, err.Error())) 163 | return 0, err 164 | } 165 | } 166 | 167 | func FileExists(filename string) bool { 168 | info, err := os.Stat(filename) 169 | if os.IsNotExist(err) { 170 | return false 171 | } 172 | return !info.IsDir() 173 | } 174 | 175 | func mmdiag(arg string, ctx context.Context) (string, error) { 176 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmdiag", arg, "-Y") 177 | var out bytes.Buffer 178 | cmd.Stdout = &out 179 | err := cmd.Run() 180 | if ctx.Err() == context.DeadlineExceeded { 181 | return "", ctx.Err() 182 | } else if err != nil { 183 | return "", err 184 | } 185 | return out.String(), nil 186 | } 187 | 188 | func mmlfsfsFilesystems(ctx context.Context, logger log.Logger) ([]string, error) { 189 | var filesystems []string 190 | out, err := MmlsfsExec(ctx) 191 | if err != nil { 192 | return nil, err 193 | } 194 | mmlsfs_filesystems := parse_mmlsfs(out) 195 | for _, fs := range mmlsfs_filesystems { 196 | filesystems = append(filesystems, fs.Name) 197 | } 198 | return filesystems, nil 199 | } 200 | 201 | func mmlsfs(ctx context.Context) (string, error) { 202 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmlsfs", "all", "-Y", "-T") 203 | var out bytes.Buffer 204 | cmd.Stdout = &out 205 | err := cmd.Run() 206 | if ctx.Err() == context.DeadlineExceeded { 207 | return "", ctx.Err() 208 | } else if err != nil { 209 | return "", err 210 | } 211 | return out.String(), nil 212 | } 213 | 214 | func parse_mmlsfs(out string) []GPFSFilesystem { 215 | var filesystems []GPFSFilesystem 216 | lines := strings.Split(out, "\n") 217 | for _, line := range lines { 218 | items := strings.Split(line, ":") 219 | if len(items) < 7 { 220 | continue 221 | } 222 | if items[2] == "HEADER" { 223 | continue 224 | } 225 | var fs GPFSFilesystem 226 | fs.Name = items[6] 227 | mountpoint, err := url.QueryUnescape(items[8]) 228 | if err != nil { 229 | continue 230 | } 231 | fs.Mountpoint = mountpoint 232 | filesystems = append(filesystems, fs) 233 | } 234 | return filesystems 235 | } 236 | -------------------------------------------------------------------------------- /collectors/collector_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "os/exec" 21 | "strconv" 22 | "testing" 23 | "time" 24 | 25 | "github.com/alecthomas/kingpin/v2" 26 | "github.com/prometheus/client_golang/prometheus" 27 | ) 28 | 29 | var ( 30 | mockedExitStatus = 0 31 | mockedStdout string 32 | _, cancel = context.WithTimeout(context.Background(), 5*time.Second) 33 | mmlsfsStdout = ` 34 | fs::HEADER:version:reserved:reserved:deviceName:fieldName:data:remarks: 35 | mmlsfs::0:1:::project:defaultMountPoint:%2Ffs%2Fproject:: 36 | mmlsfs::0:1:::scratch:defaultMountPoint:%2Ffs%2Fscratch:: 37 | mmlsfs::0:1:::ess:defaultMountPoint:%2Ffs%2Fess:: 38 | ` 39 | ) 40 | 41 | func TestMain(m *testing.M) { 42 | NowLocation = func() *time.Location { 43 | return time.FixedZone("EST", -5*60*60) 44 | } 45 | exitVal := m.Run() 46 | os.Exit(exitVal) 47 | } 48 | 49 | func TestArgs(t *testing.T) { 50 | if _, err := kingpin.CommandLine.Parse([]string{"--collector.waiter.buckets=foo"}); err == nil { 51 | t.Errorf("Expected error, none given") 52 | } 53 | } 54 | 55 | func fakeExecCommand(ctx context.Context, command string, args ...string) *exec.Cmd { 56 | cs := []string{"-test.run=TestExecCommandHelper", "--", command} 57 | cs = append(cs, args...) 58 | defer cancel() 59 | cmd := exec.CommandContext(ctx, os.Args[0], cs...) 60 | es := strconv.Itoa(mockedExitStatus) 61 | cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1", 62 | "STDOUT=" + mockedStdout, 63 | "EXIT_STATUS=" + es} 64 | return cmd 65 | } 66 | 67 | func TestExecCommandHelper(t *testing.T) { 68 | if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { 69 | return 70 | } 71 | 72 | //nolint:staticcheck 73 | fmt.Fprintf(os.Stdout, os.Getenv("STDOUT")) 74 | i, _ := strconv.Atoi(os.Getenv("EXIT_STATUS")) 75 | os.Exit(i) 76 | } 77 | 78 | func setupGatherer(collector Collector) prometheus.Gatherer { 79 | registry := prometheus.NewRegistry() 80 | registry.MustRegister(collector) 81 | gatherers := prometheus.Gatherers{registry} 82 | return gatherers 83 | } 84 | 85 | func TestMmdiag(t *testing.T) { 86 | execCommand = fakeExecCommand 87 | mockedExitStatus = 0 88 | mockedStdout = "foo" 89 | defer func() { execCommand = exec.CommandContext }() 90 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 91 | defer cancel() 92 | out, err := mmdiag("--waiters", ctx) 93 | if err != nil { 94 | t.Errorf("Unexpected error: %s", err.Error()) 95 | } 96 | if out != mockedStdout { 97 | t.Errorf("Unexpected out: %s", out) 98 | } 99 | } 100 | 101 | func TestMmdiagError(t *testing.T) { 102 | execCommand = fakeExecCommand 103 | mockedExitStatus = 1 104 | mockedStdout = "foo" 105 | defer func() { execCommand = exec.CommandContext }() 106 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 107 | defer cancel() 108 | out, err := mmdiag("--waiters", ctx) 109 | if err == nil { 110 | t.Errorf("Expected error") 111 | } 112 | if out != "" { 113 | t.Errorf("Unexpected out: %s", out) 114 | } 115 | } 116 | 117 | func TestMmdiagTimeout(t *testing.T) { 118 | execCommand = fakeExecCommand 119 | mockedExitStatus = 1 120 | mockedStdout = "foo" 121 | defer func() { execCommand = exec.CommandContext }() 122 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 123 | defer cancel() 124 | out, err := mmdiag("--waiters", ctx) 125 | if err != context.DeadlineExceeded { 126 | t.Errorf("Expected DeadlineExceeded") 127 | } 128 | if out != "" { 129 | t.Errorf("Unexpected out: %s", out) 130 | } 131 | } 132 | 133 | func TestMmlsfs(t *testing.T) { 134 | execCommand = fakeExecCommand 135 | mockedExitStatus = 0 136 | mockedStdout = "foo" 137 | defer func() { execCommand = exec.CommandContext }() 138 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 139 | defer cancel() 140 | out, err := mmlsfs(ctx) 141 | if err != nil { 142 | t.Errorf("Unexpected error: %s", err.Error()) 143 | } 144 | if out != mockedStdout { 145 | t.Errorf("Unexpected out: %s", out) 146 | } 147 | } 148 | 149 | func TestMmlsfsError(t *testing.T) { 150 | execCommand = fakeExecCommand 151 | mockedExitStatus = 1 152 | mockedStdout = "foo" 153 | defer func() { execCommand = exec.CommandContext }() 154 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 155 | defer cancel() 156 | out, err := mmlsfs(ctx) 157 | if err == nil { 158 | t.Errorf("Expected error") 159 | } 160 | if out != "" { 161 | t.Errorf("Unexpected out: %s", out) 162 | } 163 | } 164 | 165 | func TestMmlsfsTimeout(t *testing.T) { 166 | execCommand = fakeExecCommand 167 | mockedExitStatus = 1 168 | mockedStdout = "foo" 169 | defer func() { execCommand = exec.CommandContext }() 170 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 171 | defer cancel() 172 | out, err := mmlsfs(ctx) 173 | if err != context.DeadlineExceeded { 174 | t.Errorf("Expected DeadlineExceeded") 175 | } 176 | if out != "" { 177 | t.Errorf("Unexpected out: %s", out) 178 | } 179 | } 180 | 181 | func TestParseMmlsfs(t *testing.T) { 182 | execCommand = fakeExecCommand 183 | mockedExitStatus = 0 184 | defer func() { execCommand = exec.CommandContext }() 185 | filesystems := parse_mmlsfs(mmlsfsStdout) 186 | if len(filesystems) != 3 { 187 | t.Errorf("Expected 3 perfs returned, got %d", len(filesystems)) 188 | return 189 | } 190 | if val := filesystems[0].Name; val != "project" { 191 | t.Errorf("Unexpected Name, got %v", val) 192 | } 193 | if val := filesystems[0].Mountpoint; val != "/fs/project" { 194 | t.Errorf("Unexpected Mounpoint, got %v", val) 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /collectors/config.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "strconv" 20 | "strings" 21 | "time" 22 | 23 | "github.com/alecthomas/kingpin/v2" 24 | "github.com/go-kit/log" 25 | "github.com/go-kit/log/level" 26 | "github.com/prometheus/client_golang/prometheus" 27 | ) 28 | 29 | var ( 30 | configs = []string{"pagepool"} 31 | configTimeout = kingpin.Flag("collector.config.timeout", "Timeout for 'mmdiag --config' execution").Default("5").Int() 32 | ) 33 | 34 | type ConfigMetric struct { 35 | PagePool float64 36 | } 37 | 38 | type ConfigCollector struct { 39 | PagePool *prometheus.Desc 40 | logger log.Logger 41 | } 42 | 43 | func init() { 44 | registerCollector("config", true, NewConfigCollector) 45 | } 46 | 47 | func NewConfigCollector(logger log.Logger) Collector { 48 | return &ConfigCollector{ 49 | PagePool: prometheus.NewDesc(prometheus.BuildFQName(namespace, "config", "page_pool_bytes"), 50 | "GPFS configured page pool size", nil, nil), 51 | logger: logger, 52 | } 53 | } 54 | 55 | func (c *ConfigCollector) Describe(ch chan<- *prometheus.Desc) { 56 | ch <- c.PagePool 57 | } 58 | 59 | func (c *ConfigCollector) Collect(ch chan<- prometheus.Metric) { 60 | level.Debug(c.logger).Log("msg", "Collecting config metrics") 61 | collectTime := time.Now() 62 | timeout := 0 63 | errorMetric := 0 64 | metrics, err := c.collect() 65 | if err == context.DeadlineExceeded { 66 | level.Error(c.logger).Log("msg", "Timeout executing 'mmdiag --config'") 67 | timeout = 1 68 | } else if err != nil { 69 | level.Error(c.logger).Log("msg", err) 70 | errorMetric = 1 71 | } 72 | 73 | if err == nil { 74 | ch <- prometheus.MustNewConstMetric(c.PagePool, prometheus.GaugeValue, metrics.PagePool) 75 | } 76 | 77 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "config") 78 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "config") 79 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "config") 80 | } 81 | 82 | func (c *ConfigCollector) collect() (ConfigMetric, error) { 83 | var configMetric ConfigMetric 84 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*configTimeout)*time.Second) 85 | defer cancel() 86 | out, err := MmdiagExec("--config", ctx) 87 | if err != nil { 88 | return configMetric, err 89 | } 90 | parse_mmdiag_config(out, &configMetric, c.logger) 91 | return configMetric, nil 92 | } 93 | 94 | func parse_mmdiag_config(out string, configMetric *ConfigMetric, logger log.Logger) { 95 | lines := strings.Split(out, "\n") 96 | var keyIdx int 97 | var valueIdx int 98 | for _, line := range lines { 99 | items := strings.Split(line, ":") 100 | if len(items) < 3 { 101 | continue 102 | } 103 | if items[2] == "HEADER" { 104 | for i, header := range items { 105 | if header == "name" { 106 | keyIdx = i 107 | } else if header == "value" { 108 | valueIdx = i 109 | } 110 | } 111 | continue 112 | } 113 | if (len(items) - 1) < keyIdx { 114 | continue 115 | } 116 | if !SliceContains(configs, items[keyIdx]) { 117 | continue 118 | } 119 | value, err := strconv.ParseFloat(items[valueIdx], 64) 120 | if err != nil { 121 | level.Error(logger).Log("msg", fmt.Sprintf("Unable to convert %s to float64", items[valueIdx]), "err", err) 122 | continue 123 | } 124 | switch items[keyIdx] { 125 | case "pagepool": 126 | configMetric.PagePool = value 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /collectors/config_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "strings" 20 | "testing" 21 | 22 | "github.com/alecthomas/kingpin/v2" 23 | "github.com/go-kit/log" 24 | "github.com/prometheus/client_golang/prometheus/testutil" 25 | ) 26 | 27 | var ( 28 | configStdout = ` 29 | mmdiag:config:HEADER:version:reserved:reserved:name:value:changed: 30 | mmdiag:config:0:1:::opensslLibName:/usr/lib64/libssl.so.10%3A/usr/lib64/libssl.so.6%3A/usr/lib64/libssl.so.0.9.8%3A/lib64/libssl.so.6%3Alibssl.so%3Alibss 31 | l.so.0%3Alibssl.so.4%3A/lib64/libssl.so.1.0.0:: 32 | mmdiag:config:0:1:::pagepool:4294967296:static: 33 | mmdiag:config:0:1:::pagepoolMaxPhysMemPct:75:: 34 | mmdiag:config:0:1:::parallelMetadataWrite:0:: 35 | ` 36 | ) 37 | 38 | func TestParseMmdiagConfig(t *testing.T) { 39 | var metric ConfigMetric 40 | configs = []string{"pagepool", "opensslLibName"} 41 | parse_mmdiag_config(configStdout, &metric, log.NewNopLogger()) 42 | if val := metric.PagePool; val != 4294967296 { 43 | t.Errorf("Unexpected page pool value %v", val) 44 | } 45 | } 46 | 47 | func TestConfigCollector(t *testing.T) { 48 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 49 | t.Fatal(err) 50 | } 51 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 52 | return configStdout, nil 53 | } 54 | expected := ` 55 | # HELP gpfs_config_page_pool_bytes GPFS configured page pool size 56 | # TYPE gpfs_config_page_pool_bytes gauge 57 | gpfs_config_page_pool_bytes 4294967296 58 | ` 59 | collector := NewConfigCollector(log.NewNopLogger()) 60 | gatherers := setupGatherer(collector) 61 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 62 | t.Errorf("Unexpected error: %v", err) 63 | } else if val != 4 { 64 | t.Errorf("Unexpected collection count %d, expected 4", val) 65 | } 66 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_config_page_pool_bytes"); err != nil { 67 | t.Errorf("unexpected collecting result:\n%s", err) 68 | } 69 | } 70 | 71 | func TestConfigCollectorError(t *testing.T) { 72 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 73 | t.Fatal(err) 74 | } 75 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 76 | return "", fmt.Errorf("Error") 77 | } 78 | expected := ` 79 | # HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 80 | # TYPE gpfs_exporter_collect_error gauge 81 | gpfs_exporter_collect_error{collector="config"} 1 82 | ` 83 | collector := NewConfigCollector(log.NewNopLogger()) 84 | gatherers := setupGatherer(collector) 85 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 86 | t.Errorf("Unexpected error: %v", err) 87 | } else if val != 3 { 88 | t.Errorf("Unexpected collection count %d, expected 3", val) 89 | } 90 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_error"); err != nil { 91 | t.Errorf("unexpected collecting result:\n%s", err) 92 | } 93 | } 94 | 95 | func TestConfigCollectorTimeout(t *testing.T) { 96 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 97 | t.Fatal(err) 98 | } 99 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 100 | return "", context.DeadlineExceeded 101 | } 102 | expected := ` 103 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 104 | # TYPE gpfs_exporter_collect_timeout gauge 105 | gpfs_exporter_collect_timeout{collector="config"} 1 106 | ` 107 | collector := NewConfigCollector(log.NewNopLogger()) 108 | gatherers := setupGatherer(collector) 109 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 110 | t.Errorf("Unexpected error: %v", err) 111 | } else if val != 3 { 112 | t.Errorf("Unexpected collection count %d, expected 3", val) 113 | } 114 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_timeout"); err != nil { 115 | t.Errorf("unexpected collecting result:\n%s", err) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /collectors/mmces.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "os" 21 | "regexp" 22 | "strings" 23 | "time" 24 | 25 | "github.com/alecthomas/kingpin/v2" 26 | "github.com/go-kit/log" 27 | "github.com/go-kit/log/level" 28 | "github.com/prometheus/client_golang/prometheus" 29 | ) 30 | 31 | var ( 32 | osHostname = os.Hostname 33 | configNodeName = kingpin.Flag("collector.mmces.nodename", "CES node name to check, defaults to FQDN").Default("").String() 34 | mmcesTimeout = kingpin.Flag("collector.mmces.timeout", "Timeout for mmces execution").Default("5").Int() 35 | mmcesIgnoredServices = kingpin.Flag("collector.mmces.ignored-services", "Regex of services to ignore").Default("^$").String() 36 | cesServices = []string{"AUTH", "BLOCK", "NETWORK", "AUTH_OBJ", "NFS", "OBJ", "SMB", "CES"} 37 | cesStates = []string{"DEGRADED", "DEPEND", "DISABLED", "FAILED", "HEALTHY", "STARTING", "STOPPED", "SUSPENDED"} 38 | mmcesExec = mmces 39 | ) 40 | 41 | func getFQDN(logger log.Logger) string { 42 | hostname, err := osHostname() 43 | if err != nil { 44 | level.Info(logger).Log("msg", fmt.Sprintf("Unable to determine FQDN: %s", err.Error())) 45 | return "" 46 | } 47 | return hostname 48 | } 49 | 50 | type CESMetric struct { 51 | Service string 52 | State string 53 | } 54 | 55 | type MmcesCollector struct { 56 | State *prometheus.Desc 57 | logger log.Logger 58 | } 59 | 60 | func init() { 61 | registerCollector("mmces", false, NewMmcesCollector) 62 | } 63 | 64 | func NewMmcesCollector(logger log.Logger) Collector { 65 | return &MmcesCollector{ 66 | State: prometheus.NewDesc(prometheus.BuildFQName(namespace, "ces", "state"), 67 | "GPFS CES health status", []string{"service", "state"}, nil), 68 | logger: logger, 69 | } 70 | } 71 | 72 | func (c *MmcesCollector) Describe(ch chan<- *prometheus.Desc) { 73 | ch <- c.State 74 | } 75 | 76 | func (c *MmcesCollector) Collect(ch chan<- prometheus.Metric) { 77 | level.Debug(c.logger).Log("msg", "Collecting mmces metrics") 78 | collectTime := time.Now() 79 | timeout := 0 80 | errorMetric := 0 81 | var nodename string 82 | if *configNodeName == "" { 83 | nodename = getFQDN(c.logger) 84 | if nodename == "" { 85 | level.Error(c.logger).Log("msg", "collector.mmces.nodename must be defined and could not be determined") 86 | os.Exit(1) 87 | } 88 | } else { 89 | nodename = *configNodeName 90 | } 91 | metrics, err := c.collect(nodename) 92 | if err == context.DeadlineExceeded { 93 | level.Error(c.logger).Log("msg", "Timeout executing mmces") 94 | timeout = 1 95 | } else if err != nil { 96 | level.Error(c.logger).Log("msg", err) 97 | errorMetric = 1 98 | } 99 | for _, m := range metrics { 100 | for _, s := range cesStates { 101 | var value float64 102 | if s == m.State { 103 | value = 1 104 | } 105 | ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, value, m.Service, s) 106 | } 107 | var unknown float64 108 | if !SliceContains(cesStates, m.State) { 109 | unknown = 1 110 | level.Warn(c.logger).Log("msg", "Unknown state encountered", "state", m.State, "service", m.Service) 111 | } 112 | ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, unknown, m.Service, "UNKNOWN") 113 | } 114 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmces") 115 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmces") 116 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "mmces") 117 | } 118 | 119 | func (c *MmcesCollector) collect(nodename string) ([]CESMetric, error) { 120 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmcesTimeout)*time.Second) 121 | defer cancel() 122 | mmces_state_out, err := mmcesExec(nodename, ctx) 123 | if err != nil { 124 | return nil, err 125 | } 126 | metrics := mmces_state_show_parse(mmces_state_out, c.logger) 127 | return metrics, nil 128 | } 129 | 130 | func mmces(nodename string, ctx context.Context) (string, error) { 131 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmces", "state", "show", "-N", nodename, "-Y") 132 | var out bytes.Buffer 133 | cmd.Stdout = &out 134 | err := cmd.Run() 135 | if ctx.Err() == context.DeadlineExceeded { 136 | return "", ctx.Err() 137 | } else if err != nil { 138 | return "", err 139 | } 140 | return out.String(), nil 141 | } 142 | 143 | func mmces_state_show_parse(out string, logger log.Logger) []CESMetric { 144 | mmcesIgnoredServicesPattern := regexp.MustCompile(*mmcesIgnoredServices) 145 | var metrics []CESMetric 146 | lines := strings.Split(out, "\n") 147 | var headers []string 148 | var values []string 149 | for _, l := range lines { 150 | if !strings.HasPrefix(l, "mmcesstate") { 151 | continue 152 | } 153 | items := strings.Split(l, ":") 154 | if len(items) < 3 { 155 | continue 156 | } 157 | if items[2] == "HEADER" { 158 | headers = append(headers, items...) 159 | } else { 160 | values = append(values, items...) 161 | } 162 | } 163 | for i, h := range headers { 164 | if !SliceContains(cesServices, h) { 165 | continue 166 | } 167 | if mmcesIgnoredServicesPattern.MatchString(h) { 168 | level.Debug(logger).Log("msg", "Skipping service due to ignored pattern", "service", h) 169 | continue 170 | } 171 | var metric CESMetric 172 | metric.Service = h 173 | metric.State = values[i] 174 | metrics = append(metrics, metric) 175 | } 176 | return metrics 177 | } 178 | -------------------------------------------------------------------------------- /collectors/mmgetstate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "strings" 20 | "time" 21 | 22 | "github.com/alecthomas/kingpin/v2" 23 | "github.com/go-kit/log" 24 | "github.com/go-kit/log/level" 25 | "github.com/prometheus/client_golang/prometheus" 26 | ) 27 | 28 | var ( 29 | mmgetstateTimeout = kingpin.Flag("collector.mmgetstate.timeout", "Timeout for executing mmgetstate").Default("5").Int() 30 | mmgetstateStates = []string{"active", "arbitrating", "down"} 31 | MmgetstateExec = mmgetstate 32 | ) 33 | 34 | type MmgetstateMetrics struct { 35 | state string 36 | } 37 | 38 | type MmgetstateCollector struct { 39 | state *prometheus.Desc 40 | logger log.Logger 41 | } 42 | 43 | func init() { 44 | registerCollector("mmgetstate", true, NewMmgetstateCollector) 45 | } 46 | 47 | func NewMmgetstateCollector(logger log.Logger) Collector { 48 | return &MmgetstateCollector{ 49 | state: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "state"), 50 | "GPFS state", []string{"state"}, nil), 51 | logger: logger, 52 | } 53 | } 54 | 55 | func (c *MmgetstateCollector) Describe(ch chan<- *prometheus.Desc) { 56 | ch <- c.state 57 | } 58 | 59 | func (c *MmgetstateCollector) Collect(ch chan<- prometheus.Metric) { 60 | level.Debug(c.logger).Log("msg", "Collecting mmgetstate metrics") 61 | collectTime := time.Now() 62 | timeout := 0 63 | errorMetric := 0 64 | metric, err := c.collect() 65 | if err == context.DeadlineExceeded { 66 | level.Error(c.logger).Log("msg", "Timeout executing mmgetstate") 67 | timeout = 1 68 | } else if err != nil { 69 | level.Error(c.logger).Log("msg", err) 70 | errorMetric = 1 71 | } 72 | for _, state := range mmgetstateStates { 73 | if state == metric.state { 74 | ch <- prometheus.MustNewConstMetric(c.state, prometheus.GaugeValue, 1, state) 75 | } else { 76 | ch <- prometheus.MustNewConstMetric(c.state, prometheus.GaugeValue, 0, state) 77 | } 78 | } 79 | if !SliceContains(mmgetstateStates, metric.state) { 80 | ch <- prometheus.MustNewConstMetric(c.state, prometheus.GaugeValue, 1, "unknown") 81 | } else { 82 | ch <- prometheus.MustNewConstMetric(c.state, prometheus.GaugeValue, 0, "unknown") 83 | } 84 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmgetstate") 85 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmgetstate") 86 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "mmgetstate") 87 | } 88 | 89 | func (c *MmgetstateCollector) collect() (MmgetstateMetrics, error) { 90 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmgetstateTimeout)*time.Second) 91 | defer cancel() 92 | out, err := MmgetstateExec(ctx) 93 | if err != nil { 94 | return MmgetstateMetrics{}, err 95 | } 96 | metric := mmgetstate_parse(out) 97 | return metric, nil 98 | } 99 | 100 | func mmgetstate(ctx context.Context) (string, error) { 101 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmgetstate", "-Y") 102 | var out bytes.Buffer 103 | cmd.Stdout = &out 104 | err := cmd.Run() 105 | if ctx.Err() == context.DeadlineExceeded { 106 | return "", ctx.Err() 107 | } else if err != nil { 108 | return "", err 109 | } 110 | return out.String(), nil 111 | } 112 | 113 | func mmgetstate_parse(out string) MmgetstateMetrics { 114 | metric := MmgetstateMetrics{} 115 | lines := strings.Split(out, "\n") 116 | var headers []string 117 | for _, l := range lines { 118 | if !strings.HasPrefix(l, "mmgetstate") { 119 | continue 120 | } 121 | items := strings.Split(l, ":") 122 | if len(items) < 3 { 123 | continue 124 | } 125 | var values []string 126 | if items[2] == "HEADER" { 127 | headers = append(headers, items...) 128 | continue 129 | } else { 130 | values = append(values, items...) 131 | } 132 | for i, h := range headers { 133 | switch h { 134 | case "state": 135 | metric.state = values[i] 136 | } 137 | } 138 | } 139 | return metric 140 | } 141 | -------------------------------------------------------------------------------- /collectors/mmgetstate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os/exec" 20 | "strings" 21 | "testing" 22 | "time" 23 | 24 | "github.com/alecthomas/kingpin/v2" 25 | "github.com/go-kit/log" 26 | "github.com/prometheus/client_golang/prometheus/testutil" 27 | ) 28 | 29 | var ( 30 | mmgetstateStdout = ` 31 | mmgetstate::HEADER:version:reserved:reserved:nodeName:nodeNumber:state:quorum:nodesUp:totalNodes:remarks:cnfsState: 32 | mmgetstate::0:1:::ib-proj-nsd05.domain:11:active:4:7:1122::(undefined): 33 | ` 34 | ) 35 | 36 | func TestNewGPFSCollector(t *testing.T) { 37 | ret := NewGPFSCollector(log.NewNopLogger()) 38 | if len(ret.Collectors) != 4 { 39 | t.Errorf("Unexpected number of collectors, expected 4, got %d", len(ret.Collectors)) 40 | } 41 | } 42 | 43 | func TestMmgetstate(t *testing.T) { 44 | execCommand = fakeExecCommand 45 | mockedExitStatus = 0 46 | mockedStdout = "foo" 47 | defer func() { execCommand = exec.CommandContext }() 48 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 49 | defer cancel() 50 | out, err := mmgetstate(ctx) 51 | if err != nil { 52 | t.Errorf("Unexpected error: %s", err.Error()) 53 | } 54 | if out != mockedStdout { 55 | t.Errorf("Unexpected out: %s", out) 56 | } 57 | } 58 | 59 | func TestMmgetstateError(t *testing.T) { 60 | execCommand = fakeExecCommand 61 | mockedExitStatus = 1 62 | mockedStdout = "foo" 63 | defer func() { execCommand = exec.CommandContext }() 64 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 65 | defer cancel() 66 | out, err := mmgetstate(ctx) 67 | if err == nil { 68 | t.Errorf("Expected error") 69 | } 70 | if out != "" { 71 | t.Errorf("Unexpected out: %s", out) 72 | } 73 | } 74 | 75 | func TestMmgetstateTimeout(t *testing.T) { 76 | execCommand = fakeExecCommand 77 | mockedExitStatus = 1 78 | mockedStdout = "foo" 79 | defer func() { execCommand = exec.CommandContext }() 80 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 81 | defer cancel() 82 | out, err := mmgetstate(ctx) 83 | if err != context.DeadlineExceeded { 84 | t.Errorf("Expected DeadlineExceeded") 85 | } 86 | if out != "" { 87 | t.Errorf("Unexpected out: %s", out) 88 | } 89 | } 90 | 91 | func TestParseMmgetstate(t *testing.T) { 92 | metric := mmgetstate_parse(mmgetstateStdout) 93 | if val := metric.state; val != "active" { 94 | t.Errorf("Unexpected state got %s", val) 95 | } 96 | } 97 | 98 | func TestMmgetstateCollector(t *testing.T) { 99 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 100 | t.Fatal(err) 101 | } 102 | MmgetstateExec = func(ctx context.Context) (string, error) { 103 | return mmgetstateStdout, nil 104 | } 105 | expected := ` 106 | # HELP gpfs_state GPFS state 107 | # TYPE gpfs_state gauge 108 | gpfs_state{state="active"} 1 109 | gpfs_state{state="arbitrating"} 0 110 | gpfs_state{state="down"} 0 111 | gpfs_state{state="unknown"} 0 112 | ` 113 | collector := NewMmgetstateCollector(log.NewNopLogger()) 114 | gatherers := setupGatherer(collector) 115 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 116 | t.Errorf("Unexpected error: %v", err) 117 | } else if val != 7 { 118 | t.Errorf("Unexpected collection count %d, expected 7", val) 119 | } 120 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_state"); err != nil { 121 | t.Errorf("unexpected collecting result:\n%s", err) 122 | } 123 | } 124 | 125 | func TestMMgetstateCollectorError(t *testing.T) { 126 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 127 | t.Fatal(err) 128 | } 129 | MmgetstateExec = func(ctx context.Context) (string, error) { 130 | return "", fmt.Errorf("Error") 131 | } 132 | expected := ` 133 | # HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 134 | # TYPE gpfs_exporter_collect_error gauge 135 | gpfs_exporter_collect_error{collector="mmgetstate"} 1 136 | ` 137 | collector := NewMmgetstateCollector(log.NewNopLogger()) 138 | gatherers := setupGatherer(collector) 139 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 140 | t.Errorf("Unexpected error: %v", err) 141 | } else if val != 7 { 142 | t.Errorf("Unexpected collection count %d, expected 7", val) 143 | } 144 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_error"); err != nil { 145 | t.Errorf("unexpected collecting result:\n%s", err) 146 | } 147 | } 148 | 149 | func TestMMgetstateCollectorTimeout(t *testing.T) { 150 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 151 | t.Fatal(err) 152 | } 153 | MmgetstateExec = func(ctx context.Context) (string, error) { 154 | return "", context.DeadlineExceeded 155 | } 156 | expected := ` 157 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 158 | # TYPE gpfs_exporter_collect_timeout gauge 159 | gpfs_exporter_collect_timeout{collector="mmgetstate"} 1 160 | ` 161 | collector := NewMmgetstateCollector(log.NewNopLogger()) 162 | gatherers := setupGatherer(collector) 163 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 164 | t.Errorf("Unexpected error: %v", err) 165 | } else if val != 7 { 166 | t.Errorf("Unexpected collection count %d, expected 7", val) 167 | } 168 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_timeout"); err != nil { 169 | t.Errorf("unexpected collecting result:\n%s", err) 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /collectors/mmhealth.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "reflect" 21 | "regexp" 22 | "strconv" 23 | "strings" 24 | "time" 25 | 26 | "github.com/alecthomas/kingpin/v2" 27 | "github.com/go-kit/log" 28 | "github.com/go-kit/log/level" 29 | "github.com/prometheus/client_golang/prometheus" 30 | ) 31 | 32 | var ( 33 | mmhealthTimeout = kingpin.Flag("collector.mmhealth.timeout", "Timeout for mmhealth execution").Default("5").Int() 34 | mmhealthIgnoredComponent = kingpin.Flag("collector.mmhealth.ignored-component", "Regex of components to ignore").Default("^$").String() 35 | mmhealthIgnoredEntityName = kingpin.Flag("collector.mmhealth.ignored-entityname", "Regex of entity names to ignore").Default("^$").String() 36 | mmhealthIgnoredEntityType = kingpin.Flag("collector.mmhealth.ignored-entitytype", "Regex of entity types to ignore").Default("^$").String() 37 | mmhealthIgnoredEvent = kingpin.Flag("collector.mmhealth.ignored-event", "Regex of events to ignore").Default("").String() 38 | mmhealthMap = map[string]string{ 39 | "component": "Component", 40 | "entityname": "EntityName", 41 | "entitytype": "EntityType", 42 | "status": "Status", 43 | "event": "Event", 44 | } 45 | mmhealthStatuses = []string{"CHECKING", "DEGRADED", "DEPEND", "DISABLED", "FAILED", "HEALTHY", "STARTING", "STOPPED", "SUSPENDED", "TIPS"} 46 | mmhealthExec = mmhealth 47 | ) 48 | 49 | type HealthMetric struct { 50 | Type string 51 | Component string 52 | EntityName string 53 | EntityType string 54 | Status string 55 | Event string 56 | } 57 | 58 | type MmhealthCollector struct { 59 | State *prometheus.Desc 60 | Event *prometheus.Desc 61 | logger log.Logger 62 | } 63 | 64 | func init() { 65 | registerCollector("mmhealth", false, NewMmhealthCollector) 66 | } 67 | 68 | func NewMmhealthCollector(logger log.Logger) Collector { 69 | return &MmhealthCollector{ 70 | State: prometheus.NewDesc(prometheus.BuildFQName(namespace, "health", "status"), 71 | "GPFS health status", []string{"component", "entityname", "entitytype", "status"}, nil), 72 | Event: prometheus.NewDesc(prometheus.BuildFQName(namespace, "health", "event"), 73 | "GPFS health event", []string{"component", "entityname", "entitytype", "event"}, nil), 74 | logger: logger, 75 | } 76 | } 77 | 78 | func (c *MmhealthCollector) Describe(ch chan<- *prometheus.Desc) { 79 | ch <- c.State 80 | ch <- c.Event 81 | } 82 | 83 | func (c *MmhealthCollector) Collect(ch chan<- prometheus.Metric) { 84 | level.Debug(c.logger).Log("msg", "Collecting mmhealth metrics") 85 | collectTime := time.Now() 86 | timeout := 0 87 | errorMetric := 0 88 | metrics, err := c.collect() 89 | if err == context.DeadlineExceeded { 90 | timeout = 1 91 | level.Error(c.logger).Log("msg", "Timeout executing mmhealth") 92 | } else if err != nil { 93 | level.Error(c.logger).Log("msg", err) 94 | errorMetric = 1 95 | } 96 | for _, m := range metrics { 97 | if m.Type == "Event" { 98 | ch <- prometheus.MustNewConstMetric(c.Event, prometheus.GaugeValue, 1, m.Component, m.EntityName, m.EntityType, m.Event) 99 | continue 100 | } 101 | for _, s := range mmhealthStatuses { 102 | var value float64 103 | if s == m.Status { 104 | value = 1 105 | } 106 | ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, value, m.Component, m.EntityName, m.EntityType, s) 107 | } 108 | var unknown float64 109 | if !SliceContains(mmhealthStatuses, m.Status) { 110 | unknown = 1 111 | level.Warn(c.logger).Log("msg", "Unknown status encountered", "status", m.Status, 112 | "component", m.Component, "entityname", m.EntityName, "entitytype", m.EntityType) 113 | } 114 | ch <- prometheus.MustNewConstMetric(c.State, prometheus.GaugeValue, unknown, m.Component, m.EntityName, m.EntityType, "UNKNOWN") 115 | } 116 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmhealth") 117 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmhealth") 118 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "mmhealth") 119 | } 120 | 121 | func (c *MmhealthCollector) collect() ([]HealthMetric, error) { 122 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmhealthTimeout)*time.Second) 123 | defer cancel() 124 | mmhealth_out, err := mmhealthExec(ctx) 125 | if err != nil { 126 | return nil, err 127 | } 128 | metrics := mmhealth_parse(mmhealth_out, c.logger) 129 | return metrics, nil 130 | } 131 | 132 | func mmhealth(ctx context.Context) (string, error) { 133 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmhealth", "node", "show", "-Y") 134 | var out bytes.Buffer 135 | cmd.Stdout = &out 136 | err := cmd.Run() 137 | if ctx.Err() == context.DeadlineExceeded { 138 | return "", ctx.Err() 139 | } else if err != nil { 140 | return "", err 141 | } 142 | return out.String(), nil 143 | } 144 | 145 | func mmhealth_parse(out string, logger log.Logger) []HealthMetric { 146 | mmhealthIgnoredComponentPattern := regexp.MustCompile(*mmhealthIgnoredComponent) 147 | mmhealthIgnoredEntityNamePattern := regexp.MustCompile(*mmhealthIgnoredEntityName) 148 | mmhealthIgnoredEntityTypePattern := regexp.MustCompile(*mmhealthIgnoredEntityType) 149 | mmhealthIgnoredEventPattern := regexp.MustCompile(*mmhealthIgnoredEvent) 150 | var metrics []HealthMetric 151 | var eventKeys []string 152 | lines := strings.Split(out, "\n") 153 | typeHeaders := make(map[string][]string) 154 | for _, line := range lines { 155 | l := strings.TrimSpace(line) 156 | if !strings.HasPrefix(l, "mmhealth") { 157 | level.Debug(logger).Log("msg", "Skip due to prefix", "line", l) 158 | continue 159 | } 160 | items := strings.Split(l, ":") 161 | if len(items) < 3 { 162 | level.Debug(logger).Log("msg", "Skip due to length", "len", len(items), "line", l) 163 | continue 164 | } 165 | var metric HealthMetric 166 | metric.Type = items[1] 167 | if metric.Type != "State" && metric.Type != "Event" { 168 | level.Debug(logger).Log("msg", "Skip due to type", "type", metric.Type, "line", l) 169 | continue 170 | } 171 | var headers []string 172 | var values []string 173 | if items[2] == "HEADER" { 174 | typeHeaders[metric.Type] = items 175 | continue 176 | } else { 177 | headers = typeHeaders[metric.Type] 178 | values = items 179 | } 180 | ps := reflect.ValueOf(&metric) // pointer to struct - addressable 181 | s := ps.Elem() // struct 182 | for i, h := range headers { 183 | if field, ok := mmhealthMap[h]; ok { 184 | f := s.FieldByName(field) 185 | if f.Kind() == reflect.String { 186 | f.SetString(values[i]) 187 | } else if f.Kind() == reflect.Int64 { 188 | if val, err := strconv.ParseInt(values[i], 10, 64); err == nil { 189 | f.SetInt(val) 190 | } else { 191 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 192 | } 193 | } 194 | } 195 | } 196 | if mmhealthIgnoredComponentPattern.MatchString(metric.Component) { 197 | level.Debug(logger).Log("msg", "Skipping component due to ignored pattern", "component", metric.Component) 198 | continue 199 | } 200 | if mmhealthIgnoredEntityNamePattern.MatchString(metric.EntityName) { 201 | level.Debug(logger).Log("msg", "Skipping entity name due to ignored pattern", "entityname", metric.EntityName) 202 | continue 203 | } 204 | if mmhealthIgnoredEntityTypePattern.MatchString(metric.EntityType) { 205 | level.Debug(logger).Log("msg", "Skipping entity type due to ignored pattern", "entitytype", metric.EntityType) 206 | continue 207 | } 208 | if metric.Type == "Event" && *mmhealthIgnoredEvent != "" && mmhealthIgnoredEventPattern.MatchString(metric.Event) { 209 | level.Debug(logger).Log("msg", "Skipping event due to ignored pattern", "event", metric.Event) 210 | continue 211 | } 212 | if metric.Type == "Event" { 213 | eventKey := fmt.Sprintf("%s-%s-%s-%s", metric.Component, metric.EntityName, metric.EntityType, metric.Event) 214 | if SliceContains(eventKeys, eventKey) { 215 | level.Debug(logger).Log("msg", "Skipping event as already encountered", "event", metric.Event) 216 | continue 217 | } else { 218 | eventKeys = append(eventKeys, eventKey) 219 | } 220 | } 221 | metrics = append(metrics, metric) 222 | } 223 | return metrics 224 | } 225 | -------------------------------------------------------------------------------- /collectors/mmlsfileset.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "net/url" 21 | "reflect" 22 | "strconv" 23 | "strings" 24 | "sync" 25 | "time" 26 | 27 | "github.com/alecthomas/kingpin/v2" 28 | "github.com/go-kit/log" 29 | "github.com/go-kit/log/level" 30 | "github.com/prometheus/client_golang/prometheus" 31 | ) 32 | 33 | var ( 34 | filesetFilesystems = kingpin.Flag("collector.mmlsfileset.filesystems", "Filesystems to query with mmlsfileset, comma separated. Defaults to all filesystems.").Default("").String() 35 | filesetTimeout = kingpin.Flag("collector.mmlsfileset.timeout", "Timeout for mmlsfileset execution").Default("60").Int() 36 | filesetMap = map[string]string{ 37 | "filesystemName": "FS", 38 | "filesetName": "Fileset", 39 | "status": "Status", 40 | "path": "Path", 41 | "created": "Created", 42 | "maxInodes": "MaxInodes", 43 | "allocInodes": "AllocInodes", 44 | "freeInodes": "FreeInodes", 45 | } 46 | MmlsfilesetExec = mmlsfileset 47 | ) 48 | 49 | type FilesetMetric struct { 50 | FS string 51 | Fileset string 52 | Status string 53 | Path string 54 | Created float64 55 | MaxInodes float64 56 | AllocInodes float64 57 | FreeInodes float64 58 | } 59 | 60 | type MmlsfilesetCollector struct { 61 | Status *prometheus.Desc 62 | Path *prometheus.Desc 63 | Created *prometheus.Desc 64 | MaxInodes *prometheus.Desc 65 | AllocInodes *prometheus.Desc 66 | FreeInodes *prometheus.Desc 67 | logger log.Logger 68 | } 69 | 70 | func init() { 71 | registerCollector("mmlsfileset", false, NewMmlsfilesetCollector) 72 | } 73 | 74 | func NewMmlsfilesetCollector(logger log.Logger) Collector { 75 | labels := []string{"fs", "fileset"} 76 | return &MmlsfilesetCollector{ 77 | Status: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "status_info"), 78 | "GPFS fileset status", append(labels, []string{"status"}...), nil), 79 | Path: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "path_info"), 80 | "GPFS fileset path", append(labels, []string{"path"}...), nil), 81 | Created: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "created_timestamp_seconds"), 82 | "GPFS fileset creation timestamp", labels, nil), 83 | MaxInodes: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "max_inodes"), 84 | "GPFS fileset max inodes", labels, nil), 85 | AllocInodes: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "alloc_inodes"), 86 | "GPFS fileset alloc inodes", labels, nil), 87 | FreeInodes: prometheus.NewDesc(prometheus.BuildFQName(namespace, "fileset", "free_inodes"), 88 | "GPFS fileset free inodes", labels, nil), 89 | logger: logger, 90 | } 91 | } 92 | 93 | func (c *MmlsfilesetCollector) Describe(ch chan<- *prometheus.Desc) { 94 | ch <- c.Status 95 | ch <- c.Path 96 | ch <- c.Created 97 | ch <- c.MaxInodes 98 | ch <- c.AllocInodes 99 | ch <- c.FreeInodes 100 | } 101 | 102 | func (c *MmlsfilesetCollector) Collect(ch chan<- prometheus.Metric) { 103 | wg := &sync.WaitGroup{} 104 | var filesystems []string 105 | if *filesetFilesystems == "" { 106 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmlsfsTimeout)*time.Second) 107 | defer cancel() 108 | var mmlsfsTimeout float64 109 | var mmlsfsError float64 110 | mmlfsfs_filesystems, err := mmlfsfsFilesystems(ctx, c.logger) 111 | if err == context.DeadlineExceeded { 112 | mmlsfsTimeout = 1 113 | level.Error(c.logger).Log("msg", "Timeout executing mmlsfs") 114 | } else if err != nil { 115 | mmlsfsError = 1 116 | level.Error(c.logger).Log("msg", err) 117 | } 118 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, mmlsfsTimeout, "mmlsfileset-mmlsfs") 119 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, mmlsfsError, "mmlsfileset-mmlsfs") 120 | filesystems = mmlfsfs_filesystems 121 | } else { 122 | filesystems = strings.Split(*filesetFilesystems, ",") 123 | } 124 | for _, fs := range filesystems { 125 | level.Debug(c.logger).Log("msg", "Collecting mmlsfileset metrics", "fs", fs) 126 | wg.Add(1) 127 | collectTime := time.Now() 128 | go func(fs string) { 129 | defer wg.Done() 130 | label := fmt.Sprintf("mmlsfileset-%s", fs) 131 | timeout := 0 132 | errorMetric := 0 133 | metrics, err := c.mmlsfilesetCollect(fs) 134 | if err == context.DeadlineExceeded { 135 | level.Error(c.logger).Log("msg", fmt.Sprintf("Timeout executing %s", label)) 136 | timeout = 1 137 | } else if err != nil { 138 | level.Error(c.logger).Log("msg", err, "fs", fs) 139 | errorMetric = 1 140 | } 141 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), label) 142 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), label) 143 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), label) 144 | if err != nil { 145 | return 146 | } 147 | for _, m := range metrics { 148 | ch <- prometheus.MustNewConstMetric(c.Status, prometheus.GaugeValue, 1, m.FS, m.Fileset, m.Status) 149 | ch <- prometheus.MustNewConstMetric(c.Path, prometheus.GaugeValue, 1, m.FS, m.Fileset, m.Path) 150 | ch <- prometheus.MustNewConstMetric(c.Created, prometheus.GaugeValue, m.Created, m.FS, m.Fileset) 151 | ch <- prometheus.MustNewConstMetric(c.MaxInodes, prometheus.GaugeValue, m.MaxInodes, m.FS, m.Fileset) 152 | ch <- prometheus.MustNewConstMetric(c.AllocInodes, prometheus.GaugeValue, m.AllocInodes, m.FS, m.Fileset) 153 | ch <- prometheus.MustNewConstMetric(c.FreeInodes, prometheus.GaugeValue, m.FreeInodes, m.FS, m.Fileset) 154 | } 155 | }(fs) 156 | } 157 | wg.Wait() 158 | } 159 | 160 | func (c *MmlsfilesetCollector) mmlsfilesetCollect(fs string) ([]FilesetMetric, error) { 161 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*filesetTimeout)*time.Second) 162 | defer cancel() 163 | out, err := MmlsfilesetExec(fs, ctx) 164 | if err != nil { 165 | return nil, err 166 | } 167 | metrics, err := parse_mmlsfileset(out, c.logger) 168 | return metrics, err 169 | } 170 | 171 | func mmlsfileset(fs string, ctx context.Context) (string, error) { 172 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmlsfileset", fs, "-Y") 173 | var out bytes.Buffer 174 | cmd.Stdout = &out 175 | err := cmd.Run() 176 | if ctx.Err() == context.DeadlineExceeded { 177 | return "", ctx.Err() 178 | } else if err != nil { 179 | return "", err 180 | } 181 | return out.String(), nil 182 | } 183 | 184 | func parse_mmlsfileset(out string, logger log.Logger) ([]FilesetMetric, error) { 185 | var metrics []FilesetMetric 186 | headers := []string{} 187 | lines := strings.Split(out, "\n") 188 | for _, l := range lines { 189 | if !strings.HasPrefix(l, "mmlsfileset") { 190 | continue 191 | } 192 | items := strings.Split(l, ":") 193 | if len(items) < 3 { 194 | continue 195 | } 196 | var values []string 197 | if items[2] == "HEADER" { 198 | headers = append(headers, items...) 199 | continue 200 | } else { 201 | values = append(values, items...) 202 | } 203 | var metric FilesetMetric 204 | ps := reflect.ValueOf(&metric) // pointer to struct - addressable 205 | s := ps.Elem() // struct 206 | for i, h := range headers { 207 | if field, ok := filesetMap[h]; ok { 208 | f := s.FieldByName(field) 209 | if f.Kind() == reflect.String { 210 | value := values[i] 211 | if h == "path" { 212 | pathParsed, err := url.QueryUnescape(values[i]) 213 | if err != nil { 214 | level.Error(logger).Log("msg", "Unable to unescape path", "value", values[i]) 215 | return nil, err 216 | } 217 | value = pathParsed 218 | } 219 | f.SetString(value) 220 | } else if f.Kind() == reflect.Float64 { 221 | var value float64 222 | if h == "created" { 223 | createdStr, err := url.QueryUnescape(values[i]) 224 | if err != nil { 225 | level.Error(logger).Log("msg", "Unable to unescape created time", "value", values[i]) 226 | return nil, err 227 | } 228 | createdTime, err := time.ParseInLocation(time.ANSIC, createdStr, NowLocation()) 229 | if err != nil { 230 | level.Error(logger).Log("msg", "Unable to parse time", "value", createdStr) 231 | return nil, err 232 | } 233 | value = float64(createdTime.Unix()) 234 | } else if val, err := strconv.ParseFloat(values[i], 64); err == nil { 235 | value = val 236 | } else { 237 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 238 | return nil, err 239 | } 240 | f.SetFloat(value) 241 | } 242 | } 243 | } 244 | 245 | metrics = append(metrics, metric) 246 | } 247 | return metrics, nil 248 | } 249 | -------------------------------------------------------------------------------- /collectors/mmlspool.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "strings" 21 | "sync" 22 | "time" 23 | 24 | "github.com/alecthomas/kingpin/v2" 25 | "github.com/go-kit/log" 26 | "github.com/go-kit/log/level" 27 | "github.com/prometheus/client_golang/prometheus" 28 | ) 29 | 30 | var ( 31 | poolFilesystems = kingpin.Flag("collector.mmlspool.filesystems", "Filesystems to query with mmlspool, comma separated. Defaults to all filesystems.").Default("").String() 32 | mmlspoolTimeout = kingpin.Flag("collector.mmlspool.timeout", "Timeout for mmlspool execution").Default("30").Int() 33 | MmlspoolExec = mmlspool 34 | ) 35 | 36 | type PoolMetric struct { 37 | FS string 38 | PoolName string 39 | PoolTotal float64 40 | PoolFree float64 41 | PoolFreePercent float64 42 | Meta bool 43 | MetaTotal float64 44 | MetaFree float64 45 | MetaFreePercent float64 46 | } 47 | 48 | type MmlspoolCollector struct { 49 | PoolTotal *prometheus.Desc 50 | PoolFree *prometheus.Desc 51 | PoolFreePercent *prometheus.Desc 52 | MetaTotal *prometheus.Desc 53 | MetaFree *prometheus.Desc 54 | MetaFreePercent *prometheus.Desc 55 | logger log.Logger 56 | } 57 | 58 | func init() { 59 | registerCollector("mmlspool", false, NewMmlspoolCollector) 60 | } 61 | 62 | func NewMmlspoolCollector(logger log.Logger) Collector { 63 | return &MmlspoolCollector{ 64 | PoolTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "total_bytes"), 65 | "GPFS pool total size in bytes", []string{"fs", "pool"}, nil), 66 | PoolFree: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "free_bytes"), 67 | "GPFS pool free size in bytes", []string{"fs", "pool"}, nil), 68 | PoolFreePercent: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "free_percent"), 69 | "GPFS pool free percent", []string{"fs", "pool"}, nil), 70 | MetaTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "metadata_total_bytes"), 71 | "GPFS pool total metadata in bytes", []string{"fs", "pool"}, nil), 72 | MetaFree: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "metadata_free_bytes"), 73 | "GPFS pool free metadata in bytes", []string{"fs", "pool"}, nil), 74 | MetaFreePercent: prometheus.NewDesc(prometheus.BuildFQName(namespace, "pool", "metadata_free_percent"), 75 | "GPFS pool free percent", []string{"fs", "pool"}, nil), 76 | logger: logger, 77 | } 78 | } 79 | 80 | func (c *MmlspoolCollector) Describe(ch chan<- *prometheus.Desc) { 81 | ch <- c.PoolTotal 82 | ch <- c.PoolFree 83 | ch <- c.PoolFreePercent 84 | ch <- c.MetaTotal 85 | ch <- c.MetaFree 86 | ch <- c.MetaFreePercent 87 | } 88 | 89 | func (c *MmlspoolCollector) Collect(ch chan<- prometheus.Metric) { 90 | wg := &sync.WaitGroup{} 91 | var filesystems []string 92 | if *poolFilesystems == "" { 93 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmlsfsTimeout)*time.Second) 94 | defer cancel() 95 | var mmlsfsTimeout float64 96 | var mmlsfsError float64 97 | mmlfsfs_filesystems, err := mmlfsfsFilesystems(ctx, c.logger) 98 | if err == context.DeadlineExceeded { 99 | mmlsfsTimeout = 1 100 | level.Error(c.logger).Log("msg", "Timeout executing mmlsfs") 101 | } else if err != nil { 102 | mmlsfsError = 1 103 | level.Error(c.logger).Log("msg", err) 104 | } 105 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, mmlsfsTimeout, "mmlspool-mmlsfs") 106 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, mmlsfsError, "mmlspool-mmlsfs") 107 | filesystems = mmlfsfs_filesystems 108 | } else { 109 | filesystems = strings.Split(*poolFilesystems, ",") 110 | } 111 | for _, fs := range filesystems { 112 | level.Debug(c.logger).Log("msg", "Collecting mmlspool metrics", "fs", fs) 113 | wg.Add(1) 114 | collectTime := time.Now() 115 | go func(fs string) { 116 | defer wg.Done() 117 | label := fmt.Sprintf("mmlspool-%s", fs) 118 | timeout := 0 119 | errorMetric := 0 120 | metrics, err := c.mmlspoolCollect(fs) 121 | if err == context.DeadlineExceeded { 122 | level.Error(c.logger).Log("msg", fmt.Sprintf("Timeout executing %s", label)) 123 | timeout = 1 124 | } else if err != nil { 125 | level.Error(c.logger).Log("msg", err, "fs", fs) 126 | errorMetric = 1 127 | } 128 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), label) 129 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), label) 130 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), label) 131 | if err == nil { 132 | for _, pool := range metrics { 133 | ch <- prometheus.MustNewConstMetric(c.PoolTotal, prometheus.GaugeValue, pool.PoolTotal, fs, pool.PoolName) 134 | ch <- prometheus.MustNewConstMetric(c.PoolFree, prometheus.GaugeValue, pool.PoolFree, fs, pool.PoolName) 135 | ch <- prometheus.MustNewConstMetric(c.PoolFreePercent, prometheus.GaugeValue, pool.PoolFreePercent, fs, pool.PoolName) 136 | if pool.Meta { 137 | ch <- prometheus.MustNewConstMetric(c.MetaTotal, prometheus.GaugeValue, pool.MetaTotal, fs, pool.PoolName) 138 | ch <- prometheus.MustNewConstMetric(c.MetaFree, prometheus.GaugeValue, pool.MetaFree, fs, pool.PoolName) 139 | ch <- prometheus.MustNewConstMetric(c.MetaFreePercent, prometheus.GaugeValue, pool.MetaFreePercent, fs, pool.PoolName) 140 | } 141 | } 142 | } 143 | ch <- prometheus.MustNewConstMetric(lastExecution, prometheus.GaugeValue, float64(time.Now().Unix()), label) 144 | }(fs) 145 | } 146 | wg.Wait() 147 | } 148 | 149 | func (c *MmlspoolCollector) mmlspoolCollect(fs string) ([]PoolMetric, error) { 150 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmlspoolTimeout)*time.Second) 151 | defer cancel() 152 | out, err := MmlspoolExec(fs, ctx) 153 | if err != nil { 154 | return nil, err 155 | } 156 | metrics, err := parse_mmlspool(fs, out, c.logger) 157 | if err != nil { 158 | return nil, err 159 | } 160 | return metrics, nil 161 | } 162 | 163 | func mmlspool(fs string, ctx context.Context) (string, error) { 164 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmlspool", fs) 165 | var out bytes.Buffer 166 | cmd.Stdout = &out 167 | err := cmd.Run() 168 | if ctx.Err() == context.DeadlineExceeded { 169 | return "", ctx.Err() 170 | } else if err != nil { 171 | return "", err 172 | } 173 | return out.String(), nil 174 | } 175 | 176 | func parse_mmlspool(fs string, out string, logger log.Logger) ([]PoolMetric, error) { 177 | pools := []PoolMetric{} 178 | headers := []string{} 179 | lines := strings.Split(out, "\n") 180 | for _, l := range lines { 181 | // Replace beginning of percent ( N%) 182 | line := strings.Replace(l, "(", "", -1) 183 | // Replace percent N %) with just N 184 | line = strings.Replace(line, "%)", "", -1) 185 | // Replace '8 MB' with just '8' 186 | line = strings.Replace(line, " MB", "", -1) 187 | items := strings.Fields(line) 188 | if len(items) < 8 { 189 | continue 190 | } 191 | if items[0] == "Name" { 192 | headers = parse_mmlspool_headers(items) 193 | level.Debug(logger).Log("msg", "headers", "headers", fmt.Sprintf("%v", headers), "line", line) 194 | continue 195 | } 196 | level.Debug(logger).Log("msg", "items", "items", fmt.Sprintf("%v", items), "line", line) 197 | pool := PoolMetric{ 198 | FS: fs, 199 | } 200 | for i, item := range items { 201 | field := headers[i] 202 | switch field { 203 | case "Name": 204 | pool.PoolName = item 205 | case "Meta": 206 | if item == "yes" { 207 | pool.Meta = true 208 | } 209 | case "TotalData": 210 | poolTotal, err := ParseFloat(item, false, logger) 211 | if err != nil { 212 | return nil, err 213 | } 214 | pool.PoolTotal = poolTotal * 1024 215 | case "FreeData": 216 | poolFree, err := ParseFloat(item, false, logger) 217 | if err != nil { 218 | return nil, err 219 | } 220 | pool.PoolFree = poolFree * 1024 221 | case "FreeDataPercent": 222 | poolFreePercent, err := ParseFloat(item, false, logger) 223 | if err != nil { 224 | return nil, err 225 | } 226 | pool.PoolFreePercent = poolFreePercent 227 | case "TotalMeta": 228 | metaTotal, err := ParseFloat(item, false, logger) 229 | if err != nil { 230 | return nil, err 231 | } 232 | pool.MetaTotal = metaTotal * 1024 233 | case "FreeMeta": 234 | metaFree, err := ParseFloat(item, false, logger) 235 | if err != nil { 236 | return nil, err 237 | } 238 | pool.MetaFree = metaFree * 1024 239 | case "FreeMetaPercent": 240 | metaFreePercent, err := ParseFloat(item, false, logger) 241 | if err != nil { 242 | return nil, err 243 | } 244 | pool.MetaFreePercent = metaFreePercent 245 | } 246 | } 247 | pools = append(pools, pool) 248 | } 249 | return pools, nil 250 | } 251 | 252 | func parse_mmlspool_headers(items []string) []string { 253 | skip := 0 254 | headers := []string{} 255 | for i := 0; i < len(items); i++ { 256 | if skip > 0 { 257 | skip-- 258 | continue 259 | } 260 | item := items[i] 261 | if item == "Total" && items[i+1] == "Data" { 262 | item = "TotalData" 263 | skip = 3 264 | } 265 | if item == "Free" && items[i+1] == "Data" { 266 | item = "FreeData" 267 | skip = 3 268 | } 269 | if item == "Total" && items[i+1] == "Meta" { 270 | item = "TotalMeta" 271 | skip = 3 272 | } 273 | if item == "Free" && items[i+1] == "Meta" { 274 | item = "FreeMeta" 275 | skip = 3 276 | } 277 | headers = append(headers, item) 278 | if strings.HasPrefix(item, "Free") { 279 | percentItem := fmt.Sprintf("%sPercent", item) 280 | headers = append(headers, percentItem) 281 | } 282 | } 283 | return headers 284 | } 285 | -------------------------------------------------------------------------------- /collectors/mmlsqos.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | package collectors 14 | 15 | import ( 16 | "bytes" 17 | "context" 18 | "fmt" 19 | "reflect" 20 | "strconv" 21 | "strings" 22 | "sync" 23 | "time" 24 | 25 | "github.com/alecthomas/kingpin/v2" 26 | "github.com/go-kit/log" 27 | "github.com/go-kit/log/level" 28 | "github.com/prometheus/client_golang/prometheus" 29 | ) 30 | 31 | var ( 32 | qosFilesystems = kingpin.Flag("collector.mmlsqos.filesystems", "Filesystems to query with mmlsqos, comma separated. Defaults to all filesystems.").Default("").String() 33 | qosTimeout = kingpin.Flag("collector.mmlsqos.timeout", "Timeout for mmlsqos execution").Default("60").Int() 34 | qosSeconds = kingpin.Flag("collector.mmlsqos.seconds", "Display the I/O performance values for the previous number of seconds. The valid range of seconds is 1-999").Default("60").Int() 35 | qosMap = map[string]string{ 36 | "pool": "Pool", 37 | "timeEpoch": "Time", 38 | "class": "Class", 39 | "iops": "Iops", 40 | "ioql": "AvegarePendingRequests", 41 | "qsdl": "AvegareQueuedRequests", 42 | "et": "MeasurementInterval", 43 | "MBs": "Bs", 44 | } 45 | MmlsqosExec = mmlsqos 46 | ) 47 | 48 | type QosMetric struct { 49 | Pool string 50 | Time float64 51 | Class string 52 | Iops float64 53 | AvegarePendingRequests float64 54 | AvegareQueuedRequests float64 55 | MeasurementInterval float64 56 | Bs float64 57 | } 58 | 59 | type MmlsqosCollector struct { 60 | Iops *prometheus.Desc 61 | AvegarePendingRequests *prometheus.Desc 62 | AvegareQueuedRequests *prometheus.Desc 63 | MeasurementInterval *prometheus.Desc 64 | Bs *prometheus.Desc 65 | logger log.Logger 66 | } 67 | 68 | func init() { 69 | registerCollector("mmlsqos", false, NewMmlsqosCollector) 70 | } 71 | 72 | func NewMmlsqosCollector(logger log.Logger) Collector { 73 | labels := []string{"fs", "pool", "class", "measurement_period_seconds"} 74 | return &MmlsqosCollector{ 75 | Iops: prometheus.NewDesc(prometheus.BuildFQName(namespace, "qos", "iops"), 76 | "GPFS performance of the class in I/O operations per second", labels, nil), 77 | AvegarePendingRequests: prometheus.NewDesc(prometheus.BuildFQName(namespace, "qos", "average_pending_requests"), 78 | "GPFS average number of I/O requests in the class that are pending for reasons other than being queued by QoS", labels, nil), 79 | AvegareQueuedRequests: prometheus.NewDesc(prometheus.BuildFQName(namespace, "qos", "average_queued_requests"), 80 | "GPFS average number of I/O requests in the class that are queued by QoS", labels, nil), 81 | MeasurementInterval: prometheus.NewDesc(prometheus.BuildFQName(namespace, "qos", "measurement_interval_seconds"), 82 | "GPFS interval in seconds during which the measurement was made", labels, nil), 83 | Bs: prometheus.NewDesc(prometheus.BuildFQName(namespace, "qos", "bytes_per_second"), 84 | "GPFS performance of the class in Bytes per second", labels, nil), 85 | logger: logger, 86 | } 87 | } 88 | 89 | func (c *MmlsqosCollector) Describe(ch chan<- *prometheus.Desc) { 90 | ch <- c.Iops 91 | ch <- c.AvegarePendingRequests 92 | ch <- c.AvegareQueuedRequests 93 | ch <- c.MeasurementInterval 94 | ch <- c.Bs 95 | } 96 | 97 | func (c *MmlsqosCollector) Collect(ch chan<- prometheus.Metric) { 98 | wg := &sync.WaitGroup{} 99 | var filesystems []string 100 | if *qosFilesystems == "" { 101 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmlsfsTimeout)*time.Second) 102 | defer cancel() 103 | var mmlsfsTimeout float64 104 | var mmlsfsError float64 105 | mmlfsfs_filesystems, err := mmlfsfsFilesystems(ctx, c.logger) 106 | if err == context.DeadlineExceeded { 107 | mmlsfsTimeout = 1 108 | level.Error(c.logger).Log("msg", "Timeout executing mmlsfs") 109 | } else if err != nil { 110 | mmlsfsError = 1 111 | level.Error(c.logger).Log("msg", err) 112 | } 113 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, mmlsfsTimeout, "mmlsqos-mmlsfs") 114 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, mmlsfsError, "mmlsqos-mmlsfs") 115 | filesystems = mmlfsfs_filesystems 116 | } else { 117 | filesystems = strings.Split(*qosFilesystems, ",") 118 | } 119 | for _, fs := range filesystems { 120 | level.Debug(c.logger).Log("msg", "Collecting mmlsqos metrics", "fs", fs) 121 | wg.Add(1) 122 | collectTime := time.Now() 123 | go func(fs string) { 124 | defer wg.Done() 125 | label := fmt.Sprintf("mmlsqos-%s", fs) 126 | timeout := 0 127 | errorMetric := 0 128 | metrics, err := c.mmlsqosCollect(fs) 129 | if err == context.DeadlineExceeded { 130 | level.Error(c.logger).Log("msg", fmt.Sprintf("Timeout executing %s", label)) 131 | timeout = 1 132 | } else if err != nil { 133 | level.Error(c.logger).Log("msg", err, "fs", fs) 134 | errorMetric = 1 135 | } 136 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), label) 137 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), label) 138 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), label) 139 | if err != nil { 140 | return 141 | } 142 | for _, m := range metrics { 143 | ch <- prometheus.MustNewConstMetric(c.Iops, prometheus.GaugeValue, m.Iops, fs, m.Pool, m.Class, fmt.Sprintf("%.f", m.Time)) 144 | ch <- prometheus.MustNewConstMetric(c.AvegarePendingRequests, prometheus.GaugeValue, m.AvegarePendingRequests, fs, m.Pool, m.Class, fmt.Sprintf("%.f", m.Time)) 145 | ch <- prometheus.MustNewConstMetric(c.AvegareQueuedRequests, prometheus.GaugeValue, m.AvegareQueuedRequests, fs, m.Pool, m.Class, fmt.Sprintf("%.f", m.Time)) 146 | ch <- prometheus.MustNewConstMetric(c.MeasurementInterval, prometheus.GaugeValue, m.MeasurementInterval, fs, m.Pool, m.Class, fmt.Sprintf("%.f", m.Time)) 147 | ch <- prometheus.MustNewConstMetric(c.Bs, prometheus.GaugeValue, m.Bs, fs, m.Pool, m.Class, fmt.Sprintf("%.f", m.Time)) 148 | } 149 | }(fs) 150 | } 151 | wg.Wait() 152 | } 153 | 154 | func (c *MmlsqosCollector) mmlsqosCollect(fs string) ([]QosMetric, error) { 155 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*qosTimeout)*time.Second) 156 | defer cancel() 157 | out, err := MmlsqosExec(fs, ctx) 158 | if err != nil { 159 | return nil, err 160 | } 161 | metrics, err := parse_mmlsqos(out, c.logger) 162 | return metrics, err 163 | } 164 | 165 | func mmlsqos(fs string, ctx context.Context) (string, error) { 166 | args := []string{"/usr/lpp/mmfs/bin/mmlsqos", fs, "-Y", "--seconds", strconv.Itoa(*qosSeconds)} 167 | cmd := execCommand(ctx, *sudoCmd, args...) 168 | var out bytes.Buffer 169 | cmd.Stdout = &out 170 | err := cmd.Run() 171 | if ctx.Err() == context.DeadlineExceeded { 172 | return "", ctx.Err() 173 | } else if err != nil { 174 | return "", err 175 | } 176 | return out.String(), nil 177 | } 178 | 179 | func parse_mmlsqos(out string, logger log.Logger) ([]QosMetric, error) { 180 | var metrics []QosMetric 181 | headers := []string{} 182 | lines := strings.Split(out, "\n") 183 | for _, l := range lines { 184 | if !strings.HasPrefix(l, "mmlsqos") { 185 | continue 186 | } 187 | items := strings.Split(l, ":") 188 | if len(items) < 3 { 189 | continue 190 | } 191 | if items[1] != "stats" { 192 | continue 193 | } 194 | var values []string 195 | if items[2] == "HEADER" { 196 | headers = append(headers, items...) 197 | continue 198 | } else { 199 | values = append(values, items...) 200 | } 201 | var metric QosMetric 202 | ps := reflect.ValueOf(&metric) // pointer to struct - addressable 203 | s := ps.Elem() // struct 204 | for i, h := range headers { 205 | if field, ok := qosMap[h]; ok { 206 | f := s.FieldByName(field) 207 | if f.Kind() == reflect.String { 208 | f.SetString(values[i]) 209 | } else if f.Kind() == reflect.Float64 { 210 | if strings.Contains(values[i], "nan") { 211 | f.SetFloat(0) 212 | } else if val, err := strconv.ParseFloat(strings.Replace(values[i], ",", ".", -1), 64); err == nil { 213 | if field == "Bs" { 214 | val = val * 1024 * 1024 215 | } 216 | f.SetFloat(val) 217 | } else { 218 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 219 | return nil, err 220 | } 221 | } 222 | } 223 | } 224 | 225 | metrics = append(metrics, metric) 226 | } 227 | return metrics, nil 228 | } 229 | -------------------------------------------------------------------------------- /collectors/mmlssnapshot.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "net/url" 21 | "reflect" 22 | "strconv" 23 | "strings" 24 | "sync" 25 | "time" 26 | 27 | "github.com/alecthomas/kingpin/v2" 28 | "github.com/go-kit/log" 29 | "github.com/go-kit/log/level" 30 | "github.com/prometheus/client_golang/prometheus" 31 | ) 32 | 33 | var ( 34 | snapshotFilesystems = kingpin.Flag("collector.mmlssnapshot.filesystems", "Filesystems to query with mmlssnapshot, comma separated. Defaults to all filesystems.").Default("").String() 35 | snapshotTimeout = kingpin.Flag("collector.mmlssnapshot.timeout", "Timeout for mmlssnapshot execution").Default("60").Int() 36 | snapshotGetSize = kingpin.Flag("collector.mmlssnapshot.get-size", "Collect snapshot sizes, long running operation").Default("false").Bool() 37 | SnapshotKbToBytes = []string{"data", "metadata"} 38 | snapshotMap = map[string]string{ 39 | "filesystemName": "FS", 40 | "directory": "Name", 41 | "snapID": "ID", 42 | "status": "Status", 43 | "created": "Created", 44 | "fileset": "Fileset", 45 | "data": "Data", 46 | "metadata": "Metadata", 47 | } 48 | MmlssnapshotExec = mmlssnapshot 49 | ) 50 | 51 | type SnapshotMetric struct { 52 | FS string 53 | Name string 54 | ID string 55 | Status string 56 | Created float64 57 | Fileset string 58 | Data float64 59 | Metadata float64 60 | } 61 | 62 | type MmlssnapshotCollector struct { 63 | Status *prometheus.Desc 64 | Created *prometheus.Desc 65 | Data *prometheus.Desc 66 | Metadata *prometheus.Desc 67 | logger log.Logger 68 | } 69 | 70 | func init() { 71 | registerCollector("mmlssnapshot", false, NewMmlssnapshotCollector) 72 | } 73 | 74 | func NewMmlssnapshotCollector(logger log.Logger) Collector { 75 | labels := []string{"fs", "fileset", "snapshot", "id"} 76 | return &MmlssnapshotCollector{ 77 | Status: prometheus.NewDesc(prometheus.BuildFQName(namespace, "snapshot", "status_info"), 78 | "GPFS snapshot status", append(labels, []string{"status"}...), nil), 79 | Created: prometheus.NewDesc(prometheus.BuildFQName(namespace, "snapshot", "created_timestamp_seconds"), 80 | "GPFS snapshot creation timestamp", labels, nil), 81 | Data: prometheus.NewDesc(prometheus.BuildFQName(namespace, "snapshot", "data_size_bytes"), 82 | "GPFS snapshot data size", labels, nil), 83 | Metadata: prometheus.NewDesc(prometheus.BuildFQName(namespace, "snapshot", "metadata_size_bytes"), 84 | "GPFS snapshot metadata size", labels, nil), 85 | logger: logger, 86 | } 87 | } 88 | 89 | func (c *MmlssnapshotCollector) Describe(ch chan<- *prometheus.Desc) { 90 | ch <- c.Status 91 | ch <- c.Created 92 | if *snapshotGetSize { 93 | ch <- c.Data 94 | ch <- c.Metadata 95 | } 96 | } 97 | 98 | func (c *MmlssnapshotCollector) Collect(ch chan<- prometheus.Metric) { 99 | wg := &sync.WaitGroup{} 100 | var filesystems []string 101 | if *snapshotFilesystems == "" { 102 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmlsfsTimeout)*time.Second) 103 | defer cancel() 104 | var mmlsfsTimeout float64 105 | var mmlsfsError float64 106 | mmlfsfs_filesystems, err := mmlfsfsFilesystems(ctx, c.logger) 107 | if err == context.DeadlineExceeded { 108 | mmlsfsTimeout = 1 109 | level.Error(c.logger).Log("msg", "Timeout executing mmlsfs") 110 | } else if err != nil { 111 | mmlsfsError = 1 112 | level.Error(c.logger).Log("msg", err) 113 | } 114 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, mmlsfsTimeout, "mmlssnapshot-mmlsfs") 115 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, mmlsfsError, "mmlssnapshot-mmlsfs") 116 | filesystems = mmlfsfs_filesystems 117 | } else { 118 | filesystems = strings.Split(*snapshotFilesystems, ",") 119 | } 120 | for _, fs := range filesystems { 121 | level.Debug(c.logger).Log("msg", "Collecting mmlssnapshot metrics", "fs", fs) 122 | wg.Add(1) 123 | collectTime := time.Now() 124 | go func(fs string) { 125 | defer wg.Done() 126 | label := fmt.Sprintf("mmlssnapshot-%s", fs) 127 | timeout := 0 128 | errorMetric := 0 129 | metrics, err := c.mmlssnapshotCollect(fs) 130 | if err == context.DeadlineExceeded { 131 | level.Error(c.logger).Log("msg", fmt.Sprintf("Timeout executing %s", label)) 132 | timeout = 1 133 | } else if err != nil { 134 | level.Error(c.logger).Log("msg", err, "fs", fs) 135 | errorMetric = 1 136 | } 137 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), label) 138 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), label) 139 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), label) 140 | if err != nil { 141 | return 142 | } 143 | for _, m := range metrics { 144 | ch <- prometheus.MustNewConstMetric(c.Status, prometheus.GaugeValue, 1, m.FS, m.Fileset, m.Name, m.ID, m.Status) 145 | ch <- prometheus.MustNewConstMetric(c.Created, prometheus.GaugeValue, m.Created, m.FS, m.Fileset, m.Name, m.ID) 146 | if *snapshotGetSize { 147 | ch <- prometheus.MustNewConstMetric(c.Data, prometheus.GaugeValue, m.Data, m.FS, m.Fileset, m.Name, m.ID) 148 | ch <- prometheus.MustNewConstMetric(c.Metadata, prometheus.GaugeValue, m.Metadata, m.FS, m.Fileset, m.Name, m.ID) 149 | } 150 | } 151 | ch <- prometheus.MustNewConstMetric(lastExecution, prometheus.GaugeValue, float64(time.Now().Unix()), label) 152 | }(fs) 153 | } 154 | wg.Wait() 155 | } 156 | 157 | func (c *MmlssnapshotCollector) mmlssnapshotCollect(fs string) ([]SnapshotMetric, error) { 158 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*snapshotTimeout)*time.Second) 159 | defer cancel() 160 | out, err := MmlssnapshotExec(fs, ctx) 161 | if err != nil { 162 | return nil, err 163 | } 164 | metrics, err := parse_mmlssnapshot(out, c.logger) 165 | return metrics, err 166 | } 167 | 168 | func mmlssnapshot(fs string, ctx context.Context) (string, error) { 169 | args := []string{"/usr/lpp/mmfs/bin/mmlssnapshot", fs, "-s", "all", "-Y"} 170 | if *snapshotGetSize { 171 | args = append(args, "-d") 172 | } 173 | cmd := execCommand(ctx, *sudoCmd, args...) 174 | var out bytes.Buffer 175 | cmd.Stdout = &out 176 | err := cmd.Run() 177 | if ctx.Err() == context.DeadlineExceeded { 178 | return "", ctx.Err() 179 | } else if err != nil { 180 | return "", err 181 | } 182 | return out.String(), nil 183 | } 184 | 185 | func parse_mmlssnapshot(out string, logger log.Logger) ([]SnapshotMetric, error) { 186 | var metrics []SnapshotMetric 187 | headers := []string{} 188 | lines := strings.Split(out, "\n") 189 | for _, l := range lines { 190 | if !strings.HasPrefix(l, "mmlssnapshot") { 191 | continue 192 | } 193 | items := strings.Split(l, ":") 194 | if len(items) < 3 { 195 | continue 196 | } 197 | var values []string 198 | if items[2] == "HEADER" { 199 | headers = append(headers, items...) 200 | continue 201 | } else { 202 | values = append(values, items...) 203 | } 204 | var metric SnapshotMetric 205 | ps := reflect.ValueOf(&metric) // pointer to struct - addressable 206 | s := ps.Elem() // struct 207 | for i, h := range headers { 208 | if field, ok := snapshotMap[h]; ok { 209 | f := s.FieldByName(field) 210 | if f.Kind() == reflect.String { 211 | f.SetString(values[i]) 212 | } else if f.Kind() == reflect.Float64 { 213 | if h == "created" { 214 | createdStr, err := url.QueryUnescape(values[i]) 215 | if err != nil { 216 | level.Error(logger).Log("msg", "Unable to unescape created time", "value", values[i]) 217 | return nil, err 218 | } 219 | createdTime, err := time.ParseInLocation(time.ANSIC, createdStr, NowLocation()) 220 | if err != nil { 221 | level.Error(logger).Log("msg", "Unable to parse time", "value", createdStr) 222 | return nil, err 223 | } 224 | f.SetFloat(float64(createdTime.Unix())) 225 | continue 226 | } 227 | if val, err := strconv.ParseFloat(values[i], 64); err == nil { 228 | if SliceContains(SnapshotKbToBytes, h) { 229 | val = val * 1024 230 | } 231 | f.SetFloat(val) 232 | } else { 233 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 234 | return nil, err 235 | } 236 | } 237 | } 238 | } 239 | 240 | metrics = append(metrics, metric) 241 | } 242 | return metrics, nil 243 | } 244 | -------------------------------------------------------------------------------- /collectors/mmpmon.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "reflect" 21 | "strconv" 22 | "strings" 23 | "time" 24 | 25 | "github.com/alecthomas/kingpin/v2" 26 | "github.com/go-kit/log" 27 | "github.com/go-kit/log/level" 28 | "github.com/prometheus/client_golang/prometheus" 29 | ) 30 | 31 | var ( 32 | mmpmonTimeout = kingpin.Flag("collector.mmpmon.timeout", "Timeout for mmpmon execution").Default("5").Int() 33 | mmpmonMap = map[string]string{ 34 | "_fs_": "FS", 35 | "_nn_": "NodeName", 36 | "_br_": "ReadBytes", 37 | "_bw_": "WriteBytes", 38 | "_rdc_": "Reads", 39 | "_wc_": "Writes", 40 | "_oc_": "Opens", 41 | "_cc_": "Closes", 42 | "_dir_": "ReadDir", 43 | "_iu_": "InodeUpdates", 44 | } 45 | MmpmonExec = mmpmon 46 | ) 47 | 48 | type PerfMetrics struct { 49 | FS string 50 | NodeName string 51 | ReadBytes int64 52 | WriteBytes int64 53 | Reads int64 54 | Writes int64 55 | Opens int64 56 | Closes int64 57 | ReadDir int64 58 | InodeUpdates int64 59 | } 60 | 61 | type MmpmonCollector struct { 62 | read_bytes *prometheus.Desc 63 | write_bytes *prometheus.Desc 64 | operations *prometheus.Desc 65 | info *prometheus.Desc 66 | logger log.Logger 67 | } 68 | 69 | func init() { 70 | registerCollector("mmpmon", true, NewMmpmonCollector) 71 | } 72 | 73 | func NewMmpmonCollector(logger log.Logger) Collector { 74 | return &MmpmonCollector{ 75 | read_bytes: prometheus.NewDesc(prometheus.BuildFQName(namespace, "perf", "read_bytes_total"), 76 | "GPFS read bytes", []string{"fs"}, nil), 77 | write_bytes: prometheus.NewDesc(prometheus.BuildFQName(namespace, "perf", "write_bytes_total"), 78 | "GPFS write bytes", []string{"fs"}, nil), 79 | operations: prometheus.NewDesc(prometheus.BuildFQName(namespace, "perf", "operations_total"), 80 | "GPFS operationgs reported by mmpmon", []string{"fs", "operation"}, nil), 81 | info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "perf", "info"), 82 | "GPFS client information", []string{"fs", "nodename"}, nil), 83 | logger: logger, 84 | } 85 | } 86 | 87 | func (c *MmpmonCollector) Describe(ch chan<- *prometheus.Desc) { 88 | ch <- c.read_bytes 89 | ch <- c.write_bytes 90 | ch <- c.operations 91 | ch <- c.info 92 | } 93 | 94 | func (c *MmpmonCollector) Collect(ch chan<- prometheus.Metric) { 95 | level.Debug(c.logger).Log("msg", "Collecting mmpmon metrics") 96 | collectTime := time.Now() 97 | timeout := 0 98 | errorMetric := 0 99 | perfs, err := c.collect() 100 | if err == context.DeadlineExceeded { 101 | timeout = 1 102 | level.Error(c.logger).Log("msg", "Timeout executing mmpmon") 103 | } else if err != nil { 104 | level.Error(c.logger).Log("msg", err) 105 | errorMetric = 1 106 | } 107 | for _, perf := range perfs { 108 | ch <- prometheus.MustNewConstMetric(c.read_bytes, prometheus.CounterValue, float64(perf.ReadBytes), perf.FS) 109 | ch <- prometheus.MustNewConstMetric(c.write_bytes, prometheus.CounterValue, float64(perf.WriteBytes), perf.FS) 110 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.Reads), perf.FS, "reads") 111 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.Writes), perf.FS, "writes") 112 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.Opens), perf.FS, "opens") 113 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.Closes), perf.FS, "closes") 114 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.ReadDir), perf.FS, "read_dir") 115 | ch <- prometheus.MustNewConstMetric(c.operations, prometheus.CounterValue, float64(perf.InodeUpdates), perf.FS, "inode_updates") 116 | ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, 1, perf.FS, perf.NodeName) 117 | } 118 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "mmpmon") 119 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "mmpmon") 120 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "mmpmon") 121 | } 122 | 123 | func (c *MmpmonCollector) collect() ([]PerfMetrics, error) { 124 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*mmpmonTimeout)*time.Second) 125 | defer cancel() 126 | mmpmon_out, err := MmpmonExec(ctx) 127 | if err != nil { 128 | return nil, err 129 | } 130 | perfs := mmpmon_parse(mmpmon_out, c.logger) 131 | return perfs, nil 132 | } 133 | 134 | func mmpmon(ctx context.Context) (string, error) { 135 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmpmon", "-s", "-p") 136 | cmd.Stdin = strings.NewReader("fs_io_s\n") 137 | var out bytes.Buffer 138 | cmd.Stdout = &out 139 | err := cmd.Run() 140 | if ctx.Err() == context.DeadlineExceeded { 141 | return "", ctx.Err() 142 | } else if err != nil { 143 | return "", err 144 | } 145 | return out.String(), nil 146 | } 147 | 148 | func mmpmon_parse(out string, logger log.Logger) []PerfMetrics { 149 | var metrics []PerfMetrics 150 | lines := strings.Split(out, "\n") 151 | for _, l := range lines { 152 | if !strings.HasPrefix(l, "_") { 153 | continue 154 | } 155 | var headers []string 156 | var values []string 157 | items := strings.Split(l, " ") 158 | for _, i := range items[1:] { 159 | if strings.HasPrefix(i, "_") { 160 | headers = append(headers, i) 161 | } else { 162 | values = append(values, i) 163 | } 164 | } 165 | var perf PerfMetrics 166 | ps := reflect.ValueOf(&perf) // pointer to struct - addressable 167 | s := ps.Elem() // struct 168 | for i, h := range headers { 169 | if field, ok := mmpmonMap[h]; ok { 170 | f := s.FieldByName(field) 171 | if f.Kind() == reflect.String { 172 | f.SetString(values[i]) 173 | } else if f.Kind() == reflect.Int64 { 174 | if val, err := strconv.ParseInt(values[i], 10, 64); err == nil { 175 | f.SetInt(val) 176 | } else { 177 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 178 | } 179 | } 180 | } 181 | } 182 | metrics = append(metrics, perf) 183 | } 184 | return metrics 185 | } 186 | -------------------------------------------------------------------------------- /collectors/mmpmon_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os/exec" 20 | "strings" 21 | "testing" 22 | "time" 23 | 24 | "github.com/alecthomas/kingpin/v2" 25 | "github.com/go-kit/log" 26 | "github.com/prometheus/client_golang/prometheus/testutil" 27 | ) 28 | 29 | var ( 30 | mmpmonStdout = ` 31 | _fs_io_s_ _n_ 10.22.0.106 _nn_ ib-pitzer-rw02.ten _rc_ 0 _t_ 1579358234 _tu_ 53212 _cl_ gpfs.domain _fs_ scratch _d_ 48 _br_ 205607400434 _bw_ 74839282351 _oc_ 2377656 _cc_ 2201576 _rdc_ 59420404 _wc_ 18874626 _dir_ 40971 _iu_ 544768 32 | _fs_io_s_ _n_ 10.22.0.106 _nn_ ib-pitzer-rw02.ten _rc_ 0 _t_ 1579358234 _tu_ 53212 _cl_ gpfs.domain _fs_ project _d_ 96 _br_ 0 _bw_ 0 _oc_ 513 _cc_ 513 _rdc_ 0 _wc_ 0 _dir_ 0 _iu_ 169 33 | ` 34 | ) 35 | 36 | func TestMmpmon(t *testing.T) { 37 | execCommand = fakeExecCommand 38 | mockedExitStatus = 0 39 | mockedStdout = "foo" 40 | defer func() { execCommand = exec.CommandContext }() 41 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 42 | defer cancel() 43 | out, err := mmpmon(ctx) 44 | if err != nil { 45 | t.Errorf("Unexpected error: %s", err.Error()) 46 | } 47 | if out != mockedStdout { 48 | t.Errorf("Unexpected out: %s", out) 49 | } 50 | } 51 | 52 | func TestMmpmonError(t *testing.T) { 53 | execCommand = fakeExecCommand 54 | mockedExitStatus = 1 55 | mockedStdout = "foo" 56 | defer func() { execCommand = exec.CommandContext }() 57 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 58 | defer cancel() 59 | out, err := mmpmon(ctx) 60 | if err == nil { 61 | t.Errorf("Expected error") 62 | } 63 | if out != "" { 64 | t.Errorf("Unexpected out: %s", out) 65 | } 66 | } 67 | 68 | func TestMmpmonTimeout(t *testing.T) { 69 | execCommand = fakeExecCommand 70 | mockedExitStatus = 1 71 | mockedStdout = "foo" 72 | defer func() { execCommand = exec.CommandContext }() 73 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 74 | defer cancel() 75 | out, err := mmpmon(ctx) 76 | if err != context.DeadlineExceeded { 77 | t.Errorf("Expected DeadlineExceeded") 78 | } 79 | if out != "" { 80 | t.Errorf("Unexpected out: %s", out) 81 | } 82 | } 83 | 84 | func TestParsePerf(t *testing.T) { 85 | perfs := mmpmon_parse(mmpmonStdout, log.NewNopLogger()) 86 | if len(perfs) != 2 { 87 | t.Errorf("Expected 2 perfs returned, got %d", len(perfs)) 88 | return 89 | } 90 | if val := perfs[0].FS; val != "scratch" { 91 | t.Errorf("Unexpected FS got %s", val) 92 | } 93 | if val := perfs[1].FS; val != "project" { 94 | t.Errorf("Unexpected FS got %s", val) 95 | } 96 | if val := perfs[0].NodeName; val != "ib-pitzer-rw02.ten" { 97 | t.Errorf("Unexpected NodeName got %s", val) 98 | } 99 | if val := perfs[1].NodeName; val != "ib-pitzer-rw02.ten" { 100 | t.Errorf("Unexpected NodeName got %s", val) 101 | } 102 | if val := perfs[0].ReadBytes; val != 205607400434 { 103 | t.Errorf("Unexpected ReadBytes got %d", val) 104 | } 105 | } 106 | 107 | func TestMmpmonCollector(t *testing.T) { 108 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 109 | t.Fatal(err) 110 | } 111 | MmpmonExec = func(ctx context.Context) (string, error) { 112 | return mmpmonStdout, nil 113 | } 114 | expected := ` 115 | # HELP gpfs_perf_info GPFS client information 116 | # TYPE gpfs_perf_info gauge 117 | gpfs_perf_info{fs="project",nodename="ib-pitzer-rw02.ten"} 1 118 | gpfs_perf_info{fs="scratch",nodename="ib-pitzer-rw02.ten"} 1 119 | # HELP gpfs_perf_operations_total GPFS operationgs reported by mmpmon 120 | # TYPE gpfs_perf_operations_total counter 121 | gpfs_perf_operations_total{fs="project",operation="closes"} 513 122 | gpfs_perf_operations_total{fs="project",operation="inode_updates"} 169 123 | gpfs_perf_operations_total{fs="project",operation="opens"} 513 124 | gpfs_perf_operations_total{fs="project",operation="read_dir"} 0 125 | gpfs_perf_operations_total{fs="project",operation="reads"} 0 126 | gpfs_perf_operations_total{fs="project",operation="writes"} 0 127 | gpfs_perf_operations_total{fs="scratch",operation="closes"} 2201576 128 | gpfs_perf_operations_total{fs="scratch",operation="inode_updates"} 544768 129 | gpfs_perf_operations_total{fs="scratch",operation="opens"} 2377656 130 | gpfs_perf_operations_total{fs="scratch",operation="read_dir"} 40971 131 | gpfs_perf_operations_total{fs="scratch",operation="reads"} 59420404 132 | gpfs_perf_operations_total{fs="scratch",operation="writes"} 18874626 133 | # HELP gpfs_perf_read_bytes_total GPFS read bytes 134 | # TYPE gpfs_perf_read_bytes_total counter 135 | gpfs_perf_read_bytes_total{fs="project"} 0 136 | gpfs_perf_read_bytes_total{fs="scratch"} 2.05607400434e+11 137 | # HELP gpfs_perf_write_bytes_total GPFS write bytes 138 | # TYPE gpfs_perf_write_bytes_total counter 139 | gpfs_perf_write_bytes_total{fs="project"} 0 140 | gpfs_perf_write_bytes_total{fs="scratch"} 74839282351 141 | ` 142 | collector := NewMmpmonCollector(log.NewNopLogger()) 143 | gatherers := setupGatherer(collector) 144 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 145 | t.Errorf("Unexpected error: %v", err) 146 | } else if val != 21 { 147 | t.Errorf("Unexpected collection count %d, expected 21", val) 148 | } 149 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), 150 | "gpfs_perf_info", 151 | "gpfs_perf_read_bytes_total", "gpfs_perf_write_bytes_total", "gpfs_perf_operations_total"); err != nil { 152 | t.Errorf("unexpected collecting result:\n%s", err) 153 | } 154 | } 155 | 156 | func TestMMpmonCollectorError(t *testing.T) { 157 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 158 | t.Fatal(err) 159 | } 160 | MmpmonExec = func(ctx context.Context) (string, error) { 161 | return "", fmt.Errorf("Error") 162 | } 163 | expected := ` 164 | # HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 165 | # TYPE gpfs_exporter_collect_error gauge 166 | gpfs_exporter_collect_error{collector="mmpmon"} 1 167 | ` 168 | collector := NewMmpmonCollector(log.NewNopLogger()) 169 | gatherers := setupGatherer(collector) 170 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 171 | t.Errorf("Unexpected error: %v", err) 172 | } else if val != 3 { 173 | t.Errorf("Unexpected collection count %d, expected 3", val) 174 | } 175 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_error"); err != nil { 176 | t.Errorf("unexpected collecting result:\n%s", err) 177 | } 178 | } 179 | 180 | func TestMMpmonCollectorTimeout(t *testing.T) { 181 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 182 | t.Fatal(err) 183 | } 184 | MmpmonExec = func(ctx context.Context) (string, error) { 185 | return "", context.DeadlineExceeded 186 | } 187 | expected := ` 188 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 189 | # TYPE gpfs_exporter_collect_timeout gauge 190 | gpfs_exporter_collect_timeout{collector="mmpmon"} 1 191 | ` 192 | collector := NewMmpmonCollector(log.NewNopLogger()) 193 | gatherers := setupGatherer(collector) 194 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 195 | t.Errorf("Unexpected error: %v", err) 196 | } else if val != 3 { 197 | t.Errorf("Unexpected collection count %d, expected 3", val) 198 | } 199 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_timeout"); err != nil { 200 | t.Errorf("unexpected collecting result:\n%s", err) 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /collectors/mount.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "fmt" 18 | "strings" 19 | "time" 20 | 21 | "github.com/alecthomas/kingpin/v2" 22 | linuxproc "github.com/c9s/goprocinfo/linux" 23 | fstab "github.com/deniswernert/go-fstab" 24 | "github.com/go-kit/log" 25 | "github.com/go-kit/log/level" 26 | "github.com/prometheus/client_golang/prometheus" 27 | ) 28 | 29 | var ( 30 | procMounts = "/proc/mounts" 31 | fstabPath = "/etc/fstab" 32 | configMounts = kingpin.Flag("collector.mount.mounts", "Mountpoints to monitor, comma separated. Defaults to all filesystems.").Default("").String() 33 | mountTimeout = kingpin.Flag("collector.mount.timeout", "Timeout for mount collection").Default("5").Int() 34 | ) 35 | 36 | type MountCollector struct { 37 | fs_mount_status *prometheus.Desc 38 | logger log.Logger 39 | } 40 | 41 | func init() { 42 | registerCollector("mount", true, NewMountCollector) 43 | } 44 | 45 | func NewMountCollector(logger log.Logger) Collector { 46 | return &MountCollector{ 47 | fs_mount_status: prometheus.NewDesc(prometheus.BuildFQName(namespace, "mount", "status"), 48 | "Status of GPFS filesystems, 1=mounted 0=not mounted", []string{"mount"}, nil), 49 | logger: logger, 50 | } 51 | } 52 | 53 | func (c *MountCollector) Describe(ch chan<- *prometheus.Desc) { 54 | ch <- c.fs_mount_status 55 | } 56 | 57 | func (c *MountCollector) Collect(ch chan<- prometheus.Metric) { 58 | level.Debug(c.logger).Log("msg", "Collecting mount metrics") 59 | err := c.collect(ch) 60 | if err != nil { 61 | level.Error(c.logger).Log("msg", err) 62 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, 1, "mount") 63 | } else { 64 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, 0, "mount") 65 | } 66 | } 67 | 68 | func (c *MountCollector) collect(ch chan<- prometheus.Metric) error { 69 | collectTime := time.Now() 70 | var gpfsMounts []string 71 | var gpfsMountsFstab []string 72 | var err error 73 | 74 | c1 := make(chan int, 1) 75 | timeout := false 76 | 77 | go func() { 78 | gpfsMounts, err = getGPFSMounts() 79 | if err != nil { 80 | return 81 | } 82 | gpfsMountsFstab, err = getGPFSMountsFSTab() 83 | if err != nil { 84 | return 85 | } 86 | if !timeout { 87 | c1 <- 1 88 | } 89 | }() 90 | 91 | select { 92 | case <-c1: 93 | case <-time.After(time.Duration(*mountTimeout) * time.Second): 94 | timeout = true 95 | close(c1) 96 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, 1, "mount") 97 | level.Error(c.logger).Log("msg", "Timeout collecting mount information") 98 | return nil 99 | } 100 | close(c1) 101 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, 0, "mount") 102 | 103 | if err != nil { 104 | level.Error(c.logger).Log("msg", err) 105 | return err 106 | } 107 | 108 | var gpfsFoundMounts []string 109 | for _, m := range gpfsMountsFstab { 110 | if !SliceContains(gpfsFoundMounts, m) { 111 | gpfsFoundMounts = append(gpfsFoundMounts, m) 112 | } 113 | } 114 | var checkMounts []string 115 | if *configMounts == "" { 116 | checkMounts = gpfsFoundMounts 117 | } else { 118 | checkMounts = strings.Split(*configMounts, ",") 119 | } 120 | for _, mount := range checkMounts { 121 | if SliceContains(gpfsMounts, mount) { 122 | ch <- prometheus.MustNewConstMetric(c.fs_mount_status, prometheus.GaugeValue, 1, mount) 123 | } else { 124 | ch <- prometheus.MustNewConstMetric(c.fs_mount_status, prometheus.GaugeValue, 0, mount) 125 | } 126 | } 127 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "mount") 128 | return nil 129 | } 130 | 131 | func getGPFSMounts() ([]string, error) { 132 | var gpfsMounts []string 133 | mounts, err := linuxproc.ReadMounts(procMounts) 134 | if err != nil { 135 | return nil, err 136 | } 137 | for _, mount := range mounts.Mounts { 138 | if mount.FSType != "gpfs" { 139 | continue 140 | } 141 | gpfsMount := mount.MountPoint 142 | gpfsMounts = append(gpfsMounts, gpfsMount) 143 | } 144 | return gpfsMounts, err 145 | } 146 | 147 | func getGPFSMountsFSTab() ([]string, error) { 148 | var gpfsMounts []string 149 | if exists := FileExists(fstabPath); !exists { 150 | return nil, fmt.Errorf("%s does not exist", fstabPath) 151 | } 152 | mounts, err := fstab.ParseFile(fstabPath) 153 | if err != nil { 154 | return nil, err 155 | } 156 | for _, m := range mounts { 157 | if m.VfsType != "gpfs" { 158 | continue 159 | } 160 | gpfsMounts = append(gpfsMounts, m.File) 161 | } 162 | return gpfsMounts, nil 163 | } 164 | -------------------------------------------------------------------------------- /collectors/mount_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "os" 18 | "strings" 19 | "testing" 20 | 21 | "github.com/alecthomas/kingpin/v2" 22 | "github.com/go-kit/log" 23 | "github.com/prometheus/client_golang/prometheus/testutil" 24 | ) 25 | 26 | func TestGetGPFSMounts(t *testing.T) { 27 | tmpDir, err := os.MkdirTemp(os.TempDir(), "proc") 28 | if err != nil { 29 | t.Fatal(err) 30 | } 31 | defer os.RemoveAll(tmpDir) 32 | procMounts = tmpDir + "/mounts" 33 | mockedProcMounts := `root.domain:/root_rhel76_1 / nfs rw,relatime,vers=3,rsize=65536,wsize=65536,namlen=255,acregmin=240,acregmax=240,acdirmin=240,acdirmax=240,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=10.27.2.2,mountvers=3,mountport=635,mountproto=tcp,fsc,local_lock=all,addr=10.27.2.2 0 0 34 | /dev/mapper/vg0-lv_tmp /tmp xfs rw,relatime,attr2,inode64,noquota 0 0 35 | scratch /fs/scratch gpfs rw,relatime 0 0 36 | project /fs/project gpfs rw,relatime 0 0 37 | 10.11.200.17:/PZS0710 /users/PZS0710 nfs4 rw,relatime,vers=4.0,rsize=65536,wsize=65536,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.4.0.102,local_lock=none,addr=10.11.200.17 0 0 38 | ` 39 | if err := os.WriteFile(procMounts, []byte(mockedProcMounts), 0644); err != nil { 40 | t.Fatal(err) 41 | } 42 | gpfsMounts, err := getGPFSMounts() 43 | if err != nil { 44 | t.Errorf("Unexpected error: %s", err.Error()) 45 | } 46 | if len(gpfsMounts) != 2 { 47 | t.Errorf("Incorrect number of GPFS mounts, expected 2, got %d", len(gpfsMounts)) 48 | return 49 | } 50 | if val := gpfsMounts[0]; val != "/fs/scratch" { 51 | t.Errorf("Unexpected Path value %s", val) 52 | } 53 | if val := gpfsMounts[1]; val != "/fs/project" { 54 | t.Errorf("Unexpected Path value %s", val) 55 | } 56 | } 57 | 58 | func TestGetGPFSMountsFSTab(t *testing.T) { 59 | tmpDir, err := os.MkdirTemp(os.TempDir(), "proc") 60 | if err != nil { 61 | t.Fatal(err) 62 | } 63 | defer os.RemoveAll(tmpDir) 64 | fstabPath = tmpDir + "/fstab" 65 | mockedFstab := ` 66 | LABEL=tmp /tmp xfs defaults 1 2 67 | project /fs/project gpfs rw,mtime,atime,quota=userquota;groupquota;filesetquota;perfileset,dev=project,noauto 0 0 68 | scratch /fs/scratch gpfs rw,mtime,atime,quota=userquota;groupquota;filesetquota;perfileset,dev=scratch,noauto 0 0 69 | ` 70 | if err := os.WriteFile(fstabPath, []byte(mockedFstab), 0644); err != nil { 71 | t.Fatal(err) 72 | } 73 | gpfsMounts, err := getGPFSMountsFSTab() 74 | if err != nil { 75 | t.Errorf("Unexpected error: %s", err.Error()) 76 | return 77 | } 78 | if len(gpfsMounts) != 2 { 79 | t.Errorf("Incorrect number fo GPFS mounts, expected 2, got %d", len(gpfsMounts)) 80 | } 81 | if val := gpfsMounts[0]; val != "/fs/project" { 82 | t.Errorf("Unexpected value %s", val) 83 | } 84 | if val := gpfsMounts[1]; val != "/fs/scratch" { 85 | t.Errorf("Unexpected value %s", val) 86 | } 87 | } 88 | 89 | func TestMountCollector(t *testing.T) { 90 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 91 | t.Fatal(err) 92 | } 93 | mounts := "/fs/project,/fs/scratch,/fs/ess" 94 | configMounts = &mounts 95 | tmpDir, err := os.MkdirTemp(os.TempDir(), "proc") 96 | if err != nil { 97 | t.Fatal(err) 98 | } 99 | defer os.RemoveAll(tmpDir) 100 | procMounts = tmpDir + "/mounts" 101 | fstabPath = tmpDir + "/fstab" 102 | mockedProcMounts := `root.domain:/root_rhel76_1 / nfs rw,relatime,vers=3,rsize=65536,wsize=65536,namlen=255,acregmin=240,acregmax=240,acdirmin=240,acdirmax=240,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=10.27.2.2,mountvers=3,mountport=635,mountproto=tcp,fsc,local_lock=all,addr=10.27.2.2 0 0 103 | /dev/mapper/vg0-lv_tmp /tmp xfs rw,relatime,attr2,inode64,noquota 0 0 104 | scratch /fs/scratch gpfs rw,relatime 0 0 105 | project /fs/project gpfs rw,relatime 0 0 106 | 10.11.200.17:/PZS0710 /users/PZS0710 nfs4 rw,relatime,vers=4.0,rsize=65536,wsize=65536,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.4.0.102,local_lock=none,addr=10.11.200.17 0 0 107 | ` 108 | mockedFstab := ` 109 | project /fs/project gpfs rw,mtime,atime,quota=userquota;groupquota;filesetquota;perfileset,dev=project,noauto 0 0 110 | scratch /fs/scratch gpfs rw,mtime,atime,quota=userquota;groupquota;filesetquota;perfileset,dev=scratch,noauto 0 0 111 | ess /fs/ess gpfs rw,mtime,relatime,dev=ess.domain:ess,ldev=ess,noauto 0 0 112 | ` 113 | if err := os.WriteFile(procMounts, []byte(mockedProcMounts), 0644); err != nil { 114 | t.Fatal(err) 115 | } 116 | if err := os.WriteFile(fstabPath, []byte(mockedFstab), 0644); err != nil { 117 | t.Fatal(err) 118 | } 119 | metadata := ` 120 | # HELP gpfs_mount_status Status of GPFS filesystems, 1=mounted 0=not mounted 121 | # TYPE gpfs_mount_status gauge` 122 | expected := ` 123 | gpfs_mount_status{mount="/fs/ess"} 0 124 | gpfs_mount_status{mount="/fs/project"} 1 125 | gpfs_mount_status{mount="/fs/scratch"} 1 126 | ` 127 | collector := NewMountCollector(log.NewNopLogger()) 128 | gatherers := setupGatherer(collector) 129 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 130 | t.Errorf("Unexpected error: %v", err) 131 | } else if val != 6 { 132 | t.Errorf("Unexpected collection count %d, expected 6", val) 133 | } 134 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(metadata+expected), "gpfs_mount_status"); err != nil { 135 | t.Errorf("unexpected collecting result:\n%s", err) 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /collectors/verbs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "strings" 20 | "time" 21 | 22 | "github.com/alecthomas/kingpin/v2" 23 | "github.com/go-kit/log" 24 | "github.com/go-kit/log/level" 25 | "github.com/prometheus/client_golang/prometheus" 26 | ) 27 | 28 | var ( 29 | verbsTimeout = kingpin.Flag("collector.verbs.timeout", "Timeout for collecting verbs information").Default("5").Int() 30 | verbsExec = verbs 31 | ) 32 | 33 | type VerbsMetrics struct { 34 | Status string 35 | } 36 | 37 | type VerbsCollector struct { 38 | Status *prometheus.Desc 39 | logger log.Logger 40 | } 41 | 42 | func init() { 43 | registerCollector("verbs", false, NewVerbsCollector) 44 | } 45 | 46 | func NewVerbsCollector(logger log.Logger) Collector { 47 | return &VerbsCollector{ 48 | Status: prometheus.NewDesc(prometheus.BuildFQName(namespace, "verbs", "status"), 49 | "GPFS verbs status, 1=started 0=not started", nil, nil), 50 | logger: logger, 51 | } 52 | } 53 | 54 | func (c *VerbsCollector) Describe(ch chan<- *prometheus.Desc) { 55 | ch <- c.Status 56 | } 57 | 58 | func (c *VerbsCollector) Collect(ch chan<- prometheus.Metric) { 59 | level.Debug(c.logger).Log("msg", "Collecting verbs metrics") 60 | collectTime := time.Now() 61 | timeout := 0 62 | errorMetric := 0 63 | metric, err := c.collect() 64 | if err == context.DeadlineExceeded { 65 | timeout = 1 66 | level.Error(c.logger).Log("msg", "Timeout executing verbs check") 67 | } else if err != nil { 68 | level.Error(c.logger).Log("msg", err) 69 | errorMetric = 1 70 | } 71 | if metric.Status == "started" { 72 | ch <- prometheus.MustNewConstMetric(c.Status, prometheus.GaugeValue, 1) 73 | } else if err == nil { 74 | ch <- prometheus.MustNewConstMetric(c.Status, prometheus.GaugeValue, 0) 75 | } 76 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "verbs") 77 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "verbs") 78 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "verbs") 79 | } 80 | 81 | func (c *VerbsCollector) collect() (VerbsMetrics, error) { 82 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*verbsTimeout)*time.Second) 83 | defer cancel() 84 | out, err := verbsExec(ctx) 85 | if err != nil { 86 | return VerbsMetrics{}, err 87 | } 88 | metric := verbs_parse(out) 89 | return metric, nil 90 | } 91 | 92 | func verbs(ctx context.Context) (string, error) { 93 | cmd := execCommand(ctx, *sudoCmd, "/usr/lpp/mmfs/bin/mmfsadm", "test", "verbs", "status") 94 | var out bytes.Buffer 95 | cmd.Stdout = &out 96 | err := cmd.Run() 97 | if ctx.Err() == context.DeadlineExceeded { 98 | return "", ctx.Err() 99 | } else if err != nil { 100 | return "", err 101 | } 102 | return out.String(), nil 103 | } 104 | 105 | func verbs_parse(out string) VerbsMetrics { 106 | metric := VerbsMetrics{} 107 | lines := strings.Split(out, "\n") 108 | for _, l := range lines { 109 | if !strings.HasPrefix(l, "VERBS") { 110 | continue 111 | } 112 | items := strings.Split(l, ": ") 113 | if len(items) == 2 { 114 | metric.Status = strings.TrimSuffix(items[1], "\n") 115 | break 116 | } 117 | } 118 | return metric 119 | } 120 | -------------------------------------------------------------------------------- /collectors/verbs_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os/exec" 20 | "strings" 21 | "testing" 22 | "time" 23 | 24 | "github.com/alecthomas/kingpin/v2" 25 | "github.com/go-kit/log" 26 | "github.com/prometheus/client_golang/prometheus/testutil" 27 | ) 28 | 29 | var ( 30 | verbsStdout = ` 31 | VERBS RDMA status: started 32 | ` 33 | ) 34 | 35 | func TestVerbs(t *testing.T) { 36 | execCommand = fakeExecCommand 37 | mockedExitStatus = 0 38 | mockedStdout = "foo" 39 | defer func() { execCommand = exec.CommandContext }() 40 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 41 | defer cancel() 42 | out, err := verbs(ctx) 43 | if err != nil { 44 | t.Errorf("Unexpected error: %s", err.Error()) 45 | } 46 | if out != mockedStdout { 47 | t.Errorf("Unexpected out: %s", out) 48 | } 49 | } 50 | 51 | func TestVerbsError(t *testing.T) { 52 | execCommand = fakeExecCommand 53 | mockedExitStatus = 1 54 | mockedStdout = "foo" 55 | defer func() { execCommand = exec.CommandContext }() 56 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 57 | defer cancel() 58 | out, err := verbs(ctx) 59 | if err == nil { 60 | t.Errorf("Expected error") 61 | } 62 | if out != "" { 63 | t.Errorf("Unexpected out: %s", out) 64 | } 65 | } 66 | 67 | func TestVerbsTimeout(t *testing.T) { 68 | execCommand = fakeExecCommand 69 | mockedExitStatus = 1 70 | mockedStdout = "foo" 71 | defer func() { execCommand = exec.CommandContext }() 72 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 73 | defer cancel() 74 | out, err := verbs(ctx) 75 | if err != context.DeadlineExceeded { 76 | t.Errorf("Expected DeadlineExceeded") 77 | } 78 | if out != "" { 79 | t.Errorf("Unexpected out: %s", out) 80 | } 81 | } 82 | 83 | func TestParseVerbsDisabled(t *testing.T) { 84 | stdout := ` 85 | VERBS RDMA status: disabled 86 | ` 87 | metric := verbs_parse(stdout) 88 | if metric.Status != "disabled" { 89 | t.Errorf("Unexpected value for status, expected disabled, got %s", metric.Status) 90 | } 91 | } 92 | 93 | func TestParseVerbsStarted(t *testing.T) { 94 | metric := verbs_parse(verbsStdout) 95 | if metric.Status != "started" { 96 | t.Errorf("Unexpected value for status, expected started, got %s", metric.Status) 97 | } 98 | } 99 | 100 | func TestVerbsCollector(t *testing.T) { 101 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 102 | t.Fatal(err) 103 | } 104 | verbsExec = func(ctx context.Context) (string, error) { 105 | return verbsStdout, nil 106 | } 107 | expected := ` 108 | # HELP gpfs_verbs_status GPFS verbs status, 1=started 0=not started 109 | # TYPE gpfs_verbs_status gauge 110 | gpfs_verbs_status 1 111 | ` 112 | collector := NewVerbsCollector(log.NewNopLogger()) 113 | gatherers := setupGatherer(collector) 114 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 115 | t.Errorf("Unexpected error: %v", err) 116 | } else if val != 4 { 117 | t.Errorf("Unexpected collection count %d, expected 4", val) 118 | } 119 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_verbs_status"); err != nil { 120 | t.Errorf("unexpected collecting result:\n%s", err) 121 | } 122 | } 123 | 124 | func TestVerbsCollectorError(t *testing.T) { 125 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 126 | t.Fatal(err) 127 | } 128 | verbsExec = func(ctx context.Context) (string, error) { 129 | return "", fmt.Errorf("Error") 130 | } 131 | expected := ` 132 | # HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 133 | # TYPE gpfs_exporter_collect_error gauge 134 | gpfs_exporter_collect_error{collector="verbs"} 1 135 | ` 136 | collector := NewVerbsCollector(log.NewNopLogger()) 137 | gatherers := setupGatherer(collector) 138 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 139 | t.Errorf("Unexpected error: %v", err) 140 | } else if val != 3 { 141 | t.Errorf("Unexpected collection count %d, expected 3", val) 142 | } 143 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_error"); err != nil { 144 | t.Errorf("unexpected collecting result:\n%s", err) 145 | } 146 | } 147 | 148 | func TestVerbsCollectorTimeout(t *testing.T) { 149 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 150 | t.Fatal(err) 151 | } 152 | verbsExec = func(ctx context.Context) (string, error) { 153 | return "", context.DeadlineExceeded 154 | } 155 | expected := ` 156 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 157 | # TYPE gpfs_exporter_collect_timeout gauge 158 | gpfs_exporter_collect_timeout{collector="verbs"} 1 159 | ` 160 | collector := NewVerbsCollector(log.NewNopLogger()) 161 | gatherers := setupGatherer(collector) 162 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 163 | t.Errorf("Unexpected error: %v", err) 164 | } else if val != 3 { 165 | t.Errorf("Unexpected collection count %d, expected 3", val) 166 | } 167 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_timeout"); err != nil { 168 | t.Errorf("unexpected collecting result:\n%s", err) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /collectors/waiter.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "math" 20 | "reflect" 21 | "regexp" 22 | "strconv" 23 | "strings" 24 | "time" 25 | 26 | "github.com/alecthomas/kingpin/v2" 27 | "github.com/go-kit/log" 28 | "github.com/go-kit/log/level" 29 | "github.com/prometheus/client_golang/prometheus" 30 | ) 31 | 32 | var ( 33 | defWaiterExclude = "(EventsExporterSenderThread|Fsck)" 34 | defWaiterBuckets = "1s,5s,15s,1m,5m,60m" 35 | waiterExclude = kingpin.Flag("collector.waiter.exclude", "Pattern to exclude for waiters").Default(defWaiterExclude).String() 36 | waiterBuckets = DurationBuckets(kingpin.Flag("collector.waiter.buckets", "Buckets for waiter metrics").Default(defWaiterBuckets)) 37 | waiterTimeout = kingpin.Flag("collector.waiter.timeout", "Timeout for mmdiag execution").Default("5").Int() 38 | waiterLogReason = kingpin.Flag("collector.waiter.log-reason", "Log the waiter reason").Default("false").Bool() 39 | waiterMap = map[string]string{ 40 | "threadName": "Name", 41 | "waitTime": "Seconds", 42 | "auxReason": "Reason", 43 | } 44 | ) 45 | 46 | type WaiterMetric struct { 47 | seconds []float64 48 | infoCounts map[string]float64 49 | } 50 | 51 | type Waiter struct { 52 | Name string 53 | Reason string 54 | Seconds float64 55 | } 56 | 57 | type WaiterCollector struct { 58 | Waiter prometheus.Histogram 59 | WaiterInfo *prometheus.Desc 60 | logger log.Logger 61 | } 62 | 63 | func init() { 64 | registerCollector("waiter", false, NewWaiterCollector) 65 | } 66 | 67 | func NewWaiterCollector(logger log.Logger) Collector { 68 | return &WaiterCollector{ 69 | Waiter: prometheus.NewHistogram(prometheus.HistogramOpts{ 70 | Namespace: namespace, 71 | Subsystem: "waiter", 72 | Name: "seconds", 73 | Help: "GPFS waiter in seconds", 74 | Buckets: *waiterBuckets, 75 | }), 76 | WaiterInfo: prometheus.NewDesc(prometheus.BuildFQName(namespace, "waiter", "info_count"), 77 | "GPFS waiter info", []string{"waiter"}, nil), 78 | logger: logger, 79 | } 80 | } 81 | 82 | func (c *WaiterCollector) Describe(ch chan<- *prometheus.Desc) { 83 | ch <- c.Waiter.Desc() 84 | ch <- c.WaiterInfo 85 | } 86 | 87 | func (c *WaiterCollector) Collect(ch chan<- prometheus.Metric) { 88 | level.Debug(c.logger).Log("msg", "Collecting waiter metrics") 89 | collectTime := time.Now() 90 | timeout := 0 91 | errorMetric := 0 92 | waiterMetric, err := c.collect() 93 | if err == context.DeadlineExceeded { 94 | level.Error(c.logger).Log("msg", "Timeout executing mmdiag") 95 | timeout = 1 96 | } else if err != nil { 97 | level.Error(c.logger).Log("msg", err) 98 | errorMetric = 1 99 | } 100 | for _, second := range waiterMetric.seconds { 101 | c.Waiter.Observe(second) 102 | } 103 | if err == nil { 104 | ch <- c.Waiter 105 | } 106 | for waiter, count := range waiterMetric.infoCounts { 107 | ch <- prometheus.MustNewConstMetric(c.WaiterInfo, prometheus.GaugeValue, count, waiter) 108 | } 109 | ch <- prometheus.MustNewConstMetric(collectError, prometheus.GaugeValue, float64(errorMetric), "waiter") 110 | ch <- prometheus.MustNewConstMetric(collecTimeout, prometheus.GaugeValue, float64(timeout), "waiter") 111 | ch <- prometheus.MustNewConstMetric(collectDuration, prometheus.GaugeValue, time.Since(collectTime).Seconds(), "waiter") 112 | } 113 | 114 | func (c *WaiterCollector) collect() (WaiterMetric, error) { 115 | var waiterMetric WaiterMetric 116 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*waiterTimeout)*time.Second) 117 | defer cancel() 118 | out, err := MmdiagExec("--waiters", ctx) 119 | if err != nil { 120 | return waiterMetric, err 121 | } 122 | waiters := parse_mmdiag_waiters(out, c.logger) 123 | seconds := []float64{} 124 | infoCounts := make(map[string]float64) 125 | for _, waiter := range waiters { 126 | if !math.IsNaN(waiter.Seconds) { 127 | seconds = append(seconds, waiter.Seconds) 128 | } 129 | if waiter.Name == "" && waiter.Reason == "" { 130 | continue 131 | } 132 | if *waiterLogReason { 133 | level.Info(c.logger).Log("msg", "Waiter reason information", "waiter", waiter.Name, "reason", waiter.Reason, "seconds", waiter.Seconds) 134 | } 135 | infoCounts[waiter.Name] += 1 136 | } 137 | waiterMetric.seconds = seconds 138 | waiterMetric.infoCounts = infoCounts 139 | return waiterMetric, nil 140 | } 141 | 142 | func parse_mmdiag_waiters(out string, logger log.Logger) []Waiter { 143 | waiters := []Waiter{} 144 | lines := strings.Split(out, "\n") 145 | var headers []string 146 | excludePattern := regexp.MustCompile(*waiterExclude) 147 | for _, l := range lines { 148 | if !strings.HasPrefix(l, "mmdiag") { 149 | continue 150 | } 151 | items := strings.Split(l, ":") 152 | if len(items) < 3 { 153 | continue 154 | } 155 | if items[1] != "waiters" { 156 | continue 157 | } 158 | var values []string 159 | if items[2] == "HEADER" { 160 | headers = append(headers, items...) 161 | continue 162 | } else { 163 | values = append(values, items...) 164 | } 165 | var metric Waiter 166 | ps := reflect.ValueOf(&metric) // pointer to struct - addressable 167 | s := ps.Elem() // struct 168 | for i, h := range headers { 169 | if field, ok := waiterMap[h]; ok { 170 | f := s.FieldByName(field) 171 | if f.Kind() == reflect.String { 172 | f.SetString(values[i]) 173 | } else if f.Kind() == reflect.Float64 { 174 | if val, err := strconv.ParseFloat(values[i], 64); err == nil { 175 | f.SetFloat(val) 176 | } else { 177 | level.Error(logger).Log("msg", fmt.Sprintf("Error parsing %s value %s: %s", h, values[i], err.Error())) 178 | f.SetFloat(math.NaN()) 179 | } 180 | } 181 | } 182 | } 183 | if excludePattern.MatchString(metric.Name) { 184 | level.Debug(logger).Log("msg", "Skipping waiter due to ignored pattern", "name", metric.Name) 185 | continue 186 | } 187 | waiters = append(waiters, metric) 188 | } 189 | return waiters 190 | } 191 | -------------------------------------------------------------------------------- /collectors/waiter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Trey Dockendorf 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package collectors 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/alecthomas/kingpin/v2" 24 | "github.com/go-kit/log" 25 | "github.com/prometheus/client_golang/prometheus/testutil" 26 | ) 27 | 28 | var ( 29 | waitersStdout = ` 30 | mmdiag:waiters:HEADER:version:reserved:reserved:threadId:threadAddr:threadName:waitStartTime:waitTime:isMonitored:condVarAddr:condVarName:condVarReason:mutexAddr:mutexName:auxReason:delayTime:delayReason: 31 | mmdiag:waiters:0:1:::101445:00000000F57FC500:FsckClientReaperThread:2021-09-23_15%3A31%3A33-0400:6861.7395:monitored::::::reason 'Waiting to reap fsck pointer::: 32 | mmdiag:waiters:0:1:::101445:00000000F57FC500:EventsExporterSenderThread:2021-09-23_15%3A31%3A33-0400:44.3:monitored::::::for poll on sock 1379::: 33 | mmdiag:waiters:0:1:::101445:00000000F57FC500:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:64.3:monitored::::::for I/O completion::: 34 | mmdiag:waiters:0:1:::101445:00000000F57FC500:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:44.3:monitored::::::for I/O completion::: 35 | mmdiag:waiters:0:1:::101445:00000000F57FC500:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:44.3:monitored::::::for I/O completion::: 36 | mmdiag:waiters:0:1:::101445:00000000F57FC500:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:0.3897:monitored::::::for I/O completion::: 37 | mmdiag:waiters:0:1:::137940:000000001401AE50:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:0.2919:monitored::::::for I/O completion::: 38 | mmdiag:waiters:0:1:::101392:00000000F57EF6C0:RebuildWorkThread:2021-09-23_15%3A31%3A33-0400:0.2234:monitored::::::for I/O completion::: 39 | mmdiag:waiters:0:1:::127780:00000000ED808950:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1872:monitored::::::for I/O completion::: 40 | mmdiag:waiters:0:1:::61817:000000001C02CA80:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1592:monitored::::::for I/O completion::: 41 | mmdiag:waiters:0:1:::47037:0000000088029CD0:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1491:monitored::::::for I/O completion::: 42 | mmdiag:waiters:0:1:::64102:0000000020097320:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1428:monitored::::::for I/O completion::: 43 | mmdiag:waiters:0:1:::47035:0000000088029490:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1336:monitored::::::for I/O completion::: 44 | mmdiag:waiters:0:1:::128451:0000000064004E20:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.1053:monitored::::::for I/O completion::: 45 | mmdiag:waiters:0:1:::131854:000000006C00B3E0:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0918:monitored::::::for I/O completion::: 46 | mmdiag:waiters:0:1:::61815:000000001C02C240:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0890:monitored:00003FF9FFD6D8C0:VdiskPGDrainCondvar:waiting for PG drain:::::: 47 | mmdiag:waiters:0:1:::146753:00000000AC02DB90:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0696:monitored::::::for I/O completion::: 48 | mmdiag:waiters:0:1:::47032:0000000088028830:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0547:monitored::::::for I/O completion::: 49 | mmdiag:waiters:0:1:::128454:0000000040001C80:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0433:monitored::::::for I/O completion::: 50 | mmdiag:waiters:0:1:::101497:00000000F5808F20:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0348:monitored::::::for I/O completion::: 51 | mmdiag:waiters:0:1:::131849:000000006001EC80:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0313:monitored::::::for I/O completion::: 52 | mmdiag:waiters:0:1:::101482:00000000F5805980:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0298:monitored::::::for I/O completion::: 53 | mmdiag:waiters:0:1:::47532:0000000088AFE710:NSDThread:2021-09-23_15%3A31%3A34-0400:0.0244:monitored::::::for I/O completion::: 54 | mmdiag:waiters:0:1:::64149:00000000200A3500:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0196:monitored::::::for I/O completion::: 55 | mmdiag:waiters:0:1:::48622:0000000088C16F10:NSDThread:2021-09-23_15%3A31%3A34-0400:0.0134:monitored::::::for I/O completion::: 56 | mmdiag:waiters:0:1:::127779:000000003BFFFD40:RebuildWorkThread:2021-09-23_15%3A31%3A34-0400:0.0081:monitored::::::for I/O completion::: 57 | mmdiag:waiters:0:1:::48081:0000000088B8BFB0:NSDThread:2021-09-23_15%3A31%3A34-0400:0.0037:monitored::::::for I/O completion::: 58 | mmdiag:waiters:0:1:::48081:0000000088B8BFB0::2021-09-23_15%3A31%3A34-0400:foo:monitored::::::::: 59 | foobar 60 | mmdiag:waiters 61 | mmdiag:foobar:0:1 62 | ` 63 | ) 64 | 65 | func TestParseMmdiagWaiters(t *testing.T) { 66 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 67 | t.Fatal(err) 68 | } 69 | w := log.NewSyncWriter(os.Stderr) 70 | logger := log.NewLogfmtLogger(w) 71 | waiters := parse_mmdiag_waiters(waitersStdout, logger) 72 | if val := len(waiters); val != 26 { 73 | t.Errorf("Unexpected Waiters len got %v", val) 74 | return 75 | } 76 | if val := waiters[0].Name; val != "RebuildWorkThread" { 77 | t.Errorf("Unexpected name for waiter, got %s", val) 78 | } 79 | if val := waiters[0].Reason; val != "for I/O completion" { 80 | t.Errorf("Unexpected reason for waiter, got %s", val) 81 | } 82 | if val := waiters[0].Seconds; val != 64.3 { 83 | t.Errorf("Unexpected seconds for waiter, got %f", val) 84 | } 85 | } 86 | 87 | func TestWaiterCollector(t *testing.T) { 88 | if _, err := kingpin.CommandLine.Parse([]string{"--collector.waiter.log-reason"}); err != nil { 89 | t.Fatal(err) 90 | } 91 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 92 | return waitersStdout, nil 93 | } 94 | expected := ` 95 | # HELP gpfs_waiter_seconds GPFS waiter in seconds 96 | # TYPE gpfs_waiter_seconds histogram 97 | gpfs_waiter_seconds_bucket{le="1"} 22 98 | gpfs_waiter_seconds_bucket{le="5"} 22 99 | gpfs_waiter_seconds_bucket{le="15"} 22 100 | gpfs_waiter_seconds_bucket{le="60"} 24 101 | gpfs_waiter_seconds_bucket{le="300"} 25 102 | gpfs_waiter_seconds_bucket{le="3600"} 25 103 | gpfs_waiter_seconds_bucket{le="+Inf"} 25 104 | gpfs_waiter_seconds_sum 155.19569999999996 105 | gpfs_waiter_seconds_count 25 106 | # HELP gpfs_waiter_info_count GPFS waiter info 107 | # TYPE gpfs_waiter_info_count gauge 108 | gpfs_waiter_info_count{waiter="NSDThread"} 3 109 | gpfs_waiter_info_count{waiter="RebuildWorkThread"} 22 110 | ` 111 | w := log.NewSyncWriter(os.Stderr) 112 | logger := log.NewLogfmtLogger(w) 113 | collector1 := NewWaiterCollector(logger) 114 | collector2 := NewWaiterCollector(logger) 115 | gatherers1 := setupGatherer(collector1) 116 | gatherers2 := setupGatherer(collector2) 117 | if val, err := testutil.GatherAndCount(gatherers1); err != nil { 118 | t.Errorf("Unexpected error: %v", err) 119 | } else if val != 6 { 120 | t.Errorf("Unexpected collection count %d, expected 6", val) 121 | } 122 | if err := testutil.GatherAndCompare(gatherers2, strings.NewReader(expected), 123 | "gpfs_waiter_seconds", "gpfs_waiter_info_count"); err != nil { 124 | t.Errorf("unexpected collecting result:\n%s", err) 125 | } 126 | } 127 | 128 | func TestWaiterCollectorError(t *testing.T) { 129 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 130 | t.Fatal(err) 131 | } 132 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 133 | return "", fmt.Errorf("Error") 134 | } 135 | expected := ` 136 | # HELP gpfs_exporter_collect_error Indicates if error has occurred during collection 137 | # TYPE gpfs_exporter_collect_error gauge 138 | gpfs_exporter_collect_error{collector="waiter"} 1 139 | ` 140 | collector := NewWaiterCollector(log.NewNopLogger()) 141 | gatherers := setupGatherer(collector) 142 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 143 | t.Errorf("Unexpected error: %v", err) 144 | } else if val != 3 { 145 | t.Errorf("Unexpected collection count %d, expected 3", val) 146 | } 147 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_error"); err != nil { 148 | t.Errorf("unexpected collecting result:\n%s", err) 149 | } 150 | } 151 | 152 | func TestWaiterCollectorTimeout(t *testing.T) { 153 | if _, err := kingpin.CommandLine.Parse([]string{}); err != nil { 154 | t.Fatal(err) 155 | } 156 | MmdiagExec = func(arg string, ctx context.Context) (string, error) { 157 | return "", context.DeadlineExceeded 158 | } 159 | expected := ` 160 | # HELP gpfs_exporter_collect_timeout Indicates the collector timed out 161 | # TYPE gpfs_exporter_collect_timeout gauge 162 | gpfs_exporter_collect_timeout{collector="waiter"} 1 163 | ` 164 | collector := NewWaiterCollector(log.NewNopLogger()) 165 | gatherers := setupGatherer(collector) 166 | if val, err := testutil.GatherAndCount(gatherers); err != nil { 167 | t.Errorf("Unexpected error: %v", err) 168 | } else if val != 3 { 169 | t.Errorf("Unexpected collection count %d, expected 3", val) 170 | } 171 | if err := testutil.GatherAndCompare(gatherers, strings.NewReader(expected), "gpfs_exporter_collect_timeout"); err != nil { 172 | t.Errorf("unexpected collecting result:\n%s", err) 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/treydock/gpfs_exporter 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/alecthomas/kingpin/v2 v2.3.2 7 | github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8 8 | github.com/deniswernert/go-fstab v0.0.0-20141204152952-eb4090f26517 9 | github.com/go-kit/log v0.2.1 10 | github.com/gofrs/flock v0.8.1 11 | github.com/prometheus/client_golang v1.15.0 12 | github.com/prometheus/client_model v0.3.0 13 | github.com/prometheus/common v0.42.0 14 | github.com/prometheus/exporter-toolkit v0.10.0 15 | ) 16 | 17 | require ( 18 | github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect 19 | github.com/beorn7/perks v1.0.1 // indirect 20 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 21 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 22 | github.com/davecgh/go-spew v1.1.1 // indirect 23 | github.com/go-logfmt/logfmt v0.6.0 // indirect 24 | github.com/golang/protobuf v1.5.3 // indirect 25 | github.com/jpillora/backoff v1.0.0 // indirect 26 | github.com/kr/text v0.2.0 // indirect 27 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 28 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect 29 | github.com/prometheus/procfs v0.9.0 // indirect 30 | github.com/rogpeppe/go-internal v1.10.0 // indirect 31 | github.com/xhit/go-str2duration/v2 v2.1.0 // indirect 32 | golang.org/x/crypto v0.8.0 // indirect 33 | golang.org/x/net v0.9.0 // indirect 34 | golang.org/x/oauth2 v0.6.0 // indirect 35 | golang.org/x/sync v0.1.0 // indirect 36 | golang.org/x/sys v0.7.0 // indirect 37 | golang.org/x/text v0.9.0 // indirect 38 | google.golang.org/appengine v1.6.7 // indirect 39 | google.golang.org/protobuf v1.30.0 // indirect 40 | gopkg.in/yaml.v2 v2.4.0 // indirect 41 | ) 42 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/kingpin/v2 v2.3.2 h1:H0aULhgmSzN8xQ3nX1uxtdlTHYoPLu5AhHxWrKI6ocU= 2 | github.com/alecthomas/kingpin/v2 v2.3.2/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= 3 | github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= 4 | github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= 5 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 6 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 7 | github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8 h1:SjZ2GvvOononHOpK84APFuMvxqsk3tEIaKH/z4Rpu3g= 8 | github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8/go.mod h1:uEyr4WpAH4hio6LFriaPkL938XnrvLpNPmQHBdrmbIE= 9 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 10 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 11 | github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= 12 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 13 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 14 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 16 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/deniswernert/go-fstab v0.0.0-20141204152952-eb4090f26517 h1:YMvaGdOIUowdD6ZybqLsUamGvWONZViUeW6T22U7fP0= 18 | github.com/deniswernert/go-fstab v0.0.0-20141204152952-eb4090f26517/go.mod h1:ixLGX4GUQg44igA/iJawr+KYZLyWOoAzAgTCQcJ/K9Y= 19 | github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= 20 | github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= 21 | github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= 22 | github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= 23 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 24 | github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= 25 | github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= 26 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 27 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 28 | github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= 29 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 30 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 31 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 32 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 33 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 34 | github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= 35 | github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 36 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 37 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 38 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 39 | github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= 40 | github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= 41 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= 42 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 43 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 44 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 45 | github.com/prometheus/client_golang v1.15.0 h1:5fCgGYogn0hFdhyhLbw7hEsWxufKtY9klyvdNfFlFhM= 46 | github.com/prometheus/client_golang v1.15.0/go.mod h1:e9yaBhRPU2pPNsZwE+JdQl0KEt1N9XgF6zxWmaC0xOk= 47 | github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= 48 | github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= 49 | github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= 50 | github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= 51 | github.com/prometheus/exporter-toolkit v0.10.0 h1:yOAzZTi4M22ZzVxD+fhy1URTuNRj/36uQJJ5S8IPza8= 52 | github.com/prometheus/exporter-toolkit v0.10.0/go.mod h1:+sVFzuvV5JDyw+Ih6p3zFxZNVnKQa3x5qPmDSiPu4ZY= 53 | github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= 54 | github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= 55 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 56 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 57 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 58 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 59 | github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= 60 | github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= 61 | github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= 62 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 63 | golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ= 64 | golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= 65 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 66 | golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= 67 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= 68 | golang.org/x/oauth2 v0.6.0 h1:Lh8GPgSKBfWSwFvtuWOfeI3aAAnbXTSutYxJiOJFgIw= 69 | golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= 70 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 71 | golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= 72 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 73 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 74 | golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= 75 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 76 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 77 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 78 | golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= 79 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 80 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 81 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 82 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= 83 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 84 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 85 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 86 | google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= 87 | google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 88 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 89 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 90 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 91 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 92 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 93 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 94 | -------------------------------------------------------------------------------- /systemd/gpfs_exporter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Prometheus gpfs_exporter 3 | Wants=basic.target 4 | After=basic.target network.target 5 | 6 | [Service] 7 | User=gpfs_exporter 8 | Group=gpfs_exporter 9 | ExecStart=/usr/local/bin/gpfs_exporter 10 | ExecReload=/bin/kill -HUP $MAINPID 11 | KillMode=process 12 | Restart=always 13 | 14 | [Install] 15 | WantedBy=multi-user.target 16 | --------------------------------------------------------------------------------