├── CMakeLists.txt ├── LICENSE ├── README.md ├── dashboards ├── README.md └── grafana │ ├── ZFS-pool-latency-heatmaps-influxdb.json │ └── compressed-ARC.json ├── telegraf.d └── zpool_influxdb.conf └── zpool_influxdb.c /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.7) 2 | project (zpool_influxdb C) 3 | 4 | # By default, ZFSonLinux installs the necessary header files and 5 | # libraries in /usr/local. If this is not the case for your system, 6 | # set ZFS_INSTALL_BASE, e.g. for the Ubuntu zfs-on-linux library, use: 7 | # -D ZFS_INSTALL_BASE=/usr 8 | # on the cmake command-line. 9 | set(ZFS_INSTALL_BASE /usr/local CACHE STRING "zfs installation base directory") 10 | 11 | # to support unsigned 64-bit ints properly, uncomment below to set the 12 | # SUPPORT_UINT64 flag at compile time 13 | set(CMAKE_C_FLAGS "-DSUPPORT_UINT64") 14 | 15 | include_directories(${ZFS_INSTALL_BASE}/include/libspl ${ZFS_INSTALL_BASE}/include/libzfs) 16 | link_directories(${ZFS_INSTALL_BASE}/lib) 17 | add_executable(zpool_influxdb zpool_influxdb.c) 18 | target_link_libraries(zpool_influxdb zfs nvpair) 19 | set_property(TARGET zpool_influxdb PROPERTY C_STANDARD 99) 20 | install(TARGETS zpool_influxdb DESTINATION ${ZFS_INSTALL_BASE}/bin) 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Influxdb Metrics for ZFS Pools 2 | The _zpool_influxdb_ program produces 3 | [influxdb](https://github.com/influxdata/influxdb) line protocol 4 | compatible metrics from zpools. In the UNIX tradition, _zpool_influxdb_ 5 | does one thing: read statistics from a pool and print them to 6 | stdout. In many ways, this is a metrics-friendly output of 7 | statistics normally observed via the `zpool` command. 8 | 9 | ## ZFS Versions 10 | There are many implementations of ZFS on many OSes. The current 11 | version is tested to work on: 12 | * [ZFSonLinux](https://github.com/zfsonlinux/zfs) version 0.7 and later 13 | * [cstor](https://github.com/openebs/cstor) for userland ZFS (uZFS) 14 | 15 | This should compile and run on other ZFS versions, though many 16 | do not have the latency histograms. Pull requests are welcome. 17 | 18 | ## Usage 19 | When run without arguments, _zpool_influxdb_ runs once, reading data 20 | from all imported pools, and prints to stdout. 21 | ```shell 22 | zpool_influxdb [options] [poolname] 23 | ``` 24 | If no poolname is specified, then all pools are sampled. 25 | 26 | | option | short option | description | 27 | |---|---|---| 28 | | --execd | -e | For use with telegraf's `execd` plugin. When [enter] is pressed, the pools are sampled. To exit, use [ctrl+D] | 29 | | --no-histogram | -n | Do not print histogram information | 30 | | --sum-histogram-buckets | -s | Sum histogram bucket values | 31 | | --help | -h | Print a short usage message | 32 | 33 | #### Histogram Bucket Values 34 | The histogram data collected by ZFS is stored as independent bucket values. 35 | This works well out-of-the-box with an influxdb data source and grafana's 36 | heatmap visualization. The influxdb query for a grafana heatmap 37 | visualization looks like: 38 | ``` 39 | field(disk_read) last() non_negative_derivative(1s) 40 | ``` 41 | 42 | Another method for storing histogram data sums the values for lower-value 43 | buckets. For example, a latency bucket tagged "le=10" includes the values 44 | in the bucket "le=1". 45 | This method is often used for prometheus histograms. 46 | The `zpool_influxdb --sum-histogram-buckets` option presents the data from ZFS 47 | as summed values. 48 | 49 | ## Measurements 50 | The following measurements are collected: 51 | 52 | | measurement | description | zpool equivalent | 53 | |---|---|---| 54 | | zpool_stats | general size and data | zpool list | 55 | | zpool_scan_stats | scrub, rebuild, and resilver statistics (omitted if no scan has been requested) | zpool status | 56 | | zpool_vdev_stats | per-vdev statistics | zpool iostat -q | 57 | | zpool_io_size | per-vdev I/O size histogram | zpool iostat -r | 58 | | zpool_latency | per-vdev I/O latency histogram | zpool iostat -w | 59 | | zpool_vdev_queue | per-vdev instantaneous queue depth | zpool iostat -q | 60 | 61 | ### zpool_stats Description 62 | zpool_stats contains top-level summary statistics for the pool. 63 | Performance counters measure the I/Os to the pool's devices. 64 | 65 | #### zpool_stats Tags 66 | 67 | | label | description | 68 | |---|---| 69 | | name | pool name | 70 | | state | pool state, as shown by _zpool status_ | 71 | 72 | #### zpool_stats Fields 73 | 74 | | field | units | description | 75 | |---|---|---| 76 | | alloc | bytes | allocated space | 77 | | free | bytes | unallocated space | 78 | | size | bytes | total pool size | 79 | | read_bytes | bytes | bytes read since pool import | 80 | | read_errors | count | number of read errors | 81 | | read_ops | count | number of read operations | 82 | | write_bytes | bytes | bytes written since pool import | 83 | | write_errors | count | number of write errors | 84 | | write_ops | count | number of write operations | 85 | 86 | ### zpool_scan_stats Description 87 | Once a pool has been scrubbed, resilvered, or rebuilt, the zpool_scan_stats 88 | contain information about the status and performance of the operation. 89 | Otherwise, the zpool_scan_stats do not exist in the kernel, and therefore 90 | cannot be reported by this collector. 91 | 92 | #### zpool_scan_stats Tags 93 | 94 | | label | description | 95 | |---|---| 96 | | name | pool name | 97 | | function | name of the scan function running or recently completed | 98 | | state | scan state, as shown by _zpool status_ | 99 | 100 | #### zpool_scan_stats Fields 101 | 102 | | field | units | description | 103 | |---|---|---| 104 | | errors | count | number of errors encountered by scan | 105 | | examined | bytes | total data examined during scan | 106 | | to_examine | bytes | prediction of total bytes to be scanned | 107 | | pass_examined | bytes | data examined during current scan pass | 108 | | processed | bytes | data reconstructed during scan | 109 | | to_process | bytes | total bytes to be repaired | 110 | | rate | bytes/sec | examination rate | 111 | | start_ts | epoch timestamp | start timestamp for scan | 112 | | pause_ts | epoch timestamp | timestamp for a scan pause request | 113 | | end_ts | epoch timestamp | completion timestamp for scan | 114 | | paused_t | seconds | elapsed time while paused | 115 | | remaining_t | seconds | estimate of time remaining for scan | 116 | 117 | ### zpool_vdev_stats Description 118 | The ZFS I/O (ZIO) scheduler uses five queues to schedule I/Os to each vdev. 119 | These queues are further divided into active and pending states. 120 | An I/O is pending prior to being issued to the vdev. An active 121 | I/O has been issued to the vdev. The scheduler and its tunable 122 | parameters are described at the 123 | [ZFS on Linux wiki.](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) 124 | The ZIO scheduler reports the queue depths as gauges where the value 125 | represents an instantaneous snapshot of the queue depth at 126 | the sample time. Therefore, it is not unusual to see all zeroes 127 | for an idle pool. 128 | 129 | #### zpool_vdev_stats Tags 130 | | label | description | 131 | |---|---| 132 | | name | pool name | 133 | | vdev | vdev name (root = entire pool) | 134 | 135 | #### zpool_vdev_stats Fields 136 | | field | units | description | 137 | |---|---|---| 138 | | sync_r_active_queue | entries | synchronous read active queue depth | 139 | | sync_w_active_queue | entries | synchronous write active queue depth | 140 | | async_r_active_queue | entries | asynchronous read active queue depth | 141 | | async_w_active_queue | entries | asynchronous write active queue depth | 142 | | async_scrub_active_queue | entries | asynchronous scrub active queue depth | 143 | | sync_r_pend_queue | entries | synchronous read pending queue depth | 144 | | sync_w_pend_queue | entries | synchronous write pending queue depth | 145 | | async_r_pend_queue | entries | asynchronous read pending queue depth | 146 | | async_w_pend_queue | entries | asynchronous write pending queue depth | 147 | | async_scrub_pend_queue | entries | asynchronous scrub pending queue depth | 148 | 149 | ### zpool_latency Histogram 150 | ZFS tracks the latency of each I/O in the ZIO pipeline. This latency can 151 | be useful for observing latency-related issues that are not easily observed 152 | using the averaged latency statistics. 153 | 154 | The histogram fields show cumulative values from lowest to highest. 155 | The largest bucket is tagged "le=+Inf", representing the total count 156 | of I/Os by type and vdev. 157 | 158 | #### zpool_latency Histogram Tags 159 | | label | description | 160 | |---|---| 161 | | le | bucket for histogram, latency is less than or equal to bucket value in seconds | 162 | | name | pool name | 163 | | path | for leaf vdevs, the device path name, otherwise omitted | 164 | | vdev | vdev name (root = entire pool) | 165 | 166 | #### zpool_latency Histogram Fields 167 | | field | units | description | 168 | |---|---|---| 169 | | total_read | operations | read operations of all types | 170 | | total_write | operations | write operations of all types | 171 | | disk_read | operations | disk read operations | 172 | | disk_write | operations | disk write operations | 173 | | sync_read | operations | ZIO sync reads | 174 | | sync_write | operations | ZIO sync writes | 175 | | async_read | operations | ZIO async reads| 176 | | async_write | operations | ZIO async writes | 177 | | scrub | operations | ZIO scrub/scan reads | 178 | | trim | operations | ZIO trim (aka unmap) writes | 179 | 180 | ### zpool_io_size Histogram 181 | ZFS tracks I/O throughout the ZIO pipeline. The size of each I/O is used 182 | to create a histogram of the size by I/O type and vdev. For example, a 183 | 4KiB write to mirrored pool will show a 4KiB write to the top-level vdev 184 | (root) and a 4KiB write to each of the mirror leaf vdevs. 185 | 186 | The ZIO pipeline can aggregate I/O operations. For example, a contiguous 187 | series of writes can be aggregated into a single, larger I/O to the leaf 188 | vdev. The independent I/O operations reflect the logical operations and 189 | the aggregated I/O operations reflect the physical operations. 190 | 191 | The histogram fields show cumulative values from lowest to highest. 192 | The largest bucket is tagged "le=+Inf", representing the total count 193 | of I/Os by type and vdev. 194 | 195 | Note: trim I/Os can be larger than 16MiB, but the larger sizes are 196 | accounted in the 16MiB bucket. 197 | 198 | #### zpool_io_size Histogram Tags 199 | | label | description | 200 | |---|---| 201 | | le | bucket for histogram, I/O size is less than or equal to bucket value in bytes | 202 | | name | pool name | 203 | | path | for leaf vdevs, the device path name, otherwise omitted | 204 | | vdev | vdev name (root = entire pool) | 205 | 206 | #### zpool_io_size Histogram Fields 207 | | field | units | description | 208 | |---|---|---| 209 | | sync_read_ind | blocks | independent sync reads | 210 | | sync_write_ind | blocks | independent sync writes | 211 | | async_read_ind | blocks | independent async reads | 212 | | async_write_ind | blocks | independent async writes | 213 | | scrub_read_ind | blocks | independent scrub/scan reads | 214 | | trim_write_ind | blocks | independent trim (aka unmap) writes | 215 | | sync_read_agg | blocks | aggregated sync reads | 216 | | sync_write_agg | blocks | aggregated sync writes | 217 | | async_read_agg | blocks | aggregated async reads | 218 | | async_write_agg | blocks | aggregated async writes | 219 | | scrub_read_agg | blocks | aggregated scrub/scan reads | 220 | | trim_write_agg | blocks | aggregated trim (aka unmap) writes | 221 | 222 | #### About unsigned integers 223 | Telegraf v1.6.2 and later support unsigned 64-bit integers which more 224 | closely matches the uint64_t values used by ZFS. By default, zpool_influxdb 225 | will mask ZFS' uint64_t values and use influxdb line protocol integer type. 226 | Eventually the monitoring world will catch up to the times and support 227 | unsigned integers. To support unsigned, define SUPPORT_UINT64 and compile 228 | as described in `CMakeLists.txt` 229 | 230 | ## Building 231 | Building is simplified by using cmake. 232 | It is as simple as possible, but no simpler. 233 | By default, [ZFSonLinux](https://github.com/zfsonlinux/zfs) 234 | installs the necessary header and library files in _/usr/local_. 235 | If you place those files elsewhere, either edit _CMakeLists.txt_ and 236 | change the _ZFS_INSTALL_BASE_ or pass it with `-D ZFS_INSTALL_BASE=/usr` 237 | on the cmake command line: 238 | ```bash 239 | cmake . 240 | make 241 | ``` 242 | If successful, the _zpool_influxdb_ executable is created. 243 | 244 | ## Installing 245 | Installation is left as an exercise for the reader because 246 | there are many different methods that can be used. 247 | Ultimately the method depends on how the local metrics collection is 248 | implemented and the local access policies. 249 | 250 | To install the _zpool_influxdb_ executable in _INSTALL_DIR_, use 251 | ```bash 252 | make install 253 | ``` 254 | 255 | The simplest method is to use the exec agent in telegraf. For convenience, 256 | a sample config file is _zpool_influxdb.conf_ which can be placed in the 257 | telegraf config-directory (often /etc/telegraf/telegraf.d). Telegraf can 258 | be restarted to read the config-directory files. 259 | 260 | ## Caveat Emptor 261 | * Like the _zpool_ command, _zpool_influxdb_ takes a reader 262 | lock on spa_config for each imported pool. If this lock blocks, 263 | then the command will also block indefinitely and might be 264 | unkillable. This is not a normal condition, but can occur if 265 | there are bugs in the kernel modules. 266 | For this reason, care should be taken: 267 | * avoid spawning many of these commands hoping that one might 268 | finish 269 | * avoid frequent updates or short sample time 270 | intervals, because the locks can interfere with the performance 271 | of other instances of _zpool_ or _zpool_influxdb_ 272 | 273 | ## Other collectors 274 | There are a few other collectors for zpool statistics roaming around 275 | the Internet. Many attempt to screen-scrape `zpool` output in various 276 | ways. The screen-scrape method works poorly for `zpool` output because 277 | of its human-friendly nature. Also, they suffer from the same caveats 278 | as this implementation. This implementation is optimized for directly 279 | collecting the metrics and is much more efficient than the screen-scrapers. 280 | 281 | ## Feedback Encouraged 282 | Pull requests and issues are greatly appreciated. Visit 283 | https://github.com/richardelling/zpool_influxdb 284 | -------------------------------------------------------------------------------- /dashboards/README.md: -------------------------------------------------------------------------------- 1 | ### Dashboards for zpool_influxdb 2 | This directory contains a collection of dashboards related to ZFS with data 3 | collected from the zpool_influxdb collector. 4 | -------------------------------------------------------------------------------- /dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_MACBOOK-INFLUX", 5 | "label": "macbook-influx", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "influxdb", 9 | "pluginName": "InfluxDB" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "6.7.3" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "heatmap", 22 | "name": "Heatmap", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "influxdb", 28 | "name": "InfluxDB", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "jdbranham-diagram-panel", 34 | "name": "Diagram", 35 | "version": "1.4.5" 36 | }, 37 | { 38 | "type": "panel", 39 | "id": "text", 40 | "name": "Text", 41 | "version": "" 42 | } 43 | ], 44 | "annotations": { 45 | "list": [ 46 | { 47 | "$$hashKey": "object:1627", 48 | "builtIn": 1, 49 | "datasource": "-- Grafana --", 50 | "enable": true, 51 | "hide": true, 52 | "iconColor": "rgba(0, 211, 255, 1)", 53 | "name": "Annotations & Alerts", 54 | "type": "dashboard" 55 | } 56 | ] 57 | }, 58 | "description": "Top-level ZFS pool latency by ZIO type", 59 | "editable": true, 60 | "gnetId": null, 61 | "graphTooltip": 1, 62 | "id": null, 63 | "iteration": 1590445168391, 64 | "links": [], 65 | "panels": [ 66 | { 67 | "collapsed": false, 68 | "datasource": "${DS_MACBOOK-INFLUX}", 69 | "gridPos": { 70 | "h": 1, 71 | "w": 24, 72 | "x": 0, 73 | "y": 0 74 | }, 75 | "id": 5, 76 | "panels": [], 77 | "title": "Total Reads and Writes", 78 | "type": "row" 79 | }, 80 | { 81 | "cards": { 82 | "cardPadding": null, 83 | "cardRound": null 84 | }, 85 | "color": { 86 | "cardColor": "#b4ff00", 87 | "colorScale": "sqrt", 88 | "colorScheme": "interpolateOranges", 89 | "exponent": 0.5, 90 | "mode": "spectrum" 91 | }, 92 | "dataFormat": "tsbuckets", 93 | "datasource": "${DS_MACBOOK-INFLUX}", 94 | "description": "Latency histogram for the total reads of a ZFS pool", 95 | "fieldConfig": { 96 | "defaults": { 97 | "custom": {} 98 | }, 99 | "overrides": [] 100 | }, 101 | "gridPos": { 102 | "h": 9, 103 | "w": 12, 104 | "x": 0, 105 | "y": 1 106 | }, 107 | "heatmap": {}, 108 | "hideZeroBuckets": false, 109 | "highlightCards": true, 110 | "id": 2, 111 | "legend": { 112 | "show": true 113 | }, 114 | "reverseYBuckets": false, 115 | "targets": [ 116 | { 117 | "alias": "$tag_le", 118 | "groupBy": [ 119 | { 120 | "params": [ 121 | "$__interval" 122 | ], 123 | "type": "time" 124 | }, 125 | { 126 | "params": [ 127 | "le" 128 | ], 129 | "type": "tag" 130 | }, 131 | { 132 | "params": [ 133 | "null" 134 | ], 135 | "type": "fill" 136 | } 137 | ], 138 | "measurement": "zpool_latency", 139 | "orderByTime": "ASC", 140 | "policy": "default", 141 | "refId": "A", 142 | "resultFormat": "time_series", 143 | "select": [ 144 | [ 145 | { 146 | "params": [ 147 | "total_read" 148 | ], 149 | "type": "field" 150 | }, 151 | { 152 | "params": [], 153 | "type": "last" 154 | }, 155 | { 156 | "params": [ 157 | "1s" 158 | ], 159 | "type": "non_negative_derivative" 160 | } 161 | ] 162 | ], 163 | "tags": [ 164 | { 165 | "key": "host", 166 | "operator": "=~", 167 | "value": "/^$hostname$/" 168 | }, 169 | { 170 | "condition": "AND", 171 | "key": "name", 172 | "operator": "=~", 173 | "value": "/^$poolname$/" 174 | } 175 | ] 176 | } 177 | ], 178 | "timeFrom": null, 179 | "timeShift": null, 180 | "title": "Total Reads", 181 | "tooltip": { 182 | "show": true, 183 | "showHistogram": true 184 | }, 185 | "type": "heatmap", 186 | "xAxis": { 187 | "show": true 188 | }, 189 | "xBucketNumber": null, 190 | "xBucketSize": null, 191 | "yAxis": { 192 | "decimals": 0, 193 | "format": "s", 194 | "logBase": 1, 195 | "max": null, 196 | "min": null, 197 | "show": true, 198 | "splitFactor": null 199 | }, 200 | "yBucketBound": "auto", 201 | "yBucketNumber": null, 202 | "yBucketSize": null 203 | }, 204 | { 205 | "cards": { 206 | "cardPadding": null, 207 | "cardRound": null 208 | }, 209 | "color": { 210 | "cardColor": "#b4ff00", 211 | "colorScale": "sqrt", 212 | "colorScheme": "interpolateOranges", 213 | "exponent": 0.5, 214 | "mode": "spectrum" 215 | }, 216 | "dataFormat": "tsbuckets", 217 | "datasource": "${DS_MACBOOK-INFLUX}", 218 | "description": "Latency histogram for the total writes of a ZFS pool", 219 | "fieldConfig": { 220 | "defaults": { 221 | "custom": {} 222 | }, 223 | "overrides": [] 224 | }, 225 | "gridPos": { 226 | "h": 9, 227 | "w": 12, 228 | "x": 12, 229 | "y": 1 230 | }, 231 | "heatmap": {}, 232 | "hideZeroBuckets": false, 233 | "highlightCards": true, 234 | "id": 3, 235 | "legend": { 236 | "show": true 237 | }, 238 | "reverseYBuckets": false, 239 | "targets": [ 240 | { 241 | "alias": "$tag_le", 242 | "groupBy": [ 243 | { 244 | "params": [ 245 | "$__interval" 246 | ], 247 | "type": "time" 248 | }, 249 | { 250 | "params": [ 251 | "le" 252 | ], 253 | "type": "tag" 254 | }, 255 | { 256 | "params": [ 257 | "null" 258 | ], 259 | "type": "fill" 260 | } 261 | ], 262 | "measurement": "zpool_latency", 263 | "orderByTime": "ASC", 264 | "policy": "default", 265 | "refId": "A", 266 | "resultFormat": "time_series", 267 | "select": [ 268 | [ 269 | { 270 | "params": [ 271 | "total_write" 272 | ], 273 | "type": "field" 274 | }, 275 | { 276 | "params": [], 277 | "type": "last" 278 | }, 279 | { 280 | "params": [ 281 | "1s" 282 | ], 283 | "type": "non_negative_derivative" 284 | } 285 | ] 286 | ], 287 | "tags": [ 288 | { 289 | "key": "host", 290 | "operator": "=~", 291 | "value": "/^$hostname$/" 292 | }, 293 | { 294 | "condition": "AND", 295 | "key": "name", 296 | "operator": "=~", 297 | "value": "/^$poolname$/" 298 | } 299 | ] 300 | } 301 | ], 302 | "timeFrom": null, 303 | "timeShift": null, 304 | "title": "Total Writes", 305 | "tooltip": { 306 | "show": true, 307 | "showHistogram": true 308 | }, 309 | "type": "heatmap", 310 | "xAxis": { 311 | "show": true 312 | }, 313 | "xBucketNumber": null, 314 | "xBucketSize": null, 315 | "yAxis": { 316 | "decimals": 0, 317 | "format": "s", 318 | "logBase": 1, 319 | "max": null, 320 | "min": null, 321 | "show": true, 322 | "splitFactor": null 323 | }, 324 | "yBucketBound": "auto", 325 | "yBucketNumber": null, 326 | "yBucketSize": null 327 | }, 328 | { 329 | "collapsed": false, 330 | "datasource": "${DS_MACBOOK-INFLUX}", 331 | "gridPos": { 332 | "h": 1, 333 | "w": 24, 334 | "x": 0, 335 | "y": 10 336 | }, 337 | "id": 8, 338 | "panels": [], 339 | "title": "ZIO Scheduler Queues for Read Operations", 340 | "type": "row" 341 | }, 342 | { 343 | "cards": { 344 | "cardPadding": null, 345 | "cardRound": null 346 | }, 347 | "color": { 348 | "cardColor": "#b4ff00", 349 | "colorScale": "sqrt", 350 | "colorScheme": "interpolateOranges", 351 | "exponent": 0.5, 352 | "mode": "spectrum" 353 | }, 354 | "dataFormat": "tsbuckets", 355 | "datasource": "${DS_MACBOOK-INFLUX}", 356 | "description": "Latency histogram for the synchronous reads of a ZFS pool", 357 | "fieldConfig": { 358 | "defaults": { 359 | "custom": {} 360 | }, 361 | "overrides": [] 362 | }, 363 | "gridPos": { 364 | "h": 8, 365 | "w": 5, 366 | "x": 0, 367 | "y": 11 368 | }, 369 | "heatmap": {}, 370 | "hideZeroBuckets": false, 371 | "highlightCards": true, 372 | "id": 6, 373 | "legend": { 374 | "show": false 375 | }, 376 | "reverseYBuckets": false, 377 | "targets": [ 378 | { 379 | "alias": "$tag_le", 380 | "groupBy": [ 381 | { 382 | "params": [ 383 | "$__interval" 384 | ], 385 | "type": "time" 386 | }, 387 | { 388 | "params": [ 389 | "le" 390 | ], 391 | "type": "tag" 392 | }, 393 | { 394 | "params": [ 395 | "null" 396 | ], 397 | "type": "fill" 398 | } 399 | ], 400 | "measurement": "zpool_latency", 401 | "orderByTime": "ASC", 402 | "policy": "default", 403 | "refId": "A", 404 | "resultFormat": "time_series", 405 | "select": [ 406 | [ 407 | { 408 | "params": [ 409 | "sync_read" 410 | ], 411 | "type": "field" 412 | }, 413 | { 414 | "params": [], 415 | "type": "last" 416 | }, 417 | { 418 | "params": [ 419 | "1s" 420 | ], 421 | "type": "non_negative_derivative" 422 | } 423 | ] 424 | ], 425 | "tags": [ 426 | { 427 | "key": "host", 428 | "operator": "=~", 429 | "value": "/^$hostname$/" 430 | }, 431 | { 432 | "condition": "AND", 433 | "key": "name", 434 | "operator": "=~", 435 | "value": "/^$poolname$/" 436 | } 437 | ] 438 | } 439 | ], 440 | "timeFrom": null, 441 | "timeShift": null, 442 | "title": "Sync Read Queue", 443 | "tooltip": { 444 | "show": true, 445 | "showHistogram": true 446 | }, 447 | "type": "heatmap", 448 | "xAxis": { 449 | "show": true 450 | }, 451 | "xBucketNumber": null, 452 | "xBucketSize": null, 453 | "yAxis": { 454 | "decimals": 0, 455 | "format": "s", 456 | "logBase": 1, 457 | "max": null, 458 | "min": null, 459 | "show": true, 460 | "splitFactor": null 461 | }, 462 | "yBucketBound": "auto", 463 | "yBucketNumber": null, 464 | "yBucketSize": null 465 | }, 466 | { 467 | "cards": { 468 | "cardPadding": null, 469 | "cardRound": null 470 | }, 471 | "color": { 472 | "cardColor": "#b4ff00", 473 | "colorScale": "sqrt", 474 | "colorScheme": "interpolateOranges", 475 | "exponent": 0.5, 476 | "mode": "spectrum" 477 | }, 478 | "dataFormat": "tsbuckets", 479 | "datasource": "${DS_MACBOOK-INFLUX}", 480 | "description": "Latency histogram for the asynchronous reads of a ZFS pool", 481 | "fieldConfig": { 482 | "defaults": { 483 | "custom": {} 484 | }, 485 | "overrides": [] 486 | }, 487 | "gridPos": { 488 | "h": 8, 489 | "w": 5, 490 | "x": 5, 491 | "y": 11 492 | }, 493 | "heatmap": {}, 494 | "hideZeroBuckets": false, 495 | "highlightCards": true, 496 | "id": 9, 497 | "legend": { 498 | "show": false 499 | }, 500 | "reverseYBuckets": false, 501 | "targets": [ 502 | { 503 | "alias": "$tag_le", 504 | "groupBy": [ 505 | { 506 | "params": [ 507 | "$__interval" 508 | ], 509 | "type": "time" 510 | }, 511 | { 512 | "params": [ 513 | "le" 514 | ], 515 | "type": "tag" 516 | }, 517 | { 518 | "params": [ 519 | "null" 520 | ], 521 | "type": "fill" 522 | } 523 | ], 524 | "measurement": "zpool_latency", 525 | "orderByTime": "ASC", 526 | "policy": "default", 527 | "refId": "A", 528 | "resultFormat": "time_series", 529 | "select": [ 530 | [ 531 | { 532 | "params": [ 533 | "async_read" 534 | ], 535 | "type": "field" 536 | }, 537 | { 538 | "params": [], 539 | "type": "last" 540 | }, 541 | { 542 | "params": [ 543 | "1s" 544 | ], 545 | "type": "non_negative_derivative" 546 | } 547 | ] 548 | ], 549 | "tags": [ 550 | { 551 | "key": "host", 552 | "operator": "=~", 553 | "value": "/^$hostname$/" 554 | }, 555 | { 556 | "condition": "AND", 557 | "key": "name", 558 | "operator": "=~", 559 | "value": "/^$poolname$/" 560 | } 561 | ] 562 | } 563 | ], 564 | "timeFrom": null, 565 | "timeShift": null, 566 | "title": "Async Read Queue", 567 | "tooltip": { 568 | "show": true, 569 | "showHistogram": true 570 | }, 571 | "type": "heatmap", 572 | "xAxis": { 573 | "show": true 574 | }, 575 | "xBucketNumber": null, 576 | "xBucketSize": null, 577 | "yAxis": { 578 | "decimals": 0, 579 | "format": "s", 580 | "logBase": 1, 581 | "max": null, 582 | "min": null, 583 | "show": true, 584 | "splitFactor": null 585 | }, 586 | "yBucketBound": "auto", 587 | "yBucketNumber": null, 588 | "yBucketSize": null 589 | }, 590 | { 591 | "cards": { 592 | "cardPadding": null, 593 | "cardRound": null 594 | }, 595 | "color": { 596 | "cardColor": "#b4ff00", 597 | "colorScale": "sqrt", 598 | "colorScheme": "interpolateOranges", 599 | "exponent": 0.5, 600 | "mode": "spectrum" 601 | }, 602 | "dataFormat": "tsbuckets", 603 | "datasource": "${DS_MACBOOK-INFLUX}", 604 | "description": "Latency histogram for the scrub or scan reads of a ZFS pool", 605 | "fieldConfig": { 606 | "defaults": { 607 | "custom": {} 608 | }, 609 | "overrides": [] 610 | }, 611 | "gridPos": { 612 | "h": 8, 613 | "w": 5, 614 | "x": 10, 615 | "y": 11 616 | }, 617 | "heatmap": {}, 618 | "hideZeroBuckets": false, 619 | "highlightCards": true, 620 | "id": 10, 621 | "legend": { 622 | "show": false 623 | }, 624 | "reverseYBuckets": false, 625 | "targets": [ 626 | { 627 | "alias": "$tag_le", 628 | "groupBy": [ 629 | { 630 | "params": [ 631 | "$__interval" 632 | ], 633 | "type": "time" 634 | }, 635 | { 636 | "params": [ 637 | "le" 638 | ], 639 | "type": "tag" 640 | }, 641 | { 642 | "params": [ 643 | "null" 644 | ], 645 | "type": "fill" 646 | } 647 | ], 648 | "measurement": "zpool_latency", 649 | "orderByTime": "ASC", 650 | "policy": "default", 651 | "refId": "A", 652 | "resultFormat": "time_series", 653 | "select": [ 654 | [ 655 | { 656 | "params": [ 657 | "scrub" 658 | ], 659 | "type": "field" 660 | }, 661 | { 662 | "params": [], 663 | "type": "last" 664 | }, 665 | { 666 | "params": [ 667 | "1s" 668 | ], 669 | "type": "non_negative_derivative" 670 | } 671 | ] 672 | ], 673 | "tags": [ 674 | { 675 | "key": "host", 676 | "operator": "=~", 677 | "value": "/^$hostname$/" 678 | }, 679 | { 680 | "condition": "AND", 681 | "key": "name", 682 | "operator": "=~", 683 | "value": "/^$poolname$/" 684 | } 685 | ] 686 | } 687 | ], 688 | "timeFrom": null, 689 | "timeShift": null, 690 | "title": "Scrub/Scan Read Queue", 691 | "tooltip": { 692 | "show": true, 693 | "showHistogram": true 694 | }, 695 | "type": "heatmap", 696 | "xAxis": { 697 | "show": true 698 | }, 699 | "xBucketNumber": null, 700 | "xBucketSize": null, 701 | "yAxis": { 702 | "decimals": 0, 703 | "format": "s", 704 | "logBase": 1, 705 | "max": null, 706 | "min": null, 707 | "show": true, 708 | "splitFactor": null 709 | }, 710 | "yBucketBound": "auto", 711 | "yBucketNumber": null, 712 | "yBucketSize": null 713 | }, 714 | { 715 | "cards": { 716 | "cardPadding": null, 717 | "cardRound": null 718 | }, 719 | "color": { 720 | "cardColor": "#b4ff00", 721 | "colorScale": "sqrt", 722 | "colorScheme": "interpolateOranges", 723 | "exponent": 0.5, 724 | "mode": "spectrum" 725 | }, 726 | "dataFormat": "tsbuckets", 727 | "datasource": "${DS_MACBOOK-INFLUX}", 728 | "description": "Latency histogram for the actual disk reads of a ZFS pool", 729 | "fieldConfig": { 730 | "defaults": { 731 | "custom": {} 732 | }, 733 | "overrides": [] 734 | }, 735 | "gridPos": { 736 | "h": 8, 737 | "w": 9, 738 | "x": 15, 739 | "y": 11 740 | }, 741 | "heatmap": {}, 742 | "hideZeroBuckets": false, 743 | "highlightCards": true, 744 | "id": 11, 745 | "legend": { 746 | "show": false 747 | }, 748 | "reverseYBuckets": false, 749 | "targets": [ 750 | { 751 | "alias": "$tag_le", 752 | "groupBy": [ 753 | { 754 | "params": [ 755 | "$__interval" 756 | ], 757 | "type": "time" 758 | }, 759 | { 760 | "params": [ 761 | "le" 762 | ], 763 | "type": "tag" 764 | }, 765 | { 766 | "params": [ 767 | "null" 768 | ], 769 | "type": "fill" 770 | } 771 | ], 772 | "measurement": "zpool_latency", 773 | "orderByTime": "ASC", 774 | "policy": "default", 775 | "refId": "A", 776 | "resultFormat": "time_series", 777 | "select": [ 778 | [ 779 | { 780 | "params": [ 781 | "disk_read" 782 | ], 783 | "type": "field" 784 | }, 785 | { 786 | "params": [], 787 | "type": "last" 788 | }, 789 | { 790 | "params": [ 791 | "1s" 792 | ], 793 | "type": "non_negative_derivative" 794 | } 795 | ] 796 | ], 797 | "tags": [ 798 | { 799 | "key": "host", 800 | "operator": "=~", 801 | "value": "/^$hostname$/" 802 | }, 803 | { 804 | "condition": "AND", 805 | "key": "name", 806 | "operator": "=~", 807 | "value": "/^$poolname$/" 808 | } 809 | ] 810 | } 811 | ], 812 | "timeFrom": null, 813 | "timeShift": null, 814 | "title": "Disk Read Queue", 815 | "tooltip": { 816 | "show": true, 817 | "showHistogram": true 818 | }, 819 | "type": "heatmap", 820 | "xAxis": { 821 | "show": true 822 | }, 823 | "xBucketNumber": null, 824 | "xBucketSize": null, 825 | "yAxis": { 826 | "decimals": 0, 827 | "format": "s", 828 | "logBase": 1, 829 | "max": null, 830 | "min": null, 831 | "show": true, 832 | "splitFactor": null 833 | }, 834 | "yBucketBound": "auto", 835 | "yBucketNumber": null, 836 | "yBucketSize": null 837 | }, 838 | { 839 | "collapsed": false, 840 | "datasource": "${DS_MACBOOK-INFLUX}", 841 | "gridPos": { 842 | "h": 1, 843 | "w": 24, 844 | "x": 0, 845 | "y": 19 846 | }, 847 | "id": 13, 848 | "panels": [], 849 | "title": "ZIO Scheduler Queues for Write Operations", 850 | "type": "row" 851 | }, 852 | { 853 | "cards": { 854 | "cardPadding": null, 855 | "cardRound": null 856 | }, 857 | "color": { 858 | "cardColor": "#b4ff00", 859 | "colorScale": "sqrt", 860 | "colorScheme": "interpolateOranges", 861 | "exponent": 0.5, 862 | "mode": "spectrum" 863 | }, 864 | "dataFormat": "tsbuckets", 865 | "datasource": "${DS_MACBOOK-INFLUX}", 866 | "description": "Latency histogram for the synchronous writes of a ZFS pool", 867 | "fieldConfig": { 868 | "defaults": { 869 | "custom": {} 870 | }, 871 | "overrides": [] 872 | }, 873 | "gridPos": { 874 | "h": 8, 875 | "w": 5, 876 | "x": 0, 877 | "y": 20 878 | }, 879 | "heatmap": {}, 880 | "hideZeroBuckets": false, 881 | "highlightCards": true, 882 | "id": 14, 883 | "legend": { 884 | "show": false 885 | }, 886 | "reverseYBuckets": false, 887 | "targets": [ 888 | { 889 | "alias": "$tag_le", 890 | "groupBy": [ 891 | { 892 | "params": [ 893 | "$__interval" 894 | ], 895 | "type": "time" 896 | }, 897 | { 898 | "params": [ 899 | "le" 900 | ], 901 | "type": "tag" 902 | }, 903 | { 904 | "params": [ 905 | "null" 906 | ], 907 | "type": "fill" 908 | } 909 | ], 910 | "measurement": "zpool_latency", 911 | "orderByTime": "ASC", 912 | "policy": "default", 913 | "refId": "A", 914 | "resultFormat": "time_series", 915 | "select": [ 916 | [ 917 | { 918 | "params": [ 919 | "sync_write" 920 | ], 921 | "type": "field" 922 | }, 923 | { 924 | "params": [], 925 | "type": "last" 926 | }, 927 | { 928 | "params": [ 929 | "1s" 930 | ], 931 | "type": "non_negative_derivative" 932 | } 933 | ] 934 | ], 935 | "tags": [ 936 | { 937 | "key": "host", 938 | "operator": "=~", 939 | "value": "/^$hostname$/" 940 | }, 941 | { 942 | "condition": "AND", 943 | "key": "name", 944 | "operator": "=~", 945 | "value": "/^$poolname$/" 946 | } 947 | ] 948 | } 949 | ], 950 | "timeFrom": null, 951 | "timeShift": null, 952 | "title": "Sync Write Queue", 953 | "tooltip": { 954 | "show": true, 955 | "showHistogram": true 956 | }, 957 | "type": "heatmap", 958 | "xAxis": { 959 | "show": true 960 | }, 961 | "xBucketNumber": null, 962 | "xBucketSize": null, 963 | "yAxis": { 964 | "decimals": 0, 965 | "format": "s", 966 | "logBase": 1, 967 | "max": null, 968 | "min": null, 969 | "show": true, 970 | "splitFactor": null 971 | }, 972 | "yBucketBound": "auto", 973 | "yBucketNumber": null, 974 | "yBucketSize": null 975 | }, 976 | { 977 | "cards": { 978 | "cardPadding": null, 979 | "cardRound": null 980 | }, 981 | "color": { 982 | "cardColor": "#b4ff00", 983 | "colorScale": "sqrt", 984 | "colorScheme": "interpolateOranges", 985 | "exponent": 0.5, 986 | "mode": "spectrum" 987 | }, 988 | "dataFormat": "tsbuckets", 989 | "datasource": "${DS_MACBOOK-INFLUX}", 990 | "description": "Latency histogram for the asynchronous writes of a ZFS pool", 991 | "fieldConfig": { 992 | "defaults": { 993 | "custom": {} 994 | }, 995 | "overrides": [] 996 | }, 997 | "gridPos": { 998 | "h": 8, 999 | "w": 5, 1000 | "x": 5, 1001 | "y": 20 1002 | }, 1003 | "heatmap": {}, 1004 | "hideZeroBuckets": false, 1005 | "highlightCards": true, 1006 | "id": 15, 1007 | "legend": { 1008 | "show": false 1009 | }, 1010 | "reverseYBuckets": false, 1011 | "targets": [ 1012 | { 1013 | "alias": "$tag_le", 1014 | "groupBy": [ 1015 | { 1016 | "params": [ 1017 | "$__interval" 1018 | ], 1019 | "type": "time" 1020 | }, 1021 | { 1022 | "params": [ 1023 | "le" 1024 | ], 1025 | "type": "tag" 1026 | }, 1027 | { 1028 | "params": [ 1029 | "null" 1030 | ], 1031 | "type": "fill" 1032 | } 1033 | ], 1034 | "measurement": "zpool_latency", 1035 | "orderByTime": "ASC", 1036 | "policy": "default", 1037 | "refId": "A", 1038 | "resultFormat": "time_series", 1039 | "select": [ 1040 | [ 1041 | { 1042 | "params": [ 1043 | "async_write" 1044 | ], 1045 | "type": "field" 1046 | }, 1047 | { 1048 | "params": [], 1049 | "type": "last" 1050 | }, 1051 | { 1052 | "params": [ 1053 | "1s" 1054 | ], 1055 | "type": "non_negative_derivative" 1056 | } 1057 | ] 1058 | ], 1059 | "tags": [ 1060 | { 1061 | "key": "host", 1062 | "operator": "=~", 1063 | "value": "/^$hostname$/" 1064 | }, 1065 | { 1066 | "condition": "AND", 1067 | "key": "name", 1068 | "operator": "=~", 1069 | "value": "/^$poolname$/" 1070 | } 1071 | ] 1072 | } 1073 | ], 1074 | "timeFrom": null, 1075 | "timeShift": null, 1076 | "title": "Async Write Queue", 1077 | "tooltip": { 1078 | "show": true, 1079 | "showHistogram": true 1080 | }, 1081 | "type": "heatmap", 1082 | "xAxis": { 1083 | "show": true 1084 | }, 1085 | "xBucketNumber": null, 1086 | "xBucketSize": null, 1087 | "yAxis": { 1088 | "decimals": 0, 1089 | "format": "s", 1090 | "logBase": 1, 1091 | "max": null, 1092 | "min": null, 1093 | "show": true, 1094 | "splitFactor": null 1095 | }, 1096 | "yBucketBound": "auto", 1097 | "yBucketNumber": null, 1098 | "yBucketSize": null 1099 | }, 1100 | { 1101 | "cards": { 1102 | "cardPadding": null, 1103 | "cardRound": null 1104 | }, 1105 | "color": { 1106 | "cardColor": "#b4ff00", 1107 | "colorScale": "sqrt", 1108 | "colorScheme": "interpolateOranges", 1109 | "exponent": 0.5, 1110 | "mode": "spectrum" 1111 | }, 1112 | "dataFormat": "tsbuckets", 1113 | "datasource": "${DS_MACBOOK-INFLUX}", 1114 | "description": "Latency histogram for the trim or unmap operations of a ZFS pool", 1115 | "fieldConfig": { 1116 | "defaults": { 1117 | "custom": {} 1118 | }, 1119 | "overrides": [] 1120 | }, 1121 | "gridPos": { 1122 | "h": 8, 1123 | "w": 5, 1124 | "x": 10, 1125 | "y": 20 1126 | }, 1127 | "heatmap": {}, 1128 | "hideZeroBuckets": false, 1129 | "highlightCards": true, 1130 | "id": 16, 1131 | "legend": { 1132 | "show": false 1133 | }, 1134 | "reverseYBuckets": false, 1135 | "targets": [ 1136 | { 1137 | "alias": "$tag_le", 1138 | "groupBy": [ 1139 | { 1140 | "params": [ 1141 | "$__interval" 1142 | ], 1143 | "type": "time" 1144 | }, 1145 | { 1146 | "params": [ 1147 | "le" 1148 | ], 1149 | "type": "tag" 1150 | }, 1151 | { 1152 | "params": [ 1153 | "null" 1154 | ], 1155 | "type": "fill" 1156 | } 1157 | ], 1158 | "measurement": "zpool_latency", 1159 | "orderByTime": "ASC", 1160 | "policy": "default", 1161 | "refId": "A", 1162 | "resultFormat": "time_series", 1163 | "select": [ 1164 | [ 1165 | { 1166 | "params": [ 1167 | "trim" 1168 | ], 1169 | "type": "field" 1170 | }, 1171 | { 1172 | "params": [], 1173 | "type": "last" 1174 | }, 1175 | { 1176 | "params": [ 1177 | "1s" 1178 | ], 1179 | "type": "non_negative_derivative" 1180 | } 1181 | ] 1182 | ], 1183 | "tags": [ 1184 | { 1185 | "key": "host", 1186 | "operator": "=~", 1187 | "value": "/^$hostname$/" 1188 | }, 1189 | { 1190 | "condition": "AND", 1191 | "key": "name", 1192 | "operator": "=~", 1193 | "value": "/^$poolname$/" 1194 | } 1195 | ] 1196 | } 1197 | ], 1198 | "timeFrom": null, 1199 | "timeShift": null, 1200 | "title": "Trim Write Queue", 1201 | "tooltip": { 1202 | "show": true, 1203 | "showHistogram": true 1204 | }, 1205 | "type": "heatmap", 1206 | "xAxis": { 1207 | "show": true 1208 | }, 1209 | "xBucketNumber": null, 1210 | "xBucketSize": null, 1211 | "yAxis": { 1212 | "decimals": 0, 1213 | "format": "s", 1214 | "logBase": 1, 1215 | "max": null, 1216 | "min": null, 1217 | "show": true, 1218 | "splitFactor": null 1219 | }, 1220 | "yBucketBound": "auto", 1221 | "yBucketNumber": null, 1222 | "yBucketSize": null 1223 | }, 1224 | { 1225 | "cards": { 1226 | "cardPadding": null, 1227 | "cardRound": null 1228 | }, 1229 | "color": { 1230 | "cardColor": "#b4ff00", 1231 | "colorScale": "sqrt", 1232 | "colorScheme": "interpolateOranges", 1233 | "exponent": 0.5, 1234 | "mode": "spectrum" 1235 | }, 1236 | "dataFormat": "tsbuckets", 1237 | "datasource": "${DS_MACBOOK-INFLUX}", 1238 | "description": "Latency histogram for the disk write operations of a ZFS pool", 1239 | "fieldConfig": { 1240 | "defaults": { 1241 | "custom": {} 1242 | }, 1243 | "overrides": [] 1244 | }, 1245 | "gridPos": { 1246 | "h": 8, 1247 | "w": 9, 1248 | "x": 15, 1249 | "y": 20 1250 | }, 1251 | "heatmap": {}, 1252 | "hideZeroBuckets": false, 1253 | "highlightCards": true, 1254 | "id": 17, 1255 | "legend": { 1256 | "show": false 1257 | }, 1258 | "reverseYBuckets": false, 1259 | "targets": [ 1260 | { 1261 | "alias": "$tag_le", 1262 | "groupBy": [ 1263 | { 1264 | "params": [ 1265 | "$__interval" 1266 | ], 1267 | "type": "time" 1268 | }, 1269 | { 1270 | "params": [ 1271 | "le" 1272 | ], 1273 | "type": "tag" 1274 | }, 1275 | { 1276 | "params": [ 1277 | "null" 1278 | ], 1279 | "type": "fill" 1280 | } 1281 | ], 1282 | "measurement": "zpool_latency", 1283 | "orderByTime": "ASC", 1284 | "policy": "default", 1285 | "refId": "A", 1286 | "resultFormat": "time_series", 1287 | "select": [ 1288 | [ 1289 | { 1290 | "params": [ 1291 | "disk_write" 1292 | ], 1293 | "type": "field" 1294 | }, 1295 | { 1296 | "params": [], 1297 | "type": "last" 1298 | }, 1299 | { 1300 | "params": [ 1301 | "1s" 1302 | ], 1303 | "type": "non_negative_derivative" 1304 | } 1305 | ] 1306 | ], 1307 | "tags": [ 1308 | { 1309 | "key": "host", 1310 | "operator": "=~", 1311 | "value": "/^$hostname$/" 1312 | }, 1313 | { 1314 | "condition": "AND", 1315 | "key": "name", 1316 | "operator": "=~", 1317 | "value": "/^$poolname$/" 1318 | } 1319 | ] 1320 | } 1321 | ], 1322 | "timeFrom": null, 1323 | "timeShift": null, 1324 | "title": "Disk Write Queue", 1325 | "tooltip": { 1326 | "show": true, 1327 | "showHistogram": true 1328 | }, 1329 | "type": "heatmap", 1330 | "xAxis": { 1331 | "show": true 1332 | }, 1333 | "xBucketNumber": null, 1334 | "xBucketSize": null, 1335 | "yAxis": { 1336 | "decimals": 0, 1337 | "format": "s", 1338 | "logBase": 1, 1339 | "max": null, 1340 | "min": null, 1341 | "show": true, 1342 | "splitFactor": null 1343 | }, 1344 | "yBucketBound": "auto", 1345 | "yBucketNumber": null, 1346 | "yBucketSize": null 1347 | }, 1348 | { 1349 | "collapsed": false, 1350 | "datasource": "${DS_MACBOOK-INFLUX}", 1351 | "gridPos": { 1352 | "h": 1, 1353 | "w": 24, 1354 | "x": 0, 1355 | "y": 28 1356 | }, 1357 | "id": 19, 1358 | "panels": [], 1359 | "title": "About", 1360 | "type": "row" 1361 | }, 1362 | { 1363 | "content": "I/O requests that are satisfied by accessing pool devices are managed by the ZIO scheduler.\nThe total latency is measured from the start of the I/O to completion by the disk.\nLatency through each queue is shown prior to its submission to the disk queue.\n\nThis view is useful for observing the effects of tuning the ZIO scheduler min and max values\n(see zfs-module-parameters(5) and [ZFS on Linux Module Parameters](https://openzfs.github.io/openzfs-docs/Performance%20and%20tuning/ZFS%20on%20Linux%20Module%20Parameters.html)):\n+ *zfs_vdev_max_active* controls the ZIO scheduler's disk queue depth (do not confuse with the block device's nr_requests)\n+ *zfs_vdev_sync_read_min_active* and *zfs_vdev_sync_read_max_active* control the synchronous queue for reads: most reads are sync\n+ *zfs_vdev_sync_write_min_active* and *zfs_vdev_sync_write_max_active* control the synchronous queue for writes: \nusually metadata or user data depending on the \"sync\" property setting or I/Os that are requested to be flushed\n+ *zfs_vdev_async_read_min_active* and *zfs_vdev_async_read_max_active* control the asynchronous queue for reads: usually prefetches\n+ *zfs_vdev_async_write_min_active* and *zfs_vdev_async_write_max_active* control the asynchronous queue for writes: \nusually the bulk of all writes at transaction group (txg) commit\n+ *zfs_vdev_scrub_min_active* and *zfs_vdev_scrub_max_active* controls the scan reads: usually scrub or resilver\n\n", 1364 | "datasource": "${DS_MACBOOK-INFLUX}", 1365 | "fieldConfig": { 1366 | "defaults": { 1367 | "custom": {} 1368 | }, 1369 | "overrides": [] 1370 | }, 1371 | "gridPos": { 1372 | "h": 15, 1373 | "w": 16, 1374 | "x": 0, 1375 | "y": 29 1376 | }, 1377 | "id": 21, 1378 | "mode": "markdown", 1379 | "targets": [ 1380 | { 1381 | "groupBy": [ 1382 | { 1383 | "params": [ 1384 | "$__interval" 1385 | ], 1386 | "type": "time" 1387 | }, 1388 | { 1389 | "params": [ 1390 | "null" 1391 | ], 1392 | "type": "fill" 1393 | } 1394 | ], 1395 | "orderByTime": "ASC", 1396 | "policy": "default", 1397 | "refId": "A", 1398 | "resultFormat": "time_series", 1399 | "select": [ 1400 | [ 1401 | { 1402 | "params": [ 1403 | "value" 1404 | ], 1405 | "type": "field" 1406 | }, 1407 | { 1408 | "params": [], 1409 | "type": "mean" 1410 | } 1411 | ] 1412 | ], 1413 | "tags": [] 1414 | } 1415 | ], 1416 | "timeFrom": null, 1417 | "timeShift": null, 1418 | "title": "About ZFS Pool All Queues Read/Write Latency Histograms", 1419 | "type": "text" 1420 | }, 1421 | { 1422 | "colors": [ 1423 | "rgba(50, 172, 45, 0.97)", 1424 | "rgba(237, 129, 40, 0.89)", 1425 | "rgba(245, 54, 54, 0.9)" 1426 | ], 1427 | "composites": [], 1428 | "content": "graph LR\nIO((I/O request)) --> SR(sync read queue)\nIO --> SW(sync write queue)\nIO --> AR(async read queue)\nIO --> AW(async write queue)\nIO --> SCRUB(scrub queue)\nIO --> TRIM(trim queue)\nSR --> DISKQ(disk queue)\nSW --> DISKQ\nAR --> DISKQ\nAW --> DISKQ\nSCRUB --> DISKQ\nTRIM --> DISKQ\nDISKQ --> DISK((disk))\n", 1429 | "datasource": "${DS_MACBOOK-INFLUX}", 1430 | "decimals": 2, 1431 | "fieldConfig": { 1432 | "defaults": { 1433 | "custom": {} 1434 | }, 1435 | "overrides": [] 1436 | }, 1437 | "format": "none", 1438 | "graphId": "diagram_23", 1439 | "gridPos": { 1440 | "h": 15, 1441 | "w": 7, 1442 | "x": 16, 1443 | "y": 29 1444 | }, 1445 | "id": 23, 1446 | "init": { 1447 | "arrowMarkerAbsolute": true, 1448 | "cloneCssStyles": true, 1449 | "flowchart": { 1450 | "htmlLabels": true, 1451 | "useMaxWidth": true 1452 | }, 1453 | "gantt": { 1454 | "barGap": 4, 1455 | "barHeight": 20, 1456 | "fontFamily": "\"Open-Sans\", \"sans-serif\"", 1457 | "fontSize": 11, 1458 | "gridLineStartPadding": 35, 1459 | "leftPadding": 75, 1460 | "numberSectionStyles": 3, 1461 | "titleTopMargin": 25, 1462 | "topPadding": 50 1463 | }, 1464 | "logLevel": 3, 1465 | "securityLevel": "loose", 1466 | "sequence": { 1467 | "actorMargin": 50, 1468 | "bottomMarginAdj": 1, 1469 | "boxMargin": 10, 1470 | "boxTextMargin": 5, 1471 | "diagramMarginX": 50, 1472 | "diagramMarginY": 10, 1473 | "height": 65, 1474 | "messageMargin": 35, 1475 | "mirrorActors": true, 1476 | "noteMargin": 10, 1477 | "useMaxWidth": true, 1478 | "width": 150 1479 | }, 1480 | "startOnLoad": false, 1481 | "theme": "dark" 1482 | }, 1483 | "legend": { 1484 | "avg": true, 1485 | "current": true, 1486 | "gradient": { 1487 | "enabled": true, 1488 | "show": true 1489 | }, 1490 | "max": true, 1491 | "min": true, 1492 | "show": false, 1493 | "total": true 1494 | }, 1495 | "mappingType": 1, 1496 | "mappingTypes": [ 1497 | { 1498 | "$$hashKey": "object:155", 1499 | "name": "value to text", 1500 | "value": 1 1501 | }, 1502 | { 1503 | "$$hashKey": "object:156", 1504 | "name": "range to text", 1505 | "value": 2 1506 | } 1507 | ], 1508 | "maxDataPoints": 100, 1509 | "maxWidth": false, 1510 | "mermaidServiceUrl": "", 1511 | "metricCharacterReplacements": [], 1512 | "moddedSeriesVal": 0, 1513 | "mode": "content", 1514 | "nullPointMode": "connected", 1515 | "seriesOverrides": [], 1516 | "style": "", 1517 | "styleValues": {}, 1518 | "targets": [ 1519 | { 1520 | "groupBy": [ 1521 | { 1522 | "params": [ 1523 | "$__interval" 1524 | ], 1525 | "type": "time" 1526 | }, 1527 | { 1528 | "params": [ 1529 | "null" 1530 | ], 1531 | "type": "fill" 1532 | } 1533 | ], 1534 | "hide": true, 1535 | "orderByTime": "ASC", 1536 | "policy": "default", 1537 | "refId": "A", 1538 | "resultFormat": "time_series", 1539 | "select": [ 1540 | [ 1541 | { 1542 | "params": [ 1543 | "value" 1544 | ], 1545 | "type": "field" 1546 | }, 1547 | { 1548 | "params": [], 1549 | "type": "mean" 1550 | } 1551 | ] 1552 | ], 1553 | "tags": [] 1554 | } 1555 | ], 1556 | "themes": [ 1557 | "default", 1558 | "dark", 1559 | "forest", 1560 | "neutral" 1561 | ], 1562 | "thresholds": "0,10", 1563 | "timeFrom": null, 1564 | "timeShift": null, 1565 | "title": "Panel Title", 1566 | "type": "jdbranham-diagram-panel", 1567 | "valueMaps": [ 1568 | { 1569 | "$$hashKey": "object:151", 1570 | "op": "=", 1571 | "text": "N/A", 1572 | "value": "null" 1573 | } 1574 | ], 1575 | "valueName": "avg", 1576 | "valueOptions": [ 1577 | "avg", 1578 | "min", 1579 | "max", 1580 | "total", 1581 | "current" 1582 | ] 1583 | } 1584 | ], 1585 | "refresh": false, 1586 | "schemaVersion": 22, 1587 | "style": "dark", 1588 | "tags": [ 1589 | "ZFS", 1590 | "Latency", 1591 | "Histogram" 1592 | ], 1593 | "templating": { 1594 | "list": [ 1595 | { 1596 | "allValue": null, 1597 | "current": {}, 1598 | "datasource": "${DS_MACBOOK-INFLUX}", 1599 | "definition": "show tag values from \"zpool_latency\" with key = \"host\"", 1600 | "hide": 0, 1601 | "includeAll": false, 1602 | "index": -1, 1603 | "label": null, 1604 | "multi": false, 1605 | "name": "hostname", 1606 | "options": [], 1607 | "query": "show tag values from \"zpool_latency\" with key = \"host\"", 1608 | "refresh": 1, 1609 | "regex": "/([-a-zA-Z-0-9]+)/", 1610 | "skipUrlSync": false, 1611 | "sort": 5, 1612 | "tagValuesQuery": "", 1613 | "tags": [], 1614 | "tagsQuery": "", 1615 | "type": "query", 1616 | "useTags": false 1617 | }, 1618 | { 1619 | "allValue": null, 1620 | "current": {}, 1621 | "datasource": "${DS_MACBOOK-INFLUX}", 1622 | "definition": "show tag values from \"zpool_latency\" with key = \"name\" where \"host\" =~ /^$hostname/", 1623 | "hide": 0, 1624 | "includeAll": false, 1625 | "index": -1, 1626 | "label": null, 1627 | "multi": false, 1628 | "name": "poolname", 1629 | "options": [], 1630 | "query": "show tag values from \"zpool_latency\" with key = \"name\" where \"host\" =~ /^$hostname/", 1631 | "refresh": 1, 1632 | "regex": "", 1633 | "skipUrlSync": false, 1634 | "sort": 5, 1635 | "tagValuesQuery": "", 1636 | "tags": [], 1637 | "tagsQuery": "", 1638 | "type": "query", 1639 | "useTags": false 1640 | } 1641 | ] 1642 | }, 1643 | "time": { 1644 | "from": "2020-05-25T21:34:30.137Z", 1645 | "to": "2020-05-25T21:39:54.445Z" 1646 | }, 1647 | "timepicker": { 1648 | "refresh_intervals": [ 1649 | "10s", 1650 | "30s", 1651 | "1m", 1652 | "5m", 1653 | "15m", 1654 | "30m", 1655 | "1h", 1656 | "2h", 1657 | "1d" 1658 | ] 1659 | }, 1660 | "timezone": "", 1661 | "title": "ZFS Pool Latency Heatmaps Influxdb", 1662 | "uid": "TbB4-DkGz", 1663 | "variables": { 1664 | "list": [] 1665 | }, 1666 | "version": 2 1667 | } -------------------------------------------------------------------------------- /dashboards/grafana/compressed-ARC.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_INFLUXDB", 5 | "label": "InfluxDB", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "influxdb", 9 | "pluginName": "InfluxDB" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "7.0.0" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "influxdb", 28 | "name": "InfluxDB", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "text", 34 | "name": "Text", 35 | "version": "" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [ 40 | { 41 | "builtIn": 1, 42 | "datasource": "-- Grafana --", 43 | "enable": true, 44 | "hide": true, 45 | "iconColor": "rgba(0, 211, 255, 1)", 46 | "name": "Annotations & Alerts", 47 | "type": "dashboard" 48 | } 49 | ] 50 | }, 51 | "editable": true, 52 | "gnetId": null, 53 | "graphTooltip": 1, 54 | "id": null, 55 | "iteration": 1590445927804, 56 | "links": [], 57 | "panels": [ 58 | { 59 | "aliasColors": {}, 60 | "bars": false, 61 | "dashLength": 10, 62 | "dashes": false, 63 | "datasource": "${DS_INFLUXDB}", 64 | "fieldConfig": { 65 | "defaults": { 66 | "custom": {} 67 | }, 68 | "overrides": [] 69 | }, 70 | "fill": 1, 71 | "fillGradient": 0, 72 | "gridPos": { 73 | "h": 9, 74 | "w": 12, 75 | "x": 0, 76 | "y": 0 77 | }, 78 | "hiddenSeries": false, 79 | "id": 4, 80 | "interval": "10s", 81 | "legend": { 82 | "avg": false, 83 | "current": false, 84 | "max": false, 85 | "min": false, 86 | "show": true, 87 | "total": false, 88 | "values": false 89 | }, 90 | "lines": true, 91 | "linewidth": 1, 92 | "links": [], 93 | "nullPointMode": "null", 94 | "options": { 95 | "dataLinks": [] 96 | }, 97 | "percentage": false, 98 | "pointradius": 5, 99 | "points": false, 100 | "renderer": "flot", 101 | "seriesOverrides": [], 102 | "spaceLength": 10, 103 | "stack": false, 104 | "steppedLine": false, 105 | "targets": [ 106 | { 107 | "alias": "space efficiency improvement from compression", 108 | "groupBy": [ 109 | { 110 | "params": [ 111 | "$__interval" 112 | ], 113 | "type": "time" 114 | }, 115 | { 116 | "params": [ 117 | "null" 118 | ], 119 | "type": "fill" 120 | } 121 | ], 122 | "measurement": "zfs", 123 | "orderByTime": "ASC", 124 | "policy": "default", 125 | "query": "SELECT (last(\"arcstats_uncompressed_size\")/(last(\"arcstats_compressed_size\")+1) -1) FROM \"zfs\" WHERE (\"host\" =~ /^$hostname$/) AND $timeFilter GROUP BY time($__interval) fill(null)", 126 | "rawQuery": true, 127 | "refId": "A", 128 | "resultFormat": "time_series", 129 | "select": [ 130 | [ 131 | { 132 | "params": [ 133 | "arcstats_uncompressed_size" 134 | ], 135 | "type": "field" 136 | }, 137 | { 138 | "params": [], 139 | "type": "last" 140 | } 141 | ] 142 | ], 143 | "tags": [ 144 | { 145 | "key": "host", 146 | "operator": "=", 147 | "value": "elvis" 148 | } 149 | ] 150 | } 151 | ], 152 | "thresholds": [], 153 | "timeFrom": null, 154 | "timeRegions": [], 155 | "timeShift": null, 156 | "title": "ARC Space Efficiency Improvement from Compression", 157 | "tooltip": { 158 | "shared": true, 159 | "sort": 0, 160 | "value_type": "individual" 161 | }, 162 | "type": "graph", 163 | "xaxis": { 164 | "buckets": null, 165 | "mode": "time", 166 | "name": null, 167 | "show": true, 168 | "values": [] 169 | }, 170 | "yaxes": [ 171 | { 172 | "decimals": 2, 173 | "format": "percentunit", 174 | "label": null, 175 | "logBase": 1, 176 | "max": null, 177 | "min": null, 178 | "show": true 179 | }, 180 | { 181 | "format": "short", 182 | "label": null, 183 | "logBase": 1, 184 | "max": null, 185 | "min": null, 186 | "show": true 187 | } 188 | ], 189 | "yaxis": { 190 | "align": false, 191 | "alignLevel": null 192 | } 193 | }, 194 | { 195 | "aliasColors": {}, 196 | "bars": false, 197 | "dashLength": 10, 198 | "dashes": false, 199 | "datasource": "${DS_INFLUXDB}", 200 | "fieldConfig": { 201 | "defaults": { 202 | "custom": {} 203 | }, 204 | "overrides": [] 205 | }, 206 | "fill": 1, 207 | "fillGradient": 0, 208 | "gridPos": { 209 | "h": 9, 210 | "w": 12, 211 | "x": 12, 212 | "y": 0 213 | }, 214 | "hiddenSeries": false, 215 | "id": 2, 216 | "interval": ">10s", 217 | "legend": { 218 | "avg": false, 219 | "current": false, 220 | "max": false, 221 | "min": false, 222 | "show": true, 223 | "total": false, 224 | "values": false 225 | }, 226 | "lines": true, 227 | "linewidth": 1, 228 | "links": [], 229 | "nullPointMode": "null", 230 | "options": { 231 | "dataLinks": [] 232 | }, 233 | "percentage": false, 234 | "pointradius": 5, 235 | "points": false, 236 | "renderer": "flot", 237 | "seriesOverrides": [ 238 | { 239 | "alias": "active uncompressed", 240 | "stack": "A" 241 | }, 242 | { 243 | "alias": "compressed data size", 244 | "stack": "A" 245 | }, 246 | { 247 | "alias": "total ARC size", 248 | "stack": false 249 | }, 250 | { 251 | "alias": "uncompressed data size", 252 | "dashLength": 20, 253 | "dashes": true, 254 | "fill": 0, 255 | "spaceLength": 5, 256 | "stack": false 257 | } 258 | ], 259 | "spaceLength": 10, 260 | "stack": false, 261 | "steppedLine": false, 262 | "targets": [ 263 | { 264 | "alias": "$col", 265 | "groupBy": [ 266 | { 267 | "params": [ 268 | "$__interval" 269 | ], 270 | "type": "time" 271 | }, 272 | { 273 | "params": [ 274 | "null" 275 | ], 276 | "type": "fill" 277 | } 278 | ], 279 | "measurement": "zfs", 280 | "orderByTime": "ASC", 281 | "policy": "default", 282 | "query": "SELECT last(\"arcstats_overhead_size\") AS \"active uncompressed\", last(\"arcstats_compressed_size\") AS \"compressed data size\", last(\"arcstats_uncompressed_size\") AS \"uncompressed data size\", last(\"arcstats_size\") AS \"total ARC size\" FROM \"zfs\" WHERE (\"host\" =~ /^$hostname$/) AND $timeFilter GROUP BY time($__interval) fill(null)", 283 | "rawQuery": true, 284 | "refId": "A", 285 | "resultFormat": "time_series", 286 | "select": [ 287 | [ 288 | { 289 | "params": [ 290 | "arcstats_overhead_size" 291 | ], 292 | "type": "field" 293 | }, 294 | { 295 | "params": [], 296 | "type": "last" 297 | }, 298 | { 299 | "params": [ 300 | "active uncompressed" 301 | ], 302 | "type": "alias" 303 | } 304 | ], 305 | [ 306 | { 307 | "params": [ 308 | "arcstats_compressed_size" 309 | ], 310 | "type": "field" 311 | }, 312 | { 313 | "params": [], 314 | "type": "last" 315 | }, 316 | { 317 | "params": [ 318 | "compressed data size" 319 | ], 320 | "type": "alias" 321 | } 322 | ], 323 | [ 324 | { 325 | "params": [ 326 | "arcstats_uncompressed_size" 327 | ], 328 | "type": "field" 329 | }, 330 | { 331 | "params": [], 332 | "type": "last" 333 | }, 334 | { 335 | "params": [ 336 | "uncompressed data size" 337 | ], 338 | "type": "alias" 339 | } 340 | ], 341 | [ 342 | { 343 | "params": [ 344 | "arcstats_size" 345 | ], 346 | "type": "field" 347 | }, 348 | { 349 | "params": [], 350 | "type": "last" 351 | }, 352 | { 353 | "params": [ 354 | "total ARC size" 355 | ], 356 | "type": "alias" 357 | } 358 | ] 359 | ], 360 | "tags": [ 361 | { 362 | "key": "host", 363 | "operator": "=~", 364 | "value": "/^$hostname$/" 365 | } 366 | ] 367 | } 368 | ], 369 | "thresholds": [], 370 | "timeFrom": null, 371 | "timeRegions": [], 372 | "timeShift": null, 373 | "title": "Compressed ARC Size", 374 | "tooltip": { 375 | "shared": true, 376 | "sort": 0, 377 | "value_type": "individual" 378 | }, 379 | "type": "graph", 380 | "xaxis": { 381 | "buckets": null, 382 | "mode": "time", 383 | "name": null, 384 | "show": true, 385 | "values": [] 386 | }, 387 | "yaxes": [ 388 | { 389 | "format": "bytes", 390 | "label": null, 391 | "logBase": 1, 392 | "max": null, 393 | "min": null, 394 | "show": true 395 | }, 396 | { 397 | "format": "short", 398 | "label": null, 399 | "logBase": 1, 400 | "max": null, 401 | "min": null, 402 | "show": true 403 | } 404 | ], 405 | "yaxis": { 406 | "align": false, 407 | "alignLevel": null 408 | } 409 | }, 410 | { 411 | "collapsed": false, 412 | "datasource": "${DS_INFLUXDB}", 413 | "gridPos": { 414 | "h": 1, 415 | "w": 24, 416 | "x": 0, 417 | "y": 9 418 | }, 419 | "id": 8, 420 | "panels": [], 421 | "title": "About", 422 | "type": "row" 423 | }, 424 | { 425 | "content": "The Compressed ARC dashboard shows the space efficiency for ZFS Adjustable Replacement Cache (ARC).\n\nWhen both compressed ARC feature is data compression is enabled, then the ARC keeps the data read from\ndisk in compressed form. When the data is actively being read, the data is uncompressed. This is\nexpected to improve ARC efficiency because the uncompressed size of the data can be much larger than\nphysical RAM.\n\n+ _ARC Space Efficiency Improvement from Compression_ shows the ratio of uncompressed to compressed size\nas a percentage. A value of 0% indicates that the data is not compressed. A value of 100% indicates the\nuncompressed size is 2x the compressed size. Note: if there are no pools imported, then the ratio can\nappear to be overly optimistic because the values are close to zero.\n\n+ _Compressed ARC Size_ shows the sizes in bytes of:\n + _active uncompressed_ is the size of uncompressed data. In arcstats this is the *overhead_size*. \n This is stacked with _compressed data size_\n + _compressed data size_ is the size of the compressed data. In arcstats this is the *compressed_size*.\n This is stacked with _active uncompressed_\n + _uncompressed data size_ is the size of the data if all was uncompressed. In arcstats this is the \n *uncompressed_size*. This value can be much larger than the available RAM.\n + _total ARC size_ is the current total size of all items in the ARC, including all data and internal\n data structures.\n", 426 | "datasource": "${DS_INFLUXDB}", 427 | "fieldConfig": { 428 | "defaults": { 429 | "custom": {} 430 | }, 431 | "overrides": [] 432 | }, 433 | "gridPos": { 434 | "h": 7, 435 | "w": 24, 436 | "x": 0, 437 | "y": 10 438 | }, 439 | "id": 6, 440 | "links": [], 441 | "mode": "markdown", 442 | "title": "About Compressed ARC Dashboard", 443 | "type": "text" 444 | } 445 | ], 446 | "refresh": false, 447 | "schemaVersion": 25, 448 | "style": "dark", 449 | "tags": [], 450 | "templating": { 451 | "list": [ 452 | { 453 | "allValue": null, 454 | "current": {}, 455 | "datasource": "${DS_INFLUXDB}", 456 | "definition": "", 457 | "hide": 0, 458 | "includeAll": false, 459 | "label": null, 460 | "multi": false, 461 | "name": "hostname", 462 | "options": [], 463 | "query": "SHOW TAG VALUES FROM \"zfs\" WITH KEY = \"host\"", 464 | "refresh": 1, 465 | "regex": "/([-a-zA-Z-0-9]+)/", 466 | "skipUrlSync": false, 467 | "sort": 5, 468 | "tagValuesQuery": "", 469 | "tags": [], 470 | "tagsQuery": "", 471 | "type": "query", 472 | "useTags": false 473 | } 474 | ] 475 | }, 476 | "time": { 477 | "from": "2019-05-11T22:14:53.738Z", 478 | "to": "2019-05-11T22:39:31.301Z" 479 | }, 480 | "timepicker": { 481 | "refresh_intervals": [ 482 | "5s", 483 | "10s", 484 | "30s", 485 | "1m", 486 | "5m", 487 | "15m", 488 | "30m", 489 | "1h", 490 | "2h", 491 | "1d" 492 | ], 493 | "time_options": [ 494 | "5m", 495 | "15m", 496 | "1h", 497 | "6h", 498 | "12h", 499 | "24h", 500 | "2d", 501 | "7d", 502 | "30d" 503 | ] 504 | }, 505 | "timezone": "", 506 | "title": "Compressed ARC", 507 | "uid": "97LnNsmZk", 508 | "version": 1 509 | } -------------------------------------------------------------------------------- /telegraf.d/zpool_influxdb.conf: -------------------------------------------------------------------------------- 1 | # # Read metrics from zpool_influxdb 2 | [[inputs.exec]] 3 | # ## default installation location for zpool_influxdb command 4 | commands = ["/usr/local/bin/zpool_influxdb"] 5 | # ## Timeout for each command to complete. 6 | # timeout = "5s" 7 | # 8 | # ## measurement name suffix (for separating different commands) 9 | # name_suffix = "_mycollector" 10 | # 11 | # ## Data format to consume. 12 | # ## Each data format has its own unique set of configuration options, read 13 | # ## more about them here: 14 | # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md 15 | data_format = "influx" 16 | -------------------------------------------------------------------------------- /zpool_influxdb.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Gather top-level ZFS pool and resilver/scan statistics and print using 3 | * influxdb line protocol 4 | * usage: [options] [pool_name] 5 | * where options are: 6 | * --execd, -e run in telegraf execd input plugin mode, [CR] on 7 | * stdin causes a sample to be printed and wait for 8 | * the next [CR] 9 | * --no-histograms, -n don't print histogram data (reduces cardinality 10 | * if you don't care about histograms) 11 | * --sum-histogram-buckets, -s sum histogram bucket values 12 | * 13 | * To integrate into telegraf use one of: 14 | * 1. the `inputs.execd` plugin with the `--execd` option 15 | * 2. the `inputs.exec` plugin to simply run with no options 16 | * 17 | * NOTE: libzfs is an unstable interface. YMMV. 18 | * For Linux compile with: 19 | * cmake . && make && make install 20 | * 21 | * The design goals of this software include: 22 | * + be as lightweight as possible 23 | * + reduce the number of external dependencies as far as possible, hence 24 | * there is no dependency on a client library for managing the metric 25 | * collection -- info is printed, KISS 26 | * + broken pools or kernel bugs can cause this process to hang in an 27 | * unkillable state. For this reason, it is best to keep the damage limited 28 | * to a small process like zpool_influxdb rather than a larger collector. 29 | * 30 | * Copyright 2018-2020 Richard Elling 31 | * 32 | * The MIT License (MIT) 33 | * 34 | * Permission is hereby granted, free of charge, to any person obtaining a copy 35 | * of this software and associated documentation files (the "Software"), to deal 36 | * in the Software without restriction, including without limitation the rights 37 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 38 | * copies of the Software, and to permit persons to whom the Software is 39 | * furnished to do so, subject to the following conditions: 40 | * 41 | * The above copyright notice and this permission notice shall be included in 42 | * all copies or substantial portions of the Software. 43 | * 44 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 50 | * SOFTWARE. 51 | */ 52 | 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | 61 | #define POOL_MEASUREMENT "zpool_stats" 62 | #define SCAN_MEASUREMENT "zpool_scan_stats" 63 | #define VDEV_MEASUREMENT "zpool_vdev_stats" 64 | #define POOL_LATENCY_MEASUREMENT "zpool_latency" 65 | #define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue" 66 | #define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */ 67 | #define POOL_IO_SIZE_MEASUREMENT "zpool_io_size" 68 | #define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */ 69 | 70 | /* 71 | * telegraf 1.6.4 can handle uint64, which is the native ZFS type 72 | * telegraf also handles the input of uint64 and will convert to match 73 | * influxdb via the outputs.influxdb plugin. This is the easiest method 74 | * for future compatibility. If is it not possible to use telegraf as a 75 | * metrics broker and unsigned 64-bit is not possible, then consider 76 | * defining SUPPORT_UINT64 in the CMakeLists.txt or Makefile. 77 | * 78 | * influxdb 1.x requires an option to enable uint64 79 | * influxdb 2.x supports uint64 80 | */ 81 | #ifdef SUPPORT_UINT64 82 | #define IFMT "%luu" 83 | #define MASK_UINT64(x) (x) 84 | #else 85 | #define IFMT "%lui" 86 | #define MASK_UINT64(x) ((x) & INT64_MAX) 87 | #endif 88 | 89 | /* global options */ 90 | int execd_mode = 0; 91 | int no_histograms = 0; 92 | int sum_histogram_buckets = 0; 93 | uint64_t timestamp = 0; 94 | int complained_about_sync = 0; 95 | 96 | /* 97 | * in cases where ZFS is installed, but not the ZFS dev environment, copy in 98 | * the needed definitions from libzfs_impl.h 99 | */ 100 | #ifndef _LIBZFS_IMPL_H 101 | struct zpool_handle { 102 | libzfs_handle_t *zpool_hdl; 103 | zpool_handle_t *zpool_next; 104 | char zpool_name[ZFS_MAX_DATASET_NAME_LEN]; 105 | int zpool_state; 106 | size_t zpool_config_size; 107 | nvlist_t *zpool_config; 108 | nvlist_t *zpool_old_config; 109 | nvlist_t *zpool_props; 110 | diskaddr_t zpool_start_block; 111 | }; 112 | #endif 113 | 114 | /* 115 | * influxdb line protocol rules for escaping are important because the 116 | * zpool name can include characters that need to be escaped 117 | * 118 | * caller is responsible for freeing result 119 | */ 120 | char * 121 | escape_string(char *s) { 122 | char *c, *d; 123 | char *t = (char *) malloc(ZFS_MAX_DATASET_NAME_LEN * 2); 124 | if (t == NULL) { 125 | fprintf(stderr, "error: cannot allocate memory\n"); 126 | exit(1); 127 | } 128 | 129 | for (c = s, d = t; *c != '\0'; c++, d++) { 130 | switch (*c) { 131 | case ' ': 132 | case ',': 133 | case '=': 134 | case '\\': 135 | *d++ = '\\'; 136 | default: 137 | *d = *c; 138 | } 139 | } 140 | *d = '\0'; 141 | return (t); 142 | } 143 | 144 | /* 145 | * print_scan_status() prints the details as often seen in the "zpool status" 146 | * output. However, unlike the zpool command, which is intended for humans, 147 | * this output is suitable for long-term tracking in influxdb. 148 | * TODO: update to include issued scan data 149 | */ 150 | int 151 | print_scan_status(nvlist_t *nvroot, const char *pool_name) { 152 | uint_t c; 153 | int64_t elapsed; 154 | uint64_t examined, pass_exam, paused_time, paused_ts, rate; 155 | uint64_t remaining_time; 156 | pool_scan_stat_t *ps = NULL; 157 | double pct_done; 158 | char *state[DSS_NUM_STATES] = {"none", "scanning", "finished", 159 | "canceled"}; 160 | char *func; 161 | 162 | (void) nvlist_lookup_uint64_array(nvroot, 163 | ZPOOL_CONFIG_SCAN_STATS, 164 | (uint64_t **) &ps, &c); 165 | 166 | /* 167 | * ignore if there are no stats 168 | */ 169 | if (ps == NULL) 170 | return (0); 171 | 172 | /* 173 | * return error if state is bogus 174 | */ 175 | if (ps->pss_state >= DSS_NUM_STATES || 176 | ps->pss_func >= POOL_SCAN_FUNCS) { 177 | if (complained_about_sync % 1000 == 0) { 178 | fprintf(stderr, "error: cannot decode scan stats: ZFS is " 179 | "out of sync with compiled zpool_influxdb"); 180 | complained_about_sync++; 181 | } 182 | return (1); 183 | } 184 | 185 | switch (ps->pss_func) { 186 | case POOL_SCAN_NONE: 187 | func = "none_requested"; 188 | break; 189 | case POOL_SCAN_SCRUB: 190 | func = "scrub"; 191 | break; 192 | case POOL_SCAN_RESILVER: 193 | func = "resilver"; 194 | break; 195 | #ifdef POOL_SCAN_REBUILD 196 | case POOL_SCAN_REBUILD: 197 | func = "rebuild"; 198 | break; 199 | #endif 200 | default: 201 | func = "scan"; 202 | } 203 | 204 | /* overall progress */ 205 | examined = ps->pss_examined ? ps->pss_examined : 1; 206 | pct_done = 0.0; 207 | if (ps->pss_to_examine > 0) 208 | pct_done = 100.0 * examined / ps->pss_to_examine; 209 | 210 | #ifdef EZFS_SCRUB_PAUSED 211 | paused_ts = ps->pss_pass_scrub_pause; 212 | paused_time = ps->pss_pass_scrub_spent_paused; 213 | #else 214 | paused_ts = 0; 215 | paused_time = 0; 216 | #endif 217 | 218 | /* calculations for this pass */ 219 | if (ps->pss_state == DSS_SCANNING) { 220 | elapsed = (int64_t) time(NULL) - (int64_t) ps->pss_pass_start - 221 | (int64_t) paused_time; 222 | elapsed = (elapsed > 0) ? elapsed : 1; 223 | pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 224 | rate = pass_exam / elapsed; 225 | rate = (rate > 0) ? rate : 1; 226 | remaining_time = ps->pss_to_examine - examined / rate; 227 | } else { 228 | elapsed = 229 | (int64_t) ps->pss_end_time - (int64_t) ps->pss_pass_start - 230 | (int64_t) paused_time; 231 | elapsed = (elapsed > 0) ? elapsed : 1; 232 | pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; 233 | rate = pass_exam / elapsed; 234 | remaining_time = 0; 235 | } 236 | rate = rate ? rate : 1; 237 | 238 | /* influxdb line protocol format: "tags metrics timestamp" */ 239 | (void) printf("%s,function=%s,name=%s,state=%s ", 240 | SCAN_MEASUREMENT, func, pool_name, state[ps->pss_state]); 241 | (void) printf("end_ts="IFMT",errors="IFMT",examined="IFMT"," 242 | "pass_examined="IFMT",pause_ts="IFMT",paused_t="IFMT"," 243 | "pct_done=%.2f,processed="IFMT",rate="IFMT"," 244 | "remaining_t="IFMT",start_ts="IFMT"," 245 | "to_examine="IFMT",to_process="IFMT" ", 246 | MASK_UINT64(ps->pss_end_time), 247 | MASK_UINT64(ps->pss_errors), 248 | MASK_UINT64(examined), 249 | MASK_UINT64(pass_exam), 250 | MASK_UINT64(paused_ts), 251 | MASK_UINT64(paused_time), 252 | pct_done, 253 | MASK_UINT64(ps->pss_processed), 254 | MASK_UINT64(rate), 255 | MASK_UINT64(remaining_time), 256 | MASK_UINT64(ps->pss_start_time), 257 | MASK_UINT64(ps->pss_to_examine), 258 | MASK_UINT64(ps->pss_to_process) 259 | ); 260 | (void) printf("%lu\n", timestamp); 261 | return (0); 262 | } 263 | 264 | /* 265 | * get a vdev name that corresponds to the top-level vdev names 266 | * printed by `zpool status` 267 | */ 268 | char * 269 | get_vdev_name(nvlist_t *nvroot, const char *parent_name) { 270 | static char vdev_name[256]; 271 | char *vdev_type = NULL; 272 | uint64_t vdev_id = 0; 273 | 274 | if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, 275 | &vdev_type) != 0) { 276 | vdev_type = "unknown"; 277 | } 278 | if (nvlist_lookup_uint64( 279 | nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 280 | vdev_id = UINT64_MAX; 281 | } 282 | if (parent_name == NULL) { 283 | (void) snprintf(vdev_name, sizeof(vdev_name), "%s", 284 | vdev_type); 285 | } else { 286 | (void) snprintf(vdev_name, sizeof(vdev_name), 287 | "%s/%s-%lu", 288 | parent_name, vdev_type, vdev_id); 289 | } 290 | return (vdev_name); 291 | } 292 | 293 | /* 294 | * get a string suitable for an influxdb tag that describes this vdev 295 | * 296 | * By default only the vdev hierarchical name is shown, separated by '/' 297 | * If the vdev has an associated path, which is typical of leaf vdevs, 298 | * then the path is added. 299 | * It would be nice to have the devid instead of the path, but under 300 | * Linux we cannot be sure a devid will exist and we'd rather have 301 | * something than nothing, so we'll use path instead. 302 | */ 303 | char * 304 | get_vdev_desc(nvlist_t *nvroot, const char *parent_name) { 305 | static char vdev_desc[2 * MAXPATHLEN]; 306 | char *vdev_type = NULL; 307 | uint64_t vdev_id = 0; 308 | char vdev_value[MAXPATHLEN]; 309 | char *vdev_path = NULL; 310 | char *s, *t; 311 | 312 | if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) { 313 | vdev_type = "unknown"; 314 | } 315 | if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) { 316 | vdev_id = UINT64_MAX; 317 | } 318 | if (nvlist_lookup_string( 319 | nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) { 320 | vdev_path = NULL; 321 | } 322 | 323 | if (parent_name == NULL) { 324 | s = escape_string(vdev_type); 325 | (void) snprintf(vdev_value, sizeof(vdev_value), "vdev=%s", s); 326 | free(s); 327 | } else { 328 | s = escape_string((char *)parent_name); 329 | t = escape_string(vdev_type); 330 | (void) snprintf(vdev_value, sizeof(vdev_value), 331 | "vdev=%s/%s-%lu", s, t, vdev_id); 332 | free(s); 333 | free(t); 334 | } 335 | if (vdev_path == NULL) { 336 | (void) snprintf(vdev_desc, sizeof(vdev_desc), "%s", 337 | vdev_value); 338 | } else { 339 | s = escape_string(vdev_path); 340 | (void) snprintf(vdev_desc, sizeof(vdev_desc), "path=%s,%s", 341 | s, vdev_value); 342 | free(s); 343 | } 344 | return (vdev_desc); 345 | } 346 | 347 | /* 348 | * vdev summary stats are a combination of the data shown by 349 | * `zpool status` and `zpool list -v` 350 | */ 351 | int 352 | print_summary_stats(nvlist_t *nvroot, const char *pool_name, 353 | const char *parent_name) { 354 | uint_t c; 355 | vdev_stat_t *vs; 356 | char *vdev_desc = NULL; 357 | vdev_desc = get_vdev_desc(nvroot, parent_name); 358 | 359 | if (nvlist_lookup_uint64_array(nvroot, 360 | ZPOOL_CONFIG_VDEV_STATS, 361 | (uint64_t **) &vs, &c) != 0) { 362 | return (1); 363 | } 364 | (void) printf("%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, pool_name, 365 | zpool_state_to_name((vdev_state_t) vs->vs_state, 366 | (vdev_aux_t) vs->vs_aux), 367 | vdev_desc); 368 | (void) printf("alloc="IFMT",free="IFMT",size="IFMT"," 369 | "read_bytes="IFMT",read_errors="IFMT",read_ops="IFMT"," 370 | "write_bytes="IFMT",write_errors="IFMT",write_ops="IFMT"," 371 | "checksum_errors="IFMT",fragmentation="IFMT"", 372 | MASK_UINT64(vs->vs_alloc), 373 | MASK_UINT64(vs->vs_space - vs->vs_alloc), 374 | MASK_UINT64(vs->vs_space), 375 | MASK_UINT64(vs->vs_bytes[ZIO_TYPE_READ]), 376 | MASK_UINT64(vs->vs_read_errors), 377 | MASK_UINT64(vs->vs_ops[ZIO_TYPE_READ]), 378 | MASK_UINT64(vs->vs_bytes[ZIO_TYPE_WRITE]), 379 | MASK_UINT64(vs->vs_write_errors), 380 | MASK_UINT64(vs->vs_ops[ZIO_TYPE_WRITE]), 381 | MASK_UINT64(vs->vs_checksum_errors), 382 | MASK_UINT64(vs->vs_fragmentation)); 383 | (void) printf(" %lu\n", timestamp); 384 | return (0); 385 | } 386 | 387 | /* 388 | * vdev latency stats are histograms stored as nvlist arrays of uint64. 389 | * Latency stats include the ZIO scheduler classes plus lower-level 390 | * vdev latencies. 391 | * 392 | * In many cases, the top-level "root" view obscures the underlying 393 | * top-level vdev operations. For example, if a pool has a log, special, 394 | * or cache device, then each can behave very differently. It is useful 395 | * to see how each is responding. 396 | */ 397 | int 398 | print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name, 399 | const char *parent_name) { 400 | uint_t c, end = 0; 401 | nvlist_t *nv_ex; 402 | char *vdev_desc = NULL; 403 | 404 | /* short_names become part of the metric name and are influxdb-ready */ 405 | struct lat_lookup { 406 | char *name; 407 | char *short_name; 408 | uint64_t sum; 409 | uint64_t *array; 410 | }; 411 | struct lat_lookup lat_type[] = { 412 | {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0}, 413 | {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0}, 414 | {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0}, 415 | {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0}, 416 | {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0}, 417 | {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0}, 418 | {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0}, 419 | {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0}, 420 | {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0}, 421 | #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO 422 | {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0}, 423 | #endif 424 | {NULL, NULL} 425 | }; 426 | 427 | if (nvlist_lookup_nvlist(nvroot, 428 | ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 429 | return (6); 430 | } 431 | 432 | vdev_desc = get_vdev_desc(nvroot, parent_name); 433 | 434 | for (int i = 0; lat_type[i].name; i++) { 435 | if (nvlist_lookup_uint64_array(nv_ex, 436 | lat_type[i].name, 437 | &lat_type[i].array, 438 | &c) != 0) { 439 | fprintf(stderr, "error: can't get %s\n", lat_type[i].name); 440 | return (3); 441 | } 442 | /* end count count, all of the arrays are the same size */ 443 | end = c - 1; 444 | } 445 | 446 | for (int bucket = 0; bucket <= end; bucket++) { 447 | if (bucket < MIN_LAT_INDEX) { 448 | /* don't print, but collect the sum */ 449 | for (int i = 0; lat_type[i].name; i++) { 450 | lat_type[i].sum += lat_type[i].array[bucket]; 451 | } 452 | continue; 453 | } 454 | if (bucket < end) { 455 | printf("%s,le=%0.6f,name=%s,%s ", 456 | POOL_LATENCY_MEASUREMENT, (float) (1ULL << bucket) * 1e-9, 457 | pool_name, vdev_desc); 458 | } else { 459 | printf("%s,le=+Inf,name=%s,%s ", 460 | POOL_LATENCY_MEASUREMENT, pool_name, vdev_desc); 461 | } 462 | for (int i = 0; lat_type[i].name; i++) { 463 | if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) { 464 | lat_type[i].sum += lat_type[i].array[bucket]; 465 | } else { 466 | lat_type[i].sum = lat_type[i].array[bucket]; 467 | } 468 | printf("%s="IFMT, lat_type[i].short_name, lat_type[i].sum); 469 | if (lat_type[i + 1].name != NULL) { 470 | printf(","); 471 | } 472 | } 473 | printf(" %lu\n", timestamp); 474 | } 475 | return (0); 476 | } 477 | 478 | /* 479 | * vdev request size stats are histograms stored as nvlist arrays of uint64. 480 | * Request size stats include the ZIO scheduler classes plus lower-level 481 | * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported. 482 | * 483 | * In many cases, the top-level "root" view obscures the underlying 484 | * top-level vdev operations. For example, if a pool has a log, special, 485 | * or cache device, then each can behave very differently. It is useful 486 | * to see how each is responding. 487 | */ 488 | int 489 | print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name, 490 | const char *parent_name) { 491 | uint_t c, end = 0; 492 | nvlist_t *nv_ex; 493 | char *vdev_desc = NULL; 494 | 495 | /* short_names become the field name */ 496 | struct size_lookup { 497 | char *name; 498 | char *short_name; 499 | uint64_t sum; 500 | uint64_t *array; 501 | }; 502 | struct size_lookup size_type[] = { 503 | {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"}, 504 | {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"}, 505 | {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"}, 506 | {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"}, 507 | {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"}, 508 | {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"}, 509 | {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"}, 510 | {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"}, 511 | {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"}, 512 | {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"}, 513 | #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO 514 | {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"}, 515 | {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"}, 516 | #endif 517 | {NULL, NULL} 518 | }; 519 | 520 | if (nvlist_lookup_nvlist(nvroot, 521 | ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 522 | return (6); 523 | } 524 | 525 | vdev_desc = get_vdev_desc(nvroot, parent_name); 526 | 527 | for (int i = 0; size_type[i].name; i++) { 528 | if (nvlist_lookup_uint64_array(nv_ex, 529 | size_type[i].name, 530 | &size_type[i].array, 531 | &c) != 0) { 532 | fprintf(stderr, "error: can't get %s\n", size_type[i].name); 533 | return (3); 534 | } 535 | /* end count count, all of the arrays are the same size */ 536 | end = c - 1; 537 | } 538 | 539 | for (int bucket = 0; bucket <= end; bucket++) { 540 | if (bucket < MIN_SIZE_INDEX) { 541 | /* don't print, but collect the sum */ 542 | for (int i = 0; size_type[i].name; i++) { 543 | size_type[i].sum += size_type[i].array[bucket]; 544 | } 545 | continue; 546 | } 547 | 548 | if (bucket < end) { 549 | printf("%s,le=%llu,name=%s,%s ", 550 | POOL_IO_SIZE_MEASUREMENT, 1ULL << bucket, 551 | pool_name, vdev_desc); 552 | } else { 553 | printf("%s,le=+Inf,name=%s,%s ", 554 | POOL_IO_SIZE_MEASUREMENT, pool_name, vdev_desc); 555 | } 556 | for (int i = 0; size_type[i].name; i++) { 557 | if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) { 558 | size_type[i].sum += size_type[i].array[bucket]; 559 | } else { 560 | size_type[i].sum = size_type[i].array[bucket]; 561 | } 562 | printf("%s="IFMT, size_type[i].short_name, size_type[i].sum); 563 | if (size_type[i + 1].name != NULL) { 564 | printf(","); 565 | } 566 | } 567 | printf(" %lu\n", timestamp); 568 | } 569 | return (0); 570 | } 571 | 572 | /* 573 | * ZIO scheduler queue stats are stored as gauges. This is unfortunate 574 | * because the values can change very rapidly and any point-in-time 575 | * value will quickly be obsoleted. It is also not easy to downsample. 576 | * Thus only the top-level queue stats might be beneficial... maybe. 577 | */ 578 | int 579 | print_queue_stats(nvlist_t *nvroot, const char *pool_name, 580 | const char *parent_name) { 581 | nvlist_t *nv_ex; 582 | uint64_t value; 583 | 584 | /* short_names are used for the field name */ 585 | struct queue_lookup { 586 | char *name; 587 | char *short_name; 588 | }; 589 | struct queue_lookup queue_type[] = { 590 | {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"}, 591 | {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"}, 592 | {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"}, 593 | {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"}, 594 | {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"}, 595 | {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"}, 596 | {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"}, 597 | {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"}, 598 | {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"}, 599 | {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"}, 600 | {NULL, NULL} 601 | }; 602 | 603 | if (nvlist_lookup_nvlist(nvroot, 604 | ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 605 | return (6); 606 | } 607 | 608 | printf("%s,name=%s,%s ", 609 | POOL_QUEUE_MEASUREMENT, pool_name, 610 | get_vdev_desc(nvroot, parent_name)); 611 | for (int i = 0; queue_type[i].name; i++) { 612 | if (nvlist_lookup_uint64(nv_ex, 613 | queue_type[i].name, &value) != 0) { 614 | fprintf(stderr, "error: can't get %s\n", 615 | queue_type[i].name); 616 | return (3); 617 | } 618 | printf("%s="IFMT, queue_type[i].short_name, value); 619 | if (queue_type[i + 1].name != NULL) { 620 | printf(","); 621 | } 622 | } 623 | printf(" %lu\n", timestamp); 624 | return (0); 625 | } 626 | 627 | /* 628 | * top-level vdev stats are at the pool level 629 | */ 630 | int 631 | print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name) { 632 | nvlist_t *nv_ex; 633 | uint64_t value; 634 | 635 | /* short_names become part of the metric name */ 636 | struct queue_lookup { 637 | char *name; 638 | char *short_name; 639 | }; 640 | struct queue_lookup queue_type[] = { 641 | {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"}, 642 | {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"}, 643 | {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"}, 644 | {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"}, 645 | {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"}, 646 | {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"}, 647 | {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"}, 648 | {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"}, 649 | {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"}, 650 | {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"}, 651 | {NULL, NULL} 652 | }; 653 | 654 | if (nvlist_lookup_nvlist(nvroot, 655 | ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) { 656 | return (6); 657 | } 658 | 659 | (void) printf("%s,name=%s,vdev=root ", VDEV_MEASUREMENT, pool_name); 660 | for (int i = 0; queue_type[i].name; i++) { 661 | if (nvlist_lookup_uint64(nv_ex, 662 | queue_type[i].name, &value) != 0) { 663 | fprintf(stderr, "error: can't get %s\n", 664 | queue_type[i].name); 665 | return (3); 666 | } 667 | if (i > 0) 668 | printf(","); 669 | printf("%s="IFMT, queue_type[i].short_name, MASK_UINT64(value)); 670 | } 671 | 672 | (void) printf(" %lu\n", timestamp); 673 | return (0); 674 | } 675 | 676 | /* 677 | * recursive stats printer 678 | */ 679 | typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *); 680 | 681 | int 682 | print_recursive_stats(stat_printer_f func, nvlist_t *nvroot, 683 | const char *pool_name, const char *parent_name, 684 | int descend) { 685 | uint_t c, children; 686 | nvlist_t **child; 687 | char vdev_name[256]; 688 | int err; 689 | 690 | err = func(nvroot, pool_name, parent_name); 691 | if (err) 692 | return (err); 693 | 694 | if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 695 | &child, &children) == 0) { 696 | (void) strncpy(vdev_name, get_vdev_name(nvroot, parent_name), 697 | sizeof(vdev_name)); 698 | vdev_name[sizeof(vdev_name) - 1] = '\0'; 699 | 700 | for (c = 0; c < children; c++) { 701 | print_recursive_stats(func, child[c], pool_name, 702 | vdev_name, descend); 703 | } 704 | } 705 | return (0); 706 | } 707 | 708 | /* 709 | * call-back to print the stats from the pool config 710 | * 711 | * Note: if the pool is broken, this can hang indefinitely and perhaps in an 712 | * unkillable state. 713 | */ 714 | int 715 | print_stats(zpool_handle_t *zhp, void *data) { 716 | uint_t c; 717 | int err; 718 | boolean_t missing; 719 | nvlist_t *config, *nvroot; 720 | vdev_stat_t *vs; 721 | struct timespec tv; 722 | char *pool_name; 723 | 724 | /* if not this pool return quickly */ 725 | if (data && 726 | strncmp(data, zhp->zpool_name, ZFS_MAX_DATASET_NAME_LEN) != 0) { 727 | zpool_close(zhp); 728 | return (0); 729 | } 730 | 731 | if (zpool_refresh_stats(zhp, &missing) != 0) { 732 | zpool_close(zhp); 733 | return (1); 734 | } 735 | 736 | config = zpool_get_config(zhp, NULL); 737 | if (clock_gettime(CLOCK_REALTIME, &tv) != 0) 738 | timestamp = (uint64_t) time(NULL) * 1000000000; 739 | else 740 | timestamp = 741 | ((uint64_t) tv.tv_sec * 1000000000) + (uint64_t) tv.tv_nsec; 742 | 743 | if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) { 744 | zpool_close(zhp); 745 | return (2); 746 | } 747 | if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 748 | (uint64_t **) &vs, &c) != 0) { 749 | zpool_close(zhp); 750 | return (3); 751 | } 752 | 753 | pool_name = escape_string(zhp->zpool_name); 754 | err = print_recursive_stats(print_summary_stats, nvroot, 755 | pool_name, NULL, 1); 756 | /* if any of these return an error, skip the rest */ 757 | if (err == 0) 758 | err = print_top_level_vdev_stats(nvroot, pool_name); 759 | 760 | if (no_histograms == 0) { 761 | if (err == 0) 762 | err = print_recursive_stats(print_vdev_latency_stats, nvroot, 763 | pool_name, NULL, 1); 764 | if (err == 0) 765 | err = print_recursive_stats(print_vdev_size_stats, nvroot, 766 | pool_name, NULL, 1); 767 | if (err == 0) 768 | err = print_recursive_stats(print_queue_stats, nvroot, 769 | pool_name, NULL, 0); 770 | } 771 | if (err == 0) 772 | err = print_scan_status(nvroot, pool_name); 773 | 774 | free(pool_name); 775 | zpool_close(zhp); 776 | return (err); 777 | } 778 | 779 | 780 | void 781 | usage(char* name) { 782 | fprintf(stderr, "usage: %s [--execd][--no-histograms]" 783 | "[--sum-histogram-buckets] [poolname]\n", name); 784 | exit(EXIT_FAILURE); 785 | } 786 | 787 | int 788 | main(int argc, char *argv[]) { 789 | int opt; 790 | int ret = 8; 791 | char *line = NULL; 792 | size_t len = 0; 793 | struct option long_options[] = { 794 | {"execd", no_argument, NULL, 'e'}, 795 | {"help", no_argument, NULL, 'h'}, 796 | {"no-histograms", no_argument, NULL, 'n'}, 797 | {"sum-histogram-buckets", no_argument, NULL, 's'}, 798 | {0, 0, 0, 0} 799 | }; 800 | while ((opt = getopt_long(argc, argv, "ehns", long_options, NULL)) != -1) { 801 | switch (opt) { 802 | case 'e': 803 | execd_mode = 1; 804 | break; 805 | case 'n': 806 | no_histograms = 1; 807 | break; 808 | case 's': 809 | sum_histogram_buckets = 1; 810 | break; 811 | default: 812 | usage(argv[0]); 813 | } 814 | } 815 | 816 | libzfs_handle_t *g_zfs; 817 | if ((g_zfs = libzfs_init()) == NULL) { 818 | fprintf(stderr, 819 | "error: cannot initialize libzfs. " 820 | "Is the zfs module loaded or zrepl running?"); 821 | exit(EXIT_FAILURE); 822 | } 823 | if (execd_mode == 0) { 824 | ret = zpool_iter(g_zfs, print_stats, argv[optind]); 825 | return (ret); 826 | } 827 | while (getline(&line, &len, stdin) != -1) { 828 | ret = zpool_iter(g_zfs, print_stats, argv[optind]); 829 | fflush(stdout); 830 | } 831 | return (ret); 832 | } 833 | 834 | --------------------------------------------------------------------------------