├── DTrace_notes.txt ├── README.md ├── arc_adjust.v2.d ├── collectd ├── arcstat_collectd.pl ├── kmem_reap_100ms.sh ├── nfssvrtop_collectd ├── nfsutil.sh ├── openzfs_txg.d ├── sol-iostat.pl ├── sol-kstat.pl ├── sol-stmf.pl ├── stmf_iops.d └── zfsio.sh ├── emlxs_reset.d ├── ixgbe_debug.d ├── kd_collect.sh ├── kmem_oversize.d ├── kmem_reap_100ms.d ├── modparams ├── nfsio.d ├── nfsio30sec.d ├── nfsrpclat.d ├── nfsutil.d ├── parse_zfsio.py ├── resconflict.d ├── resilver.d ├── resilver_v4.d ├── rpc.d ├── rpcbind.d ├── scrub.d ├── scrub_v4.d ├── smb_session.d ├── smb_trace.d ├── svc_flowcontrol.d ├── tcp_input.d ├── trace_destroy.sh ├── txg_delay.d ├── txg_monitor.d ├── txg_monitor.v3.d ├── txg_realtime.d ├── watch_destroy.sh ├── whoalloc.d ├── zfsio.d ├── zfsio_1fs.d ├── zfsio_plot_example.r └── ziolatency.d /DTrace_notes.txt: -------------------------------------------------------------------------------- 1 | Notes for non-trivial things I had to dig for to find. 2 | 3 | 1. To print a global symbol value in a DTrace script, put a ` in front of it. 4 | 5 | For instance, 6 | 7 | dtrace -qn 'dnlc_lookup:return{@[(arg1)?"Hit":"Miss"]=count()} tick-10sec{printf("%Y ",walltimestamp);printa("%s: %@d ",@);printf("DNLC entries: %d\n",`dnlc_nentries);trunc(@)}' 8 | 9 | One-liners 10 | 11 | Detect zeroed blocks being written to disk (such as sdelete or dd if=/dev/zero from the client): 12 | 13 | dtrace -qn 'zio_compress_data:return/args[1]==0/{@["zero blocks"]=count()} tick-1sec{printf("%Y",walltimestamp);printa(@);clear(@);}' 14 | 15 | Find how much time a userland process spends on-cpu to determine how effective its threading model is (using rpcbind as example) - credit to Saso Kiselkov: 16 | 17 | dtrace -n 'BEGIN{total=0}sched:::on-cpu/pid=='$(pgrep rpcbind)'/{self->start=walltimestamp} sched:::off-cpu/pid=='$(pgrep rpcbind)'/{total += walltimestamp - self->start} tick-1s{printf("total: %lli.%09lli", (long long) total/1000000000, (long long) total % 1000000000); total=0}' 18 | 19 | Detect whether ixgbe is tripping on an unsupported SFP: 20 | 21 | dtrace -n 'ixgbe_identify_sfp_module_generic:return/arg1==-19/{trace("IXGBE_ERR_SFP_NOT_SUPPORTED")}' 22 | 23 | Find out who is unmounting ZFS snapshots (stolen from https://www.illumos.org/issues/5273): 24 | 25 | dtrace -n 'fbt:zfs:zfsvfs_teardown:entry /strstr(stringof(((struct zfsvfs *)arg0)->z_vfs->vfs_mntpt->rs_string), "snapshot") != 0/ { printf("pid %d, execname %s, unmounting %d, %s, %Y", pid, execname != 0 ? execname : "NULL", arg1, stringof(((struct zfsvfs *)arg0)->z_vfs->vfs_mntpt->rs_string), walltimestamp ); stack(); ustack(); }' 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dtrace 2 | ====== 3 | This is a collection of useful DTrace scripts written while troubleshooting performance issues at Nexenta Systems. 4 | For comments, feel free to e-mail Kirill.Davydychev@nexenta.com, but I cannot promise a timely reply. If you find 5 | a bug, please file a Git issue, and maybe I'll get around to fixing it eventually. 6 | 7 | Unless a different license is mentioned in any of the scripts, assume that they are under CDDL, MIT, BSD or whatever 8 | license that suits you most, and I have not gotten around to actually adding the license information yet. Just keep 9 | the copyright and remember I'm not liable for anything these scripts may do. 10 | 11 | Most of the scripts are work in progress, and may change at any time. 12 | -------------------------------------------------------------------------------- /arc_adjust.v2.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | #pragma D option destructive 4 | 5 | arc_adjust:entry 6 | { 7 | self->ts = walltimestamp; 8 | printf("%Y 1836: adjustment = MIN( %d, %d)\n", walltimestamp, 9 | (int64_t)(`arc_stats.arcstat_size.value.ui64 - `arc_stats.arcstat_c.value.ui64), 10 | (int64_t)(`arc_anon->arcs_size + `arc_mru->arcs_size + `arc_meta_used - `arc_stats.arcstat_p.value.ui64)); 11 | printf("arc_size = %d, arc_c = %d, arc_anon.size = %d, arc_mru.size = %d, arc_meta_used = %d, arc_p = %d\n", 12 | `arc_stats.arcstat_size.value.ui64, 13 | `arc_stats.arcstat_c.value.ui64, 14 | `arc_anon->arcs_size, 15 | `arc_mru->arcs_size, 16 | `arc_meta_used, 17 | `arc_stats.arcstat_p.value.ui64); 18 | printf("arc_mfu.size = %d, arc_mfu_ghost.size = %d, arc_mru_ghost.size = %d, arc_l2c_only.size = %d\n", 19 | `arc_mfu->arcs_size, 20 | `arc_mfu_ghost->arcs_size, 21 | `arc_mru_ghost->arcs_size, 22 | `arc_l2c_only->arcs_size); 23 | } 24 | 25 | arc_shrink:entry 26 | { 27 | printf("%Y 2085: to_free = MAX( %d, %d)\n", walltimestamp, 28 | `arc_stats.arcstat_c.value.ui64 >> `arc_shrink_shift, `needfree*4096); 29 | } 30 | 31 | arc_adjust:return 32 | { 33 | printf("Returned from arc_adjust started at %Y %d ms later.\n", self->ts, (walltimestamp - self->ts)/1000000); 34 | } 35 | -------------------------------------------------------------------------------- /collectd/arcstat_collectd.pl: -------------------------------------------------------------------------------- 1 | #!/usr/perl5/bin/perl -w 2 | # 3 | # Print out ZFS ARC Statistics exported via kstat(1) 4 | # For a definition of fields, or usage, use arctstat.pl -v 5 | # 6 | # This script is a fork of the original arcstat.pl (0.1) by 7 | # Neelakanth Nadgir, originally published on his Sun blog on 8 | # 09/18/2007 9 | # http://blogs.sun.com/realneel/entry/zfs_arc_statistics 10 | # 11 | # This version aims to improve upon the original by adding features 12 | # and fixing bugs as needed. This version is maintained by 13 | # Mike Harsch and is hosted in a public open source repository: 14 | # http://github.com/mharsch/arcstat 15 | # 16 | # Comments, Questions, or Suggestions are always welcome. 17 | # Contact the maintainer at ( mike at harschsystems dot com ) 18 | # 19 | # CDDL HEADER START 20 | # 21 | # The contents of this file are subject to the terms of the 22 | # Common Development and Distribution License, Version 1.0 only 23 | # (the "License"). You may not use this file except in compliance 24 | # with the License. 25 | # 26 | # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 27 | # or http://www.opensolaris.org/os/licensing. 28 | # See the License for the specific language governing permissions 29 | # and limitations under the License. 30 | # 31 | # When distributing Covered Code, include this CDDL HEADER in each 32 | # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 33 | # If applicable, add the following below this CDDL HEADER, with the 34 | # fields enclosed by brackets "[]" replaced with your own identifying 35 | # information: Portions Copyright [yyyy] [name of copyright owner] 36 | # 37 | # CDDL HEADER END 38 | # 39 | # 40 | # Fields have a fixed width. Every interval, we fill the "v" 41 | # hash with its corresponding value (v[field]=value) using calculate(). 42 | # @hdr is the array of fields that needs to be printed, so we 43 | # just iterate over this array and print the values using our pretty printer. 44 | 45 | use strict; 46 | use POSIX qw(strftime); 47 | use Sys::Hostname; 48 | use Sun::Solaris::Kstat; 49 | use Getopt::Long; 50 | use IO::Handle; 51 | 52 | my %cols = (# HDR => [Size, Scale, Description] 53 | "time" =>[17, -1, "Time"], 54 | "hits" =>[4, 1000, "ARC reads per second"], 55 | "miss" =>[4, 1000, "ARC misses per second"], 56 | "read" =>[4, 1000, "Total ARC accesses per second"], 57 | "hit%" =>[4, 100, "ARC Hit percentage"], 58 | "miss%" =>[5, 100, "ARC miss percentage"], 59 | "dhit" =>[4, 1000, "Demand Data hits per second"], 60 | "dmis" =>[4, 1000, "Demand Data misses per second"], 61 | "dh%" =>[3, 100, "Demand Data hit percentage"], 62 | "dm%" =>[3, 100, "Demand Data miss percentage"], 63 | "phit" =>[4, 1000, "Prefetch hits per second"], 64 | "pmis" =>[4, 1000, "Prefetch misses per second"], 65 | "ph%" =>[3, 100, "Prefetch hits percentage"], 66 | "pm%" =>[3, 100, "Prefetch miss percentage"], 67 | "mhit" =>[4, 1000, "Metadata hits per second"], 68 | "mmis" =>[4, 1000, "Metadata misses per second"], 69 | "mread" =>[4, 1000, "Metadata accesses per second"], 70 | "mh%" =>[3, 100, "Metadata hit percentage"], 71 | "mm%" =>[3, 100, "Metadata miss percentage"], 72 | "arcsz" =>[5, 1024, "ARC Size"], 73 | "c" =>[4, 1024, "ARC Target Size"], 74 | "mfu" =>[4, 1000, "MFU List hits per second"], 75 | "mru" =>[4, 1000, "MRU List hits per second"], 76 | "mfug" =>[4, 1000, "MFU Ghost List hits per second"], 77 | "mrug" =>[4, 1000, "MRU Ghost List hits per second"], 78 | "eskip" =>[5, 1000, "evict_skip per second"], 79 | "mtxmis" =>[6, 1000, "mutex_miss per second"], 80 | "rmis" =>[4, 1000, "recycle_miss per second"], 81 | "dread" =>[5, 1000, "Demand data accesses per second"], 82 | "pread" =>[5, 1000, "Prefetch accesses per second"], 83 | "l2hits" =>[6, 1000, "L2ARC hits per second"], 84 | "l2miss" =>[6, 1000, "L2ARC misses per second"], 85 | "l2read" =>[6, 1000, "Total L2ARC accesses per second"], 86 | "l2hit%" =>[6, 100, "L2ARC access hit percentage"], 87 | "l2miss%" =>[7, 100, "L2ARC access miss percentage"], 88 | "l2size" =>[6, 1024, "Size of the L2ARC"], 89 | "l2bytes" =>[7, 1024, "bytes read per second from the L2ARC"], 90 | ); 91 | my %v=(); 92 | my @hdr = qw(read l2read dread mread miss dmis pmis mmis l2miss arcsz l2size c); 93 | my @xhdr = qw(time mfu mru mfug mrug eskip mtxmis rmis dread pread read); 94 | my $int = 1; # Default interval is 1 second 95 | my $count = 1; # Default count is 1 96 | my $hdr_intr = 20; # Print header every 20 lines of output 97 | my $opfile = ""; 98 | my $sep = " "; # Default separator is 2 spaces 99 | my $raw_output = 1; 100 | my $version = "0.4"; 101 | my $l2exist = 0; 102 | my $host = hostname; 103 | my $cmd = "Usage: arcstat [-hvxr] [-f fields] [-o file] [-s string] " . 104 | "[interval [count]]\n"; 105 | my %cur; 106 | my %d; 107 | my $out; 108 | my $kstat = Sun::Solaris::Kstat->new(); 109 | STDOUT->autoflush; 110 | 111 | sub detailed_usage { 112 | print STDERR "$cmd\n"; 113 | print STDERR "Field definitions are as follows:\n"; 114 | foreach my $hdr (keys %cols) { 115 | print STDERR sprintf("%11s : %s\n", $hdr, $cols{$hdr}[2]); 116 | } 117 | exit(1); 118 | } 119 | 120 | sub usage { 121 | print STDERR "$cmd\n"; 122 | print STDERR "\t -h : Print this help message\n"; 123 | print STDERR "\t -n : Override hostname\n"; 124 | print STDERR "\t -v : List all possible field headers " . 125 | "and definitions\n"; 126 | print STDERR "\t -x : Print extended stats\n"; 127 | print STDERR "\t -r : Raw output mode (values not scaled)\n"; 128 | print STDERR "\t -f : Specify specific fields to print (see -v)\n"; 129 | print STDERR "\t -o : Redirect output to the specified file\n"; 130 | print STDERR "\t -s : Override default field separator with custom " . 131 | "character or string\n"; 132 | print STDERR "\nExamples:\n"; 133 | print STDERR "\tarcstat -o /tmp/a.log 2 10\n"; 134 | print STDERR "\tarcstat -s \",\" -o /tmp/a.log 2 10\n"; 135 | print STDERR "\tarcstat -v\n"; 136 | print STDERR "\tarcstat -f time,hit%,dh%,ph%,mh% 1\n"; 137 | exit(1); 138 | } 139 | 140 | sub init { 141 | my $desired_cols; 142 | my $xflag = ''; 143 | my $hflag = ''; 144 | my $vflag; 145 | my $res = GetOptions('x' => \$xflag, 146 | 'o=s' => \$opfile, 147 | 'help|h|?' => \$hflag, 148 | 'v' => \$vflag, 149 | 's=s' => \$sep, 150 | 'f=s' => \$desired_cols, 151 | 'n=s' => \$host, 152 | 'r' => \$raw_output); 153 | $int = $ARGV[0] || $int; 154 | $count = $ARGV[1] || $count; 155 | 156 | if (defined $ARGV[0] && defined $ARGV[1]) { 157 | $int = $ARGV[0]; 158 | $count = $ARGV[1]; 159 | } elsif (defined $ARGV[0]) { 160 | $int = $ARGV[0]; 161 | $count = 0; 162 | } 163 | 164 | usage() if !$res or $hflag or ($xflag and $desired_cols); 165 | detailed_usage() if $vflag; 166 | @hdr = @xhdr if $xflag; #reset headers to xhdr 167 | 168 | # check if L2ARC exists 169 | snap_stats(); 170 | if (defined $cur{"l2_size"}) { 171 | $l2exist = 1; 172 | } 173 | 174 | if ($desired_cols) { 175 | @hdr = split(/[ ,]+/, $desired_cols); 176 | # Now check if they are valid fields 177 | my @invalid = (); 178 | my @incompat = (); 179 | foreach my $ele (@hdr) { 180 | if (not exists($cols{$ele})) { 181 | push(@invalid, $ele); 182 | } elsif (($l2exist == 0) && ($ele =~ /^l2/)) { 183 | printf("No L2ARC here\n", $ele); 184 | push(@incompat, $ele); 185 | } 186 | } 187 | if (scalar @invalid > 0) { 188 | print STDERR "Invalid column definition! -- " 189 | . "@invalid\n\n"; 190 | usage(); 191 | } 192 | 193 | if (scalar @incompat > 0) { 194 | print STDERR "Incompatible field specified -- " 195 | . "@incompat\n\n"; 196 | usage(); 197 | } 198 | } 199 | 200 | if ($opfile) { 201 | open($out, ">$opfile") ||die "Cannot open $opfile for writing"; 202 | $out->autoflush; 203 | select $out; 204 | } 205 | } 206 | 207 | # Capture kstat statistics. We maintain 3 hashes, prev, cur, and 208 | # d (delta). As their names imply they maintain the previous, current, 209 | # and delta (cur - prev) statistics. 210 | sub snap_stats { 211 | my %prev = %cur; 212 | #if ($kstat->update()) { 213 | # printf("\n"); 214 | #} 215 | my $hashref_cur = $kstat->{"zfs"}{0}{"arcstats"}; 216 | %cur = %$hashref_cur; 217 | foreach my $key (keys %cur) { 218 | next if $key =~ /class/; 219 | if (defined $prev{$key}) { 220 | $d{$key} = $cur{$key} - $prev{$key}; 221 | } else { 222 | $d{$key} = $cur{$key}; 223 | } 224 | } 225 | } 226 | 227 | # Pretty print num. Arguments are width, scale, and num 228 | sub prettynum { 229 | my @suffix = (' ', 'K', 'M', 'G', 'T'); 230 | my $num = $_[2] || 0; 231 | my $scale = $_[1]; 232 | my $sz = $_[0]; 233 | my $index = 0; 234 | my $save = 0; 235 | 236 | if ($scale == -1) { #special case for date field 237 | return sprintf("%s", $num); 238 | } elsif (($num > 0) && ($num < 1)) { #rounding error. return 0 239 | $num = 0; 240 | } 241 | 242 | while ($num > $scale and $index < 5) { 243 | $save = $num; 244 | $num = $num/$scale; 245 | $index++; 246 | } 247 | 248 | return sprintf("%*d", $sz, $num) if ($index == 0); 249 | if (($save / $scale) < 10) { 250 | return sprintf("%*.1f%s", $sz - 1, $num,$suffix[$index]); 251 | } else { 252 | return sprintf("%*d%s", $sz - 1, $num,$suffix[$index]); 253 | } 254 | } 255 | 256 | sub print_values { 257 | foreach my $col (@hdr) { 258 | if (not $raw_output) { 259 | printf("%s%s", prettynum($cols{$col}[0], $cols{$col}[1], 260 | $v{$col}), $sep); 261 | } else { 262 | printf("%d %s.%s%s %d%s\n", $v{"time"},$host,"zfs.arcstat/req/gauge-", $col, $v{$col} || 0, $sep); 263 | } 264 | } 265 | } 266 | 267 | sub print_header { 268 | if (not $raw_output) { 269 | foreach my $col (@hdr) { 270 | printf("%*s%s", $cols{$col}[0], $col, $sep); 271 | } 272 | } else { 273 | # Don't try to align headers in raw mode 274 | #foreach my $col (@hdr) { 275 | # printf("%s%s", $col, $sep); 276 | #} 277 | } 278 | printf("\n"); 279 | } 280 | 281 | sub calculate { 282 | %v = (); 283 | 284 | if ($raw_output) { 285 | $v{"time"} = strftime("%s", localtime); 286 | } else { 287 | $v{"time"} = strftime("%D %H:%M:%S", localtime); 288 | } 289 | 290 | $v{"hits"} = $d{"hits"}/$int; 291 | $v{"miss"} = $d{"misses"}/$int; 292 | $v{"read"} = $v{"hits"} + $v{"miss"}; 293 | $v{"hit%"} = 100 * ($v{"hits"} / $v{"read"}) if $v{"read"} > 0; 294 | $v{"miss%"} = 100 - $v{"hit%"} if $v{"read"} > 0; 295 | 296 | $v{"dhit"} = ($d{"demand_data_hits"} + 297 | $d{"demand_metadata_hits"})/$int; 298 | $v{"dmis"} = ($d{"demand_data_misses"} + 299 | $d{"demand_metadata_misses"})/$int; 300 | 301 | $v{"dread"} = $v{"dhit"} + $v{"dmis"}; 302 | $v{"dh%"} = 100 * ($v{"dhit"} / $v{"dread"}) if $v{"dread"} > 0; 303 | $v{"dm%"} = 100 - $v{"dh%"} if $v{"dread"} > 0; 304 | 305 | $v{"phit"} = ($d{"prefetch_data_hits"} + 306 | $d{"prefetch_metadata_hits"})/$int; 307 | $v{"pmis"} = ($d{"prefetch_data_misses"} + 308 | $d{"prefetch_metadata_misses"})/$int; 309 | 310 | $v{"pread"} = $v{"phit"} + $v{"pmis"}; 311 | $v{"ph%"} = 100 * ($v{"phit"} / $v{"pread"}) if $v{"pread"} > 0; 312 | $v{"pm%"} = 100 - $v{"ph%"} if $v{"pread"} > 0; 313 | 314 | $v{"mhit"} = ($d{"prefetch_metadata_hits"} + 315 | $d{"demand_metadata_hits"})/$int; 316 | $v{"mmis"} = ($d{"prefetch_metadata_misses"} + 317 | $d{"demand_metadata_misses"})/$int; 318 | 319 | $v{"mread"} = $v{"mhit"} + $v{"mmis"}; 320 | $v{"mh%"} = 100 * ($v{"mhit"} / $v{"mread"}) if $v{"mread"} > 0; 321 | $v{"mm%"} = 100 - $v{"mh%"} if $v{"mread"} > 0; 322 | 323 | $v{"arcsz"} = $cur{"size"}; 324 | $v{"c"} = $cur{"c"}; 325 | $v{"mfu"} = $d{"mfu_hits"}/$int; 326 | $v{"mru"} = $d{"mru_hits"}/$int; 327 | $v{"mrug"} = $d{"mru_ghost_hits"}/$int; 328 | $v{"mfug"} = $d{"mfu_ghost_hits"}/$int; 329 | $v{"eskip"} = $d{"evict_skip"}/$int; 330 | $v{"rmiss"} = $d{"recycle_miss"}/$int; 331 | $v{"mtxmis"} = $d{"mutex_miss"}/$int; 332 | 333 | if ($l2exist) { 334 | $v{"l2hits"} = $d{"l2_hits"}/$int; 335 | $v{"l2miss"} = $d{"l2_misses"}/$int; 336 | $v{"l2read"} = $v{"l2hits"} + $v{"l2miss"}; 337 | $v{"l2hit%"} = 100 * ($v{"l2hits"} / $v{"l2read"}) 338 | if $v{"l2read"} > 0; 339 | 340 | $v{"l2miss%"} = 100 - $v{"l2hit%"} if $v{"l2read"} > 0; 341 | $v{"l2size"} = $cur{"l2_size"}; 342 | $v{"l2bytes"} = $d{"l2_read_bytes"}/$int; 343 | } 344 | } 345 | 346 | sub main { 347 | my $i = 0; 348 | my $count_flag = 0; 349 | 350 | init(); 351 | if ($count > 0) { $count_flag = 1; } 352 | while (1) { 353 | print_header() if ($i == 0); 354 | snap_stats(); 355 | calculate(); 356 | print_values(); 357 | last if ($count_flag == 1 && $count-- <= 1); 358 | $i = (($i == $hdr_intr) && (not $raw_output)) ? 0 : $i+1; 359 | sleep($int); 360 | } 361 | close($out) if defined $out; 362 | } 363 | 364 | &main; 365 | -------------------------------------------------------------------------------- /collectd/kmem_reap_100ms.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then 5 | export HOSTNAME=`hostname` 6 | else 7 | export HOSTNAME=$1 8 | fi 9 | 10 | /usr/sbin/dtrace -Cn ' 11 | 12 | #pragma D option destructive 13 | #pragma D option quiet 14 | 15 | fbt::arc_kmem_reap_now:entry 16 | { 17 | self->start[probefunc] = timestamp; 18 | self->strategy = args[0]; 19 | self->in_kmem = 1; 20 | } 21 | 22 | fbt::arc_adjust:entry, 23 | fbt::arc_shrink:entry, 24 | fbt::arc_do_user_evicts:entry, 25 | fbt::dnlc_reduce_cache:entry, 26 | fbt::kmem_reap:entry 27 | /self->in_kmem/ 28 | { 29 | self->start[probefunc] = timestamp; 30 | } 31 | 32 | kmem_depot_ws_reap:entry 33 | { 34 | self->i = 1; 35 | self->start[probefunc] = timestamp; 36 | self->kct = args[0]; 37 | self->magcount = 0; 38 | self->slabcount = 0; 39 | } 40 | 41 | kmem_magazine_destroy:entry 42 | /self->i/ 43 | { 44 | self->magcount += 1; 45 | } 46 | 47 | kmem_slab_free:entry 48 | /self->i/ 49 | { 50 | self->slabcount += 1; 51 | } 52 | 53 | fbt::arc_adjust:return, 54 | fbt::arc_shrink:return, 55 | fbt::arc_do_user_evicts:return, 56 | fbt::dnlc_reduce_cache:return, 57 | fbt::kmem_reap:return 58 | /self->start[probefunc] && self->in_kmem && ((self->end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 59 | { 60 | /* printf("%Y %d ms", walltimestamp, 61 | (timestamp - self->start[probefunc]) / 1000000); */ 62 | self->start[probefunc] = NULL; 63 | } 64 | 65 | fbt::arc_adjust:return, 66 | fbt::arc_shrink:return, 67 | fbt::arc_do_user_evicts:return, 68 | fbt::dnlc_reduce_cache:return, 69 | fbt::kmem_reap:return 70 | /self->start[probefunc] && self->in_kmem && ((self->end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 71 | { 72 | self->start[probefunc] = NULL; 73 | } 74 | 75 | 76 | kmem_depot_ws_reap:return 77 | /self->i && ((self->ts_end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 78 | { 79 | self->i = NULL; 80 | self->wts_sec = walltimestamp / 1000000000; 81 | /* printf("%Y %s %d ms %d mags %d slabs", walltimestamp, self->kct->cache_name, (self->ts_end[probefunc])/1000000, self->magcount, self->slabcount); 82 | */ 83 | 84 | printf("PUTVAL '$HOSTNAME'.arc_kmem/%s/gauge-reap_ms interval=180 %d:%d\n",self->kct->cache_name, self->wts_sec, (self->ts_end[probefunc])/1000000); 85 | printf("PUTVAL '$HOSTNAME'.arc_kmem/%s/gauge-reap_magazines interval=180 %d:%d\n",self->kct->cache_name, self->wts_sec, self->magcount); 86 | printf("PUTVAL '$HOSTNAME'.arc_kmem/%s/gauge-reap_slabs interval=180 %d:%d\n",self->kct->cache_name, self->wts_sec, self->slabcount); 87 | 88 | self->start[probefunc] = NULL; 89 | 90 | } 91 | 92 | kmem_depot_ws_reap:return 93 | /self->i && ((self->ts_end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 94 | { 95 | self->i = NULL; 96 | self->start[probefunc] = NULL; 97 | } 98 | 99 | 100 | fbt::arc_kmem_reap_now:return 101 | /self->start[probefunc] && ((self->end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 102 | { 103 | self->wts_sec = walltimestamp / 1000000000; 104 | printf("PUTVAL '$HOSTNAME'.arc_kmem/arc_kmem_reap_now/gauge-reap_ms interval=180 %d:%d\n", self->wts_sec, 105 | (timestamp - self->start[probefunc]) / 1000000); 106 | self->start[probefunc] = NULL; 107 | self->in_kmem = NULL; 108 | } 109 | 110 | fbt::arc_kmem_reap_now:return 111 | /self->start[probefunc] && ((self->end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 112 | { 113 | self->start[probefunc] = NULL; 114 | self->in_kmem = NULL; 115 | } 116 | ' 117 | -------------------------------------------------------------------------------- /collectd/nfssvrtop_collectd: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ksh 2 | # 3 | # nfsvsvrtop - display top NFS v3 & v4 I/O events on a server. 4 | # 5 | # This is measuring the response time between an incoming NFS operation 6 | # and its response. In general, this measures the server's view of how 7 | # quickly it can respond to requests. By default, the list shows responses 8 | # to each client. 9 | # 10 | # Global fields: 11 | # swrite total KB sync writes during sample 12 | # awrite total KB async writes during sample 13 | # 14 | # The following per-client and "all" clients fields are shown 15 | # nfsv3/nfsv4 NFS version (3 or 4) 16 | # client IP addr of client 17 | # nfsops NFS operations per second 18 | # reads Read operations per second 19 | # swrites Sync write operations per second 20 | # awrites Async write operations per second 21 | # commits Commits per second 22 | # read_bytes Read KB/sec 23 | # swrite_bytes Sync write KB/sec 24 | # awrite_bytes Async write KB/sec 25 | # read_latency Average read time in microseconds 26 | # swrite_latency Average sync write time in microseconds 27 | # awrite_latency Average async write time in microseconds 28 | # com_t Average commit time in microseconds 29 | # 30 | # Note: NFSv4 compound operations are not measured, per se, but they are 31 | # counted in the total operations count. 32 | # 33 | # Note: dtrace doesn't do floating point. A seemingly zero response or 34 | # count can result due to integer division. 35 | # 36 | # 37 | # INSPIRATION: top(1) by William LeFebvre and iotop by Brendan Gregg 38 | # 39 | # Copyright 2011, 2014 Nexenta Systems, Inc. All rights reserved. 40 | # 41 | # CDDL HEADER START 42 | # 43 | # The contents of this file are subject to the terms of the 44 | # Common Development and Distribution License, Version 1.0 only 45 | # (the "License"). You may not use this file except in compliance 46 | # with the License. 47 | # 48 | # You can obtain a copy of the license at Docs/cddl1.txt 49 | # or http://www.opensolaris.org/os/licensing. 50 | # See the License for the specific language governing permissions 51 | # and limitations under the License. 52 | # 53 | # CDDL HEADER END 54 | # 55 | # Author: Richard.Elling@RichardElling.com, Kirill.Davydychev@nexenta.com 56 | # 57 | # Revision: 58 | # 1.8.collectd Nov 2014 59 | # 60 | 61 | PATH=/usr/sbin:/usr/bin 62 | 63 | 64 | ############################## 65 | # check to see if the NFS server module is loaded 66 | # if not, then the dtrace probes will fail ungracefully 67 | if [ "$(uname -s)" = "SunOS" ]; then 68 | modinfo | awk '{print $6}' | grep -q nfssrv 69 | if [ $? != 0 ]; then 70 | echo "error: NFS server module is not loaded, are you serving NFS?" 71 | exit 1 72 | fi 73 | fi 74 | 75 | ############################## 76 | # --- Process Arguments --- 77 | # 78 | 79 | ### default variables 80 | opt_blocksize=4096 # blocksize for alignment measurements 81 | opt_client=0 # set if -c option set 82 | opt_clear=1 # set if screen to be cleared 83 | opt_json=1 # set if output is JSON 84 | opt_top=0 # set if list trimmed to top 85 | top=0 # number of lines trimmed 86 | opt_vers=0 # set if NFS version restricted 87 | vers=3 # version of NFS to restrict 88 | interval=10 # default interval 89 | count=-1 # number of intervals to show 90 | 91 | ### process options 92 | while getopts b:c:Cjn:t: name 93 | do 94 | case $name in 95 | b) opt_blocksize=$OPTARG ;; 96 | c) opt_client=1; client_IP=$OPTARG ;; 97 | C) opt_clear=0 ;; 98 | j) opt_json=1 ;; 99 | n) opt_vers=1; vers=$OPTARG ;; 100 | t) opt_top=1; top=$OPTARG ;; 101 | h|?) cat <&2 102 | USAGE: nfssvrtop [-Cj] [-b blocksize] [-c client_IP] [-n vers] [-t top] 103 | [interval [count]] 104 | -b blocksize # alignment blocksize (default=4096) 105 | -c client_IP # trace for this client only 106 | -C # don't clear the screen 107 | -j # print output in JSON format 108 | -n vers # show only NFS version 109 | -t top # print top number of entries only 110 | examples: 111 | nfssvrtop # default output, 10 second samples 112 | nfssvrtop -b 1024 # check alignment on 1KB boundary 113 | nfssvrtop 1 # 1 second samples 114 | nfssvrtop -n 4 # only show NFSv4 traffic 115 | nfssvrtop -C 60 # 60 second samples, do not clear screen 116 | nfssvrtop -t 20 # print top 20 lines only 117 | nfssvrtop 5 12 # print 12 x 5 second samples 118 | END 119 | exit 1 120 | esac 121 | done 122 | 123 | shift $(($OPTIND - 1)) 124 | 125 | ### option logic 126 | 127 | if [ ! -z "$1" ] 128 | then 129 | export HOSTNAME=$1; shift 130 | else 131 | export HOSTNAME=`!hostname`; shift 132 | fi 133 | 134 | if [ ! -z "$1" ]; then 135 | interval=$1; shift 136 | fi 137 | if [ ! -z "$1" ]; then 138 | count=$1; shift 139 | fi 140 | if [ $opt_clear = 1 ]; then 141 | clearstr=$(clear) 142 | else 143 | clearstr="" 144 | fi 145 | 146 | ################################# 147 | # --- Main Program, DTrace --- 148 | # 149 | /usr/sbin/dtrace -Cn ' 150 | /* 151 | * Command line arguments 152 | */ 153 | inline int OPT_blocksize = '$opt_blocksize'; 154 | inline int OPT_client = '$opt_client'; 155 | inline int OPT_clear = '$opt_clear'; 156 | inline int OPT_top = '$opt_top'; 157 | inline int OPT_json = '$opt_json'; 158 | inline int OPT_vers = '$opt_vers'; 159 | inline int INTERVAL = '$interval'; 160 | inline int COUNTER = '$count'; 161 | inline int TOP = '$top'; 162 | inline string CLIENT = "'$client_IP'"; 163 | inline int VERS = '$vers'; 164 | inline string CLEAR = "'$clearstr'"; 165 | 166 | #pragma D option quiet 167 | 168 | /* 169 | * increase dynvarsize if you get "dynamic variable drops" 170 | */ 171 | #pragma D option dynvarsize=12m 172 | 173 | /* 174 | * Print header 175 | */ 176 | dtrace:::BEGIN 177 | { 178 | /* starting values */ 179 | counts = COUNTER; 180 | secs = INTERVAL; 181 | total_read_b = 0; 182 | total_swrite_b = 0; 183 | total_awrite_b = 0; 184 | 185 | OPT_json ? 1 : printf("Tracing... Please wait.\n"); 186 | } 187 | 188 | /* 189 | * Filter as needed, based on starts 190 | */ 191 | nfsv3:nfssrv::op-access-start, 192 | nfsv3:nfssrv::op-create-start, 193 | nfsv3:nfssrv::op-commit-start, 194 | nfsv3:nfssrv::op-fsinfo-start, 195 | nfsv3:nfssrv::op-fsstat-start, 196 | nfsv3:nfssrv::op-getattr-start, 197 | nfsv3:nfssrv::op-link-start, 198 | nfsv3:nfssrv::op-lookup-start, 199 | nfsv3:nfssrv::op-mkdir-start, 200 | nfsv3:nfssrv::op-mknod-start, 201 | nfsv3:nfssrv::op-null-start, 202 | nfsv3:nfssrv::op-pathconf-start, 203 | nfsv3:nfssrv::op-read-start, 204 | nfsv3:nfssrv::op-readdir-start, 205 | nfsv3:nfssrv::op-readdirplus-start, 206 | nfsv3:nfssrv::op-readlink-start, 207 | nfsv3:nfssrv::op-remove-start, 208 | nfsv3:nfssrv::op-rename-start, 209 | nfsv3:nfssrv::op-rmdir-start, 210 | nfsv3:nfssrv::op-setattr-start, 211 | nfsv3:nfssrv::op-symlink-start, 212 | nfsv3:nfssrv::op-write-start 213 | /OPT_client == 0 || CLIENT == args[0]->ci_remote/ 214 | { 215 | self->vers = "3"; 216 | @c_nfsops[self->vers, args[0]->ci_remote] = count(); 217 | OPT_client == 0 ? @c_nfsops[self->vers, "all"] = count() : 1; 218 | @wts_sec[self->vers, args[0]->ci_remote] = max(walltimestamp / 1000000000); 219 | @wts_sec[self->vers, "all"] = max(walltimestamp / 1000000000); 220 | } 221 | 222 | nfsv4:nfssrv::cb-recall-start, 223 | nfsv4:nfssrv::compound-start, 224 | nfsv4:nfssrv::null-start, 225 | nfsv4:nfssrv::op-access-start, 226 | nfsv4:nfssrv::op-close-start, 227 | nfsv4:nfssrv::op-commit-start, 228 | nfsv4:nfssrv::op-create-start, 229 | nfsv4:nfssrv::op-delegpurge-start, 230 | nfsv4:nfssrv::op-delegreturn-start, 231 | nfsv4:nfssrv::op-getattr-start, 232 | nfsv4:nfssrv::op-getfh-start, 233 | nfsv4:nfssrv::op-link-start, 234 | nfsv4:nfssrv::op-lock-start, 235 | nfsv4:nfssrv::op-lockt-start, 236 | nfsv4:nfssrv::op-locku-start, 237 | nfsv4:nfssrv::op-lookup-start, 238 | nfsv4:nfssrv::op-lookupp-start, 239 | nfsv4:nfssrv::op-nverify-start, 240 | nfsv4:nfssrv::op-open-confirm-start, 241 | nfsv4:nfssrv::op-open-downgrade-start, 242 | nfsv4:nfssrv::op-open-start, 243 | nfsv4:nfssrv::op-openattr-start, 244 | nfsv4:nfssrv::op-putfh-start, 245 | nfsv4:nfssrv::op-putpubfh-start, 246 | nfsv4:nfssrv::op-putrootfh-start, 247 | nfsv4:nfssrv::op-read-start, 248 | nfsv4:nfssrv::op-readdir-start, 249 | nfsv4:nfssrv::op-readlink-start, 250 | nfsv4:nfssrv::op-release-lockowner-start, 251 | nfsv4:nfssrv::op-remove-start, 252 | nfsv4:nfssrv::op-rename-start, 253 | nfsv4:nfssrv::op-renew-start, 254 | nfsv4:nfssrv::op-restorefh-start, 255 | nfsv4:nfssrv::op-savefh-start, 256 | nfsv4:nfssrv::op-secinfo-start, 257 | nfsv4:nfssrv::op-setattr-start, 258 | nfsv4:nfssrv::op-setclientid-confirm-start, 259 | nfsv4:nfssrv::op-setclientid-start, 260 | nfsv4:nfssrv::op-verify-start, 261 | nfsv4:nfssrv::op-write-start 262 | /OPT_client == 0 || CLIENT == args[0]->ci_remote/ 263 | { 264 | self->vers = "4"; 265 | @c_nfsops[self->vers, args[0]->ci_remote] = count(); 266 | OPT_client == 0 ? @c_nfsops[self->vers, "all"] = count() : 1; 267 | @wts_sec[self->vers, args[0]->ci_remote] = max(walltimestamp / 1000000000); 268 | @wts_sec[self->vers, "all"] = max(walltimestamp / 1000000000); 269 | } 270 | 271 | /* measure response time for commits, reads, and writes */ 272 | nfsv3:nfssrv::op-commit-start, 273 | nfsv3:nfssrv::op-read-start, 274 | nfsv3:nfssrv::op-write-start, 275 | nfsv4:nfssrv::op-commit-start, 276 | nfsv4:nfssrv::op-read-start, 277 | nfsv4:nfssrv::op-write-start 278 | /OPT_client == 0 || CLIENT == args[0]->ci_remote/ 279 | { 280 | self->startts = timestamp; 281 | } 282 | 283 | 284 | /* 285 | * commit 286 | */ 287 | nfsv3:nfssrv::op-commit-start, 288 | nfsv4:nfssrv::op-commit-start 289 | /self->startts/ 290 | { 291 | @c_commit_client[self->vers, args[0]->ci_remote] = count(); 292 | OPT_client == 0 ? @c_commit_client[self->vers, "all"] = count() : 1; 293 | } 294 | 295 | nfsv3:nfssrv::op-commit-done, 296 | nfsv4:nfssrv::op-commit-done 297 | /self->startts/ 298 | { 299 | t = timestamp - self->startts; 300 | @avgtime_commit[self->vers, args[0]->ci_remote] = avg(t); 301 | OPT_client == 0 ? @avgtime_commit[self->vers, "all"] = avg(t) : 1; 302 | self->startts = 0; 303 | } 304 | 305 | /* 306 | * read 307 | */ 308 | nfsv3:nfssrv::op-read-start, 309 | nfsv4:nfssrv::op-read-start 310 | /self->startts/ 311 | { 312 | @c_read_client[self->vers, args[0]->ci_remote] = count(); 313 | OPT_client == 0 ? @c_read_client[self->vers, "all"] = count() : 1; 314 | @read_b[self->vers, args[0]->ci_remote] = sum(args[2]->count); 315 | OPT_client == 0 ? @read_b[self->vers, "all"] = sum(args[2]->count) : 1; 316 | total_read_b += args[2]->count; 317 | @avg_aligned[self->vers, args[0]->ci_remote] = 318 | avg((args[2]->offset % OPT_blocksize) ? 0 : 100); 319 | @avg_aligned[self->vers, "all"] = 320 | avg((args[2]->offset % OPT_blocksize) ? 0 : 100); 321 | } 322 | 323 | nfsv3:nfssrv::op-read-done, 324 | nfsv4:nfssrv::op-read-done 325 | /self->startts/ 326 | { 327 | t = timestamp - self->startts; 328 | @avgtime_read[self->vers, args[0]->ci_remote] = avg(t); 329 | OPT_client == 0 ? @avgtime_read[self->vers, "all"] = avg(t) : 1; 330 | self->startts = 0; 331 | } 332 | 333 | /* 334 | * write (sync) 335 | */ 336 | nfsv3:nfssrv::op-write-start, 337 | nfsv4:nfssrv::op-write-start 338 | /self->startts/ 339 | { 340 | @avg_aligned[self->vers, args[0]->ci_remote] = 341 | avg((args[2]->offset % OPT_blocksize) ? 0 : 100); 342 | @avg_aligned[self->vers, "all"] = 343 | avg((args[2]->offset % OPT_blocksize) ? 0 : 100); 344 | } 345 | 346 | nfsv3:nfssrv::op-write-start 347 | /self->startts && args[2]->stable/ 348 | { 349 | self->issync = 1; 350 | data_len = args[2]->data.data_len; 351 | @c_swrite_client[self->vers, args[0]->ci_remote] = count(); 352 | OPT_client == 0 ? @c_swrite_client[self->vers, "all"] = count() : 1; 353 | @swrite_b[self->vers, args[0]->ci_remote] = sum(data_len); 354 | OPT_client == 0 ? @swrite_b[self->vers, "all"] = sum(data_len) : 1; 355 | total_swrite_b += data_len; 356 | } 357 | 358 | nfsv4:nfssrv::op-write-start 359 | /self->startts && args[2]->stable/ 360 | { 361 | self->issync = 1; 362 | data_len = args[2]->data_len; 363 | @c_swrite_client[self->vers, args[0]->ci_remote] = count(); 364 | OPT_client == 0 ? @c_swrite_client[self->vers, "all"] = count() : 1; 365 | @swrite_b[self->vers, args[0]->ci_remote] = sum(data_len); 366 | OPT_client == 0 ? @swrite_b[self->vers, "all"] = sum(data_len) : 1; 367 | total_swrite_b += data_len; 368 | } 369 | 370 | nfsv3:nfssrv::op-write-done, 371 | nfsv4:nfssrv::op-write-done 372 | /self->startts && self->issync/ 373 | { 374 | t = timestamp - self->startts; 375 | @avgtime_swrite[self->vers, args[0]->ci_remote] = avg(t); 376 | OPT_client == 0 ? @avgtime_swrite[self->vers, "all"] = avg(t) : 1; 377 | self->startts = 0; 378 | } 379 | 380 | /* 381 | * write (async) 382 | */ 383 | nfsv3:nfssrv::op-write-start 384 | /self->startts && !args[2]->stable/ 385 | { 386 | self->issync = 0; 387 | data_len = args[2]->data.data_len; 388 | @c_awrite_client[self->vers, args[0]->ci_remote] = count(); 389 | OPT_client == 0 ? @c_awrite_client[self->vers, "all"] = count() : 1; 390 | @awrite_b[self->vers, args[0]->ci_remote] = sum(data_len); 391 | OPT_client == 0 ? @awrite_b[self->vers, "all"] = sum(data_len) : 1; 392 | total_awrite_b += data_len; 393 | } 394 | 395 | nfsv4:nfssrv::op-write-start 396 | /self->startts && !args[2]->stable/ 397 | { 398 | self->issync = 0; 399 | data_len = args[2]->data_len; 400 | @c_awrite_client[self->vers, args[0]->ci_remote] = count(); 401 | OPT_client == 0 ? @c_awrite_client[self->vers, "all"] = count() : 1; 402 | @awrite_b[self->vers, args[0]->ci_remote] = sum(data_len); 403 | OPT_client == 0 ? @awrite_b[self->vers, "all"] = sum(data_len) : 1; 404 | total_awrite_b += data_len; 405 | } 406 | 407 | nfsv3:nfssrv::op-write-done, 408 | nfsv4:nfssrv::op-write-done 409 | /self->startts && !self->issync/ 410 | { 411 | t = timestamp - self->startts; 412 | @avgtime_awrite[self->vers, args[0]->ci_remote] = avg(t); 413 | OPT_client == 0 ? @avgtime_awrite[self->vers, "all"] = avg(t) : 1; 414 | self->startts = 0; 415 | } 416 | 417 | /* 418 | * timer 419 | */ 420 | profile:::tick-1sec 421 | { 422 | secs--; 423 | } 424 | 425 | /* 426 | * Print report 427 | */ 428 | profile:::tick-1sec 429 | /secs == 0/ 430 | { 431 | /* fetch 1 min load average */ 432 | self->load1a = `hp_avenrun[0] / 65536; 433 | self->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; 434 | 435 | /* convert counters to Kbytes */ 436 | 437 | total_read_b /= 1024; 438 | total_swrite_b /= 1024; 439 | total_awrite_b /= 1024; 440 | 441 | /* normalize to seconds giving a rate */ 442 | 443 | normalize(@c_nfsops, INTERVAL); 444 | normalize(@c_read_client, INTERVAL); 445 | normalize(@c_swrite_client, INTERVAL); 446 | normalize(@c_awrite_client, INTERVAL); 447 | normalize(@c_commit_client, INTERVAL); 448 | 449 | /* normalize to KB per second */ 450 | 451 | normalize(@read_b, 1024 * INTERVAL); 452 | normalize(@awrite_b, 1024 * INTERVAL); 453 | normalize(@swrite_b, 1024 * INTERVAL); 454 | 455 | /* normalize average to microseconds */ 456 | 457 | normalize(@avgtime_read, 1000); 458 | normalize(@avgtime_swrite, 1000); 459 | normalize(@avgtime_awrite, 1000); 460 | normalize(@avgtime_commit, 1000); 461 | 462 | /* print status */ 463 | OPT_clear && !OPT_json ? printf("%s", CLEAR) : 1; 464 | wts_sec = (walltimestamp / 1000000000); 465 | 466 | OPT_json ? 467 | printf("PUTVAL '$HOSTNAME'.nfs/total_read_b %d:%d\nPUTVAL '$HOSTNAME'.nfs/total_swrite_b %d:%d\nPUTVAL '$HOSTNAME'.nfs/total_awrite_b %d:%d\n", 468 | wts_sec, total_read_b, wts_sec, total_swrite_b, wts_sec, total_awrite_b) 469 | : 470 | printf("%Y, load: %d.%02d, read: %-8d KB, swrite: %-8d KB, awrite: %-8d KB\n", 471 | walltimestamp, self->load1a, self->load1b, 472 | total_read_b, total_swrite_b, total_awrite_b); 473 | 474 | /* print headers */ 475 | OPT_json ? 1 : 476 | printf("%s\t%-15s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\n", 477 | "Ver", "Client", "NFSOPS", 478 | "Reads", "SWrites", "AWrites", "Commits", 479 | "Rd_bw", "SWr_bw", "AWr_bw", 480 | "Rd_t", "SWr_t", "AWr_t", "Com_t", "Align%"); 481 | 482 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-nfsops %@d:%@d\n",@wts_sec, @c_nfsops); 483 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-reads %@d:%@d\n",@wts_sec, @c_read_client); 484 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-swrites %@d:%@d\n",@wts_sec, @c_swrite_client); 485 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-awrites %@d:%@d\n",@wts_sec, @c_awrite_client); 486 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-commits %@d:%@d\n",@wts_sec, @c_commit_client); 487 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-read_bytes %@d:%@d\n",@wts_sec, @read_b); 488 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-swrite_bytes %@d:%@d\n",@wts_sec, @swrite_b); 489 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-awrite_bytes %@d:%@d\n",@wts_sec, @awrite_b); 490 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-read_latency %@d:%@d\n",@wts_sec, @avgtime_read); 491 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-swrite_latency %@d:%@d\n",@wts_sec, @avgtime_swrite); 492 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-awrite_latency %@d:%@d\n",@wts_sec, @avgtime_awrite); 493 | printa("PUTVAL '$HOSTNAME'.nfs.clients.nfsv%s/%s/gauge-commit_latency %@d:%@d\n",@wts_sec, @avgtime_commit); 494 | 495 | /* clear data */ 496 | trunc(@c_nfsops); trunc(@c_read_client); trunc(@c_swrite_client); 497 | trunc(@c_awrite_client); trunc(@c_commit_client); 498 | trunc(@read_b); trunc(@awrite_b); trunc(@swrite_b); 499 | trunc(@avgtime_read); trunc(@avgtime_swrite); trunc(@avgtime_awrite); 500 | trunc(@avgtime_commit); trunc(@avg_aligned); trunc(@wts_sec); 501 | total_read_b = 0; 502 | total_swrite_b = 0; 503 | total_awrite_b = 0; 504 | secs = INTERVAL; 505 | counts--; 506 | } 507 | 508 | /* 509 | * end of program 510 | */ 511 | profile:::tick-1sec 512 | /counts == 0/ 513 | { 514 | exit(0); 515 | } 516 | 517 | /* 518 | * clean up when interrupted 519 | */ 520 | dtrace:::END 521 | { 522 | trunc(@c_nfsops); trunc(@c_read_client); trunc(@c_swrite_client); 523 | trunc(@c_awrite_client); trunc(@c_commit_client); 524 | trunc(@read_b); trunc(@awrite_b); trunc(@swrite_b); 525 | trunc(@avgtime_read); trunc(@avgtime_swrite); trunc(@avgtime_awrite); 526 | trunc(@avgtime_commit); trunc(@avg_aligned); trunc(@wts_sec); 527 | } 528 | ' 529 | -------------------------------------------------------------------------------- /collectd/nfsutil.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then 5 | export HOSTNAME=`hostname` 6 | else 7 | export HOSTNAME=$1 8 | fi 9 | 10 | if [ -z "$2" ] 11 | then 12 | export INTERVAL=10 13 | else 14 | export INTERVAL=$2 15 | fi 16 | 17 | /usr/sbin/dtrace -Cn ' 18 | 19 | #pragma D option quiet 20 | 21 | svc_xprt_qput:entry 22 | { 23 | @pending_reqs = max(args[0]->p_reqs); 24 | @act_threads = max(args[0]->p_threads - args[0]->p_asleep); 25 | @pool_pct_util = max(100 * (args[0]->p_threads - args[0]->p_asleep) / args[0]->p_maxthreads); 26 | } 27 | 28 | tick-'$INTERVAL'sec 29 | { 30 | @wts_sec = max(walltimestamp / 1000000000); 31 | 32 | printa("PUTVAL '$HOSTNAME'.nfs/req/gauge-maxpending %@d:%@d\n", @wts_sec, @pending_reqs); 33 | printa("PUTVAL '$HOSTNAME'.nfs/req/gauge-maxactive %@d:%@d\n", @wts_sec, @act_threads); 34 | printa("PUTVAL '$HOSTNAME'.nfs/req/gauge-pct_util %@d:%@d\n", @wts_sec,@pool_pct_util); 35 | 36 | trunc(@pending_reqs); 37 | trunc(@act_threads); 38 | trunc(@pool_pct_util); 39 | } 40 | ' 41 | -------------------------------------------------------------------------------- /collectd/openzfs_txg.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | 3 | /* 4 | * Script to observe the amount of dirty data being written out (async) 5 | * per sync event and also to see the dirty data max, so we can see how 6 | * close we are to the limit. 7 | * 8 | * Author: Adam Leventhal 9 | * Copyright 2014 Joyent 10 | */ 11 | 12 | #pragma D option quiet 13 | 14 | BEGIN 15 | { 16 | printf("Monitoring TXG syncs (dirty data) for %s\n", $$1) 17 | } 18 | 19 | txg-syncing 20 | /arg0 && ((dsl_pool_t *)arg0)->dp_spa->spa_name == $$1/ 21 | { 22 | this->dp = (dsl_pool_t *)arg0; 23 | start = timestamp; 24 | } 25 | 26 | txg-synced 27 | /start && ((dsl_pool_t *)arg0)->dp_spa->spa_name == $$1/ 28 | { 29 | this->d = timestamp - start; 30 | } 31 | 32 | 33 | fbt::dsl_pool_need_dirty_delay:return 34 | /args[1] == 1/ 35 | { 36 | this->delay++; 37 | } 38 | 39 | fbt::dsl_pool_need_dirty_delay:return 40 | /args[1] == 0/ 41 | { 42 | this->no_delay++; 43 | } 44 | 45 | txg-syncing 46 | /this->d && this->dp->dp_spa->spa_name == $$1 && (this->dp->dp_dirty_total / 1024) > 1/ 47 | { 48 | printf("%Y %s %4dMB of %4dMB used, synced in %dms, delays = %d, no_delays = %d\n", walltimestamp, stringof($$1), this->dp->dp_dirty_total / 1024 / 1024, `zfs_dirty_data_max / 1024 / 1024, this->d / 1000000, this->delay, this->no_delay); 49 | this->delay = 0; 50 | this->no_delay = 0; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /collectd/sol-iostat.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # 3 | use POSIX; 4 | use IO::Socket::INET; 5 | use Data::Dumper; 6 | use strict; 7 | # where iostat lives 8 | $ENV{'PATH'} = "/usr/bin"; 9 | $|=0; 10 | 11 | my(undef, $uname, undef) = uname(); 12 | # later perhaps sort per jbod ? :) 13 | #my $graphite_host = "10.200.10.23"; 14 | my $graphite_host = "192.168.0.102"; 15 | my $graphite_port = 2003; 16 | my $output = shift || "carbon"; 17 | my $hostname = shift || $uname; 18 | my $interval = shift || 10; 19 | $interval = $ENV{COLLECTD_INTERVAL} if defined($ENV{COLLECTD_INTERVAL}); 20 | my $sock = undef; 21 | my $base; 22 | 23 | sub conCarbon { 24 | return $sock if (defined($sock)); 25 | $sock = IO::Socket::INET->new( 26 | Proto => "tcp", 27 | PeerPort => $graphite_port, 28 | PeerAddr => $graphite_host, 29 | ) || die "Unable to create socket: $!\n"; 30 | print "connected to $graphite_host on $graphite_port\n"; 31 | return $sock; 32 | } 33 | 34 | sub toOut { 35 | my $sock = shift; 36 | my $msg = shift || return undef; 37 | if ($sock) { 38 | $msg =~ s/\//\./g; 39 | print $msg; 40 | my $r = $sock->send($msg); 41 | if( ! defined $r ) { 42 | die "can't sent: " . $sock->error; 43 | } 44 | print "sent $r bytes\n"; 45 | 46 | } elsif ($msg =~ /(.*)\.(.*) \s+ ([\d\.]+) \s+ (\d+)/xi) { 47 | my $p = $1; 48 | my $t = $2; 49 | my $v = $3; 50 | my $time = $4; 51 | print "$p/gauge-$t $time:$v\n"; 52 | } 53 | 54 | } 55 | 56 | if ($output eq "carbon") { 57 | $base="nexenta.$hostname.disks"; 58 | } elsif ($output eq "collectd") { 59 | $base="PUTVAL $hostname.disks"; 60 | } 61 | 62 | my @header; 63 | die if ($interval !~ /\d+/); 64 | open(FH, "iostat -exn 10 | ") || die "unable to iostat: $!"; 65 | while() { 66 | chomp; 67 | $sock = conCarbon($sock) if ($output eq "carbon"); 68 | if ($_ =~ /r\/s/) { 69 | $_ =~ s/\//_/g; 70 | $_ =~ s/\%(\w+)/pct_$1/g; 71 | @header = reverse(split(/\s+/, $_)); 72 | shift @header; 73 | # print Dumper @header 74 | } elsif ($_ !~ /extended/) { 75 | my $time = time(); 76 | my @m = reverse(split(/\s+/, $_)); 77 | my $device = shift @m; 78 | for(0..$#m) { 79 | next if (!$header[$_]); 80 | toOut($sock, $base."/$device.".$header[$_]." ".$m[$_]." $time\n"); 81 | # if ($m[$_] != 0.0 || $m[$_] != 0); 82 | } 83 | } 84 | } 85 | close(FH); 86 | -------------------------------------------------------------------------------- /collectd/sol-kstat.pl: -------------------------------------------------------------------------------- 1 | #!/usr/perl5/bin/perl -w 2 | # 3 | use Sun::Solaris::Kstat; 4 | use POSIX; 5 | use IO::Socket::INET; 6 | use Data::Dumper; 7 | use strict; 8 | # where pagesize lives 9 | $ENV{'PATH'} = "/usr/bin"; 10 | $|=0; 11 | 12 | my(undef, $hostname, undef) = uname(); 13 | #my $graphite_host = "10.200.10.23"; 14 | my $graphite_host = "192.168.0.102"; 15 | my $graphite_port = 2003; 16 | my $output = shift || "carbon"; 17 | my $interval = shift || 10; 18 | my $sock = undef; 19 | my $base; 20 | 21 | sub conCarbon { 22 | return $sock if (defined($sock)); 23 | $sock = IO::Socket::INET->new( 24 | Proto => "tcp", 25 | PeerPort => $graphite_port, 26 | PeerAddr => $graphite_host, 27 | ) || die "Unable to create socket: $!\n"; 28 | print "connected to $graphite_host on $graphite_port\n"; 29 | return $sock; 30 | } 31 | 32 | sub toOut { 33 | my $sock = shift; 34 | my $msg = shift || return undef; 35 | if ($sock) { 36 | print $msg; 37 | $sock->send($msg); 38 | } elsif ($msg =~ /(.*)\.(.*) \s+ ([\d\.]+) \s+ (\d+)/xi) { 39 | my $p = $1; 40 | my $k = $2; 41 | my $v = $3; 42 | my $t = $4; 43 | print "$p/gauge-$k $t:$v\n"; 44 | } 45 | } 46 | 47 | if ($output eq "carbon") { 48 | $base="nexenta.$hostname.kstat"; 49 | } elsif ($output eq "collectd") { 50 | $base="PUTVAL $hostname.kstat" 51 | } 52 | 53 | my $kstat = Sun::Solaris::Kstat->new(); 54 | my $phys_pages = ${kstat}->{unix}->{0}->{system_pages}->{physmem}; 55 | my $pagesize = `pagesize`; 56 | my $phys_memory = ($phys_pages * $pagesize); 57 | 58 | while (1) { 59 | $sock = conCarbon($sock) if ($output eq "carbon"); 60 | my $time=time(); 61 | $kstat->update(); 62 | # memory 63 | # print Dumper $kstat; 64 | my $free_pages = ${kstat}->{unix}->{0}->{system_pages}->{freemem}; 65 | my $lotsfree_pages = ${kstat}->{unix}->{0}->{system_pages}->{lotsfree}; 66 | my $free_memory = ($free_pages * $pagesize); 67 | my $lotsfree_memory = ($lotsfree_pages * $pagesize); 68 | 69 | toOut($sock, sprintf("$base/memory.total %d $time\n", $phys_memory / 1024 / 1024)); 70 | toOut($sock, sprintf("$base/memory.free %d $time\n", $free_memory / 1024 / 1024)); 71 | toOut($sock, sprintf("$base/memory.lotsfree %d $time\n", $lotsfree_memory / 1024 / 1024)); 72 | 73 | # gather more stats 74 | my $mru_size = ${kstat}->{zfs}->{0}->{arcstats}->{p}; 75 | my $target_size = ${kstat}->{zfs}->{0}->{arcstats}->{c}; 76 | my $arc_min_size = ${kstat}->{zfs}->{0}->{arcstats}->{c_min}; 77 | my $arc_max_size = ${kstat}->{zfs}->{0}->{arcstats}->{c_max}; 78 | 79 | my $arc_size = ${kstat}->{zfs}->{0}->{arcstats}->{size}; 80 | my $mfu_size = ${target_size} - $mru_size; 81 | my $mru_perc = 100*($mru_size / $target_size); 82 | my $mfu_perc = 100*($mfu_size / $target_size); 83 | my $l2_arc_size = ${kstat}->{zfs}->{0}->{arcstats}->{l2_size}; 84 | 85 | # output arc 86 | toOut($sock, sprintf("$base/arc.size %d $time\n", $arc_size / 1024 / 1024)); 87 | toOut($sock, sprintf("$base/arc.target_c %d $time\n", $target_size / 1024 / 1024)); 88 | toOut($sock, sprintf("$base/arc.min_size %d $time\n", $arc_min_size / 1024 / 1024)); 89 | toOut($sock, sprintf("$base/arc.max_size %d $time\n", $arc_max_size / 1024 / 1024)); 90 | 91 | # breakdown arc 92 | toOut($sock, sprintf("$base/arc.mru_pct %2d $time\n", $mru_perc)); 93 | toOut($sock, sprintf("$base/arc.mru_p %d $time\n", $mru_size / 1024 / 1024)); 94 | toOut($sock, sprintf("$base/arc.mfu_pct %2d $time\n", $mfu_perc)); 95 | toOut($sock, sprintf("$base/arc.mfu_c-p %d $time\n", $mfu_size / 1024 / 1024)); 96 | 97 | # L2ARC 98 | toOut($sock, sprintf("$base/l2arc.size %d $time\n", $l2_arc_size / 1024 / 1024)); 99 | 100 | # efficiency statistics... 101 | my $arc_hits = ${kstat}->{zfs}->{0}->{arcstats}->{hits}; 102 | my $arc_misses = ${kstat}->{zfs}->{0}->{arcstats}->{misses}; 103 | my $arc_accesses_total = ($arc_hits + $arc_misses); 104 | my $l2_arc_hits = ${kstat}->{zfs}->{0}->{arcstats}->{l2_hits}; 105 | my $l2_arc_misses = ${kstat}->{zfs}->{0}->{arcstats}->{l2_misses}; 106 | my $l2_arc_accesses_total = ($l2_arc_hits + $l2_arc_misses); 107 | 108 | my $arc_hit_perc = 100*($arc_hits / $arc_accesses_total); 109 | my $arc_miss_perc = 100*($arc_misses / $arc_accesses_total); 110 | my $l2_arc_hit_perc = 100*($l2_arc_hits / $l2_arc_accesses_total); 111 | my $l2_arc_miss_perc = 100*($l2_arc_misses / $l2_arc_accesses_total); 112 | 113 | my $mfu_hits = ${kstat}->{zfs}->{0}->{arcstats}->{mfu_hits}; 114 | my $mru_hits = ${kstat}->{zfs}->{0}->{arcstats}->{mru_hits}; 115 | my $mfu_ghost_hits = ${kstat}->{zfs}->{0}->{arcstats}->{mfu_ghost_hits}; 116 | my $mru_ghost_hits = ${kstat}->{zfs}->{0}->{arcstats}->{mru_ghost_hits}; 117 | my $anon_hits = $arc_hits - ($mfu_hits + $mru_hits + $mfu_ghost_hits + $mru_ghost_hits); 118 | 119 | my $real_hits = ($mfu_hits + $mru_hits); 120 | my $real_hits_perc = 100*($real_hits / $arc_accesses_total); 121 | 122 | # should be based on TOTAL HITS ($arc_hits) 123 | my $anon_hits_perc = 100*($anon_hits / $arc_hits); 124 | my $mfu_hits_perc = 100*($mfu_hits / $arc_hits); 125 | my $mru_hits_perc = 100*($mru_hits / $arc_hits); 126 | my $mfu_ghost_hits_perc = 100*($mfu_ghost_hits / $arc_hits); 127 | my $mru_ghost_hits_perc = 100*($mru_ghost_hits / $arc_hits); 128 | 129 | my $demand_data_hits = ${kstat}->{zfs}->{0}->{arcstats}->{demand_data_hits}; 130 | my $demand_metadata_hits = ${kstat}->{zfs}->{0}->{arcstats}->{demand_metadata_hits}; 131 | my $prefetch_data_hits = ${kstat}->{zfs}->{0}->{arcstats}->{prefetch_data_hits}; 132 | my $prefetch_metadata_hits = ${kstat}->{zfs}->{0}->{arcstats}->{prefetch_metadata_hits}; 133 | 134 | my $demand_data_misses = ${kstat}->{zfs}->{0}->{arcstats}->{demand_data_misses}; 135 | my $demand_metadata_misses = ${kstat}->{zfs}->{0}->{arcstats}->{demand_metadata_misses}; 136 | my $prefetch_data_misses = ${kstat}->{zfs}->{0}->{arcstats}->{prefetch_data_misses}; 137 | my $prefetch_metadata_misses = ${kstat}->{zfs}->{0}->{arcstats}->{prefetch_metadata_misses}; 138 | 139 | my $demand_data_hits_perc = 100*($demand_data_hits / $arc_hits); 140 | my $demand_metadata_hits_perc = 100*($demand_metadata_hits / $arc_hits); 141 | my $prefetch_data_hits_perc = 100*($prefetch_data_hits / $arc_hits); 142 | my $prefetch_metadata_hits_perc = 100*($prefetch_metadata_hits / $arc_hits); 143 | 144 | my $demand_data_misses_perc = 100*($demand_data_misses / $arc_misses); 145 | my $demand_metadata_misses_perc = 100*($demand_metadata_misses / $arc_misses); 146 | my $prefetch_data_misses_perc = 100*($prefetch_data_misses / $arc_misses); 147 | my $prefetch_metadata_misses_perc = 100*($prefetch_metadata_misses / $arc_misses); 148 | 149 | my $prefetch_data_total = ($prefetch_data_hits + $prefetch_data_misses); 150 | my $prefetch_data_perc = "00"; 151 | if ($prefetch_data_total > 0 ) { 152 | $prefetch_data_perc = 100*($prefetch_data_hits / $prefetch_data_total); 153 | } 154 | 155 | my $demand_data_total = ($demand_data_hits + $demand_data_misses); 156 | my $demand_data_perc = 100*($demand_data_hits / $demand_data_total); 157 | 158 | # arc effciency 159 | toOut($sock, sprintf("$base/arc.cache.access_total %s $time\n", $arc_accesses_total)); 160 | toOut($sock, sprintf("$base/arc.cache.hit_pct %2d $time\n", $arc_hit_perc)); 161 | toOut($sock, sprintf("$base/arc.cache.hits %s $time\n", $arc_hits)); 162 | toOut($sock, sprintf("$base/arc.cache.mis_pct %2d $time\n", $arc_miss_perc)); 163 | toOut($sock, sprintf("$base/arc.cache.misses %s $time\n", $arc_misses)); 164 | toOut($sock, sprintf("$base/arc.cache.real_pct %2d $time\n", $real_hits_perc)); 165 | toOut($sock, sprintf("$base/arc.cache.real_hits %s $time\n", $real_hits)); 166 | 167 | # prefetch / demand 168 | toOut($sock, sprintf("$base/demand.efficiency %2d $time\n", $demand_data_perc)); 169 | toOut($sock, sprintf("$base/demand.data_hits %s $time\n", $demand_data_hits)); 170 | toOut($sock, sprintf("$base/demand.metadata_hits %s $time\n", $demand_metadata_hits)); 171 | toOut($sock, sprintf("$base/demand.data_misses %s $time\n", $demand_data_misses)); 172 | toOut($sock, sprintf("$base/demand.metadata_misses %s $time\n", $demand_metadata_misses)); 173 | toOut($sock, sprintf("$base/demand.data_hits_pct %2d $time\n", $demand_data_hits_perc)); 174 | toOut($sock, sprintf("$base/demand.metadata_hits_pct %2d $time\n", $demand_metadata_hits_perc)); 175 | toOut($sock, sprintf("$base/demand.data_misses_pct %2d $time\n", $demand_data_misses_perc)); 176 | toOut($sock, sprintf("$base/demand.metadata_misses_pct %2d $time\n", $demand_metadata_misses_perc)); 177 | toOut($sock, sprintf("$base/prefetch.efficiency %2d $time\n", $prefetch_data_perc)); 178 | toOut($sock, sprintf("$base/prefetch.data_hits %s $time\n", $prefetch_data_hits)); 179 | toOut($sock, sprintf("$base/prefetch.metadata_hits %s $time\n", $prefetch_metadata_hits)); 180 | toOut($sock, sprintf("$base/prefetch.data_misses %s $time\n", $prefetch_data_misses)); 181 | toOut($sock, sprintf("$base/prefetch.metadata_misses %s $time\n", $prefetch_metadata_misses)); 182 | toOut($sock, sprintf("$base/prefetch.data_hits_pct %2d $time\n", $prefetch_data_hits_perc)); 183 | toOut($sock, sprintf("$base/prefetch.metadata_hits_pct %2d $time\n", $prefetch_metadata_hits_perc)); 184 | toOut($sock, sprintf("$base/prefetch.data_misses_pct %2d $time\n", $prefetch_data_misses_perc)); 185 | toOut($sock, sprintf("$base/prefetch.metadata.misses_pct %2d $time\n", $prefetch_metadata_misses_perc)); 186 | 187 | # MRU/MFU 188 | toOut($sock, sprintf("$base/cache.mru_pct %2d $time\n", $mru_hits_perc)); 189 | toOut($sock, sprintf("$base/cache.mru_hits %s $time\n", $mru_hits)); 190 | toOut($sock, sprintf("$base/cache.mfu_pct %2d $time\n", $mfu_hits_perc)); 191 | toOut($sock, sprintf("$base/cache.mfu_hits %s $time\n", $mfu_hits)); 192 | toOut($sock, sprintf("$base/cache.mru_ghost_pct %2d $time\n", $mru_ghost_hits_perc)); 193 | toOut($sock, sprintf("$base/cache.mru_ghost %s $time\n", $mru_ghost_hits)); 194 | toOut($sock, sprintf("$base/cache.mfu_ghost_pct %2d $time\n", $mfu_ghost_hits_perc)); 195 | toOut($sock, sprintf("$base/cache.mfu_ghost %s $time\n", $mfu_ghost_hits)); 196 | 197 | # L2ARC 198 | toOut($sock, sprintf("$base/l2arc.access %d $time\n", $l2_arc_accesses_total)); 199 | toOut($sock, sprintf("$base/l2arc.arc_hit_pct %2d $time\n", $l2_arc_hit_perc)); 200 | toOut($sock, sprintf("$base/l2arc.arc_hits %s $time\n", $l2_arc_hits)); 201 | toOut($sock, sprintf("$base/l2arc.arc_misses_pct %2d $time\n", $l2_arc_miss_perc)); 202 | toOut($sock, sprintf("$base/l2arc.arc_misses %s $time\n", $l2_arc_misses)); 203 | 204 | # toOut($sock, sprintf("$base. 205 | # toOut($sock, sprintf("$base. 206 | select(undef,undef,undef,$interval); 207 | } 208 | -------------------------------------------------------------------------------- /collectd/sol-stmf.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use Getopt::Std; 4 | use POSIX; 5 | use Data::Dumper; 6 | use IO::Socket::INET; 7 | use strict; 8 | $| = 1; 9 | 10 | my %opt; 11 | my(undef, $hostname, undef) = uname(); 12 | my $interval; 13 | my $base; 14 | my $debug = 0; 15 | 16 | # 17 | # Only if output option is "carbon" 18 | # 19 | my $graphite_host = "192.168.0.102"; 20 | my $graphite_port = 2003; 21 | my $sock = undef; 22 | 23 | # Override PATH variable so we get run setuid 24 | $ENV{'PATH'} = "/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/usr/gnu/bin"; 25 | 26 | 27 | sub usage() 28 | { 29 | print STDERR "usage: $0 [-hd] -o output \n"; 30 | print STDERR "-h : this (help) message\n"; 31 | print STDERR "-d : debug\n"; 32 | print STDERR "-H : Override Hostname\n"; 33 | print STDERR "example: $0 -d -o stdout\n"; 34 | exit; 35 | } 36 | 37 | # 38 | # Creat socket. Use UDP so we don't need to handle dropped connections. 39 | # 40 | sub conCarbon { 41 | return $sock if (defined($sock)); 42 | $sock = IO::Socket::INET->new( 43 | Proto => "udp", 44 | PeerPort => $graphite_port, 45 | PeerAddr => $graphite_host, 46 | ) || die "Unable to create socket: $!\n"; 47 | print "connected to $graphite_host on $graphite_port\n"; 48 | return $sock; 49 | } 50 | 51 | sub toOut { 52 | local $| = 1; 53 | my $msg = shift || return undef; 54 | # Send over the socket and ignore transport errors 55 | if (($opt{o} eq "carbon") && ($sock)) { 56 | print "carbon $msg" if ($debug == 1); 57 | $sock->send($msg); 58 | } 59 | elsif (($opt{o} eq "stdout") || ($opt{o} eq "collectd")) { 60 | print "$opt{o} $msg" if ($debug == 1); 61 | print "$msg"; 62 | } 63 | } 64 | 65 | # 66 | # Main 67 | # 68 | 69 | my $opt_string = 'hdo:n:'; 70 | getopts( "$opt_string", \%opt ) or usage(); 71 | usage() if $opt{h}; 72 | 73 | $debug = 1 if $opt{d}; 74 | print "$0 $opt{o} \n" if ($debug == 1); 75 | 76 | if ($opt{n}) { 77 | $hostname = $opt{n}; 78 | } 79 | 80 | if ($opt{o} eq "carbon") { 81 | $sock = conCarbon($sock); 82 | $base="nexenta.ns3.$hostname.comstar.io"; 83 | } elsif ($opt{o} eq "collectd") { 84 | # prefix is set in collectd.conf 85 | $base="PUTVAL $hostname.comstar/io"; 86 | } elsif ($opt{o} eq "stdout") { 87 | $base="$hostname.comstar io:"; 88 | } 89 | 90 | 91 | open(STATS, "/usr/local/collectd/bin/stmf_iops.d |") || die "can't fork: $!"; 92 | while () { 93 | chomp; 94 | if ($_ !~ /^\d+$/) { 95 | # if we suspect epoch is missing... 96 | if ($_ =~ /(.*) \s+ ([\d\.]+) \s+ (\d+)$/xi) { 97 | toOut("$base.$1 $2 $3\n") if ($opt{o} eq "carbon"); 98 | toOut("$base/gauge-$1 $3:$2\n") if ($opt{o} eq "collectd"); 99 | #print "$base/gauge-$1 $3:$2\n" if ($opt{o} eq "collectd"); 100 | toOut("$base $1 = $2 at $3\n") if ($opt{o} eq "stdout"); 101 | } elsif ($_ =~ /(.*) \s+ ([\d\.]+)/) { 102 | toOut("$base.$1 $2 ".time()."\n") if ($opt{o} eq "carbon"); 103 | toOut("$base/gauge-$1 ".time().":$2\n") if ($opt{o} eq "collectd"); 104 | toOut("$base $1 = $2 at ".time()."\n") if ($opt{o} eq "stdout"); 105 | } 106 | } 107 | } 108 | close(STATS) || die "bad stat: $! $?"; 109 | 110 | -------------------------------------------------------------------------------- /collectd/stmf_iops.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -qs 2 | 3 | dtrace:::BEGIN 4 | { 5 | r_iops = 1; 6 | rtask = 0; 7 | rqtime = 0; 8 | r_lu_xfer = 0; 9 | r_lport_xfer = 0; 10 | 11 | w_iops = 1; 12 | wtask = 0; 13 | wqtime = 0; 14 | w_lu_xfer = 0; 15 | w_lport_xfer = 0; 16 | } 17 | 18 | /* 19 | * read task completed 20 | */ 21 | sdt:stmf:stmf_task_free:stmf-task-end 22 | /((scsi_task_t *) arg0)->task_flags & 0x40/ 23 | { 24 | this->task = (scsi_task_t *) arg0; 25 | this->lu = (stmf_lu_t *) this->task->task_lu; 26 | this->itask = (stmf_i_scsi_task_t *) this->task->task_stmf_private; 27 | this->lport = this->task->task_lport; 28 | 29 | r_iops = r_iops + 1; 30 | 31 | rtask = rtask + (arg1 / 1000); 32 | rqtime = rqtime + (this->itask->itask_waitq_time / 1000); 33 | r_lu_xfer = r_lu_xfer + (this->itask->itask_lu_read_time / 1000); 34 | r_lport_xfer = r_lport_xfer + (this->itask->itask_lport_read_time / 1000); 35 | } 36 | 37 | /* 38 | * write task completed 39 | */ 40 | sdt:stmf:stmf_task_free:stmf-task-end 41 | /((scsi_task_t *) arg0)->task_flags & 0x20/ 42 | { 43 | this->task = (scsi_task_t *) arg0; 44 | this->lu = (stmf_lu_t *) this->task->task_lu; 45 | this->itask = (stmf_i_scsi_task_t *) this->task->task_stmf_private; 46 | this->lport = this->task->task_lport; 47 | 48 | w_iops = w_iops + 1; 49 | 50 | /* Save total time in usecs */ 51 | wtask = wtask + (arg1 / 1000); 52 | wqtime = wqtime + (this->itask->itask_waitq_time / 1000); 53 | w_lu_xfer = w_lu_xfer + (this->itask->itask_lu_write_time / 1000); 54 | w_lport_xfer = w_lport_xfer + (this->itask->itask_lport_write_time / 1000); 55 | } 56 | 57 | profile:::tick-5sec 58 | /r_iops || w_iops/ 59 | { 60 | 61 | timer = (walltimestamp / 1000000000); 62 | 63 | avg_task = rtask / r_iops; 64 | avg_qtime = rqtime / r_iops; 65 | avg_lu_xfer = r_lu_xfer / r_iops; 66 | avg_lport_xfer = r_lport_xfer / r_iops; 67 | 68 | printf("r_s %d %d\n", r_iops, timer); 69 | printf("r_lu_xfer_us %d %d\n", avg_lu_xfer, timer); 70 | printf("r_lport_xfer_us %d %d\n", avg_lport_xfer, timer); 71 | printf("r_qtime_us %d %d\n", avg_qtime, timer); 72 | printf("r_tasktime_us %d %d\n", avg_task, timer); 73 | 74 | avg_task = wtask / w_iops; 75 | avg_qtime = wqtime / w_iops; 76 | avg_lu_xfer = w_lu_xfer / w_iops; 77 | avg_lport_xfer = w_lport_xfer / w_iops; 78 | 79 | printf("w_s %d %d\n", w_iops, timer); 80 | printf("w_lu_xfer_us %d %d\n", avg_lu_xfer, timer); 81 | printf("w_lport_xfer_us %d %d\n", avg_lport_xfer, timer); 82 | printf("w_qtime_us %d %d\n", avg_qtime, timer); 83 | printf("w_tasktime_us %d %d\n", avg_task, timer); 84 | 85 | /* Resetting globals */ 86 | r_iops = 1; 87 | rtask = 0; 88 | rqtime = 0; 89 | r_lu_xfer = 0; 90 | r_lport_xfer = 0; 91 | 92 | w_iops = 1; 93 | wtask = 0; 94 | wqtime = 0; 95 | w_lu_xfer = 0; 96 | w_lport_xfer = 0; 97 | } 98 | -------------------------------------------------------------------------------- /collectd/zfsio.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then 5 | export HOSTNAME=`hostname` 6 | else 7 | export HOSTNAME=$1 8 | fi 9 | 10 | if [ -z "$2" ] 11 | then 12 | export INTERVAL=10 13 | else 14 | export INTERVAL=$2 15 | fi 16 | 17 | /usr/sbin/dtrace -Cn ' 18 | 19 | #pragma D option quiet 20 | 21 | /* Description: This script will show read/write IOPs and throughput for ZFS 22 | * filesystems and zvols on a per-dataset basis. It can be used to estimate 23 | * which dataset is causing the most I/O load on the current system. It should 24 | * only be used for comparative analysis. */ 25 | /* Author: Kirill.Davydychev@Nexenta.com */ 26 | /* Copyright 2012, 2014 Nexenta Systems, Inc. All rights reserved. */ 27 | /* Version: 0.5b */ 28 | 29 | dmu_buf_hold_array_by_dnode:entry 30 | /args[0]->dn_objset->os_dsl_dataset && args[3]/ /* Reads */ 31 | { 32 | this->d = args[0]->dn_objset->os_dsl_dataset->ds_dir; 33 | this->path = stringof(this->d->dd_myname); 34 | this->p = this->d->dd_parent; 35 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 36 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 37 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 38 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 39 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 40 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 41 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 42 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 43 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 44 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 45 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 46 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 47 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 48 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 49 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 50 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 51 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 52 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 53 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 54 | 55 | @ior[this->path] = count(); 56 | @tpr[this->path] = sum(args[2]); 57 | @bsr[this->path] = avg(args[2]); 58 | @wts_sec[this->path] = max(walltimestamp / 1000000000); 59 | @distr[strjoin(this->path, " Reads")] = quantize(args[2]); 60 | } 61 | 62 | dmu_buf_hold_array_by_dnode:entry 63 | /args[0]->dn_objset->os_dsl_dataset && !args[3]/ /* Writes */ 64 | { 65 | this->d = args[0]->dn_objset->os_dsl_dataset->ds_dir; 66 | this->path = stringof(this->d->dd_myname); 67 | this->p = this->d->dd_parent; 68 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 69 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 70 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 71 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 72 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 73 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 74 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 75 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 76 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 77 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 78 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 79 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 80 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 81 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 82 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 83 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 84 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 85 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 86 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin((this->p->dd_parent != NULL) ? "-" : "/" ,this->path)):this->path; 87 | 88 | @iow[this->path] = count(); 89 | @tpw[this->path] = sum(args[2]); 90 | @bsw[this->path] = avg(args[2]); 91 | @wts_sec[this->path] = max(walltimestamp / 1000000000); 92 | @distw[strjoin(this->path, " Writes")] = quantize(args[2]); 93 | } 94 | 95 | tick-'$INTERVAL'sec,END 96 | { 97 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-reads %@d:%@d\n", @wts_sec, @ior); 98 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-writes %@d:%@d\n", @wts_sec, @iow); 99 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-r_bytes %@d:%@d\n", @wts_sec, @tpr); 100 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-w_bytes %@d:%@d\n", @wts_sec, @tpw); 101 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-r_bs %@d:%@d\n", @wts_sec, @bsr); 102 | printa("PUTVAL '$HOSTNAME'.zfs.%s/gauge-w_bs %@d:%@d\n", @wts_sec, @bsw); 103 | 104 | 105 | 106 | trunc(@ior); trunc(@tpr); trunc(@iow); trunc(@tpw); trunc(@bsr); trunc(@bsw); trunc(@wts_sec); 107 | /* clear(@ior); clear(@tpr); clear(@iow); clear(@tpw); clear(@bsr); clear(@bsw);*/ 108 | /* TODO: Make script more interactive. Above, uncomment clear() and comment trunc() line in order to change 109 | truncate behavior, or comment out both lines to get cumulative stats. */ 110 | } 111 | ' 112 | -------------------------------------------------------------------------------- /emlxs_reset.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | emlxs_reset:entry 5 | { 6 | @[stack(100)]=count(); 7 | printf("%Y: Reset entry command %d\n",walltimestamp,args[1]); 8 | } 9 | 10 | emlxs_reset:return 11 | { 12 | printf("%Y: Reset return code %d\n",walltimestamp,args[1]); 13 | printa(@); 14 | } 15 | 16 | tick-30sec 17 | { 18 | printa(@);trunc(@); 19 | } 20 | -------------------------------------------------------------------------------- /ixgbe_debug.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -AFs 2 | #pragma D option quiet 3 | 4 | /* This script will enable anonymous tracing of the SFP identification code in ixgbe on boot. 5 | * Based on the error code one can tell what exactly the issue is, and whether it's in the SFP. */ 6 | 7 | /* Usage: run "./ixgbe_debug.d", reboot, and collect the contents of the anon buffer with "dtrace -ae". */ 8 | 9 | /* Author: Kirill.Davydychev@Nexenta.com */ 10 | /* Copyright 2014, Nexenta Systems, Inc. All rights reserved. */ 11 | /* Version: 0.2 */ 12 | /* To get the latest version of this script, 13 | * wget https://raw.github.com/kdavyd/dtrace/master/ixgbe_debug.d --no-ch */ 14 | 15 | string ixgbe_err[uchar_t]; 16 | 17 | dtrace:::BEGIN 18 | { 19 | /* Definitions from /usr/src/uts/common/io/ixgbe/ixgbe_type.h */ 20 | /* Generated using awk '{print "ixgbe_err["$3"] = \"" $2"\";"}' */ 21 | 22 | ixgbe_err[0] = "IXGBE_SUCCESS"; 23 | ixgbe_err[-1] = "IXGBE_ERR_EEPROM"; 24 | ixgbe_err[-2] = "IXGBE_ERR_EEPROM_CHECKSUM"; 25 | ixgbe_err[-3] = "IXGBE_ERR_PHY"; 26 | ixgbe_err[-4] = "IXGBE_ERR_CONFIG"; 27 | ixgbe_err[-5] = "IXGBE_ERR_PARAM"; 28 | ixgbe_err[-6] = "IXGBE_ERR_MAC_TYPE"; 29 | ixgbe_err[-7] = "IXGBE_ERR_UNKNOWN_PHY"; 30 | ixgbe_err[-8] = "IXGBE_ERR_LINK_SETUP"; 31 | ixgbe_err[-9] = "IXGBE_ERR_ADAPTER_STOPPED"; 32 | ixgbe_err[-10] = "IXGBE_ERR_INVALID_MAC_ADDR"; 33 | ixgbe_err[-11] = "IXGBE_ERR_DEVICE_NOT_SUPPORTED"; 34 | ixgbe_err[-12] = "IXGBE_ERR_MASTER_REQUESTS_PENDING"; 35 | ixgbe_err[-13] = "IXGBE_ERR_INVALID_LINK_SETTINGS"; 36 | ixgbe_err[-14] = "IXGBE_ERR_AUTONEG_NOT_COMPLETE"; 37 | ixgbe_err[-15] = "IXGBE_ERR_RESET_FAILED"; 38 | ixgbe_err[-16] = "IXGBE_ERR_SWFW_SYNC"; 39 | ixgbe_err[-17] = "IXGBE_ERR_PHY_ADDR_INVALID"; 40 | ixgbe_err[-18] = "IXGBE_ERR_I2C"; 41 | ixgbe_err[-19] = "IXGBE_ERR_SFP_NOT_SUPPORTED"; 42 | ixgbe_err[-20] = "IXGBE_ERR_SFP_NOT_PRESENT"; 43 | ixgbe_err[-21] = "IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT"; 44 | ixgbe_err[-22] = "IXGBE_ERR_NO_SAN_ADDR_PTR"; 45 | ixgbe_err[-23] = "IXGBE_ERR_FDIR_REINIT_FAILED"; 46 | ixgbe_err[-24] = "IXGBE_ERR_EEPROM_VERSION"; 47 | ixgbe_err[-25] = "IXGBE_ERR_NO_SPACE"; 48 | ixgbe_err[-26] = "IXGBE_ERR_OVERTEMP"; 49 | ixgbe_err[-27] = "IXGBE_ERR_FC_NOT_NEGOTIATED"; 50 | ixgbe_err[-28] = "IXGBE_ERR_FC_NOT_SUPPORTED"; 51 | ixgbe_err[-30] = "IXGBE_ERR_SFP_SETUP_NOT_COMPLETE"; 52 | ixgbe_err[-31] = "IXGBE_ERR_PBA_SECTION"; 53 | ixgbe_err[-32] = "IXGBE_ERR_INVALID_ARGUMENT"; 54 | ixgbe_err[-33] = "IXGBE_ERR_HOST_INTERFACE_COMMAND"; 55 | ixgbe_err[-34] = "IXGBE_ERR_OUT_OF_MEM"; 56 | } 57 | 58 | ixgbe_identify_sfp_module_generic:return, 59 | ixgbe_identify_phy_generic:return, 60 | ixgbe_reset_phy_generic:return, 61 | ixgbe_read_phy_reg_generic:return, 62 | ixgbe_write_phy_reg_generic:return, 63 | ixgbe_setup_phy_link_generic:return, 64 | ixgbe_get_copper_link_capabilities_generic:return, 65 | ixgbe_check_phy_link_tnx:return, 66 | ixgbe_setup_phy_link_tnx:return, 67 | ixgbe_get_phy_firmware_version_tnx:return, 68 | ixgbe_get_phy_firmware_version_generic:return, 69 | ixgbe_reset_phy_nl:return, 70 | ixgbe_identify_module_generic:return, 71 | ixgbe_get_sfp_init_sequence_offsets:return, 72 | ixgbe_tn_check_overtemp:return 73 | { 74 | printf("%s:%s\n",probefunc,ixgbe_err[arg1]); 75 | } 76 | 77 | ixgbe_identify_phy_generic:return 78 | /arg1 == -17/ 79 | { 80 | printf("%s: This is normal, the code retries. Ignore and carry on.\n",probefunc); 81 | } 82 | -------------------------------------------------------------------------------- /kd_collect.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | # 3 | # Author: Kirill.Davydychev@Nexenta.com 4 | # Copyright 2013, Nexenta Systems, Inc. 5 | # 6 | 7 | # 8 | # Basic sanity check 9 | # 10 | 11 | un="/usr/bin/uname" 12 | nmc_cmd=$(which nmc) 13 | if [[ ! $(${un} -v) =~ NexentaOS ]]; then 14 | printf "%s\n" "System is not a NexentaStor Appliance. Exiting." 15 | exit 1 16 | elif [ ! -x "${nmc_cmd}" ]; then 17 | printf "%s\n" "NMC is not found. Exiting." \ 18 | "This script may not be apporpriate for this system." 19 | exit 1 20 | fi 21 | 22 | # 23 | # Setup the performance logs filesystem, it's fine if we fail - that just means we already have this. 24 | # 25 | 26 | zfs create syspool/perflogs 27 | zfs set compression=gzip-9 syspool/perflogs 28 | zfs set mountpoint=/perflogs syspool/perflogs 29 | cd /perflogs 30 | 31 | # 32 | # Download necessary scripts 33 | # 34 | 35 | wget https://raw.github.com/kdavyd/dtrace/master/nfsutil.d --no-ch 36 | wget https://raw.github.com/kdavyd/dtrace/master/txg_monitor.v3.d --no-ch 37 | wget https://raw.github.com/kdavyd/dtrace/master/kmem_reap_100ms.d --no-ch 38 | wget https://raw.github.com/kdavyd/dtrace/master/zfsio.d --no-ch 39 | wget https://raw.github.com/kdavyd/arcstat/master/arcstat.pl --no-ch 40 | wget https://raw.githubusercontent.com/kdavyd/sparta/master/payload/hotkernel.priv --no-ch 41 | wget https://raw.githubusercontent.com/kdavyd/dtrace/master/kmem_oversize.d --no-ch 42 | chmod +x *.d 43 | chmod +x arcstat.pl 44 | chmod +x hotkernel.priv 45 | 46 | # 47 | # Start the traces 48 | # 49 | 50 | ./nfsutil.d >> nfsutil.out & 51 | IFS=$'\n' zpools=($(zpool list -H -o name)) 52 | for zpool in "${zpools[@]}" ; do 53 | sleep 1 54 | ./txg_monitor.v3.d "$zpool" >> "txg.$zpool.out" & 55 | done 56 | ./kmem_reap_100ms.d >> kmem.out & 57 | ./arcstat.pl -f time,read,hits,miss,hit%,l2read,l2hits,l2miss,l2hit%,arcsz,l2size 1 >> arcstat.out & 58 | ./zfsio.d >> zfsio.out & 59 | ./kmem_oversize.d >> kmem_oversize.out & 60 | zpool iostat -Td 1 >> zpooliostat1.out & 61 | vmstat -Td 1 >> vmstat.out & 62 | prstat -dd 1 >> prstat.out & 63 | mpstat -Td 1 >> mpstat.out & 64 | iostat -Td -xn 1 86400 >> iostat.out & 65 | echo ::taskq | mdb -k >> taskq.out 66 | while true; do date >> arc.out; echo ::arc | mdb -k >> arc.out; sleep 60; done & 67 | while true; do date >> hotkernel.out; ./hotkernel.priv >> hotkernel.out; sleep 570; done & 68 | sleep 5 69 | 70 | # 71 | # Finish 72 | # 73 | 74 | echo "The logging is now set up. It will run indefinitely until the system is rebooted." 75 | echo "Please collect logs from the /perflogs/ folder in the root of the appliance." 76 | 77 | -------------------------------------------------------------------------------- /kmem_oversize.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | allocb_oversize:entry 5 | { 6 | @[stack()]=count(); 7 | @s=sum(arg0); 8 | } 9 | 10 | tick-10sec 11 | { 12 | printf("%Y\n",walltimestamp); 13 | printa(@); 14 | printa(@s); 15 | trunc(@); 16 | trunc(@s); 17 | } 18 | -------------------------------------------------------------------------------- /kmem_reap_100ms.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option destructive 3 | 4 | fbt::arc_kmem_reap_now:entry 5 | { 6 | self->start[probefunc] = timestamp; 7 | self->strategy = args[0]; 8 | self->in_kmem = 1; 9 | } 10 | 11 | fbt::arc_adjust:entry, 12 | fbt::arc_shrink:entry, 13 | fbt::arc_do_user_evicts:entry, 14 | fbt::dnlc_reduce_cache:entry, 15 | fbt::kmem_reap:entry 16 | /self->in_kmem/ 17 | { 18 | self->start[probefunc] = timestamp; 19 | } 20 | 21 | kmem_depot_ws_reap:entry 22 | { 23 | self->i = 1; 24 | self->start[probefunc] = timestamp; 25 | self->kct = args[0]; 26 | self->magcount = 0; 27 | self->slabcount = 0; 28 | } 29 | 30 | kmem_magazine_destroy:entry 31 | /self->i/ 32 | { 33 | self->magcount += 1; 34 | } 35 | 36 | kmem_slab_free:entry 37 | /self->i/ 38 | { 39 | self->slabcount += 1; 40 | } 41 | 42 | fbt::arc_adjust:return, 43 | fbt::arc_shrink:return, 44 | fbt::arc_do_user_evicts:return, 45 | fbt::dnlc_reduce_cache:return, 46 | fbt::kmem_reap:return 47 | /self->start[probefunc] && self->in_kmem && ((self->end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 48 | { 49 | printf("%Y %d ms", walltimestamp, 50 | (timestamp - self->start[probefunc]) / 1000000); 51 | self->start[probefunc] = NULL; 52 | } 53 | 54 | fbt::arc_adjust:return, 55 | fbt::arc_shrink:return, 56 | fbt::arc_do_user_evicts:return, 57 | fbt::dnlc_reduce_cache:return, 58 | fbt::kmem_reap:return 59 | /self->start[probefunc] && self->in_kmem && ((self->end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 60 | { 61 | self->start[probefunc] = NULL; 62 | } 63 | 64 | 65 | kmem_depot_ws_reap:return 66 | /self->i && ((self->ts_end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 67 | { 68 | self->i = NULL; 69 | printf("%Y %s %d ms %d mags %d slabs", walltimestamp, self->kct->cache_name, (self->ts_end[probefunc])/1000000, self->magcount, self->slabcount); 70 | self->start[probefunc] = NULL; 71 | 72 | } 73 | 74 | kmem_depot_ws_reap:return 75 | /self->i && ((self->ts_end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 76 | { 77 | self->i = NULL; 78 | self->start[probefunc] = NULL; 79 | } 80 | 81 | 82 | fbt::arc_kmem_reap_now:return 83 | /self->start[probefunc] && ((self->end[probefunc] = timestamp - self->start[probefunc]) > 100000000)/ 84 | { 85 | printf("%Y %d ms, strategy %d", walltimestamp, 86 | (timestamp - self->start[probefunc]) / 1000000, self->strategy); 87 | self->start[probefunc] = NULL; 88 | self->in_kmem = NULL; 89 | } 90 | 91 | fbt::arc_kmem_reap_now:return 92 | /self->start[probefunc] && ((self->end[probefunc] = timestamp - self->start[probefunc]) < 100000000)/ 93 | { 94 | self->start[probefunc] = NULL; 95 | self->in_kmem = NULL; 96 | } 97 | -------------------------------------------------------------------------------- /modparams: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Description: Print all global symbols of relevant types in a kernel module 4 | # Usage: ./modparams 5 | # Example: ./modparams zfs 6 | # If you run the script without any arguments, it'll grab all modules. 7 | 8 | # Copyright 2014 Kirill.Davydychev@nexenta.com, Paul.Nienaber@nexenta.com 9 | 10 | 11 | mdb -k <<<"::nm -t objt -g -n -x -f ctype,name $1" | tail -n +1 \ 12 | | perl -pe 's/\|\n/\|/' \ 13 | | perl -ne '%a=("uint64_t"=>"/E", 14 | "int64_t"=>"/e", 15 | "int"=>"/D", 16 | "offset_t"=>"/U", 17 | "ssize_t"=>"/U", 18 | "hrtime_t"=>"/U", 19 | "uint_t"=>"/U", 20 | "uint32_t"=>"/U", 21 | "boolean_t"=>"/U", 22 | "uint8_t"=>"/V"); 23 | chomp;s/ +\|/\|/;@b=split /\|/; 24 | defined $a{$b[0]} && print "$b[1]$a{$b[0]}\n"' \ 25 | | mdb -k | tr -s ' ' | egrep -v ':$' 26 | -------------------------------------------------------------------------------- /nfsio.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | dtrace:::BEGIN 5 | { 6 | trace("Tracing... Hit Ctrl-C to end.\n"); 7 | } 8 | 9 | nfsv3:::op-read-done 10 | { 11 | @readbytes[args[1]->noi_curpath] = sum(args[2]->res_u.ok.data.data_len); 12 | @readiops[args[1]->noi_curpath] = count(); 13 | @readbs[args[1]->noi_curpath] = avg(args[2]->res_u.ok.data.data_len); 14 | } 15 | 16 | nfsv4:::op-read-done 17 | { 18 | @readbytes[args[1]->noi_curpath] = sum(args[2]->data_len); 19 | @readiops[args[1]->noi_curpath] = count(); 20 | @readbs[args[1]->noi_curpath] = avg(args[2]->data_len); 21 | } 22 | 23 | nfsv3:::op-write-done 24 | { 25 | @writebytes[args[1]->noi_curpath] = sum(args[2]->res_u.ok.count); 26 | @writeiops[args[1]->noi_curpath] = count(); 27 | @writebs[args[1]->noi_curpath] = avg(args[2]->res_u.ok.count); 28 | } 29 | 30 | nfsv4:::op-write-done 31 | { 32 | @writebytes[args[1]->noi_curpath] = sum(args[2]->count); 33 | @writeiops[args[1]->noi_curpath] = count(); 34 | @writebs[args[1]->noi_curpath] = avg(args[2]->count); 35 | } 36 | 37 | dtrace:::END 38 | { 39 | printf("\n%12s %12s %12s %12s %12s %12s %s\n", "Rbytes", "Rops", "Rbs", "Wbytes", "WOps", "Wbs", "Pathname"); 40 | printa("%@12d %@12d %@12d %@12d %@12d %@12d %s\n", @readbytes, @readiops, @readbs, @writebytes, @writeiops, @writebs); 41 | } 42 | -------------------------------------------------------------------------------- /nfsio30sec.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | dtrace:::BEGIN 5 | { 6 | trace("Tracing... Hit Ctrl-C to end.\n"); 7 | } 8 | 9 | nfsv3:::op-read-done 10 | { 11 | @readbytes[args[1]->noi_curpath] = sum(args[2]->res_u.ok.data.data_len); 12 | @readiops[args[1]->noi_curpath] = count(); 13 | @readbs[args[1]->noi_curpath] = avg(args[2]->res_u.ok.data.data_len); 14 | } 15 | 16 | nfsv4:::op-read-done 17 | { 18 | @readbytes[args[1]->noi_curpath] = sum(args[2]->data_len); 19 | @readiops[args[1]->noi_curpath] = count(); 20 | @readbs[args[1]->noi_curpath] = avg(args[2]->data_len); 21 | } 22 | 23 | nfsv3:::op-write-done 24 | { 25 | @writebytes[args[1]->noi_curpath] = sum(args[2]->res_u.ok.count); 26 | @writeiops[args[1]->noi_curpath] = count(); 27 | @writebs[args[1]->noi_curpath] = avg(args[2]->res_u.ok.count); 28 | } 29 | 30 | nfsv4:::op-write-done 31 | { 32 | @writebytes[args[1]->noi_curpath] = sum(args[2]->count); 33 | @writeiops[args[1]->noi_curpath] = count(); 34 | @writebs[args[1]->noi_curpath] = avg(args[2]->count); 35 | } 36 | 37 | tick-30sec 38 | { 39 | printf("\n%12s %12s %12s %12s %12s %12s %s\n", "Rbytes", "Rops", "Rbs", "Wbytes", "WOps", "Wbs", "Pathname"); 40 | printa("%@12d %@12d %@12d %@12d %@12d %@12d %s\n", @readbytes, @readiops, @readbs, @writebytes, @writeiops, @writebs); 41 | exit(0); 42 | } 43 | 44 | dtrace:::END 45 | { 46 | } 47 | -------------------------------------------------------------------------------- /nfsrpclat.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | # pragma D option quiet 3 | 4 | 5 | svc_cots_krecv:entry 6 | { 7 | self->xid = args[0]->xp_xid; 8 | self->ts = timestamp; 9 | } 10 | 11 | 12 | svc_cots_ksend:entry 13 | /self->xid && (timestamp-self->ts)/1000000 > 100/ 14 | { 15 | self->rtaddr = ((struct sockaddr_in *)args[0]->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr; 16 | printf("%Y XID: %d Client: %i.%i.%i.%i lat: %d ms\n", walltimestamp, 17 | self->xid, 18 | self->rtaddr&0xff, self->rtaddr>>8&0xff,self->rtaddr>>16&0xff,self->rtaddr>>24, 19 | (timestamp-self->ts)/1000000); 20 | } 21 | -------------------------------------------------------------------------------- /nfsutil.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script measures the percentage of NFS thread pool currently utilized 5 | * (maximum being NFSD_SERVERS), along with Max pending (queued) NFS requests that have not 6 | * yet been assigned a thread. It is useful in troubleshooting bottlenecks in the Solaris 7 | * NFS server, and determining the correct NFSD_SERVERS value for high-load systems. */ 8 | /* Author: Kirill.Davydychev@Nexenta.com */ 9 | /* Copyright 2013, Nexenta Systems, Inc. All rights reserved. */ 10 | /* Version: 0.1 */ 11 | 12 | svc_xprt_qput:entry 13 | { 14 | @pending_reqs = max(args[0]->p_reqs); 15 | @act_threads = max(args[0]->p_threads - args[0]->p_asleep); 16 | @pool_pct_util = max(100 * (args[0]->p_threads - args[0]->p_asleep) / args[0]->p_maxthreads); 17 | } 18 | 19 | tick-5sec 20 | { 21 | printf("%Y", walltimestamp); 22 | printa(" Max Pending NFS requests: %@d; Max Active threads: %@d; Thread pool utilized percentage: %@d\n", @pending_reqs, @act_threads, @pool_pct_util); 23 | trunc(@pending_reqs); 24 | trunc(@act_threads); 25 | trunc(@pool_pct_util); 26 | } 27 | -------------------------------------------------------------------------------- /parse_zfsio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import pprint 4 | from datetime import datetime,date,time 5 | 6 | def main(): 7 | with open(sys.argv[1]) as f: 8 | dt="" 9 | while True: 10 | line = f.readline() 11 | if dt != False: 12 | prev_dt = dt 13 | print dt 14 | dt = is_timestamp(line) 15 | if dt == False: 16 | if not (line.split()[0].startswith("------") or line.split()[0].startswith("Dataset")): 17 | dp_convert_and_write(line,prev_dt) 18 | 19 | def is_timestamp(t): 20 | try: 21 | return datetime.strptime(" ".join(t.split()[0:4]),"%Y %b %d %H:%M:%S") 22 | except ValueError: 23 | return False 24 | 25 | def dp_convert_and_write(t,t_date): 26 | t = t.split() 27 | f = open (t[0].replace("/","_")+".zfsio.csv", "a") 28 | f.write(datetime.strftime(t_date,'%Y-%m-%d-%T')+','+t[1]+','+t[2]+','+t[3]+','+t[4]+','+t[5]+','+t[6]+'\n') 29 | f.close() 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /resconflict.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: Print kernel and userland stacks for every non-zero return from TUR */ 5 | /* Author: Kirill.Davydychev@Nexenta.com */ 6 | /* Copyright 2014, Nexenta Systems, Inc. All rights reserved. */ 7 | /* Version: 0.1 */ 8 | 9 | sd_ready_and_valid:entry 10 | { 11 | self->i = 1; 12 | } 13 | 14 | sd_send_scsi_TEST_UNIT_READY:return 15 | /self->i && arg1 != 0/ 16 | { 17 | printf("TUR returned code %d \n",arg1); 18 | stack(); 19 | ustack(); 20 | } 21 | 22 | sd_ready_and_valid:return 23 | /self->i/ 24 | { 25 | self->i = 0; 26 | } 27 | -------------------------------------------------------------------------------- /resilver.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show progress of any active resilvers 5 | * happening on the system. It has been tested and sanity-checked on mirror 6 | * and raidz1 vdevs, but since we are tracing deep within zio, should hold 7 | * true for all other types. Note: It displays I/O in terms of read operations 8 | * on the drives that we're resilvering *from*, in order to reflect read 9 | * inflation for raidzN vdevs, where in order to reconstruct a block we have 10 | * to read from all other devices in the same vdev. 11 | */ 12 | /* Author: Kirill.Davydychev@Nexenta.com */ 13 | /* Copyright 2012, Nexenta Systems, Inc. All rights reserved. */ 14 | /* Version: 0.1b */ 15 | 16 | dtrace:::BEGIN 17 | { 18 | printf("Tracing with 10 second interval...\n"); 19 | printf("If there is no resilver happening, only timestamps will appear. \n"); 20 | } 21 | 22 | zio_read:entry 23 | /args[7] == 10/ 24 | /* 25 | Priority 10 reads indicate ZIO_PRIORITY_RESILVER 26 | This might change in the future, but for now it 27 | looks like a safe way to detect only resilver IO. 28 | */ 29 | { 30 | @ops = count(); 31 | @bs = quantize(args[4]); 32 | @tp = sum(args[4]); 33 | } 34 | 35 | dsl_scan_scrub_cb:entry 36 | /* 37 | The only reason we're tracing here is 38 | to determine throttling factor. 39 | */ 40 | { 41 | self->in_scrub_cb = 1; 42 | } 43 | 44 | dsl_scan_scrub_cb:return 45 | { 46 | self->in_scrub_cb = NULL; 47 | } 48 | 49 | 50 | fbt:genunix:delay:entry 51 | / self->in_scrub_cb / 52 | /* 53 | Argh. What is a tick - 1ms or 10ms? 54 | Based on observation, appears to be 10ms. 55 | */ 56 | { 57 | @delay_times = count(); 58 | @delay_ticks = sum(args[0]); 59 | } 60 | 61 | tick-10sec 62 | { 63 | normalize(@tp, 10*1024); 64 | normalize(@bs, 10); 65 | normalize(@ops, 10); 66 | printf("\n%Y", walltimestamp); 67 | printa("\n\nResilver IOPs: %@d ",@ops); 68 | printa("\nResilver Blocksize: %@a",@bs); 69 | printa("\nResilver Throughput: %@d KB/sec",@tp); 70 | printa("\nThrottled %@d times by %@d ticks in last interval", @delay_times, @delay_ticks); 71 | trunc(@ops); trunc(@bs); trunc(@tp); trunc(@delay_times); trunc(@delay_ticks); 72 | } 73 | -------------------------------------------------------------------------------- /resilver_v4.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show progress of any active resilvers 5 | * happening on the system. It has been tested and sanity-checked on mirror 6 | * and raidz1 vdevs, but since we are tracing deep within zio, should hold 7 | * true for all other types. Note: It displays I/O in terms of read operations 8 | * on the drives that we're resilvering *from*, in order to reflect read 9 | * inflation for raidzN vdevs, where in order to reconstruct a block we have 10 | * to read from all other devices in the same vdev. 11 | */ 12 | /* Author: Kirill.Davydychev@Nexenta.com */ 13 | /* Copyright 2012-2015, Nexenta Systems, Inc. All rights reserved. */ 14 | /* Version: 0.2b-NS4 */ 15 | 16 | dtrace:::BEGIN 17 | { 18 | printf("Tracing with 10 second interval...\n"); 19 | printf("If there is no resilver happening, only timestamps will appear. \n"); 20 | } 21 | 22 | zio_read:entry 23 | /args[7] == 4/ 24 | /* 25 | Priority 4 reads indicate ZIO_PRIORITY_RESILVER/SCRUB 26 | This might change in the future, but for now it 27 | looks like a safe way to detect only resilver IO. 28 | */ 29 | { 30 | @ops = count(); 31 | @bs = quantize(args[4]); 32 | @tp = sum(args[4]); 33 | } 34 | 35 | dsl_scan_scrub_cb:entry 36 | / args[0]->dp_scan->scn_phys.scn_func == 2 / 37 | /* Scan function 2 is POOL_SCAN_RESILVER. 38 | The only reason we're tracing here is 39 | to determine throttling factor. 40 | */ 41 | { 42 | self->in_scrub_cb = 1; 43 | } 44 | 45 | dsl_scan_scrub_cb:return 46 | / self->in_scrub_cb / 47 | { 48 | self->in_scrub_cb = NULL; 49 | } 50 | 51 | 52 | fbt:genunix:delay:entry 53 | / self->in_scrub_cb / 54 | /* 55 | Argh. What is a tick - 1ms or 10ms? 56 | Based on observation, appears to be 10ms. 57 | */ 58 | { 59 | @delay_times = count(); 60 | @delay_ticks = sum(args[0]); 61 | } 62 | 63 | tick-10sec 64 | { 65 | normalize(@tp, 10*1024); 66 | normalize(@bs, 10); 67 | normalize(@ops, 10); 68 | printf("\n%Y", walltimestamp); 69 | printa("\n\nResilver IOPs: %@d ",@ops); 70 | printa("\nResilver Blocksize: %@a",@bs); 71 | printa("\nResilver Throughput: %@d KB/sec",@tp); 72 | printa("\nThrottled %@d times by %@d ticks in last interval", @delay_times, @delay_ticks); 73 | trunc(@ops); trunc(@bs); trunc(@tp); trunc(@delay_times); trunc(@delay_ticks); 74 | } 75 | -------------------------------------------------------------------------------- /rpc.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | 3 | 4 | pid$target:a.out:check_rmtcalls:entry 5 | { 6 | self->i1 = 1; 7 | self->ts1 = timestamp; 8 | } 9 | 10 | pid$target:*:svc_getreq_poll:entry 11 | { 12 | self->i2 = 1; 13 | self->ts2 = timestamp; 14 | } 15 | 16 | pid$target:*:svc_sendreply:entry 17 | { 18 | self->i3 = 1; 19 | self->ts3 = timestamp; 20 | } 21 | 22 | svc_sendreply:entry 23 | { 24 | self->i4 = 1; 25 | self->ts4 = timestamp; 26 | } 27 | 28 | 29 | pid$target:a.out:check_rmtcalls:return 30 | /self->i1/ 31 | { 32 | @ts_rmt["check_rmtcalls"]=quantize(timestamp - self->ts1); 33 | self->i1 = 0; 34 | self->ts1 = 0; 35 | 36 | } 37 | 38 | pid$target:*:svc_getreq_poll:return 39 | /self->i2/ 40 | { 41 | @ts_sgp["svc_getreq_poll"]=quantize(timestamp - self->ts2); 42 | self->i2 = 0; 43 | self->ts2 = 0; 44 | 45 | } 46 | 47 | pid$target:*:svc_sendreply:return 48 | /self->i3/ 49 | { 50 | @ts_sr["pid svc_sendreply"]=quantize(timestamp - self->ts3); 51 | self->i3 = 0; 52 | self->ts3 = 0; 53 | 54 | } 55 | 56 | svc_sendreply:return 57 | /self->i4/ 58 | { 59 | @ts_sr4["svc_sendreply"]=quantize(timestamp - self->ts4); 60 | self->i4 = 0; 61 | self->ts4 = 0; 62 | 63 | } 64 | 65 | tick-1sec 66 | { 67 | printf("%Y\n",walltimestamp); 68 | } 69 | 70 | tick-30sec 71 | { 72 | printa(@ts_sr, @ts_sr4, @ts_sgp, @ts_rmt); 73 | trunc(@ts_sr);trunc(@ts_sr4);trunc(@ts_sgp); trunc(@ts_rmt); 74 | } 75 | -------------------------------------------------------------------------------- /rpcbind.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | 3 | 4 | pid$target:a.out:pmapproc_getport:entry 5 | { 6 | self->i = 1; 7 | self->depth = 0; 8 | self->ts = timestamp; 9 | @k[stack()]=count(); 10 | @u[ustack()]=count(); 11 | @in_getport = sum(1); 12 | } 13 | 14 | pid$target:a.out:pmapproc_getport:return 15 | /self->i/ 16 | { 17 | @ts=quantize(timestamp - self->ts); 18 | self->i = 0; 19 | self->ts = 0; 20 | @in_getport = sum(-1); 21 | 22 | } 23 | 24 | tick-1sec 25 | { 26 | printa("%@d in getport",@in_getport); 27 | } 28 | 29 | tick-30sec 30 | { 31 | printa(@ts);printa(@k);printa(@u); 32 | trunc(@ts);trunc(@k);trunc(@u); 33 | } 34 | -------------------------------------------------------------------------------- /scrub.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show progress of any active scrubs 5 | * happening on the system. It has been tested and sanity-checked on mirror 6 | * and raidz1 vdevs, but since we are tracing deep within zio, should hold 7 | * true for all other types. It is functionally equivalent to resilver.d, just 8 | * traces scrub. 9 | */ 10 | /* Author: Kirill.Davydychev@Nexenta.com */ 11 | /* Copyright 2013, Nexenta Systems, Inc. All rights reserved. */ 12 | /* Version: 0.1b */ 13 | 14 | dtrace:::BEGIN 15 | { 16 | printf("Tracing with 10 second interval...\n"); 17 | printf("If there is no scrub happening, only timestamps will appear. \n"); 18 | } 19 | 20 | zio_read:entry 21 | /args[7] == 20/ 22 | /* 23 | Priority 20 reads indicate ZIO_PRIORITY_SCRUB 24 | This might change in the future, but for now it 25 | looks like a safe way to detect only scrub IO. 26 | */ 27 | { 28 | @ops = count(); 29 | @bs = quantize(args[4]); 30 | @tp = sum(args[4]); 31 | } 32 | 33 | dsl_scan_scrub_cb:entry 34 | /* 35 | The only reason we're tracing here is 36 | to determine throttling factor. 37 | */ 38 | { 39 | self->in_scrub_cb = 1; 40 | } 41 | 42 | dsl_scan_scrub_cb:return 43 | { 44 | self->in_scrub_cb = NULL; 45 | } 46 | 47 | 48 | fbt:genunix:delay:entry 49 | / self->in_scrub_cb / 50 | /* 51 | Argh. What is a tick - 1ms or 10ms? 52 | Based on observation, appears to be 10ms. 53 | */ 54 | { 55 | @delay_times = count(); 56 | @delay_ticks = sum(args[0]); 57 | } 58 | 59 | tick-10sec 60 | { 61 | normalize(@tp, 10*1024); 62 | normalize(@bs, 10); 63 | normalize(@ops, 10); 64 | printf("\n%Y", walltimestamp); 65 | printa("\n\nScrub IOPs: %@d ",@ops); 66 | printa("\nScrub Blocksize: %@a",@bs); 67 | printa("\nScrub Throughput: %@d KB/sec",@tp); 68 | printa("\nThrottled %@d times by %@d ticks in last interval", @delay_times, @delay_ticks); 69 | trunc(@ops); trunc(@bs); trunc(@tp); trunc(@delay_times); trunc(@delay_ticks); 70 | } 71 | -------------------------------------------------------------------------------- /scrub_v4.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show progress of any active scrubs 5 | * happening on the system. It has been tested and sanity-checked on mirror 6 | * and raidz1 vdevs, but since we are tracing deep within zio, should hold 7 | * true for all other types. It is functionally equivalent to resilver.d, just 8 | * traces scrub. 9 | */ 10 | /* Author: Kirill.Davydychev@Nexenta.com */ 11 | /* Copyright 2013-2015, Nexenta Systems, Inc. All rights reserved. */ 12 | /* Version: 0.2b-NS4 */ 13 | 14 | dtrace:::BEGIN 15 | { 16 | printf("Tracing with 10 second interval...\n"); 17 | printf("If there is no scrub happening, only timestamps will appear. \n"); 18 | } 19 | 20 | zio_read:entry 21 | /args[7] == 4/ 22 | /* 23 | Priority 4 reads indicate ZIO_PRIORITY_SCRUB/RESILVER 24 | This might change in the future, but for now it 25 | looks like a safe way to detect only scrub IO. 26 | */ 27 | { 28 | @ops = count(); 29 | @bs = quantize(args[4]); 30 | @tp = sum(args[4]); 31 | } 32 | 33 | dsl_scan_scrub_cb:entry 34 | / args[0]->dp_scan->scn_phys.scn_func == 1 / 35 | /* Scan function 1 is POOL_SCAN_SCRUB. 36 | The only reason we're tracing here is 37 | to determine throttling factor. 38 | */ 39 | { 40 | self->in_scrub_cb = 1; 41 | } 42 | 43 | dsl_scan_scrub_cb:return 44 | / self->in_scrub_cb / 45 | { 46 | self->in_scrub_cb = NULL; 47 | } 48 | 49 | 50 | fbt:genunix:delay:entry 51 | / self->in_scrub_cb / 52 | /* 53 | Argh. What is a tick - 1ms or 10ms? 54 | Based on observation, appears to be 10ms. 55 | */ 56 | { 57 | @delay_times = count(); 58 | @delay_ticks = sum(args[0]); 59 | } 60 | 61 | tick-10sec 62 | { 63 | normalize(@tp, 10*1024); 64 | normalize(@bs, 10); 65 | normalize(@ops, 10); 66 | printf("\n%Y", walltimestamp); 67 | printa("\n\nScrub IOPs: %@d ",@ops); 68 | printa("\nScrub Blocksize: %@a",@bs); 69 | printa("\nScrub Throughput: %@d KB/sec",@tp); 70 | printa("\nThrottled %@d times by %@d ticks in last interval", @delay_times, @delay_ticks); 71 | trunc(@ops); trunc(@bs); trunc(@tp); trunc(@delay_times); trunc(@delay_ticks); 72 | } 73 | -------------------------------------------------------------------------------- /smb_session.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | smb_session_create:entry 5 | { 6 | self->i = 1; 7 | } 8 | 9 | smb_session_create:return 10 | /self->i && args[1]==NULL/ 11 | { 12 | printf("Session_create returned NULL\n"); 13 | self->i = 0; 14 | } 15 | 16 | smb_server_create_session:entry 17 | { 18 | self->i2 = 1; 19 | } 20 | 21 | taskq_dispatch:entry 22 | /self->i2/ 23 | { 24 | printf("%s taskq dispatched\n",stringof(args[0]->tq_name)); 25 | } 26 | 27 | taskq_dispatch:return 28 | /self->i2 && args[1] == 0/ 29 | { 30 | printf("taskq_dispatch returned 0\n"); 31 | } 32 | 33 | taskq_ent_alloc:return 34 | /self->i2 && args[1]!=NULL/ 35 | { 36 | printf("taskq_ent_alloc returned a value\n"); 37 | } 38 | 39 | taskq_ent_alloc:return 40 | /self->i2 && args[1] == NULL/ 41 | { 42 | printf("taskq_ent_alloc returned NULL\n"); 43 | } 44 | 45 | taskq_bucket_dispatch:return 46 | /self->i2 && args[1]!=NULL/ 47 | { 48 | printf("taskq_bucket_dispatch returned a value\n"); 49 | } 50 | 51 | taskq_bucket_dispatch:return 52 | /self->i2 && args[1] == NULL/ 53 | { 54 | printf("taskq_bucket_dispatch returned NULL\n"); 55 | } 56 | 57 | taskq_bucket_dispatch:entry 58 | /self->i2 && args[0]->tqbucket_nfree == 0/ 59 | { 60 | printf("tqbucket_nfree == 0; tqbucket_nalloc =%d \n",args[0]->tqbucket_nalloc); 61 | } 62 | 63 | taskq_bucket_dispatch:entry 64 | /self->i2 && (args[0]->tqbucket_flags & 0x02)/ 65 | { 66 | printf("tqbucket_flags & TQBUCKET_SUSPEND\n"); 67 | } 68 | 69 | smb_server_create_session:return 70 | /self->i2/ 71 | { 72 | self->i2 = 0; 73 | } 74 | 75 | smb_session_create:return 76 | /self->i && args[1]!=NULL/ 77 | { 78 | printf("Session_create returned with key %d\n",args[1]->sesskey); 79 | self->i = 0; 80 | } 81 | -------------------------------------------------------------------------------- /smb_trace.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | sdt:smbsrv::-smb_op-NtCreateX-start 5 | { 6 | self->sr = (struct smb_request *)arg0; 7 | self->op = (struct open_param *)arg1; 8 | printf("%Y %s %s %s\n", walltimestamp, stringof(self->sr->uid_user->u_name), stringof(self->sr->tid_tree->t_resource), stringof(self->op->fqi.fq_path.pn_path)); 9 | } 10 | -------------------------------------------------------------------------------- /svc_flowcontrol.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace 2 | #pragma D option quiet 3 | 4 | /* Description: Track amount of times NFS QoS has throttled clients. */ 5 | /* USAGE: dtrace -p `pgrep rpcbind` -s svc_flowcontrol.d */ 6 | /* Author: Kirill.Davydychev@Nexenta.com */ 7 | /* Copyright 2015, Nexenta Systems, Inc. All rights reserved. */ 8 | /* Version: 0.1 */ 9 | 10 | dtrace:::BEGIN 11 | /`svc_flowcontrol_disable == 1/ 12 | { 13 | trace("NFS flow control is disabled on this system, no point in tracing. Exiting."); 14 | exit(0); 15 | } 16 | 17 | dtrace:::BEGIN 18 | { 19 | @thr=count(); 20 | clear(@thr); 21 | } 22 | 23 | fbt:rpcmod:svc_flowcontrol:entry 24 | { 25 | self->in = 1; 26 | self->xprt = args[0]; 27 | } 28 | 29 | fbt:rpcmod:svc_flowcontrol:return 30 | /self->in && self->xprt->xp_full/ 31 | { 32 | self->in = 0; 33 | @thr = count(); 34 | self->xprt = NULL; 35 | } 36 | 37 | fbt:rpcmod:svc_flowcontrol:return 38 | /self->in && !self->xprt->xp_full/ 39 | { 40 | self->in = 0; 41 | self->xprt = NULL; 42 | } 43 | 44 | tick-5sec 45 | { 46 | printf("%Y ",walltimestamp); 47 | printa("NFS QoS Throttles: %@d\n",@thr);clear(@thr); 48 | } 49 | -------------------------------------------------------------------------------- /tcp_input.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | tcp_input_listener:entry 5 | { 6 | printf("%Y cnt %d max %d\n",walltimestamp, 7 | ((conn_t *)args[0])->conn_proto_priv.cp_tcp->tcp_conn_req_cnt_q, 8 | ((conn_t *)args[0])->conn_proto_priv.cp_tcp->tcp_conn_req_max); 9 | } 10 | -------------------------------------------------------------------------------- /trace_destroy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Copyright 2016, Nexenta Systems, Inc. 4 | 5 | # 6 | # Watch in-progress synchronous ZFS destroys that were active at the time of starting the script. 7 | # 8 | 9 | # Determine which function to trace depending on whether we are on 3.x or 4.x 10 | 11 | { 12 | if [[ $(echo old_synchronous_dataset_destroy::whatis|mdb -k) == *"old_synchronous_dataset_destroy"* ]] 13 | then 14 | func="old_synchronous_dataset_destroy" 15 | fi 16 | 17 | if [[ $(echo dsl_dataset_destroy::whatis|mdb -k) == *"dsl_dataset_destroy"* ]] 18 | then 19 | func="dsl_dataset_destroy" 20 | fi 21 | } &> /dev/null 22 | 23 | echo Tracing $func"()... Ctrl-C to exit." 24 | 25 | update_threads () { threads=$(echo "::walk thread|::findstack -v"|mdb -k|grep $func| cut -d "(" -f 2 | cut -d "," -f 1); } 26 | 27 | update_threads 28 | while true 29 | do 30 | for thread in $threads 31 | do 32 | for i in {1..12} 33 | do 34 | date 35 | echo "$thread::print -t dsl_dataset_t ds_dir | ::print -t struct dsl_dir dd_phys | ::print -t -d dsl_dir_phys_t dd_used_bytes" | mdb -k 36 | sleep 5 37 | done 38 | update_threads 39 | done 40 | sleep 5 41 | done 42 | 43 | -------------------------------------------------------------------------------- /txg_delay.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | 3 | txg_delay:entry 4 | { 5 | @[probefunc]=count(); 6 | } 7 | 8 | dsl_pool_tempreserve_space:return 9 | /args[1]==91/ 10 | { 11 | @r[probefunc]=count(); 12 | } 13 | 14 | tick-10sec 15 | { 16 | printa("Writes delayed %@d times in last interval\n",@); 17 | printa("Writes throttled %@d times in last interval", @r); 18 | trunc(@); 19 | trunc(@r); 20 | } 21 | -------------------------------------------------------------------------------- /txg_monitor.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | inline int MIN_MS = 1; 5 | 6 | dtrace:::BEGIN 7 | { 8 | printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); 9 | @bytes = sum(0); 10 | } 11 | 12 | fbt::spa_sync:entry 13 | /args[0]->spa_name == $$1 && !self->start/ 14 | { 15 | in_spa_sync = 1; 16 | self->start = timestamp; 17 | self->spa = args[0]; 18 | } 19 | 20 | txg_delay:entry 21 | /args[0]->dp_spa->spa_name == $$1/ 22 | { 23 | @delays=count(); 24 | } 25 | 26 | dsl_pool_tempreserve_space:entry 27 | /args[0]->dp_spa->spa_name == $$1/ 28 | { 29 | @wrl_min = min(args[0]->dp_write_limit); 30 | @wrl_max = max(args[0]->dp_write_limit); 31 | @reserved_max = max(args[0]->dp_space_towrite[args[2]->tx_txg & 3] + (args[0]->dp_tempreserved[args[2]->tx_txg & 3] / 2)); 32 | } 33 | 34 | dsl_pool_tempreserve_space:return 35 | /args[1]==91/ 36 | { 37 | @throttles=count(); 38 | } 39 | 40 | io:::start 41 | /in_spa_sync/ 42 | { 43 | @io = count(); 44 | @bytes = sum(args[0]->b_bcount); 45 | } 46 | 47 | fbt::spa_sync:return 48 | /self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ 49 | { 50 | normalize(@bytes, 1048576); 51 | normalize(@wrl_min, 1048576); 52 | normalize(@wrl_max, 1048576); 53 | normalize(@reserved_max, 1048576); 54 | printf("%-20Y %-10s %6d ms, ", walltimestamp, 55 | stringof(self->spa->spa_name), this->ms); 56 | printa("%@d MB %@d I/O %@d delays %@d throttles; Write Limit min: %@d MB; max: %@d MB; reserved_max: %@d MB\n", 57 | @bytes, @io, @delays, @throttles, @wrl_min, @wrl_max, @reserved_max); 58 | } 59 | 60 | fbt::spa_sync:return 61 | /self->start/ 62 | { 63 | self->start = 0; self->spa = 0; in_spa_sync = 0; 64 | clear(@bytes); clear(@io); clear(@delays); clear(@throttles); trunc(@wrl_min); trunc(@wrl_max); trunc(@reserved_max); 65 | } 66 | -------------------------------------------------------------------------------- /txg_monitor.v3.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script measures the ZFS transaction group commit time, and tracks 5 | * several variables that affect it for each individual zpool. One of the key things it 6 | * also looks at is the amount of throttling and delaying that happens in each individual 7 | * spa_sync(). 8 | * Some important concepts: 9 | * 1. Delaying (dly) means injecting a one-tick delay into the TXG coalescing 10 | * process, effectively slowing down the rate at which the transaction group 11 | * fills. Throttling (thr), on the other hand means closing this TXG entirely, sending 12 | * it off to quiesce and then flush to disk, and pushing all new incoming data 13 | * into the next TXG that is now "filling". 14 | * 2. The feedback loop which determines when to stop filling the current TXG and 15 | * start a new one depends on a few kernel variables. The cutoff trigger (size) 16 | * is calculated from dp_tempreserved and dp_space_towrite, which this script 17 | * combines into a value of reserved_max (res_max), duplicating the calculation 18 | * that happens in the kernel. When res_max reaches 7/8 of current dp_write_limit, 19 | * system starts delaying writes. When res_max reaches current dp_write_limit, 20 | * system attempts a throttle, which has higher impact on performance. It is not 21 | * normal for a system to be constantly throttling/delaying, but if this happens 22 | * from time to time it's okay - the feedback loop likely set dp_write_limit too 23 | * low because there was no need for it to be high, and when write pattern changes, 24 | * the adjustment happens due to dp_throughput rising. 25 | * 3. dp_write_limit is calculated as dp_throughput (dp_thr) multiplied by 26 | * zfs_txg_synctime_ms, with certain thresholds applied if necessary. NOTE: It 27 | * accounts for write inflation, so it does not actually represent the amount of 28 | * data that goes into any given TXG. The output of this script shows a spread of 29 | * minimum and maximum of dp_write_limit recorded during each TXG, as well as the 30 | * maximum of the reserve, and the current dp_throughput, which is calculated at 31 | * the end of each TXG commit. 32 | * 4. Some comments on other output values: 33 | * The X ms value at the beginning of each line is the length of the spa_sync() call 34 | * in milliseconds. As a general rule, we should strive for it to be less than 35 | * zfs_txg_synctime_ms, but that is not the only condition. When this number is 36 | * pathologically high, this might indicate either a hardware issue or a code 37 | * bottleneck; an example of such code bottleneck might be a metaslab allocator 38 | * issue when pool space utilization reaches 75%-80% (sometimes even earlier), 39 | * also known as free space fragmentation issue. Other causes of slowdowns may 40 | * include checksumming bottleneck on a system with dedup enabled, ongoing ZFS 41 | * operations such as a ZFS destroy, or an ongoing scrub/resilver, which by design 42 | * will borrow time from each TXG commit to do its business. 43 | * wMB and rMB is the amount of data written and read in MB's during the spa_sync() 44 | * call. They are the total data written by the system, not just for the specific 45 | * zpool. 46 | * wIops and rIops are the I/O operations that happened during spa_sync(), also global 47 | * unfortunately. They are already adjusted per second. 48 | * dly+thr are the delays and throttles. Those, normally 0+0, are for the individual 49 | * zpool. 50 | * dp_wrl, res_max and dp_thr are covered above. Also for the individual pool. */ 51 | 52 | /* Author: Kirill.Davydychev@Nexenta.com */ 53 | /* Copyright 2013, Nexenta Systems, Inc. All rights reserved. */ 54 | /* Version: 3.0 */ 55 | /* To get the latest version of this script, 56 | * wget https://raw.github.com/kdavyd/dtrace/master/txg_monitor.v3.d --no-ch */ 57 | 58 | /* This script only works on NexentaStor 3.x and other illumos-based distributions that 59 | * do *not* have the illumos 3464 commit integrated. That commit completely refactors 60 | * the TXG subsystem, and will require a brand new script for similar functionality to 61 | * be exposed */ 62 | 63 | inline int MIN_MS = 1; 64 | 65 | dtrace:::BEGIN 66 | { 67 | printf("Tracing ZFS spa_sync() slower than %d ms...\n", MIN_MS); 68 | @readbytes=sum(0); 69 | @writebytes=sum(0); 70 | } 71 | 72 | fbt::spa_sync:entry 73 | /args[0]->spa_name == $$1 && !self->start/ 74 | { 75 | in_spa_sync = 1; 76 | self->start = timestamp; 77 | self->spa = args[0]; 78 | } 79 | 80 | txg_delay:entry 81 | /args[0]->dp_spa->spa_name == $$1/ 82 | { 83 | @delays=count(); 84 | } 85 | 86 | dsl_pool_tempreserve_space:entry 87 | /args[0]->dp_spa->spa_name == $$1/ 88 | { 89 | @wrl_min = min(args[0]->dp_write_limit); 90 | @wrl_max = max(args[0]->dp_write_limit); 91 | @dp_thr_max = max(args[0]->dp_throughput); 92 | @reserved_max = max(args[0]->dp_space_towrite[args[2]->tx_txg & 3] + (args[0]->dp_tempreserved[args[2]->tx_txg & 3] / 2)); 93 | } 94 | 95 | dsl_pool_tempreserve_space:return 96 | /args[1]==91/ 97 | { 98 | @throttles=count(); 99 | } 100 | 101 | io:::start 102 | /in_spa_sync && (args[0]->b_flags & 0x100)/ 103 | { 104 | @writeio = sum(1000); 105 | @writebytes = sum(args[0]->b_bcount); 106 | } 107 | 108 | io:::start 109 | /in_spa_sync && (args[0]->b_flags & 0x40)/ 110 | { 111 | @readio = sum(1000); 112 | @readbytes = sum(args[0]->b_bcount); 113 | } 114 | 115 | fbt::spa_sync:return 116 | /self->start && (this->ms = (timestamp - self->start) / 1000000) > MIN_MS/ 117 | { 118 | normalize(@writebytes, 1048576); 119 | normalize(@readbytes, 1048576); 120 | normalize(@wrl_min, 1048576); 121 | normalize(@wrl_max, 1048576); 122 | normalize(@reserved_max, 1048576); 123 | normalize(@dp_thr_max, 1049); /* dp_throughput is in bytes/millisec, we are converting to Mbytes/sec */ 124 | normalize(@writeio,this->ms); 125 | normalize(@readio,this->ms); 126 | printf("%-20Y %-10s %6d ms, ", walltimestamp, 127 | stringof(self->spa->spa_name), this->ms); 128 | printa("%@d wMB %@d rMB %@d wIops %@d rIops %@d+%@d dly+thr; dp_wrl %@d MB .. %@d MB; res_max: %@d MB; dp_thr: %@d\n", 129 | @writebytes, @readbytes, @writeio, @readio, @delays, @throttles, @wrl_min, @wrl_max, @reserved_max, @dp_thr_max); 130 | } 131 | 132 | fbt::spa_sync:return 133 | /self->start/ 134 | { 135 | self->start = 0; self->spa = 0; in_spa_sync = 0; 136 | clear(@writebytes); clear(@readbytes); clear(@writeio); clear(@readio); clear(@delays); clear(@throttles); trunc(@wrl_min); trunc(@wrl_max); trunc(@reserved_max); 137 | trunc(@dp_thr_max); 138 | } 139 | -------------------------------------------------------------------------------- /txg_realtime.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Author: Kirill.Davydychev@Nexenta.com */ 5 | /* Copyright 2013, Nexenta Systems, Inc. All rights reserved. */ 6 | /* Version: 1.0 */ 7 | 8 | /* This script only works on NexentaStor 3.x and other illumos-based distributions that 9 | * do *not* have the illumos 3464 commit integrated. That commit completely refactors 10 | * the TXG subsystem, and will require a brand new script for similar functionality to 11 | * be exposed */ 12 | 13 | inline int MIN_MS = 1; 14 | 15 | dtrace:::BEGIN 16 | { 17 | @readbytes=sum(0); 18 | @writebytes=sum(0); 19 | wrl_max=0 20 | } 21 | 22 | fbt::spa_sync:entry 23 | /args[0]->spa_name == $$1 && !self->start/ 24 | { 25 | in_spa_sync = 1; 26 | self->start = timestamp; 27 | self->spa = args[0]; 28 | } 29 | 30 | txg_delay:entry 31 | /args[0]->dp_spa->spa_name == $$1/ 32 | { 33 | @delays=count(); 34 | } 35 | 36 | dsl_pool_tempreserve_space:entry 37 | /args[0]->dp_spa->spa_name == $$1/ 38 | { 39 | wrl_max = (args[0]->dp_write_limit/100)>wrl_max?(args[0]->dp_write_limit/100):wrl_max; 40 | @reserved_max = max(args[0]->dp_space_towrite[args[2]->tx_txg & 3] + (args[0]->dp_tempreserved[args[2]->tx_txg & 3] / 2)); 41 | } 42 | 43 | 44 | dsl_pool_tempreserve_space:return 45 | /args[1]==91/ 46 | { 47 | @throttles=count(); 48 | } 49 | 50 | io:::start 51 | /in_spa_sync && (args[0]->b_flags & 0x100)/ 52 | { 53 | @writeio = sum(1); 54 | @writebytes = sum(args[0]->b_bcount); 55 | } 56 | 57 | io:::start 58 | /in_spa_sync && (args[0]->b_flags & 0x40)/ 59 | { 60 | @readio = sum(1); 61 | @readbytes = sum(args[0]->b_bcount); 62 | } 63 | 64 | 65 | tick-1sec 66 | { 67 | normalize(@writebytes, 1048576); 68 | normalize(@readbytes, 1048576); 69 | normalize(@reserved_max, wrl_max); 70 | printf("%Y, %d, ",walltimestamp,in_spa_sync); 71 | printa("%@d wMB, %@d rMB, %@d wIo, %@d rIo, %@d dly, %@d thr, %@d pct \n", 72 | @writebytes, @readbytes, @writeio, @readio, @delays, @throttles, @reserved_max); 73 | clear(@writebytes); clear(@readbytes); clear(@writeio); clear(@readio); clear(@delays); clear(@throttles); wrl_max=0; 74 | } 75 | 76 | 77 | fbt::spa_sync:return 78 | /self->start/ 79 | { 80 | self->start = 0; self->spa = 0; in_spa_sync = 0; clear(@reserved_max); 81 | } 82 | -------------------------------------------------------------------------------- /watch_destroy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # Author: Kirill.Davydychev@Nexenta.com 4 | # Copyright 2014, Nexenta Systems, Inc. 5 | 6 | # 7 | # Watch in-progress synchronous ZFS destroys that were active at the time of starting the script. 8 | # 9 | 10 | echo "Ctrl-C to exit." 11 | for i in `echo "::walk thread | ::findstack -v" | mdb -k | grep dsl_dataset_destroy | cut -d "(" -f 2 | cut -d "," -f 1`; do while true; do date; echo "$i::print -t dsl_dataset_t ds_dir | ::print -t struct dsl_dir dd_phys | ::print -t -d dsl_dir_phys_t dd_used_bytes" | mdb -k; sleep 5; done; done 12 | -------------------------------------------------------------------------------- /whoalloc.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | nvp_buf_alloc:entry,kmem_firewall_va_alloc:entry,vmem_nextfit_alloc:entry,segkmem_alloc:entry,vmem_alloc:entry,smbsrv:smb_alloc:entry 5 | { 6 | @[probefunc,stack()]=sum((uint64_t)args[1]); 7 | } 8 | 9 | kmem_alloc:entry,unix:smb_alloc:entry 10 | { 11 | @[probefunc,stack()]=sum((uint64_t)args[0]); 12 | } 13 | 14 | kmem_cache_alloc:entry 15 | { 16 | @cache[args[0]->cache_name,stack()]=sum((uint64_t)(args[0]->cache_bufsize)); 17 | } 18 | 19 | vmem_seg_alloc:entry 20 | { 21 | @[probefunc,stack()]=sum((uint64_t)args[3]); 22 | } 23 | 24 | kmem_slab_create:entry 25 | { 26 | @slab[args[0]->cache_name,stack()]=sum(args[0]->cache_slabsize); 27 | } 28 | 29 | tick-1sec 30 | { 31 | printf("%Y\n",walltimestamp); 32 | printf("================ SLAB ================\n"); 33 | printa(@slab); trunc(@slab); 34 | printf("================ CACHE ===============\n"); 35 | printa(@cache); trunc(@cache); 36 | printf("================ OTHER ===============\n"); 37 | trunc(@,20); printa(@); trunc(@); 38 | } 39 | -------------------------------------------------------------------------------- /zfsio.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show read/write IOPs and throughput for ZFS 5 | * filesystems and zvols on a per-dataset basis. It can be used to estimate 6 | * which dataset is causing the most I/O load on the current system. It should 7 | * only be used for comparative analysis. */ 8 | /* Author: Kirill.Davydychev@Nexenta.com */ 9 | /* Copyright 2012, 2014 Nexenta Systems, Inc. All rights reserved. */ 10 | /* Version: 0.5b */ 11 | 12 | dmu_buf_hold_array_by_dnode:entry 13 | /args[0]->dn_objset->os_dsl_dataset && args[3]/ /* Reads */ 14 | { 15 | this->d = args[0]->dn_objset->os_dsl_dataset->ds_dir; 16 | this->path = stringof(this->d->dd_myname); 17 | this->p = this->d->dd_parent; 18 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 19 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 20 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 21 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 22 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 23 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 24 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 25 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 26 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 27 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 28 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 29 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 30 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 31 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 32 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 33 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 34 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 35 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 36 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 37 | 38 | @ior[this->path] = count(); 39 | @tpr[this->path] = sum(args[2]); 40 | @bsr[this->path] = avg(args[2]); 41 | @distr[strjoin(this->path, " Reads")] = quantize(args[2]); 42 | } 43 | 44 | dmu_buf_hold_array_by_dnode:entry 45 | /args[0]->dn_objset->os_dsl_dataset && !args[3]/ /* Writes */ 46 | { 47 | this->d = args[0]->dn_objset->os_dsl_dataset->ds_dir; 48 | this->path = stringof(this->d->dd_myname); 49 | this->p = this->d->dd_parent; 50 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 51 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 52 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 53 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 54 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 55 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 56 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 57 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 58 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 59 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 60 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 61 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 62 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 63 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 64 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 65 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 66 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 67 | this->p = (this->p != NULL) ? this->p->dd_parent : NULL; 68 | this->path = (this->p != NULL) ? strjoin(stringof(this->p->dd_myname),strjoin("/",this->path)):this->path; 69 | 70 | @iow[this->path] = count(); 71 | @tpw[this->path] = sum(args[2]); 72 | @bsw[this->path] = avg(args[2]); 73 | @distw[strjoin(this->path, " Writes")] = quantize(args[2]); 74 | } 75 | 76 | tick-1sec,END 77 | { 78 | printf("%Y operations bandwidth blocksize\n",walltimestamp); 79 | printf("Dataset read write read write read write\n"); 80 | printf(" ------ ------ ---------- ---------- ------ ------\n"); 81 | printa("%-40s %@-6d %@-6d %@-10d %@-10d %@-6d %@-6d\n",@ior,@iow,@tpr,@tpw,@bsr,@bsw); 82 | trunc(@ior); trunc(@tpr); trunc(@iow); trunc(@tpw); trunc(@bsr); trunc(@bsw); 83 | /* clear(@ior); clear(@tpr); clear(@iow); clear(@tpw); clear(@bsr); clear(@bsw);*/ 84 | /* TODO: Make script more interactive. Above, uncomment clear() and comment trunc() line in order to change 85 | truncate behavior, or comment out both lines to get cumulative stats. */ 86 | } 87 | -------------------------------------------------------------------------------- /zfsio_1fs.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | #pragma D option quiet 3 | 4 | /* Description: This script will show read/write IOPs and throughput for a single ZFS filesystem. 5 | * Usage: ./zfsio_1fs.d fsname */ 6 | /* Author: Kirill.Davydychev@Nexenta.com */ 7 | /* Copyright 2013, Nexenta Systems, Inc. All rights reserved. */ 8 | /* Version: 0.5b */ 9 | 10 | dmu_buf_hold_array_by_dnode:entry 11 | /args[0]->dn_objset->os_dsl_dataset && args[3] && stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname) == $$1/ /* Reads */ 12 | { 13 | this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); 14 | this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); 15 | this->path = strjoin(strjoin(this->parent,"/"),this->ds); /* Dirty hack - parent/this format doesn't guarantee full path */ 16 | @ior[this->path] = count(); 17 | @tpr[this->path] = sum(args[2]); 18 | @bsr[this->path] = avg(args[2]); 19 | @distr[strjoin(this->path, " Reads")] = quantize(args[2]); 20 | } 21 | 22 | dmu_buf_hold_array_by_dnode:entry 23 | /args[0]->dn_objset->os_dsl_dataset && !args[3] && stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname) == $$1/ /* Writes */ 24 | { 25 | this->ds = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_myname); 26 | this->parent = stringof(args[0]->dn_objset->os_dsl_dataset->ds_dir->dd_parent->dd_myname); 27 | this->path = strjoin(strjoin(this->parent,"/"),this->ds); 28 | @iow[this->path] = count(); 29 | @tpw[this->path] = sum(args[2]); 30 | @bsw[this->path] = avg(args[2]); 31 | @distw[strjoin(this->path, " Writes")] = quantize(args[2]); 32 | } 33 | 34 | BEGIN 35 | { 36 | printf("%Y operations bandwidth blocksize\n",walltimestamp); 37 | printf("Dataset read write read write read write\n"); 38 | printf(" ------ ------ ---------- ---------- ------ ------\n"); 39 | } 40 | 41 | tick-1sec 42 | { 43 | printa("%-40s %@-6d %@-6d %@-10d %@-10d %@-6d %@-6d\n",@ior,@iow,@tpr,@tpw,@bsr,@bsw); 44 | trunc(@ior); trunc(@tpr); trunc(@iow); trunc(@tpw); trunc(@bsr); trunc(@bsw); 45 | /* clear(@ior); clear(@tpr); clear(@iow); clear(@tpw); clear(@bsr); clear(@bsw);*/ 46 | /* TODO: Make script more interactive. Above, uncomment clear() and comment trunc() line in order to change 47 | truncate behavior, or comment out both lines to get cumulative stats. */ 48 | } 49 | 50 | tick-30sec 51 | { 52 | printf("%Y operations bandwidth blocksize\n",walltimestamp); 53 | printf("Dataset read write read write read write\n"); 54 | printf(" ------ ------ ---------- ---------- ------ ------\n"); 55 | } -------------------------------------------------------------------------------- /zfsio_plot_example.r: -------------------------------------------------------------------------------- 1 | > t <- read.csv("/Path/to/parsed.zfsio.csv", skip=1, sep=",") 2 | > t[,1] <- as.POSIXct(t[,1], format="%Y-%m-%d-%T") 3 | 4 | The following will plot first 70k data points. Adjust range to narrow down time. 5 | 6 | > plot(t[,1][c(0:70000)],t[,2][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="rd_iops") 7 | > plot(t[,1][c(0:70000)],t[,3][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="wr_iops") 8 | > plot(t[,1][c(0:70000)],t[,4][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="rd_thr") 9 | > plot(t[,1][c(0:70000)],t[,5][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="wr_thr") 10 | > plot(t[,1][c(0:70000)],t[,6][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="rd_bs") 11 | > plot(t[,1][c(0:70000)],t[,7][c(0:70000)],type="l", lwd=0.1, xlab="Time", ylab="wr_bs") 12 | -------------------------------------------------------------------------------- /ziolatency.d: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/dtrace -s 2 | 3 | #pragma D option quiet 4 | #pragma D option dynvarsize=40m 5 | 6 | /* Description: Trace I/O latency on a per-vdev basis. Script will not be maintained for the foreseeable future. */ 7 | /* Author: Kirill.Davydychev@Nexenta.com */ 8 | /* Copyright 2012, Nexenta Systems, Inc. All rights reserved. */ 9 | 10 | dtrace:::BEGIN 11 | { 12 | trace("Tracing physical I/O latency... Ctrl-C to end."); 13 | 14 | /* see /usr/include/sys/fs/zfs.h */ 15 | 16 | ziotype[0] = "null"; 17 | ziotype[1] = "read"; 18 | ziotype[2] = "write"; 19 | ziotype[3] = "free"; 20 | ziotype[4] = "claim"; 21 | ziotype[5] = "ioctl"; 22 | ziochild[0] = "vdev"; 23 | ziochild[1] = "gang"; 24 | ziochild[2] = "ddt"; 25 | ziochild[3] = "logical"; 26 | } 27 | 28 | fbt::zio_vdev_io_start:entry 29 | /args[0]->io_type != 0/ 30 | { 31 | start_time[arg0] = timestamp; 32 | } 33 | 34 | fbt::zio_vdev_io_done:entry 35 | /args[0]->io_type != 0 && start_time[arg0] > 0/ /* && args[0]->io_vd && args[0]->io_vd->vdev_path */ 36 | { 37 | this->iotime = (timestamp - start_time[arg0])/1000; 38 | this->zpool = stringof(args[0]->io_spa->spa_name); 39 | this->iotype = ziotype[args[0]->io_type]; 40 | this->childtype = ziochild[args[0]->io_child_type]; 41 | this->path = args[0]->io_vd ? 42 | args[0]->io_vd->vdev_path ? 43 | stringof(args[0]->io_vd->vdev_path) : 44 | "top_level_vdev" : 45 | "pool"; 46 | 47 | this->vdev_id = args[0]->io_vd ? 48 | args[0]->io_vd->vdev_id : 49 | 404; /* Not Found (pool) */ 50 | this->vdev_pa = args[0]->io_vd ? 51 | args[0]->io_vd->vdev_parent ? 52 | args[0]->io_vd->vdev_parent->vdev_id : 53 | 12455 : /* L2ARC has no parent - set to 12455 (L2ArcSSd) */ 54 | 404; /* Not found */ 55 | 56 | /* XXX - Describe abnormal behaviors to watch out for */ 57 | 58 | @latency[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = quantize(this->iotime); 59 | @avg_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = avg(this->iotime); 60 | @sum_lat[this->zpool, this->childtype, this->vdev_pa, this->vdev_id, this->iotype, this->path] = sum(this->iotime); 61 | } 62 | 63 | dtrace:::END 64 | { 65 | printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t Latency distribution:%@d\n",@latency); 66 | printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t AvgLatency(us): %@d\n",@avg_lat); 67 | printa("ZPool: %12s IOChild: %7s ParentVdevID: %5d ThisVdevID: %3d IOType: %5s Disk: %s\t TotLatency(us): %@d\n",@sum_lat); 68 | trunc(@latency); 69 | trunc(@avg_lat); 70 | trunc(@sum_lat); 71 | } 72 | --------------------------------------------------------------------------------