├── .screenrc-main ├── DshPerlHostLoop.pm ├── README.pod ├── cl-gatherfile.pl ├── cl-killall.pl ├── cl-netstat.pl ├── cl-ping.pl ├── cl-psgrep.pl ├── cl-rolling-reboot.pl ├── cl-rsync.pl ├── cl-run.pl ├── cl-sendfile.pl ├── generate-screen-config.rb └── nssh.rb /.screenrc-main: -------------------------------------------------------------------------------- 1 | # assembled in bits & pieces from tons of screenrc's 2 | 3 | attrcolor b ".I" 4 | vbell off 5 | defscrollback 65536 6 | caption always '%{gk}[%{--} %= %{g}%n %{y}%t %?- (%h)%? %= %{-}%{gk}][ %D %Y-%m-%d %c PDT ]' 7 | autodetach on 8 | defbce "on" 9 | termcapinfo xterm|xterms|xs|rxvt ti@:te@ 10 | termcapinfo xterm 'Co#256:AB=\E[48;5;%dm:AF=\E[38;5;%dm' 11 | 12 | # xterm understands both im/ic and doesn't have a status line. 13 | # Note: Do not specify im and ic in the real termcap/info file as 14 | # some programs (e.g. vi) will not work anymore. 15 | termcap xterm hs@:cs=\E[%i%d;%dr:im=\E[4h:ei=\E[4l 16 | terminfo xterm hs@:cs=\E[%i%p1%d;%p2%dr:im=\E[4h:ei=\E[4l 17 | 18 | # 80/132 column switching must be enabled for ^AW to work 19 | # change init sequence to not switch width 20 | termcapinfo xterm Z0=\E[?3h:Z1=\E[?3l:is=\E[r\E[m\E[2J\E[H\E[?7h\E[?1;4;6l 21 | 22 | # Make the output buffer large for (fast) xterms. 23 | termcapinfo xterm* OL=10000 24 | 25 | # tell screen that xterm can switch to dark background and has function 26 | # keys. 27 | termcapinfo xterm 'VR=\E[?5h:VN=\E[?5l' 28 | termcapinfo xterm 'k1=\E[11~:k2=\E[12~:k3=\E[13~:k4=\E[14~' 29 | termcapinfo xterm 'kh=\EOH:kI=\E[2~:kD=\E[3~:kH=\EOF:kP=\E[5~:kN=\E[6~' 30 | 31 | # special xterm hardstatus: use the window title. 32 | termcapinfo xterm 'hs:ts=\E]2;:fs=\007:ds=\E]2;screen\007' 33 | 34 | terminfo xterm 'vb=\E[?5h$<200/>\E[?5l' 35 | termcapinfo xterm 'vi=\E[?25l:ve=\E[34h\E[?25h:vs=\E[34l' 36 | 37 | # xterm-52 tweaks: 38 | # - uses background color for delete operations 39 | termcapinfo xterm* be 40 | 41 | # Do not use xterm's alternative window buffer, it breaks scrollback (see bug #61195) 42 | termcapinfo xterm|xterms|xs ti@:te=\E[2J 43 | 44 | msgwait 2 45 | bind j focus down 46 | bind k focus up 47 | 48 | screen -t "workstation" 000 49 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 50 | screen -t "workstation" 001 51 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 52 | screen -t "workstation" 002 53 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 54 | screen -t "workstation" 003 55 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 56 | screen -t "workstation" 004 57 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 58 | screen -t "workstation" 005 59 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 60 | screen -t "workstation" 006 61 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 62 | screen -t "workstation" 007 63 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 64 | screen -t "workstation" 008 65 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 66 | screen -t "workstation" 009 67 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 68 | screen -t "workstation" 010 69 | stuff ". ~/.bash_profile\015rvm 1.9.2\015" 70 | screen -t "workstation" 011 71 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 72 | screen -t "workstation" 012 73 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 74 | screen -t "workstation" 013 75 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 76 | screen -t "workstation" 014 77 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 78 | screen -t "workstation" 015 79 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 80 | screen -t "workstation" 016 81 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh dev-machine.tobert.org\015" 82 | screen -t "workstation" 017 83 | stuff ". ~/.bash_profile\015rvm 1.9.2\015nssh logs.tobert.org\015" 84 | 85 | ## BEGIN GENERATED CONFIG ## 86 | ## END GENERATED CONFIG ## 87 | -------------------------------------------------------------------------------- /DshPerlHostLoop.pm: -------------------------------------------------------------------------------- 1 | package DshPerlHostLoop; 2 | 3 | ########################################################################### 4 | # # 5 | # Cluster Tools: DshPerlHostLoop.pm # 6 | # Copyright 2007-2011, Albert P. Tobey # 7 | # # 8 | ########################################################################### 9 | 10 | use strict; 11 | use warnings; 12 | use Carp; 13 | use File::Basename; 14 | use Data::Dumper; 15 | use IPC::Open3; 16 | use IO::Select; 17 | use IO::Handle; 18 | use Sys::Hostname (); 19 | use Fcntl ':flock'; 20 | use File::Temp qw(tempfile); 21 | use Tie::IxHash; 22 | eval { use Net::SSH2; }; # optional 23 | use base 'Exporter'; 24 | 25 | # globals! 26 | our @opt_filter_excl; 27 | our @opt_filter_incl; 28 | our $opt_batch; 29 | our $ssh_options .= " -o 'BatchMode yes' -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10'"; 30 | our $tag_output = 1; 31 | our $debug = undef; 32 | our $verbose; 33 | our $tempdir = '/var/tmp'; 34 | our $mainpid = $$; 35 | our $machines_list ||= "$ENV{HOME}/.dsh/machines.list"; # can be overridden with --list $name 36 | our @tempfiles; 37 | our $remote_user ||= $ENV{USER}; 38 | our $sshkey ||= "$ENV{HOME}/.ssh/id_rsa"; 39 | our $retry_wait = 30; 40 | our $lock_fh = tempfile(); 41 | our $hostname_pad = 8; 42 | 43 | # Most shops have a noisy /etc/issue.net. This reads the local issue.net 44 | # and removes any lines matching it from the output from ssh. 45 | my @issue = read_issue(); 46 | my $issue_len = length(join("\n", @issue)); 47 | 48 | use constant BLACK => "\x1b[30m"; 49 | use constant RED => "\x1b[31m"; 50 | use constant GREEN => "\x1b[32m"; 51 | use constant YELLOW => "\x1b[33m"; 52 | use constant BLUE => "\x1b[34m"; 53 | use constant MAGENTA => "\x1b[35m"; 54 | use constant CYAN => "\x1b[36m"; 55 | use constant WHITE => "\x1b[37m"; 56 | use constant DKGRAY => "\x1b[1;30m"; 57 | use constant DKRED => "\x1b[1;31m"; 58 | use constant RESET => "\x1b[0m"; 59 | 60 | # please, for the love of FSM, do not copy the style of this module 61 | # @EXPORT is the kind of thing that makes sense when you quickly turn 62 | # a utility (in this case, cl-run.pl's predecessor) into a module so 63 | # you can whip up a bunch of look-alike utilities. The right thing to 64 | # do from the start is to design a clean module, at least using 65 | # class methods so their origin is clearly visible in downstream source. 66 | # It's on my TODO list ;) 67 | our @EXPORT = qw( 68 | func_loop ssh scp hostlist reap verbose my_tempfile tag_output 69 | libssh2_connect libssh2_reconnect libssh2_slurp_cmd 70 | $ssh_options $remote_user $retry_wait $hostname_pad 71 | @opt_filter_excl @opt_filter_incl $opt_batch 72 | lock unlock 73 | BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE DKGRAY DKRED RESET 74 | ); 75 | 76 | =head1 NAME 77 | 78 | DshPerlHostLoop - loops for running ssh commands across large clusters in parallel 79 | 80 | =head1 SYNOPSIS 81 | 82 | use FindBin qw($Bin); 83 | require "$Bin/DshPerlHostLoop.pm"; 84 | 85 | func_loop( sub { system( "ssh $_[1] uname -a" ); } ); 86 | 87 | =head1 GLOBAL SWITCHES 88 | 89 | A few global CLI switches are implemented in this module in a BEGIN block. 90 | 91 | --incl - a perl regular expression that filters out non-matching hostnames 92 | --excl - a perl regular expression that filters matched hostnames out of the list 93 | --batch - run in parallel on every N nodes, shifting by 1 until all are complete 94 | --list - name of the list, e.g. ~/.dsh/machines.$NAME 95 | --root - set remote user to root 96 | --user - set the remote username to something other than $USER or root 97 | -u - don't prefix output with the remote hostname 98 | -v - verbose 99 | -m - specify a file with a list of hosts to use (default is ~/.dsh/machines.list) 100 | 101 | --excl RE's are run before --incl RE's. 102 | 103 | =head1 FUNCTIONS 104 | 105 | =over 4 106 | 107 | =item func_loop() 108 | 109 | Execute a callback in parallel for each host. The first argument passed to each callback will be the hostname. 110 | 111 | # hello, cruel world 112 | func_loop( sub { print "$@\n"; } ); 113 | 114 | =cut 115 | 116 | sub func_loop { 117 | my $f = shift; 118 | 119 | if ( ref($f) ne 'CODE' ) { 120 | confess "Argument to DshPerlHostLoop must be a subroutine/closure."; 121 | } 122 | 123 | my %pids; 124 | tie my %hosts, 'Tie::IxHash'; 125 | %hosts = hostlist(keep_comments => 1); 126 | my @hostnames = keys %hosts; 127 | 128 | # support batched commands in increments of $opt_batch 129 | # This is useful for large clusters where doing the whole cluster at once 130 | # is a bad idea. Set --batch 1 for serial execution. 131 | my @batches = (); 132 | $opt_batch ||= 0; 133 | if ($opt_batch > 0) { 134 | my $batch_count = scalar(@hostnames) / $opt_batch; 135 | 136 | for (my $b=0; $b<$batch_count; $b++) { 137 | for (my $h=0; $h<$opt_batch; $h++) { 138 | my $host = shift(@hostnames); 139 | push @{$batches[$b]}, $host; 140 | } 141 | } 142 | } 143 | # default to one batch of all hosts 144 | else { 145 | @batches = (\@hostnames); 146 | } 147 | 148 | foreach my $batch (@batches) { 149 | foreach my $hostname (@{$batch}) { 150 | my $pid = fork(); 151 | if ( $pid ) { 152 | $pids{$hostname} = $pid; 153 | next; 154 | } 155 | else { 156 | eval { $0 = "$0 -- $hostname"; }; 157 | my @out = eval { $f->( $hostname, $hosts{$hostname} ); }; 158 | if ( $@ ) { 159 | confess $@; 160 | } 161 | 162 | exit 0; 163 | } 164 | } 165 | 166 | # should block until all commands exit 167 | reap( \%pids ); 168 | } 169 | } 170 | 171 | =item read_issue() 172 | 173 | Read /etc/issue.net or if that doesn't exist, /etc/issue. This is used to filter out 174 | issues from remote systems to keep your output readable. 175 | 176 | Returns an array of chomped lines. 177 | 178 | =cut 179 | 180 | sub read_issue { 181 | my @issue; 182 | my $fh; 183 | if ( -r '/etc/issue.net' ) { 184 | open( my $fh, "< /etc/issue.net" ) or $fh = undef; 185 | } 186 | elsif ( -r '/etc/issue' ) { 187 | open( my $fh, "< /etc/issue" ) or $fh = undef; 188 | } 189 | if ( $fh ) { 190 | while ( my $line = <$fh> ) { 191 | chomp $line; 192 | push @issue, $line; 193 | } 194 | close $fh; 195 | } 196 | return @issue; 197 | } 198 | 199 | =item ssh() 200 | 201 | Run a command over ssh. 202 | 203 | func_loop(sub { 204 | my $host = shift; 205 | ssh( $host, 'ps -ef' ); 206 | }); 207 | 208 | =item scp() 209 | 210 | scp a file. 211 | 212 | my($local_file, $remote_file) = ("/etc/hosts", "/etc/hosts"); 213 | func_loop(sub { 214 | my $host = shift; 215 | scp( $local_file, "$host:$remote_file" ); 216 | }); 217 | 218 | =item scmd() 219 | 220 | The actual function behind ssh/scp. 221 | 222 | sub ssh { scmd('/usr/bin/ssh', @_) } 223 | sub scp { scmd('/usr/bin/scp', '-v', @_) } 224 | 225 | =cut 226 | 227 | # archaic & insecure but fast and convenient 228 | # in other words, don't let untrusted people sudo this!! 229 | sub ssh { scmd('/usr/bin/ssh', '-o', "'User $remote_user'", @_) } 230 | sub scp { scmd('/usr/bin/scp', '-o', "'User $remote_user'", '-v', @_) } 231 | sub scmd { 232 | my $scmd = shift; 233 | 234 | my @output; 235 | my( $in, $out, $err ) = (IO::Handle->new, IO::Handle->new, IO::Handle->new); 236 | my $pid = open3( $in, $out, $err, "$scmd $ssh_options @_" ); 237 | 238 | if ( $verbose ) { 239 | print STDERR "Command($$): $scmd $ssh_options @_\n"; 240 | } 241 | 242 | my $select = IO::Select->new( $out, $err ); 243 | my $ofd = fileno($out); 244 | my $efd = fileno($err); 245 | 246 | my %eofs; 247 | 248 | my $bytes = 0; 249 | SELECT: while ( my @ready = $select->can_read(10) ) { 250 | READY: foreach my $r ( @ready ) { 251 | my $rfd = fileno($r); 252 | 253 | if ( exists($eofs{$rfd}) ) { 254 | $select->remove($rfd); 255 | next SELECT; 256 | } 257 | elsif ( $rfd == $ofd ) { 258 | my $line = <$out>; 259 | chomp $line if ( $line ); 260 | $bytes += length($line) if ( $line ); 261 | push @output, $line if ( $line ); 262 | 263 | $eofs{$rfd} = 1 if ( eof($out) ); 264 | } 265 | elsif ( $rfd == $efd ) { 266 | my $line = <$err>; 267 | chomp $line if ( $line ); 268 | # don't look for issue.net matches after its byte size has past 269 | unless ( !$line or ($bytes < $issue_len and grep { $_ eq $line } @issue) ) { 270 | $bytes += length($line) if ( $line ); 271 | # TODO: probably should detect a terminal or have an option to disable color 272 | push @output, RED . $line . RESET; 273 | } 274 | $eofs{$rfd} = 1 if ( eof($err) ); 275 | } 276 | else { 277 | warn "Got read error on $rfd ..."; 278 | } 279 | 280 | } 281 | } 282 | 283 | # this usually means success 284 | if ( $bytes == 0 ) { 285 | push @output, "''"; 286 | printf STDERR "%sGot zero bytes from command: $scmd @_%s\n", CYAN, RESET if ($verbose); 287 | } 288 | 289 | waitpid( $pid, 0 ); 290 | if ( $? != 0 ) { 291 | printf STDERR "%sGot non-zero exit status from command: $scmd @_%s\n", RED, RESET; 292 | } 293 | 294 | return @output; 295 | } 296 | 297 | =item libssh2_connect() 298 | 299 | Connect to the remote host over SSH using Net::SSH2 instead 300 | of shelling out. This is a bit more efficient over the long run, 301 | but does not work with ssh agent, and therefore doesn't work 302 | with encrypted ssh keys. 303 | 304 | =cut 305 | 306 | # sets up the ssh2 connection 307 | sub libssh2_connect { 308 | my( $hostname, $port, $bundle ) = @_; 309 | $port ||= 22; 310 | 311 | # TODO: make this configurable 312 | my @keys = ( 313 | # my monitor-rsa key is unencrypted to work with Net::SSH2 314 | # it is restricted to '/bin/cat /proc/net/dev etc.' though so very low risk 315 | [ 316 | $remote_user, 317 | $ENV{HOME}.'/.ssh/monitor-rsa.pub', 318 | $ENV{HOME}.'/.ssh/monitor-rsa' 319 | ], 320 | [ 321 | $remote_user, 322 | $sshkey . '.pub', # this should usually be correct 323 | $sshkey # settable on the CLI with -i 324 | ] 325 | ); 326 | 327 | my $ssh2 = Net::SSH2->new(); 328 | my $ok; 329 | 330 | for (my $i=0; $i<@keys; $i++) { 331 | $ssh2->connect( $hostname, $port, Timeout => 3 ); 332 | 333 | $ok = $ssh2->auth_agent( $remote_user ); 334 | last if ($ok); 335 | 336 | $ok = $ssh2->auth_publickey( @{$keys[$i]} ); 337 | last if ($ok); 338 | 339 | printf STDERR "%sFailed authentication as user %s with pubkey %s, trying %s:%s%s\n", 340 | RED, $keys[0]->[0], $keys[0]->[1], $keys[1]->[0], $keys[1]->[1], RESET; 341 | } 342 | $ok or die "Could not authenticate."; 343 | 344 | if ($ssh2) { 345 | if ($bundle) { 346 | $bundle->host($hostname); 347 | $bundle->port($port); 348 | $bundle->ssh2($ssh2); 349 | } 350 | else { 351 | $bundle = bless { 352 | host => $hostname, 353 | port => $port, 354 | ssh2 => $ssh2 355 | }, 'DshPerlHostLoop::Bundle'; 356 | } 357 | } 358 | else { 359 | $bundle->ssh2(undef); 360 | $bundle->last_attempt(time); 361 | } 362 | 363 | return $bundle; 364 | } 365 | 366 | sub libssh2_reconnect { 367 | my $bundle = shift; 368 | 369 | # on connection failures, wait a minute and try again until it works 370 | if (not defined $bundle->ssh2) { 371 | if ($bundle->next_attempt < time) { 372 | printf "%sretrying connection to %s ...", BLUE, $bundle->host; 373 | eval { 374 | libssh2_connect( $bundle->host, $bundle->port, $bundle ); 375 | }; 376 | if ($@) { 377 | print RED, "FAILED. Trying again in $retry_wait seconds.\n"; 378 | 379 | $bundle->ssh2(undef); 380 | $bundle->next_attempt(time + $retry_wait); 381 | $bundle->retries($bundle->retries + 1); 382 | 383 | return undef; 384 | } 385 | else { 386 | $bundle->retries(0); 387 | print GREEN, "SUCCESS!\n"; 388 | } 389 | } 390 | else { 391 | return; 392 | } 393 | } 394 | 395 | return $bundle; 396 | } 397 | 398 | =item libssh2_slurp_cmd() 399 | 400 | Run a command over an existing libssh2 connection and capture all 401 | of its output. 402 | 403 | my $input = libssh2_slurp_cmd( $ssh2, $command ); 404 | 405 | =cut 406 | 407 | sub libssh2_slurp_cmd { 408 | my( $bundle, $cmd ) = @_; 409 | 410 | libssh2_reconnect( $bundle ) unless ( ref $bundle && $bundle->ssh2 ); 411 | unless ($bundle && $bundle->ssh2) { 412 | return undef; 413 | } 414 | 415 | my $data = ''; 416 | eval { 417 | my $chan = $bundle->ssh2->channel(); 418 | $chan->exec( $cmd ); 419 | 420 | while ( !$chan->eof() ) { 421 | $chan->read( my $buffer, 4096 ); 422 | $data .= $buffer; 423 | } 424 | 425 | $chan->close(); 426 | }; 427 | if ( $@ ) { 428 | $bundle->ssh2(undef); 429 | $bundle->next_attempt(time + $retry_wait); 430 | $bundle->retries(0); 431 | return undef; 432 | } 433 | else { 434 | return [split(/[\r\n]+/, $data)]; 435 | } 436 | } 437 | 438 | =item hostlist() 439 | 440 | Returns an array of hosts to be accessed. This reads the hostname list (-m $file or default ~/.dsh/machines.list) 441 | then filters it based on --excl and --incl regular expressions. 442 | 443 | my @hosts = hostlist(); 444 | 445 | # use Tie::IxHash to preserve insertion order if desired 446 | tie my %hosts_and_comments, 'Tie::IxHash'; 447 | %hosts_and_comments = hostlist(want_comments => 1); 448 | 449 | =cut 450 | 451 | sub hostlist { 452 | my %options = @_; 453 | 454 | my @hostlist; 455 | open( my $fh, "< $machines_list" ) 456 | or die "Could not open machine list file '$machines_list' for reading: $!"; 457 | 458 | HOST: while ( my $line = <$fh> ) { 459 | chomp $line; 460 | next unless ( $line && length $line ); 461 | next if ( $line =~ /^\s*#/ ); 462 | 463 | my($hostname, $comment) = split( /\s*#\s*/, $line, 2 ); 464 | 465 | $hostname =~ s/\s//g; 466 | $comment ||= ''; 467 | $comment =~ s/^\s+//; 468 | $comment =~ s/\s+$//; 469 | 470 | next unless ( length $hostname ); 471 | 472 | FILTER_EX: foreach my $excl ( @opt_filter_excl ) { 473 | if ( $hostname =~ /$excl/ ) { 474 | printf "%sDshPerlHostLoop: Skipping $hostname because it matched filter $excl.%s\n", BLUE, RESET if ( $debug ); 475 | next HOST; 476 | } 477 | } 478 | FILTER_IN: foreach my $incl ( @opt_filter_incl ) { 479 | if ( $hostname !~ /$incl/i ) { 480 | print "%sDshPerlHostLoop: Skipping $hostname because it didn't match filter $incl.%s\n", BLUE, RESET if ( $debug ); 481 | next HOST; 482 | } 483 | } 484 | 485 | # update the global hostname padding variable used for pretty printing 486 | if (length($hostname) + 2 > $hostname_pad) { 487 | $hostname_pad = length($hostname) + 2; 488 | } 489 | 490 | if ($options{keep_comments}) { 491 | push @hostlist, $hostname, $comment; 492 | } 493 | else { 494 | push @hostlist, $hostname; 495 | } 496 | } 497 | 498 | close $fh; 499 | 500 | return @hostlist; 501 | } 502 | 503 | =item reap() 504 | 505 | Reap child processes from the forkbomb. The hash is { $hostname => $pid }. 506 | 507 | reap(\%pids); 508 | 509 | =cut 510 | 511 | sub reap { 512 | my $pids = shift; 513 | foreach my $host ( keys %$pids ) { 514 | waitpid( $pids->{$host}, 0 ); 515 | delete $pids->{$host}; 516 | } 517 | } 518 | 519 | =item tag_output() 520 | 521 | Get whether or not output should be prefixed with the hostname. 522 | 523 | =cut 524 | 525 | sub tag_output { 526 | return $tag_output; 527 | } 528 | 529 | =item verbose() 530 | 531 | Toggle/get whether or not to be verbose. 532 | 533 | =cut 534 | 535 | sub verbose { 536 | if ( @_ == 1 ) { 537 | $verbose = shift; 538 | } 539 | return $verbose; 540 | } 541 | 542 | # nasty brute force argument stealing :) 543 | # BEGIN makes sure this runs before Getopt::* as long as that module 544 | # isn't also called from within a BEGIN block 545 | BEGIN { 546 | my @to_kill; 547 | 548 | for ( my $i=0; $i<@main::ARGV; $i++ ) { 549 | # skip anything after -- by itself, just like GNU convention 550 | # don't remove it though, so GetOptions can have a whack at processing 551 | last if ( $main::ARGV[$i] eq '--' ); 552 | 553 | if ( $main::ARGV[$i] eq '--list' ) { 554 | push @to_kill, $i, $i+1; 555 | my $list = $main::ARGV[$i+1]; 556 | 557 | # absolute path 558 | if ( -f $list ) { 559 | $machines_list = $list; 560 | } 561 | # short name 562 | elsif ( -f "$ENV{HOME}/.dsh/machines.$list" ) { 563 | $machines_list = "$ENV{HOME}/.dsh/machines.$list"; 564 | } 565 | else { 566 | die "Could not read machine list in '$list' or '~/.dsh/machines.$list': $!"; 567 | } 568 | } 569 | 570 | if ( $main::ARGV[$i] eq '--incl' ) { 571 | push @to_kill, $i, $i+1; 572 | my $f = $main::ARGV[$i+1]; 573 | push @opt_filter_incl, qr/$f/; 574 | } 575 | if ( $main::ARGV[$i] eq '--excl' ) { 576 | push @to_kill, $i, $i+1; 577 | my $f = $main::ARGV[$i+1]; 578 | push @opt_filter_excl, qr/$f/; 579 | } 580 | if ( $main::ARGV[$i] eq '--batch' ) { 581 | push @to_kill, $i, $i+1; 582 | $opt_batch = $main::ARGV[$i+1]; 583 | } 584 | if ( $main::ARGV[$i] eq '-u' ) { 585 | push @to_kill, $i; 586 | $tag_output = undef; 587 | } 588 | if ( $main::ARGV[$i] eq '-i' ) { 589 | push @to_kill, $i, $i+1; 590 | my $sshkey = $main::ARGV[$i+1]; 591 | $ssh_options .= " -o 'IdentityFile $sshkey'"; 592 | } 593 | if ( $main::ARGV[$i] eq '-v' ) { 594 | push @to_kill, $i; 595 | $verbose = 1; 596 | } 597 | if ( $main::ARGV[$i] eq '--user' ) { 598 | push @to_kill, $i, $i+1; 599 | $remote_user = $main::ARGV[$i+1]; 600 | } 601 | if ( $main::ARGV[$i] eq '--root' ) { 602 | push @to_kill, $i; 603 | $remote_user = 'root'; 604 | } 605 | } 606 | 607 | # do them in reverse order 608 | foreach my $idx ( reverse sort @to_kill ) { 609 | delete $main::ARGV[$idx]; 610 | } 611 | } 612 | 613 | =item set_screen_title() 614 | 615 | Set the title in GNU screen if it's detected. 616 | 617 | set_screen_title("Cluster Netstat, Cluster: foobar"); 618 | 619 | =cut 620 | 621 | sub set_screen_title { 622 | my $title = shift; 623 | 624 | if ($ENV{TERM} eq 'screen' or ($ENV{TERMCAP} and $ENV{TERMCAP} =~ /screen/)) { 625 | print "\033k$title\033\\"; 626 | } 627 | } 628 | 629 | =item lock() 630 | 631 | Simple lock backed on flock. For printing mostly, flock doesn't work on STDOUT. 632 | 633 | =cut 634 | 635 | sub lock { 636 | flock( $lock_fh, LOCK_EX ); 637 | } 638 | 639 | =item unlock() 640 | 641 | Opposite of above. 642 | 643 | =cut 644 | 645 | sub unlock { 646 | flock( $lock_fh, LOCK_UN ); 647 | } 648 | 649 | =item my_tempfile() 650 | 651 | Not secure. Generates a parseable-by-humans tempfile so people can 652 | tell what junk in /tmp is from. 653 | 654 | my($fh, $name) = my_tempfile(); 655 | 656 | =cut 657 | 658 | sub my_tempfile { 659 | my @parts; 660 | 661 | if ($ENV{USER} and $ENV{USER} ne 'root') { 662 | push @parts, $ENV{USER}; 663 | } 664 | 665 | push @parts, basename($0); 666 | push @parts, Sys::Hostname::hostname; 667 | push @parts, CORE::time; 668 | 669 | my $filename = $tempdir . '/' . join('-', @parts); 670 | $filename =~ s/\s+//g; 671 | 672 | open(my $fh, "> $filename") 673 | or die "Couldn't open tempfile for write: $!"; 674 | 675 | push @tempfiles, $filename; 676 | 677 | return($fh, $filename); 678 | } 679 | 680 | END { 681 | if ($$ == $mainpid) { 682 | if (($verbose or $debug) and @tempfiles > 0) { 683 | printf STDERR "Leaving tempfiles in $tempdir. They are:\n\t%s\n", 684 | join("\n\t", @tempfiles); 685 | } 686 | else { 687 | foreach my $tf (@tempfiles) { 688 | if ($tf =~ m#^$tempdir#) { 689 | unlink($tf); 690 | } 691 | } 692 | } 693 | } 694 | 695 | eval { unlock(); }; # try to unlock 696 | } 697 | 698 | # track Net::SSH2 connections & related information in 699 | # a separate object rather than having a bunch of globals 700 | # stuff above will just bless right into this 701 | # this AUTOLOAD just adds method syntax to a hash without dependencies 702 | package DshPerlHostLoop::Bundle; 703 | 704 | sub new { 705 | my($type, $self) = @_; 706 | return bless $self, $type; 707 | } 708 | 709 | sub AUTOLOAD { 710 | my($self, $value) = @_; 711 | our $AUTOLOAD; 712 | ( $_, $_, my $method ) = split /::/, $AUTOLOAD; 713 | if ($value) { 714 | $self->{$method} = $value; 715 | } 716 | return $self->{$method}; 717 | } 718 | 719 | 1; 720 | 721 | # vim: et ts=4 sw=4 ai smarttab 722 | 723 | __END__ 724 | 725 | =head1 COPYRIGHT AND LICENSE 726 | 727 | This software is copyright (c) 2007-2011 by Al Tobey. 728 | 729 | This is free software; you can redistribute it and/or modify it under the terms 730 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 731 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 732 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 733 | 734 | =cut 735 | -------------------------------------------------------------------------------- /README.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | cluster-ssh-tools - a collection of cluster ssh tools 4 | 5 | =head1 HISTORY 6 | 7 | I often work on clusters of machines where I need to do the same operation to many 8 | hosts at the same time. I started out with regular shell loops: 9 | 10 | for i in `seq -f '%02g' 1 20` 11 | do 12 | ssh root@hostname$i.dc.domain.com reboot 13 | done 14 | 15 | This worked fine for about half a day. Then I looked at DSH and simlar tools available 16 | and easy to find in 2007. Over the course of time I built up cl-run.pl and a couple copies 17 | like cl-rsync.pl and cl-psgrep.pl. It didn't take long and I split all the common bits 18 | out to a module and made it a bit more generic. Then the rest of the tools were pretty 19 | trivial to throw together as I needed them. 20 | 21 | That's all to say, these tools work well for me but are not good examples of perl coding 22 | nor are they good for everybody. 23 | 24 | =head1 SCALABILITY 25 | 26 | I've had good luck using most of these tools on 300+ hosts at a time from a bastion 27 | host with 8G of RAM and plenty of available CPU cycles. Currently I run these all the 28 | time from a smallish Linux VM (2G RAM, 2 vcpus), my workstation, and my Macbook Air and 29 | haven't ever had a problem with performance. 30 | 31 | =head1 SYNOPSIS 32 | 33 | cl-run.pl # run a command or script 34 | cl-rsync.pl # parallel rsync 35 | cl-sendfile.pl # push a file out 36 | cl-gatherfile.pl # pull a file in (sorted by hostname) 37 | cl-ping.pl # ping hosts 38 | cl-killall.pl # kill a process on hosts with a regular expression 39 | cl-psgrep.pl # look for processes across the cluster 40 | cl-netstat.pl # a distributed network I/O display 41 | nssh.rb # ssh wrapper that sets screen title & other things 42 | 43 | =head1 EXAMPLES 44 | 45 | $> cat > ~/.dsh/machines.nosqldb-dev < ln -sf ~/.dsh/machines.nosqldb-dev ~/.dsh/machines.list 62 | 63 | # set up a user account (cheezy example, assumes user@localhost has root@remotehost keys set up) 64 | $> cl-run.pl --root -c "useradd -m tobert" 65 | $> cl-rsync.pl --root -l ~/.ssh -r /home/tobert 66 | $> cl-run.pl --root -c "chown -R tobert /home/tobert" 67 | $> cl-run.pl --root -c "(grep -q '^tobert' /etc/sudoers) || echo 'tobert ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers" 68 | 69 | $> cl-run.pl --list nosqldb-dev.pl -c "uname -a" 70 | nosqldb-dev12.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 71 | nosqldb-dev1.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 72 | nosqldb-dev11.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 73 | nosqldb-dev5.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 74 | nosqldb-dev2.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 75 | nosqldb-dev4.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #36-Ubuntu SMP Fri Jul 8 18:12:30 UTC 2011 x86_64 GNU/Linux 76 | nosqldb-dev6.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #36-Ubuntu SMP Fri Jul 8 18:12:30 UTC 2011 x86_64 GNU/Linux 77 | nosqldb-dev10.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 78 | nosqldb-dev3.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #36-Ubuntu SMP Fri Jul 8 18:12:30 UTC 2011 x86_64 GNU/Linux 79 | nosqldb-dev8.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 80 | nosqldb-dev9.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 81 | nosqldb-dev7.tobert.org: Linux ip-xx-xx-xx-xx 2.6.32-316-ec2 #31-Ubuntu SMP Wed May 18 14:10:36 UTC 2011 x86_64 GNU/Linux 82 | 83 | $> cl-run.pl -c "sudo nohup dd if=/dev/zero of=/dev/null bs=1M &" 84 | 85 | $> cl-netstat.pl --list nosqldb-dev --device md3 86 | 87 | hostname: eth0_total eth0_recv eth0_send read_iops write_iops 1min 5min 15min 88 | -------------------------------------------------------------------------------------------------------------- 89 | nosqldb-dev12: 864 121 743 0/s 0/s 0.00 0.00 0.00 90 | nosqldb-dev11: 864 121 743 0/s 0/s 0.00 0.00 0.00 91 | nosqldb-dev10: 846 121 725 0/s 0/s 0.00 0.00 0.00 92 | nosqldb-dev9: 840 128 712 0/s 111,608/s 1.00 1.01 1.00 93 | nosqldb-dev8: 827 117 710 0/s 120,828/s 1.04 1.03 1.00 94 | nosqldb-dev7: 999 175 824 0/s 93,479/s 1.02 1.03 1.00 95 | nosqldb-dev6: 1,674 201 1,473 0/s 0/s 0.00 0.00 0.00 96 | nosqldb-dev5: 947 136 811 0/s 0/s 0.00 0.00 0.00 97 | nosqldb-dev4: 1,674 201 1,473 0/s 0/s 0.00 0.00 0.00 98 | nosqldb-dev3: 961 136 825 0/s 0/s 0.00 0.00 0.00 99 | nosqldb-dev2: 961 136 825 0/s 0/s 0.00 0.00 0.00 100 | nosqldb-dev1: 967 136 831 0/s 0/s 0.00 0.00 0.00 101 | Total: 12,424 Recv: 1,729 Send: 10,695 (0 mbit/s) | 0 read/s 325,915 write/s 102 | Average: 12,828 Recv: 151 Send: 917 (0 mbit/s) | 0 read/s 129,660 write/s 103 | 104 | nssh.rb does a few nice things around sshing inside GNU screen. In the original version, all it did was set 105 | the screen title automatically by grabbing the hostname off the args. Now it does quite a bit more, including 106 | letting you create a bunch of new named sessions in screen without a lot of typing. 107 | 108 | It also tries to flip CNAME's to A names automatically while still setting your screen title to the CNAME. This 109 | can be pretty handy when working with lots of EC2 hosts where you may not necessarily have set up all the CNAME's 110 | in ~/.ssh/config. 111 | 112 | By default, GNU screen has MAXWIN at 40. I almost always run a rebuilt version from the git head with MAXWIN 512. 113 | 114 | $> nssh.rb hostname.tobert.org 115 | 116 | # in screen, ctrl-a c, then 117 | $> nssh.rb reset 118 | $> nssh.rb next --list nosqldb-dev 119 | # ctrl-a c 120 | $> nssh.rb next --list nosqldb-dev 121 | # ctrl-a c 122 | # etc. ... 123 | 124 | And finally, the latest incarnation of my screenrc generation script is included. At the moment, I hard-code a list of 125 | clusters I want to connect to at screen startup so I can do something like the following after reboots: 126 | 127 | $> ssh-add 128 | $> generate-screen-config.rb 129 | $> screen -c ~/.screenrc-main -S main -T xterm-color -U 130 | 131 | This repo includes .screenrc-main based on what I use all the time. 132 | 133 | =head1 REQUIREMENTS 134 | 135 | Base perl with Tie::IxHash for most of the tools. cl-netstat.pl requires Net::SSH2 built against a fairly modern libssh2. 136 | The ruby utils are probably fine with a base system ruby 1.8 or 1.9. 137 | 138 | SSH agent support requires Net::SSH2 >= 0.40. 139 | 140 | =head1 INSTALLATION 141 | 142 | I usually symlink all these files into ~/bin, which my ~/.profile sets to be in my PATH. 143 | 144 | $> mkdir ~/bin ~/src 145 | $> cd ~/src 146 | $> git clone https://github.com/tobert/perl-ssh-tools.git 147 | $> ln -s ~/src/perl-ssh-tools/* ~/bin/ 148 | $> export PATH=~/bin:$PATH 149 | 150 | =head1 OSX 151 | 152 | You'll need libssh2 and the perl modules. If you're using Macports: 153 | 154 | $> sudo port install perl 155 | $> sudo port install libssh2 156 | $> sudo /opt/local/bin/perl -MCPAN -e 'install Net::SSH2' 157 | $> sudo /opt/local/bin/perl -MCPAN -e 'install Tie::IxHash' 158 | 159 | =head1 SEE ALSO 160 | 161 | The docs for common options are in DshPerlHostLoop.pm. 162 | 163 | perldoc ~/bin/DshPerlHostLoop.pm 164 | 165 | All of the utilities have their own POD and use Pod::Usage. 166 | 167 | cl-run.pl 168 | 169 | cl-rsync.pl 170 | 171 | cl-sendfile.pl 172 | 173 | cl-gatherfile.pl 174 | 175 | cl-ping.pl 176 | 177 | cl-killall.pl 178 | 179 | cl-psgrep.pl 180 | 181 | cl-netstat.pl 182 | 183 | =head1 AUTHORS 184 | 185 | Al Tobey 186 | 187 | =head1 COPYRIGHT AND LICENSE 188 | 189 | This software is copyright (c) 2007-2013 by Al Tobey. 190 | 191 | This is free software; you can redistribute it and/or modify it under the terms 192 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 193 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 194 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 195 | 196 | =cut 197 | -------------------------------------------------------------------------------- /cl-gatherfile.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-gatherfile.pl # 7 | # Copyright 2007-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-gatherfile.pl - harvest files from remote systems 14 | 15 | =head1 SYNOPSIS 16 | 17 | cl-gatherfile.pl [-a] -r $REMOTE_FILENAME -l $LOCAL_DIRECTORY 18 | -r: remote file to gather 19 | -l: local directory to write files to 20 | -a: append the hostname to the filename when writing it locally 21 | -d: only gather from hosts 22 | -n: number of hosts to gather from 23 | -v: verbose mode 24 | -h: show this help text\n 25 | 26 | =cut 27 | 28 | use Pod::Usage; 29 | use File::Temp qw/tempfile/; 30 | use File::Basename; 31 | use Getopt::Long; 32 | use strict; 33 | use warnings; 34 | 35 | use FindBin qw($Bin); 36 | use lib $Bin; 37 | use DshPerlHostLoop; 38 | 39 | our $host_cmd = undef; 40 | our $local_dir = undef; 41 | our $remote_file = undef; 42 | our $append_hostname = undef; 43 | our $help = undef; 44 | 45 | GetOptions( 46 | "l=s" => \$local_dir, 47 | "r=s" => \$remote_file, 48 | "a" => \$append_hostname, 49 | "d" => \$host_cmd, 50 | "h" => \$help 51 | ); 52 | 53 | unless ( ($local_dir && $remote_file && -r $local_dir) || $help ) { 54 | pod2usage(); 55 | } 56 | 57 | unless ( -d $local_dir || mkdir($local_dir) ) { 58 | pod2usage( -message => "Local directory '$local_dir' does not exist and could not be created." ); 59 | } 60 | 61 | func_loop( \&runit ); 62 | 63 | sub runit { 64 | my $host = shift; 65 | my $remote = "$remote_user\@$host:$remote_file"; 66 | my $dest = $local_dir; 67 | 68 | if ( $append_hostname ) { 69 | my $file = basename( $remote_file ); 70 | $dest = "$local_dir/$host-$file"; 71 | } 72 | 73 | print STDERR "Command($$): /usr/bin/scp -q $ssh_options $remote $dest\n" if ( verbose() ); 74 | system( "/usr/bin/scp -q $ssh_options $remote $dest" ); 75 | } 76 | 77 | exit 0; 78 | 79 | # vim: et ts=4 sw=4 ai smarttab 80 | 81 | __END__ 82 | 83 | =head1 COPYRIGHT AND LICENSE 84 | 85 | This software is copyright (c) 2007-2011 by Al Tobey. 86 | 87 | This is free software; you can redistribute it and/or modify it under the terms 88 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 89 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 90 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 91 | 92 | =cut 93 | -------------------------------------------------------------------------------- /cl-killall.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-killall.pl # 7 | # Copyright 2007-2013, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-killall.pl - run killall across the cluster 14 | 15 | =head1 SYNOPSIS 16 | 17 | cl-killall.pl [-s SIG] [-d] [-h] $PROCESS_NAME 18 | -s: which signal to send (e.g. 9, HUP) 19 | -h: show this help text 20 | 21 | cl-killall.pl -s HUP init 22 | cl-killall.pl -s 9 foobar 23 | 24 | =cut 25 | 26 | use strict; 27 | use warnings; 28 | use Pod::Usage; 29 | use Getopt::Long; 30 | use Scalar::Util qw(looks_like_number); 31 | 32 | use FindBin qw($Bin); 33 | use lib $Bin; 34 | use DshPerlHostLoop; 35 | 36 | our $signal = undef; 37 | our $help = undef; 38 | our $command = '/usr/bin/killall '; 39 | 40 | GetOptions( 41 | "s:s" => \$signal, 42 | "h" => \$help 43 | ); 44 | 45 | if ( $help ) { 46 | pod2usage(); 47 | } 48 | unless ( @ARGV > 0 ) { 49 | pod2usage( -message => "Not enough arguments. At least a program name to kill is required." ); 50 | } 51 | 52 | if ( $signal ) { 53 | pod2usage( -message => "Invalid signal '$signal'." ) 54 | unless ( looks_like_number($signal) or $signal =~ /^(?:HUP|USR1|USR2)$/ ); 55 | 56 | $command .= "-$signal "; 57 | } 58 | 59 | $command .= join(' ', @ARGV); 60 | 61 | func_loop( \&runit ); 62 | 63 | sub runit { 64 | my $host = shift; 65 | my @out = ssh( $host, $command ); 66 | } 67 | 68 | # vim: et ts=4 sw=4 ai smarttab 69 | 70 | __END__ 71 | 72 | =head1 COPYRIGHT AND LICENSE 73 | 74 | This software is copyright (c) 2007-2011 by Al Tobey. 75 | 76 | This is free software; you can redistribute it and/or modify it under the terms 77 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 78 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 79 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 80 | 81 | =cut 82 | -------------------------------------------------------------------------------- /cl-netstat.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $| = 1; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-netstat.pl # 7 | # Copyright 2007-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | Cluster Netstat - display cluster network usage 14 | 15 | =head1 DESCRIPTION 16 | 17 | This program opens persistent ssh connections to each of the cluster nodes and 18 | keeps them open until the user quits. 19 | 20 | In my experience, the load incurred on monitored hosts is unmeasurable. 21 | 22 | =head1 SYNOPSIS 23 | 24 | cl-netstat.pl <--list LIST> <--tolerant> <--interval SECONDS> <--device DEVICE> 25 | --list - the name of the list in ~/.dsh to use, e.g ~/.dsh/machines.db-prod 26 | --tolerant - tolerate missing/down hosts in connection creation 27 | --interval - how many seconds to sleep between updates 28 | --device - name of the device to get io stats for, as displayed in /proc/diskstats 29 | 30 | cl-netstat.pl # reads ~/.dsh/machines.list 31 | cl-netstat.pl --list db-prod 32 | cl-netstat.pl --list db-prod --tolerant 33 | cl-netstat.pl --list db-prod --interval 5 34 | cl-netstat.pl --list db-prod --device md3 35 | 36 | =head1 REQUIREMENTS 37 | 38 | 1.) password-less ssh access to all the hosts in the machine list 39 | 2.) ssh key in ~/.ssh/id_rsa or ~/.ssh/monitor-rsa 40 | 3.) ability to /bin/cat /proc/net/dev /proc/diskstats 41 | 42 | If you want to have a special key that is restricted to the cat command, here's an example: 43 | 44 | no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/bin/cat /proc/net/dev /proc/diskstats" ssh-rsa AAAA...== al@mybox.com 45 | 46 | =cut 47 | 48 | use strict; 49 | use warnings; 50 | use Carp; 51 | use Pod::Usage; 52 | use Getopt::Long; 53 | use Time::HiRes qw(time); 54 | use Data::Dumper; 55 | use Net::SSH2; 56 | use Tie::IxHash; 57 | 58 | use FindBin qw($Bin); 59 | use lib $Bin; 60 | use DshPerlHostLoop; 61 | 62 | our @ssh; 63 | our @interfaces; 64 | our @sorted_host_list; 65 | our %host_bundles; 66 | our( $opt_device, $opt_tolerant, $opt_interval ); 67 | 68 | GetOptions( 69 | "device:s" => \$opt_device, 70 | "tolerant" => \$opt_tolerant, 71 | "interval:i" => \$opt_interval, "i:i" => \$opt_interval 72 | ); 73 | 74 | $opt_interval ||= 2; 75 | 76 | tie my %hosts, 'Tie::IxHash'; 77 | %hosts = hostlist(keep_comments => 1); 78 | $hostname_pad = length('hostname: '); # reset this since this script uses short hostnames 79 | 80 | foreach my $host ( keys %hosts ) { 81 | # connect to the host over ssh 82 | my $bundle = DshPerlHostLoop::Bundle->new({ 83 | host => $host, 84 | port => 22 85 | }) ; # ssh connection + metadata 86 | 87 | print CYAN, "Connecting to $host via SSH ... ", RESET; 88 | eval { 89 | $bundle = libssh2_connect($host, 22); 90 | }; 91 | if ($@) { 92 | print RED, "failed, will retry later.\n", RESET; 93 | print RED, "$@\n", RESET; 94 | $bundle->next_attempt(time + $retry_wait); 95 | $bundle->retries(0); 96 | $bundle->ssh2(undef); 97 | } 98 | else { 99 | print GREEN, "connected.\n", RESET; 100 | } 101 | 102 | my $hn = $host; 103 | $hn =~ s/\.[a-zA-Z]+.*$//; 104 | if (length($hn) + 2 > $hostname_pad) { 105 | $hostname_pad = length($hn) + 2; 106 | } 107 | 108 | $bundle->comment($hosts{$host}); 109 | 110 | # set up the polling command and add to the poll list 111 | push @ssh, [ $host, $bundle, '/bin/cat /proc/net/dev /proc/diskstats' ]; 112 | push @sorted_host_list, $host; 113 | $host_bundles{$host} = $bundle; 114 | } 115 | 116 | #tobert@mybox:~/src/dsh-perl$ cat /proc/net/dev 117 | #Inter-| Receive | Transmit 118 | # face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed 119 | #lo: 3636173666 5626431 0 0 0 0 0 0 3636173666 5626431 0 0 0 0 0 0 120 | #eth0: 4592765291 6225806 0 0 0 0 0 0 2378883618 5545356 0 0 0 0 0 0 121 | 122 | sub cl_netstat { 123 | my( $struct, %stats, %times ); 124 | 125 | foreach my $host ( @ssh ) { 126 | $stats{$host->[0]} = libssh2_slurp_cmd($host->[1], $host->[2]); 127 | next unless $stats{$host->[0]}; 128 | $times{$host->[0]} = time(); 129 | push @{$stats{$host->[0]}}, $host->[1]->comment || ''; 130 | } 131 | 132 | foreach my $hostname ( keys %stats ) { 133 | # host is down 134 | if (not defined $stats{$hostname}) { 135 | $struct->{$hostname} = undef; 136 | next; 137 | } 138 | 139 | # pass the host comment through 140 | $struct->{$hostname}{comment} = pop @{$stats{$hostname}}; 141 | 142 | my @legend; 143 | $struct->{$hostname}{dsk_rds} = 0; # read sectors counter 144 | $struct->{$hostname}{dsk_rwt} = 0; # read wait ms counter 145 | $struct->{$hostname}{dsk_wds} = 0; # write sectors counter 146 | $struct->{$hostname}{dsk_wwt} = 0; # write wait ms counter 147 | $struct->{$hostname}{net} = {}; 148 | 149 | foreach my $line ( @{$stats{$hostname}} ) { 150 | chomp $line; 151 | if ( $line =~ /bytes\s+packets/ ) { 152 | my( $junk, $rl, $tl ) = split /\|/, $line; 153 | @legend = map { 'r' . $_ } split( /\s+/, $rl ); 154 | push @legend, map { 't' . $_ } split( /\s+/, $tl ); 155 | } 156 | elsif ( $line =~ /^\s*(e\w+)(\d+):\s*(.*)$/ ) { 157 | my( $iface, $data ) = ( $1 . $2, $3 ); 158 | 159 | my @sdata = split /\s+/, $data; 160 | 161 | foreach my $idx ( 0 .. $#sdata ) { 162 | $struct->{$hostname}{net}{$legend[$idx]} ||= 0; 163 | $struct->{$hostname}{net}{$legend[$idx]} += $sdata[$idx] || 0; 164 | } 165 | } 166 | # 8 0 sda 298890 2980 5498843 92328 10123211 2314394 134218078 10756944 0 419132 10866136 167 | # 8 5 sda5 5540 826 44511 1528 15558 55975 572334 68312 0 2932 69848 168 | # 8 32 sdc 913492 273 183151490 8217340 2047310 0 37711114 1259728 0 1267508 9476068 169 | # 8 16 sdb 2640 380 18329 2860 1751748 13461886 121702720 249041290 78 2654720 249048720 170 | # 8 1 sda1 35383589 4096190 515794290 173085956 58990656 100542811 1276270912 205189188 0 135658516 378268412 171 | # EC2 machines get disks with partitions but not whole disks 172 | # TODO: sort out devices to make sure partitions are not double-counted with whole devices 173 | # 174 | # from Documentation/iostats.txt: 175 | # Field 1 -- # of reads completed 176 | # Field 2 -- # of reads merged 177 | # Field 3 -- # of sectors read 178 | # Field 4 -- # of milliseconds spent reading 179 | # Field 5 -- # of writes completed 180 | # Field 6 -- # of writes merged 181 | # Field 7 -- # of sectors written 182 | # Field 8 -- # of milliseconds spent writing 183 | # Field 9 -- # of I/Os currently in progress 184 | # Field 10 -- # of milliseconds spent doing I/Os 185 | # Field 11 -- weighted # of milliseconds spent doing I/Os 186 | # 187 | # capture: major minor $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 ... 188 | elsif ($line =~ /^\s*\d+\s+\d+\s+(\w+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) { 189 | if (not $opt_device or $opt_device eq $1) { 190 | $struct->{$hostname}{dsk_rds} += $2; 191 | $struct->{$hostname}{dsk_rwt} += $5; 192 | $struct->{$hostname}{dsk_wds} += $6; 193 | $struct->{$hostname}{dsk_wwt} += $9; 194 | } 195 | } 196 | } 197 | $struct->{$hostname}{last_update} = $times{$hostname}; 198 | } 199 | 200 | return $struct; 201 | } 202 | 203 | sub diff_cl_netstat { 204 | my( $s1, $s2 ) = @_; 205 | my %out; 206 | 207 | foreach my $host ( keys %$s1 ) { 208 | if (not defined $s1->{$host}) { 209 | $out{$host} = undef; 210 | #$out{$host} = [ 0, 0, 0, 0, 0, 0 ]; 211 | next; 212 | } 213 | 214 | my $seconds = $s1->{$host}{last_update} - $s2->{$host}{last_update}; 215 | 216 | my @host_traffic; 217 | foreach my $iface ( sort keys %{$s1->{$host}} ) { 218 | if ( $iface eq 'net' ) { 219 | my $rdiff = $s1->{$host}{$iface}{rbytes} - $s2->{$host}{$iface}{rbytes}; 220 | my $tdiff = $s1->{$host}{$iface}{tbytes} - $s2->{$host}{$iface}{tbytes}; 221 | my $tput = ($s1->{$host}{$iface}{rpackets} - $s2->{$host}{$iface}{rpackets}) 222 | + ($s1->{$host}{$iface}{tpackets} - $s2->{$host}{$iface}{tpackets}); 223 | 224 | # counter rollover 225 | if ($s1->{$host}{$iface}{rbytes} < $s2->{$host}{$iface}{rbytes}) { 226 | # this trades off the accuracy of one iteration to avoid having 227 | # to track deltas across iterations 228 | $rdiff = $s2->{$host}{$iface}{rbytes}; 229 | } 230 | if ($s1->{$host}{$iface}{tbytes} < $s2->{$host}{$iface}{tbytes}) { 231 | $tdiff = $s2->{$host}{$iface}{tbytes}; 232 | } 233 | 234 | # 0: read_bytesps, 1: write_bytesps 235 | push @host_traffic, int($rdiff / $seconds), int($tdiff / $seconds); 236 | # 2: total_byteps, 3: 0 (using an array here was silly, should be hash) 237 | push @host_traffic, int($tput / $seconds), 0; 238 | } 239 | } 240 | 241 | # iops 242 | $host_traffic[4] = ($s1->{$host}{dsk_rds} - $s2->{$host}{dsk_rds}) / $seconds; 243 | $host_traffic[5] = ($s1->{$host}{dsk_wds} - $s2->{$host}{dsk_wds}) / $seconds; 244 | 245 | # iowait 246 | $host_traffic[6] = ($s1->{$host}{dsk_rwt} - $s2->{$host}{dsk_rwt}); 247 | $host_traffic[7] = ($s1->{$host}{dsk_wwt} - $s2->{$host}{dsk_wwt}); 248 | 249 | $out{$host} = \@host_traffic; 250 | } 251 | return %out; 252 | } 253 | 254 | ### MAIN 255 | 256 | my($iterations, %averages) = (0, ()); 257 | 258 | # these totals are for the lifetime of this process 259 | my($total_net_tx, $total_net_rx) = (0, 0); 260 | my($total_disk_riops, $total_disk_wiops) = (0, 0); 261 | my($total_disk_rwait, $total_disk_wwait) = (0, 0); 262 | 263 | my $previous = cl_netstat(); 264 | print GREEN, "Acquired first round. Output begins in $opt_interval seconds.\n", WHITE; 265 | sleep $opt_interval; 266 | FOREVER: while ( 1 ) { 267 | my $current = cl_netstat(); 268 | $iterations++; 269 | 270 | my %diff = diff_cl_netstat( $current, $previous ); 271 | $previous = $current; 272 | 273 | my $header = sprintf "% ${hostname_pad}s: % 13s % 13s % 14s %8s %8s %8s %8s", 274 | qw( hostname net_packets net_rx_bytes net_tx_bytes dsk_riops dsk_wiops rwait_ms wwait_ms ); 275 | print CYAN, $header, $/, '-' x length($header), $/, RESET; 276 | 277 | # iteration totals 278 | my $host_count = 0; 279 | my($ivl_net_rx_total, $ivl_net_tx_total) = (0, 0); 280 | my($ivl_riops_total, $ivl_wiops_total) = (0, 0); 281 | my($ivl_rwait_total, $ivl_wwait_total) = (0, 0); 282 | 283 | HOST: foreach my $host ( @sorted_host_list ) { 284 | my $hostname = $host; 285 | $hostname =~ s/\.[a-zA-Z]+.*$//; 286 | 287 | # host down, special case 288 | if (not defined $diff{$host}) { 289 | my $bundle = $host_bundles{$host}; 290 | printf "%s% ${hostname_pad}s: disconnected, retry attempt %d in %d seconds ...%s\n", 291 | DKGRAY, $hostname, $bundle->retries || 1, int($bundle->next_attempt - time), RESET; 292 | next HOST; 293 | } 294 | 295 | # network 296 | printf "%s% ${hostname_pad}s: %s% 13s %s% 13s %s% 13s%s ", 297 | WHITE, $hostname, 298 | io_c($diff{$host}->[2], 2), # total pps 299 | net_c($diff{$host}->[0]), # read bytes per second 300 | net_c($diff{$host}->[1]), # write bytes per second 301 | RESET; 302 | 303 | # disk iops 304 | printf "%s%8s %s%8s ", 305 | io_c($diff{$host}->[4]), 306 | io_c($diff{$host}->[5]); 307 | 308 | # iowait 309 | my $avg_rwait = $diff{$host}->[6] / ($diff{$host}->[4] || 1); 310 | my $avg_wwait = $diff{$host}->[7] / ($diff{$host}->[5] || 1); 311 | printf "%s%8s %s%8s %s%s%s\n", 312 | io_c($avg_rwait), 313 | io_c($avg_wwait), 314 | DKGRAY, $current->{$host}{comment} || '', RESET; 315 | 316 | # increment totals 317 | $host_count++; 318 | $ivl_net_rx_total += $diff{$host}->[0] + $diff{$host}->[2]; 319 | $ivl_net_tx_total += $diff{$host}->[1] + $diff{$host}->[3]; 320 | $ivl_riops_total += $diff{$host}->[4]; 321 | $ivl_wiops_total += $diff{$host}->[5]; 322 | $ivl_rwait_total += $avg_rwait; 323 | $ivl_wwait_total += $avg_wwait; 324 | } 325 | 326 | printf "%sNetwork total: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s MiB/s)%s\n", 327 | WHITE, net_c($ivl_net_rx_total + $ivl_net_tx_total, 2 * $host_count), WHITE, 328 | net_c($ivl_net_rx_total, $host_count), WHITE, 329 | net_c($ivl_net_tx_total, $host_count), WHITE, 330 | c(($ivl_net_rx_total + $ivl_net_tx_total)/(2**20)), 331 | RESET; 332 | 333 | $total_net_tx += $ivl_net_tx_total; 334 | $total_net_rx += $ivl_net_rx_total; 335 | 336 | printf "%sNetwork average: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s MiB/s)%s\n", 337 | WHITE, net_c(($total_net_rx + $total_net_tx) / $iterations, 2), WHITE, 338 | net_c(($total_net_rx / $iterations) / $host_count), WHITE, 339 | net_c(($total_net_tx / $iterations) / $host_count), WHITE, 340 | c((($total_net_rx + $total_net_tx) / $iterations)/(2**20)), 341 | RESET; 342 | 343 | $total_disk_riops += $ivl_riops_total; 344 | $total_disk_wiops += $ivl_wiops_total; 345 | 346 | printf "%sIOPS: %s% 10s %stotal riops %s% 10s %stotal wiops %s% 6s %savg riops %s% 6s %savg wiops%s\n", 347 | WHITE, io_c($ivl_riops_total, $host_count), WHITE, 348 | io_c($ivl_wiops_total, $host_count), WHITE, 349 | io_c(($total_disk_riops / $iterations) / $host_count), WHITE, 350 | io_c(($total_disk_wiops / $iterations) / $host_count), WHITE, 351 | RESET; 352 | 353 | $total_disk_rwait += $ivl_rwait_total; 354 | $total_disk_wwait += $ivl_wwait_total; 355 | 356 | printf "%siowait ms: %s% 10s %stotal rwait %s% 10s %stotal wwait %s% 6s %savg rwait %s% 6s %savg wwait%s\n\n", 357 | WHITE, io_c($ivl_rwait_total, $host_count), WHITE, 358 | io_c($ivl_wwait_total, $host_count), WHITE, 359 | io_c(($total_disk_rwait / $iterations) / $host_count), WHITE, 360 | io_c(($total_disk_wwait / $iterations) / $host_count), WHITE, 361 | RESET; 362 | 363 | sleep $opt_interval; 364 | } 365 | 366 | sub la_c { 367 | my($value, $factor) = @_; 368 | $factor ||= 1; 369 | 370 | if ( $value < 0.80 ) { 371 | return(GREEN, $value); 372 | } 373 | if ( $value < 1.20 ) { 374 | return(CYAN, $value); 375 | } 376 | if ( $value < 3.0 ) { 377 | return(YELLOW, $value); 378 | } 379 | elsif ( $value > 5.0 ) { 380 | return(MAGENTA, $value); 381 | } 382 | elsif ( $value > 10.0 ) { 383 | return(RED, $value); 384 | } 385 | return(CYAN, $value); 386 | } 387 | 388 | sub net_c { 389 | my($value, $factor) = @_; 390 | $factor ||= 1; 391 | my $val = $value / $factor; 392 | my $color = WHITE; 393 | 394 | if ($val < 1_000_000) { 395 | $color = GREEN; 396 | } 397 | elsif ($val < 5_000_000) { 398 | $color = CYAN; 399 | } 400 | elsif ($val < 20_000_000) { 401 | $color = YELLOW; 402 | } 403 | elsif ($val < 50_000_000) { 404 | $color = DKRED; 405 | } 406 | else { 407 | $color = RED; 408 | } 409 | 410 | return($color, c($value)); 411 | } 412 | 413 | sub io_c { 414 | my($value, $factor) = @_; 415 | $factor ||= 1; 416 | my $val = $value / $factor; 417 | my $color = WHITE; 418 | 419 | if ($val < 3000) { 420 | $color = GREEN; 421 | } 422 | elsif ($val < 10_000) { 423 | $color = CYAN; 424 | } 425 | elsif ($val < 20_000) { 426 | $color = YELLOW; 427 | } 428 | else { 429 | $color = RED; 430 | } 431 | 432 | return($color, c(shift)); 433 | } 434 | 435 | # add commas 436 | sub c { 437 | my $val = int(shift); 438 | $val =~ s/(?<=\d)(\d{3})$/,$1/; 439 | $val =~ s/(?<=\d)(\d{3}),/,$1,/g; 440 | $val =~ s/(?<=\d)(\d{3}),/,$1,/g; 441 | return $val; 442 | } 443 | 444 | # vim: et ts=4 sw=4 ai smarttab 445 | 446 | __END__ 447 | 448 | =head1 COPYRIGHT AND LICENSE 449 | 450 | This software is copyright (c) 2007-2011 by Al Tobey. 451 | 452 | This is free software; you can redistribute it and/or modify it under the terms 453 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 454 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 455 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 456 | 457 | =cut 458 | -------------------------------------------------------------------------------- /cl-ping.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-ping.pl # 7 | # Copyright 2007-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-ping.pl - ping the cluster 14 | 15 | =head1 SYNOPSIS 16 | 17 | cl-ping.pl 18 | 19 | =cut 20 | 21 | use strict; 22 | use warnings; 23 | use Getopt::Long; 24 | use Pod::Usage; 25 | 26 | use FindBin qw($Bin); 27 | use lib $Bin; 28 | use DshPerlHostLoop; 29 | 30 | func_loop( sub { 31 | my $hostname = shift; 32 | 33 | my @out = `ping -c 1 -W 2 $hostname 2>&1`; 34 | 35 | if ($? != 0) { 36 | print "DOWN: $hostname\n"; 37 | } 38 | 39 | print grep {/bytes from/} @out; 40 | } ); 41 | 42 | # vim: et ts=4 sw=4 ai smarttab 43 | 44 | __END__ 45 | 46 | =head1 COPYRIGHT AND LICENSE 47 | 48 | This software is copyright (c) 2007-2011 by Al Tobey. 49 | 50 | This is free software; you can redistribute it and/or modify it under the terms 51 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 52 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 53 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 54 | 55 | =cut 56 | -------------------------------------------------------------------------------- /cl-psgrep.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | ########################################################################### 4 | # # 5 | # Cluster Tools: cl-psgrep.pl # 6 | # Copyright 2007-2011, Albert P. Tobey # 7 | # # 8 | ########################################################################### 9 | 10 | =head1 NAME 11 | 12 | cl-psgrep.pl - ps/grep across the cluster 13 | 14 | =head1 SYNOPSIS 15 | 16 | This utility, rather than doing the work on its own, simply calls run.pl. Not all of the options are passed through 17 | and some (like -t) are implied. Most of the time, the very simplest usage is best. 18 | 19 | cl-psgrep.pl snmpd 20 | 21 | cl-psgrep.pl [-d] [-a] [-b] [-n] [-x] 22 | -h: print this message 23 | 24 | =cut 25 | 26 | use Pod::Usage; 27 | use Getopt::Long; 28 | 29 | use FindBin qw($Bin); 30 | use lib $Bin; 31 | use DshPerlHostLoop; 32 | 33 | our $help; 34 | GetOptions( "h" => \$help ); 35 | if ( $help ) { 36 | pod2usage(); 37 | } 38 | pod2usage() if ( @ARGV == 0 ); 39 | 40 | my $proc = pop(@ARGV); 41 | $proc =~ s/^(.)/[$1]/; 42 | 43 | func_loop( \&runit ); 44 | 45 | sub runit { 46 | my $host = shift; 47 | 48 | my @out = ssh( $remote_user.'@'.$host, "ps -ewwwo pid,args" ); 49 | my $fh; 50 | for my $line ( @out ) { 51 | next unless ( $line =~ /$proc/ ); 52 | print "$host: $line\n"; 53 | } 54 | 55 | exit 0; 56 | } 57 | 58 | # vim: et ts=4 sw=4 ai smarttab 59 | 60 | __END__ 61 | 62 | =head1 COPYRIGHT AND LICENSE 63 | 64 | This software is copyright (c) 2007-2011 by Al Tobey. 65 | 66 | This is free software; you can redistribute it and/or modify it under the terms 67 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 68 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 69 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 70 | 71 | =cut 72 | -------------------------------------------------------------------------------- /cl-rolling-reboot.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-rolling-restart.pl # 7 | # Copyright 2013, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-rolling-restart.pl - reboot a cluster as safely as possible 14 | 15 | =head1 SYNOPSIS 16 | 17 | This script attempts to reboot a cluster safely. It steps through the host 18 | list serially, rebooting one node at a time and only progresses to the next 19 | node if the previous node comes back online. It is quite verbose on purpose, 20 | with the intent of being run in a screen session and left alone for many hours 21 | or days to do its thing. 22 | 23 | ICMP is used to determine basic network availability. No node is considered 24 | actually available unless a command can be run over ssh. 25 | 26 | Most failures are fatal. Large clusters will typically have a few nodes down 27 | at any given time, so those nodes are skipped if they fail an ICMP test. 28 | 29 | When a run fails, a new list will be written to your ~/.dsh that only contains 30 | the incomplete list, allowing you to resume easily. The name of the file and 31 | the correct command for resuming will be printed. 32 | 33 | cl-rolling-restart.pl --list foo [--timeout 1800] [--wait 60] 34 | --timeout: number of seconds before giving up on a host 35 | --wait: number of seconds to wait between reboots 36 | 37 | =cut 38 | 39 | use Pod::Usage; 40 | use File::Temp qw/tempfile/; 41 | use Getopt::Long; 42 | use IPC::Open3; 43 | use strict; 44 | use warnings; 45 | 46 | use FindBin qw($Bin); 47 | use lib $Bin; 48 | use DshPerlHostLoop; 49 | 50 | our $opt_help = undef; 51 | our $opt_timeout = 1800; # 1/2 hour 52 | our $opt_wait = 60; # one minute 53 | 54 | GetOptions( 55 | "timeout:i" => \$opt_timeout, 56 | "wait:i" => \$opt_wait, 57 | "help" => \$opt_help, "h" => \$opt_help 58 | ); 59 | 60 | if ($opt_help) { 61 | pod2usage(); 62 | } 63 | 64 | =item ping() 65 | 66 | Ping the host once, waiting 3 seconds for a response. Returns 67 | 1 (true) on success and undef (false) on failure. 68 | 69 | This function should move to DshPerlHostLoop at some point. 70 | 71 | ping($hostname); 72 | 73 | =cut 74 | 75 | sub ping { 76 | my $hostname = shift; 77 | 78 | my $pid = open3(my $w, my $r, my $e, '/bin/ping', '-c', '1', '-W', '3', $hostname); 79 | 80 | waitpid($pid, 0); 81 | 82 | if ($? != 0) { 83 | return undef; 84 | } 85 | 86 | return 1; 87 | } 88 | 89 | =item reboot() 90 | 91 | SSHes in to the host and issues 'sudo reboot'. The return value is 92 | any text printed by the reboot command, but this should not be used 93 | to determine if it was successful. 94 | 95 | reboot($hostname); 96 | 97 | =cut 98 | 99 | sub reboot { 100 | my $host = shift; 101 | my @out = ssh("$remote_user\@$host", "sudo reboot"); 102 | return @out; 103 | } 104 | 105 | =item fail() 106 | 107 | Writes out the incomplete hosts to a new host list, prints some information, 108 | then exits immediately with a return code of 1. 109 | 110 | fail(@hostlist, $index); # will exit 111 | 112 | =cut 113 | 114 | sub fail { 115 | my($hostlist, $i) = @_; 116 | my $now = time; 117 | 118 | # print out a machine list containing only the hosts that failed 119 | # to make resuming the reboot more convenient 120 | open(my $fh, "> $ENV{HOME}/.dsh/machines.reboot-failed-$now"); 121 | for (1; $i<@$hostlist; $i++) { 122 | print $fh "$hostlist->[$i]\n"; 123 | } 124 | close $fh; 125 | 126 | print "\nA machine list containing only the un-rebooted nodes has been written to:\n"; 127 | print "$ENV{HOME}/.dsh/machines.reboot-failed-$now\n"; 128 | print "To resume:\n"; 129 | print "cl-rolling-reboot.pl --list reboot-failed-$now\n\n"; 130 | exit 1; 131 | } 132 | 133 | =item main() 134 | 135 | Try really hard to reboot machines without accidentally taking down more than one 136 | node at a time. 137 | 138 | =cut 139 | 140 | my @hosts = hostlist(); 141 | for (my $i=0; $i<@hosts; $i++) { 142 | # skip hosts that are down 143 | next unless ping($hosts[$i]); 144 | 145 | # failsafe: break and fail if work hangs somewhere 146 | $SIG{'ALRM'} = sub { 147 | print "Timeout. Something hung and SIGALRM has fired. Exiting now.\n"; 148 | fail(\@hosts, $i); 149 | }; 150 | alarm($opt_timeout + $opt_wait + 600); 151 | 152 | my $rebooted_at = time; 153 | reboot($hosts[$i]); 154 | print "$hosts[$i]: sent reboot command ...\n"; 155 | 156 | print "Waiting up to five minutes for the host to go offline ...\n"; 157 | my $count = 0; 158 | while (1) { 159 | sleep 1; 160 | my $status = ping($hosts[$i]); 161 | 162 | if ($status) { 163 | $count++; 164 | if ($count % 10 == 0) { 165 | print "$hosts[$i] has not gone offline after $count seconds. Retrying in 10 seconds ...\n"; 166 | } 167 | if ($count > 300) { 168 | print "$hosts[$i] has not gone offline after $count seconds.\n"; 169 | fail(\@hosts, $i); 170 | } 171 | } else { 172 | print "$hosts[$i] is offline. Going to sleep for two minutes ...\n"; 173 | last; 174 | } 175 | } 176 | 177 | # wait two minutes before even trying to ping the box 178 | sleep 120; 179 | 180 | print "Host has been down for at least two minutes. Will start pinging now.\n"; 181 | $count = 0; 182 | my $upcount = 0; 183 | while (1) { 184 | my $status = ping($hosts[$i]); 185 | my $elapsed = time - $rebooted_at; 186 | 187 | if ($status) { 188 | $upcount++; 189 | print "$hosts[$i] network has responded to $upcount pings.\n"; 190 | # require 5 consecutive successes before moving on 191 | if ($upcount == 4) { 192 | last; 193 | } 194 | else { 195 | next; 196 | } 197 | } 198 | 199 | # reset the counter if even a single ping fails 200 | $upcount = 0; 201 | 202 | $count++; 203 | if (not $status && $count % 10 == 0) { 204 | print "$hosts[$i] has been down for $elapsed seconds.\n"; 205 | } 206 | 207 | # wait up to $opt_timeout minutes for the host to come back, if it doesn't, 208 | # stop trying and wait for the operator to clean up 209 | if ($elapsed > $opt_timeout) { 210 | print "Reboot of $hosts[$i] failed, it is still down after $elapsed seconds.\n"; 211 | fail(\@hosts, $i); 212 | } 213 | } 214 | 215 | print "$hosts[$i] network is responding. Checking SSH in 5 minutes...\n"; 216 | sleep 300; 217 | 218 | # TODO: retries? 219 | my @out = ssh("$remote_user\@$hosts[$i]", "uptime"); 220 | my $flat = join(' ', map { chomp; $_ } @out); 221 | 222 | if ($flat =~ / up /) { 223 | print "\n-----------------------------------------------------------------------\n"; 224 | print "$hosts[$i] is back online! Moving on.\n"; 225 | print "$hosts[$i] $flat\n"; 226 | print "-----------------------------------------------------------------------\n\n"; 227 | } else { 228 | print "$hosts[$i]: could not run the uptime command.\n"; 229 | fail(\@hosts, $i); 230 | } 231 | 232 | print "Sleeping $opt_wait seconds before moving on to the next host.\n"; 233 | sleep $opt_wait; 234 | } 235 | 236 | # vim: et ts=4 sw=4 ai smarttab 237 | 238 | __END__ 239 | 240 | =head1 COPYRIGHT AND LICENSE 241 | 242 | This software is copyright (c) 2013 by Al Tobey. 243 | 244 | This is free software; you can redistribute it and/or modify it under the terms 245 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 246 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 247 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 248 | 249 | =cut 250 | -------------------------------------------------------------------------------- /cl-rsync.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-rsync.pl # 7 | # Copyright 2007-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-rsync.pl - push files using rsync over ssh, in parallel 14 | 15 | =head1 SYNOPSIS 16 | 17 | cl-rsync.pl [-l $LOCAL_FILE] [-r $REMOTE_FILE] -b] [-t] [-d] [-a] [-n] [-x] [-h] 18 | -l: local file/directory to rsync - passed through unmodified to rsync 19 | -r: remote location for rsync to write to - also unmodified 20 | -x: exclude files/directories (becomes --exclude= on rsync command line) 21 | -n: number of hosts to run on 22 | -v: verbose output 23 | -h: print this message 24 | 25 | =cut 26 | 27 | use Pod::Usage; 28 | use File::Temp qw/tempfile/; 29 | use Getopt::Long; 30 | use strict; 31 | use warnings; 32 | 33 | 34 | use FindBin qw($Bin); 35 | use lib $Bin; 36 | use DshPerlHostLoop; 37 | 38 | our $local_file = undef; 39 | our $remote_file = undef; 40 | our $help = undef; 41 | our $exclude = undef; 42 | our $copy_symlinks = undef; 43 | our $vcs_exclude = undef; 44 | our $dryrun = undef; 45 | our $delete = undef; 46 | 47 | Getopt::Long::Configure("no_ignore_case"); 48 | GetOptions( 49 | "l=s" => \$local_file, 50 | "r=s" => \$remote_file, 51 | "h" => \$help, 52 | "help" => \$help, 53 | "x=s@" => \$exclude, 54 | "L" => \$copy_symlinks, 55 | "C" => \$vcs_exclude, 56 | "z" => \$dryrun, 57 | "delete" => \$delete 58 | ); 59 | 60 | if (!$local_file) { 61 | pod2usage({ -message => "no local file specified. dangerous!", -exitval => 1 }); 62 | } 63 | if ($help) { 64 | pod2usage({ -message => "no args. dangerous!", -exitval => 1 }); 65 | } 66 | 67 | #print "L: $local_file, R: $remote_file\n"; 68 | 69 | #if ( @ARGV == 0 or not defined $local_file or not defined $remote_file or not -r $local_file or $help ) { 70 | # pod2usage(); 71 | #} 72 | 73 | $delete = $delete ? '--delete' : ''; 74 | $vcs_exclude = $vcs_exclude ? '--cvs-exclude' : ''; 75 | $copy_symlinks = $copy_symlinks ? '--copy-links' : ''; 76 | 77 | if ( !$exclude ) { 78 | $exclude = ''; 79 | } 80 | else { 81 | if ( ref $exclude eq 'ARRAY' ) { 82 | my @excopy = @$exclude; 83 | $exclude = ''; 84 | foreach my $ex ( @excopy ) { 85 | $exclude .= " --exclude '$ex' "; 86 | } 87 | } 88 | else { 89 | $exclude = "--exclude '$exclude'"; 90 | } 91 | } 92 | 93 | if ( $dryrun ) { 94 | $dryrun = ' --dry-run '; 95 | } 96 | else { 97 | $dryrun = ''; 98 | } 99 | 100 | my $routine = sub { 101 | my $hostname = shift; 102 | my $command = "rsync $dryrun $delete $copy_symlinks $vcs_exclude $exclude -ave \"ssh $ssh_options\" $local_file $remote_user\@$hostname:$remote_file"; 103 | if ( $dryrun ne '' ) { 104 | print STDERR "$command\n"; 105 | } 106 | system( $command ); 107 | }; 108 | 109 | func_loop( $routine ); 110 | 111 | # vim: et ts=4 sw=4 ai smarttab 112 | 113 | __END__ 114 | 115 | =head1 COPYRIGHT AND LICENSE 116 | 117 | This software is copyright (c) 2007-2011 by Al Tobey. 118 | 119 | This is free software; you can redistribute it and/or modify it under the terms 120 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 121 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 122 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 123 | 124 | =cut 125 | -------------------------------------------------------------------------------- /cl-run.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | ########################################################################### 4 | # # 5 | # Cluster Tools: cl-run.pl # 6 | # Copyright 2007-2011, Albert P. Tobey # 7 | # # 8 | ########################################################################### 9 | 10 | =head1 NAME 11 | 12 | cl-run.pl - run commands in parallel across the cluster 13 | 14 | =head1 SYNOPSIS 15 | 16 | This script parallelizes ssh access to hosts. 17 | 18 | cl-run.pl -s $SCRIPT [-l $FILE] [-r $FILE] [-e $FILE] [-b] [-t] [-d] [-a] [-n] [-h] [-p "SCRIPT_PARAMS"] 19 | cl-run.pl -c '$COMMAND' [-l $FILE] [-r $FILE] [-e $FILE] [-b] [-t] [-d] [-a] [-n] [-h] 20 | -s: script/program to copy out then run 21 | -c: command to run on each host 22 | - this will be written to a mini shell script then pushed out 23 | -l: file to write output into on the local host 24 | -r: place to write program output on the remote hosts 25 | - this only creates a shell variable in the command scripts that can 26 | be redirected to using \$output 27 | -e: file to write errors to 28 | -b: background the jobs on the remote host 29 | - equivalent to \"nohup command &\" 30 | -x: run as root through sudo (requires NOPASSWD: on remote host) 31 | -h: print this message 32 | 33 | =cut 34 | 35 | use strict; 36 | use warnings; 37 | use Pod::Usage; 38 | use Getopt::Long; 39 | use File::Copy qw( copy ); 40 | use IO::Handle; 41 | use Sys::Hostname; 42 | 43 | use FindBin qw($Bin); 44 | use lib $Bin; 45 | use DshPerlHostLoop; 46 | 47 | select(STDERR); 48 | 49 | our $errfile = "&1"; 50 | our $tag_local_output = undef; 51 | our $local_output_file = undef; 52 | our $remote_output = undef; 53 | our $background = undef; 54 | our $command = undef; 55 | our $script = undef; 56 | our $script_parameters = undef; 57 | our $sudo = undef; 58 | our $nowrap = undef; 59 | our $help = undef; 60 | 61 | GetOptions( 62 | "s=s" => \$script, 63 | "p=s" => \$script_parameters, 64 | "c=s" => \$command, 65 | "l=s" => \$local_output_file, 66 | "r=s" => \$remote_output, 67 | "e=s" => \$errfile, 68 | "b" => \$background, 69 | "x" => \$sudo, 70 | "n" => \$nowrap, 71 | "h" => \$help 72 | ); 73 | 74 | if ( $help || (!$command && !$script) ) { 75 | pod2usage(); 76 | } 77 | 78 | if ( $script && $command ) { 79 | pod2usage( -message => "-s and -c are mutually exclusive" ); 80 | } 81 | 82 | func_loop( \&runit ); 83 | 84 | sub runit { 85 | my( $host, $comment ) = @_; 86 | my $cmdfile; 87 | 88 | if ($nowrap) { 89 | $cmdfile = $command; 90 | } 91 | else { 92 | $cmdfile = create_command_file( $command, $script, { 93 | CNAME => $host, 94 | COMMENT => $comment, 95 | ORIGIN => Sys::Hostname::hostname() 96 | } ); 97 | 98 | my $scp = "/usr/bin/scp -q $ssh_options $cmdfile $remote_user\@$host:$cmdfile"; 99 | if (verbose()) { 100 | $scp =~ s/scp -q/scp/; 101 | print STDERR "COMMAND: $scp\n" if ( verbose() ); 102 | } 103 | 104 | system( $scp ); 105 | if ($? != 0) { 106 | print STDERR RED, "Could not copy command script to $host!", RESET, $/; 107 | } 108 | } 109 | 110 | my $shell = '/bin/bash'; 111 | if ( $sudo ) { 112 | $shell = 'sudo /bin/bash'; 113 | } 114 | 115 | my @out = undef; 116 | if ( $script && $script_parameters ) { 117 | @out = ssh( "$remote_user\@$host", "$shell $cmdfile $script_parameters" ); 118 | } else { 119 | @out = ssh( "$remote_user\@$host", "$shell $cmdfile" ); 120 | } 121 | 122 | my $fh; 123 | if ( $local_output_file ) { 124 | lock(); # uses flock under the hood in DshPerlHostLoop 125 | open( $fh, ">> $local_output_file" ); 126 | foreach my $line ( @out ) { 127 | print $fh "$host: $line\n"; 128 | } 129 | close $fh; 130 | unlock(); 131 | } 132 | else { 133 | foreach my $line ( @out ) { 134 | printf "%s% ${hostname_pad}s : %s%s\n", BLUE, $host, RESET, $line; 135 | } 136 | } 137 | 138 | exit 0; 139 | } 140 | 141 | sub create_command_file { 142 | my( $lcommand, $script, $vars ) = @_; 143 | 144 | my( $fh, $cmdfile ) = my_tempfile(); 145 | if ( $command ) { 146 | print $fh "#!/bin/bash\n\n"; 147 | print $fh "export DEBIAN_FRONTEND=noninteractive\n"; 148 | print $fh "EXIT=0\n"; 149 | 150 | foreach my $var (keys %$vars) { 151 | printf $fh "%s='%s' ; export %s\n", $var, $vars->{$var}, $var; 152 | } 153 | 154 | print $fh "cd /var/tmp\n"; 155 | if ( $remote_output ) { 156 | print $fh "outfile=$remote_output\n"; 157 | } 158 | else { 159 | print $fh "outfile=/var/tmp/`hostname`.output\n"; 160 | } 161 | print $fh "rm -f \$outfile\n"; 162 | if ( $background ) { 163 | print $fh "nohup $command &\n"; 164 | } 165 | else { 166 | print $fh "$command\n"; 167 | } 168 | print $fh "EXIT=\$?\n"; 169 | print $fh "rm -f $cmdfile\n"; 170 | print $fh "exit \$EXIT\n"; 171 | close $fh; 172 | } 173 | else { 174 | close $fh; 175 | copy( $script, $cmdfile ); 176 | chmod( 0755, $cmdfile ); 177 | } 178 | 179 | return $cmdfile; 180 | } 181 | 182 | # vim: et ts=4 sw=4 ai smarttab 183 | 184 | __END__ 185 | 186 | =head1 COPYRIGHT AND LICENSE 187 | 188 | This software is copyright (c) 2007-2011 by Al Tobey. 189 | 190 | This is free software; you can redistribute it and/or modify it under the terms 191 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 192 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 193 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 194 | 195 | =cut 196 | 197 | -------------------------------------------------------------------------------- /cl-sendfile.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | $|++; 3 | 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: cl-sendfile.pl # 7 | # Copyright 2007-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | 11 | =head1 NAME 12 | 13 | cl-sendfile.pl - push a file over scp, in parallel 14 | 15 | =head1 SYNOPSIS 16 | 17 | Send files to cluster nodes. This also archives those files in the /root/files to make tracking changes to the cluster 18 | from default installs easier. 19 | 20 | cl-sendfile.pl -a -l /etc/httpd/conf/httpd.conf 21 | cl-sendfile.pl -d -l /tmp/foo.conf -r /usr/local/etc/foo.conf 22 | 23 | cl-sendfile.pl [-l $LOCAL_FILE] [-r $REMOTE_FILE] [-h] [-v] [--incl ] [--excl ] 24 | -l: local file/directory to rsync - passed through unmodified to rsync 25 | -r: remote location for rsync to write to - also unmodified 26 | -x: stage the file as a normal user and relocate using sudo (requires sudo root/NOPASSWD) 27 | -v: verbose output 28 | -h: print this message 29 | =cut 30 | 31 | use Pod::Usage; 32 | use File::Temp qw/tempfile/; 33 | use File::Basename; 34 | use File::Copy; 35 | use Getopt::Long; 36 | use strict; 37 | use warnings; 38 | 39 | use FindBin qw($Bin); 40 | use lib $Bin; 41 | use DshPerlHostLoop; 42 | 43 | our $local_file = undef; 44 | our $remote_file = undef; 45 | our $help = undef; 46 | our $sudo = undef; 47 | our $final_file = undef; 48 | 49 | GetOptions( 50 | "l=s" => \$local_file, 51 | "r=s" => \$remote_file, 52 | "x" => \$sudo, 53 | "h" => \$help 54 | ); 55 | 56 | if ( !$remote_file && $local_file && $local_file =~ m#^/# ) { 57 | $remote_file = $local_file; 58 | } 59 | 60 | unless ( ($local_file && $remote_file && -r $local_file) || $help ) { 61 | pod2usage(); 62 | } 63 | 64 | $final_file = $remote_file; 65 | if ( $sudo ) { 66 | (my $fh, $remote_file) = my_tempfile(); 67 | close $fh; 68 | unlink $remote_file; 69 | } 70 | 71 | func_loop(sub { 72 | my $host = shift; 73 | scp( $local_file, "$host:$remote_file" ); 74 | }); 75 | 76 | if ( $sudo ) { 77 | func_loop(sub { 78 | my $host = shift; 79 | ssh( "$remote_user\@$host", "sudo cp $remote_file $final_file" ); 80 | ssh( "$remote_user\@$host", "rm $remote_file" ); 81 | }); 82 | } 83 | 84 | # vim: et ts=4 sw=4 ai smarttab 85 | 86 | __END__ 87 | 88 | =head1 COPYRIGHT AND LICENSE 89 | 90 | This software is copyright (c) 2007-2011 by Al Tobey. 91 | 92 | This is free software; you can redistribute it and/or modify it under the terms 93 | of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 94 | version 2.0 is GPL compatible by itself, hence there is no benefit to having an 95 | Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 96 | 97 | =cut 98 | -------------------------------------------------------------------------------- /generate-screen-config.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # intentionally not using /bin/env - this script always works w/ system ruby 3 | # 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: generate-screen-config.rb # 7 | # Copyright 2011-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | # 11 | # Auto-generate the bulk of my screenrc from machine lists. 12 | # 13 | 14 | require 'resolv' 15 | 16 | lists = [ 17 | "cluster1", 18 | "cluster2", 19 | "cluster3", 20 | "cluster4", 21 | "cluster5", 22 | ] 23 | 24 | resolver = Resolv.new 25 | top = [] 26 | gen = [] 27 | bottom = [] 28 | seen = { 29 | :highest => 0, 30 | :begin => false, 31 | :end => false 32 | } 33 | 34 | File.foreach(File.join(ENV['HOME'], ".screenrc-main")) do |line| 35 | if line =~ /## BEGIN GENERATED CONFIG ##/ then 36 | seen[:begin] = true 37 | next 38 | end 39 | 40 | if line =~ /## END GENERATED CONFIG ##/ then 41 | seen[:end] = true 42 | next 43 | end 44 | 45 | next if seen[:begin] and not seen[:end] 46 | 47 | if line =~ /^screen.*\s(\d+)$/ then 48 | num = $1.to_i 49 | if num > seen[:highest] then 50 | seen[:highest] = num 51 | end 52 | end 53 | 54 | if seen[:end] 55 | bottom.push line 56 | else 57 | top.push line 58 | end 59 | end 60 | 61 | count = seen[:highest] + 10 62 | 63 | lists.each do |listname| 64 | file = "machines.#{listname}" 65 | 66 | while count % 10 != 0 do 67 | gen.push "screen -t \"localhost\" #{count}" 68 | gen.push "stuff \". ~/.profile\\015\"" 69 | count+=1 70 | end 71 | 72 | gen.push "screen -t \"CLUSTER: #{listname}\" #{count}" 73 | gen.push "stuff \". ~/.profile\\015cl-netstat.pl --list #{listname}\"" 74 | count+=1 75 | 76 | File.open(File.join(ENV['HOME'], ".dsh", file), "r") do |f| 77 | f.each_line do |host| 78 | host.chomp! 79 | host, comment = host.split /\s*#\s*/, 2 80 | host = host[/\S+/] 81 | 82 | next if host == nil or host == '' 83 | 84 | match = host.match(/^(#?)(\S+)/) 85 | if match then 86 | if match[1] != nil and match[1].to_s == "#" 87 | host = match[2].to_s 88 | comment = "[DOWN] #{comment}" 89 | end 90 | end 91 | 92 | next if seen.has_key?(host) 93 | seen[host] = true 94 | 95 | gen.push "screen -t \"#{host}\" #{count}" 96 | gen.push "stuff \". ~/.profile\\015nssh --comment '#{comment}' #{host}\\015dstat -lrvn 60\\015\"" 97 | count+=1 98 | end 99 | end 100 | end 101 | 102 | File.open(File.join(ENV['HOME'], ".screenrc-main"), "w") do |f| 103 | top.each do |line| 104 | f.puts line 105 | end 106 | 107 | f.puts "## BEGIN GENERATED CONFIG ##" 108 | gen.each do |line| 109 | f.puts line 110 | end 111 | f.puts "## END GENERATED CONFIG ##" 112 | 113 | bottom.each do |line| 114 | f.puts line 115 | end 116 | end 117 | 118 | # vim: et ts=2 sw=2 ai smarttab 119 | # 120 | # This software is copyright (c) 2011-2011 by Al Tobey. 121 | # 122 | # This is free software; you can redistribute it and/or modify it under the terms 123 | # of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 124 | # version 2.0 is GPL compatible by itself, hence there is no benefit to having an 125 | # Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 126 | # 127 | 128 | -------------------------------------------------------------------------------- /nssh.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | # intentionally not using /bin/env - this script always works w/ system ruby 3 | # 4 | ########################################################################### 5 | # # 6 | # Cluster Tools: nssh.rb # 7 | # Copyright 2011-2011, Albert P. Tobey # 8 | # # 9 | ########################################################################### 10 | # 11 | # This script does minimal arg parsing to filter out SSH arguments to 12 | # be mostly compatible with plain SSH command line syntax. It's not 13 | # 100% but good enough. 14 | # 15 | # usage: nssh [-1246AaCfgKkMNnqsTtVvXxY] [-b bind_address] [-c cipher_spec] 16 | # [-D [bind_address:]port] [-e escape_char] [-F configfile] 17 | # [-i identity_file] [-L [bind_address:]port:host:hostport] 18 | # [-l login_name] [-m mac_spec] [-O ctl_cmd] [-o option] [-p port] 19 | # [-R [bind_address:]port:host:hostport] [-S ctl_path] 20 | # [-w local_tun[:remote_tun]] [user@]hostname 21 | # 22 | # nssh-specific arguments: 23 | # --list A machine list is a dsh-style ~/.dsh/machines.$listname. 24 | # --comment "[COMMENT]" comment to place after the hostname in the screen title 25 | # 26 | # I ported this to ruby as an exercise, so there may be things that aren't proper. 27 | # 28 | 29 | require 'resolv' 30 | 31 | class NamedSSH 32 | attr_reader :dsh_config_dir 33 | attr_reader :dsh_config_file 34 | attr_reader :dsh_list 35 | attr_reader :nssh_last_file 36 | attr_reader :ssh_args 37 | attr_accessor :hostname 38 | attr_accessor :comment 39 | 40 | def initialize(options = {}) 41 | @ssh_args = options[:ssh_args] || Array.new 42 | @dsh_list = options[:dsh_list] || "machines.list" 43 | @comment = options[:comment] || "" 44 | 45 | @dsh_config_dir = options[:dsh_config_dir] 46 | @dsh_config_file = File.join(@dsh_config_dir, @dsh_list) 47 | @nssh_last_file = options[:nssh_last_file] 48 | @hostname = options[:hostname] 49 | 50 | unless @hostname != nil and @hostname.length > 3 51 | raise "Invalid hostname '#@hostname'." 52 | end 53 | end 54 | 55 | # most of the arg parsers looked painful to do what this does; 56 | # it needs to stash & ignore SSH options, while parsing out --list 57 | # and grab the hostname 58 | def self.parse_options 59 | ssh_args = Array.new 60 | dsh_list = nil 61 | hostname = nil 62 | comment = "" 63 | 64 | # manual argument parsing - be intelligent about perserving ssh 65 | # options while adding custom options for nssh 66 | idx=0 67 | loop do 68 | break if idx == ARGV.size 69 | 70 | #puts "Arg #{idx}: #{ARGV[idx]}" 71 | 72 | # ssh switches 73 | if ARGV[idx].match(/^-[1246AaCfgKkMNnqsTtVvXxY]$/) 74 | ssh_args << ARGV[idx] 75 | #puts "ssh switch #{ssh_args[-1]}" 76 | 77 | # ssh options that take a value 78 | elsif ARGV[idx].match(/^-[bcDeFiLlmOopRSw]$/) 79 | ssh_args << ARGV[idx] 80 | idx+=1 81 | # force quoting - it should never hurt and makes stuff like -o options work correctly 82 | ssh_args << ARGV[idx] 83 | #puts "ssh args #{ssh_args[-2]} #{ssh_args[-1]}" 84 | 85 | # allow specification of a .dsh list in my style where --list foobar resolves to 86 | # ~/.dsh/machines.foobar to use with "nssh --list foobar next" 87 | elsif ARGV[idx].match(/^--list/) 88 | idx+=1 89 | dsh_list = "machines." << ARGV[idx] 90 | #puts "dsh list: #{dsh_list}" 91 | 92 | # --comment 93 | elsif ARGV[idx].match(/^--comment/) 94 | idx+=1 95 | comment = ARGV[idx] 96 | 97 | # --user, e.g. nssh next --user root 98 | elsif ARGV[idx].match(/^--user/) 99 | idx+=1 100 | ssh_args << '-o' << "User #{ARGV[idx]}" 101 | 102 | # user@hostname is a definite match 103 | # split it and use -u instead because hostname needs to be standalone 104 | elsif ARGV[idx].match(/^\w+@[-\.\w]+$/) 105 | user, hostname = ARGV[idx].split '@' 106 | ssh_args << '-o' << "User #{user}" 107 | #puts "user@hostname: user: #{user}, hostname: #{hostname}" 108 | 109 | # a bare, uncaptured argument is likely the hostname 110 | else 111 | hostname = ARGV[idx] 112 | #puts "hostname: #{hostname}" 113 | end 114 | 115 | idx+=1 116 | end 117 | 118 | return { 119 | :ssh_args => ssh_args, 120 | :dsh_list => dsh_list, 121 | :hostname => hostname, 122 | :comment => comment 123 | } 124 | end 125 | 126 | def parse_host(host) 127 | return nil, nil if host == nil 128 | host.chomp! 129 | return nil, nil if host == '' 130 | 131 | h, comment = host.split /\s*#\s*/, 2 132 | 133 | return h, comment 134 | end 135 | 136 | # read the last host from "nssh next" iteration from a file 137 | def read_last() 138 | host = nil 139 | comment = nil 140 | 141 | if File.exists?(@nssh_last_file) 142 | File.open(@nssh_last_file, 'r') do |f| 143 | host, comment = parse_host f.gets 144 | end 145 | end 146 | 147 | return host, comment 148 | end 149 | 150 | # save the last host for "nssh next" iteration 151 | def save_last 152 | File.open(@nssh_last_file, 'w') do |f| 153 | f.puts @hostname 154 | end 155 | end 156 | 157 | # read the dsh machines file and return the next host in the list 158 | # after whatever was in the @nssh_last_file 159 | def read_next 160 | last, comment = read_last() 161 | 162 | unless File.exists?(@dsh_config_file) 163 | raise "#@dsh_config_file does not exist on the filesystem. --list #@dsh_list is not valid." 164 | end 165 | 166 | File.open(@dsh_config_file, 'r') do |f| 167 | until f.eof? 168 | candidate, comment = parse_host f.gets 169 | 170 | # last host is not defined, return first in file 171 | if last.nil? 172 | return candidate, comment 173 | end 174 | 175 | if candidate == last 176 | if f.eof? 177 | raise "Reached end of #@dsh_config_file. There is no next host!" 178 | else 179 | while not f.eof? 180 | candidate, comment = parse_host f.gets 181 | if candidate != nil 182 | return candidate, comment 183 | end 184 | end 185 | end 186 | end 187 | end 188 | end 189 | end 190 | end 191 | 192 | # use class method to parse ARGV 193 | options = NamedSSH.parse_options 194 | 195 | nssh = NamedSSH.new( 196 | :dsh_config_dir => File.join(ENV['HOME'], ".dsh"), 197 | :nssh_last_file => File.join(ENV['HOME'], '.nssh-last'), 198 | :hostname => options[:hostname], 199 | :ssh_args => options[:ssh_args], 200 | :dsh_list => options[:dsh_list], 201 | :comment => options[:comment] 202 | ) 203 | 204 | # reset the position in the machine list to the top 205 | if nssh.hostname == "reset" 206 | File.unlink(nssh.nssh_last_file) 207 | exit 208 | end 209 | 210 | # choose the next host in the machine list, great for firing up 211 | # a ton of screen windows in a row in an already-running screen 212 | # If I'm logging into a whole cluster in an existing screen session, I'll load 213 | # "nssh next --list $cluster" into my clipboard then ... 214 | # ctrl-a n, , , ctrl-a n , ... 215 | # (my screenrc spawns with 256 open & ready shells) 216 | if nssh.hostname == "next" 217 | nssh.hostname, nssh.comment = nssh.read_next 218 | nssh.save_last 219 | end 220 | 221 | # set the terminal title in GNU Screen 222 | if nssh.comment != nil and nssh.comment != "" then 223 | puts "\033k#{nssh.hostname} [#{nssh.comment}]\033\\" 224 | else 225 | puts "\033k#{nssh.hostname}\033\\" 226 | end 227 | 228 | # set an environment variable to the selected hostname to 229 | # pass through SSH. LC_* is accepted by default in most ssh servers 230 | # this is useful for setting a PS1 with a meaningful CNAME on the remote 231 | # host via .profile (esp handy for EC2 boxen) 232 | # Note: this is really just a proof-of-concept and will not work in 233 | # hardened environments. 234 | # e.g. 235 | # ps1host=$(hostname) 236 | # [ -n "$LC_UI_HOSTNAME" ] && ps1host=$LC_UI_HOSTNAME 237 | # if [[ ${EUID} == 0 ]] ; then 238 | # PS1="\\[\\033[01;31m\\]$ps1host\\[\\033[01;34m\\] \\W \\$\\[\\033[00m\\] " 239 | # else 240 | # PS1="\\[\\033[01;32m\\]\\u@$ps1host\\[\\033[01;34m\\] \\w \\$\\[\\033[00m\\] " 241 | # fi 242 | ENV['LC_UI_HOSTNAME'] = nssh.hostname 243 | 244 | # resolve hostnames to IP then back to the IP's name because 245 | # I don't see where resolv lets you just get the CNAME 246 | # this helps for stuff where we use CNAME's to point at e.g. EC2 hosts 247 | # to switch to the EC2 name before ssh'ing but still set all the display 248 | # stuff to the human hostname 249 | # 250 | # I use this to make ssh match config entries in ~/.ssh/config which is generated 251 | # based on the EC2 instance name. This saves me having to do backflips to provide 252 | # extra aliases in ssh_config while still letting me nssh to the CNAME's. 253 | resolver = Resolv.new 254 | addr = resolver.getaddress nssh.hostname 255 | realname = resolver.getname addr 256 | 257 | # I didn't see a more elegant way to do this ... 258 | def exec_no_sh(*args) 259 | arglist = Array.new 260 | args.each do |arg| 261 | if arg.class == Array.new.class 262 | arg.each do |item| 263 | arglist << item 264 | end 265 | else 266 | arglist << arg 267 | end 268 | end 269 | 270 | Kernel.exec *arglist 271 | end 272 | 273 | # run SSH 274 | exec_no_sh "ssh", nssh.ssh_args, realname 275 | 276 | # vim: et ts=2 sw=2 ai smarttab 277 | # 278 | # This software is copyright (c) 2011-2011 by Al Tobey. 279 | # 280 | # This is free software; you can redistribute it and/or modify it under the terms 281 | # of the Artistic License 2.0. (Note that, unlike the Artistic License 1.0, 282 | # version 2.0 is GPL compatible by itself, hence there is no benefit to having an 283 | # Artistic 2.0 / GPL disjunction.) See the file LICENSE for details. 284 | 285 | --------------------------------------------------------------------------------