" 49 | else 50 | set_section(ARGV[0], ARGV[1], STDIN.read) 51 | end 52 | -------------------------------------------------------------------------------- /collect-iotop-periodically: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Collects the output of 'iotop' periodically, and dumps them to 3 | # the given directory, with one file per dump. 4 | # 5 | # This program requires root privileges, because iotop requires it. 6 | # 7 | # ## Usage syntax 8 | # 9 | # collect-iotop-periodically 10 | # 11 | # `DUMP DIRECTORY` specifies where to dump to. This directory must 12 | # already exist. 13 | # 14 | # `INTERVAL` specifies the interval between dumps, in seconds. You 15 | # can also specify a unit suffix, e.g. `5m` (5 minutes) or `2h` (2 hours). 16 | # 17 | # ## Example 18 | # 19 | # Create a dump directory and run the script in the background, 20 | # collecting every 5 minutes: 21 | # 22 | # sudo mkdir ~/iotop-dumps 23 | # sudo nohup collect-iotop-periodically ~/iotop-dumps 5m & 24 | # 25 | # At any point, feel free to stop the script... 26 | # 27 | # ps aux | grep collect-iotop-periodically 28 | # sudo kill 29 | # 30 | # ...and tar the dump directory and send it to someone for analysis: 31 | # 32 | # sudo tar -czf iotop-dumps.tar.gz ~/iotop-dumps 33 | 34 | set -e 35 | 36 | TARGET_DIR="$1" 37 | INTERVAL="$2" 38 | if [[ -z "$TARGET_DIR" || -z "$INTERVAL" ]]; then 39 | echo 'Usage: collect-iotop-periodically ' 40 | exit 1 41 | fi 42 | 43 | function cleanup() 44 | { 45 | # shellcheck disable=SC2155 46 | local PIDS=$(jobs -p) 47 | if [[ -n "$PIDS" ]]; then 48 | # shellcheck disable=SC2086 49 | kill $PIDS || true 50 | fi 51 | } 52 | 53 | trap cleanup EXIT 54 | 55 | while true; do 56 | DATE=$(date --rfc-3339=seconds) 57 | iotop -obPn 1 > "$TARGET_DIR/$DATE iotop.txt" 58 | sleep "$INTERVAL" 59 | done 60 | -------------------------------------------------------------------------------- /check-web-apps: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | require 'uri' 4 | require 'net/http' 5 | require 'net/https' 6 | 7 | def check(web_app) 8 | attempts = 0 9 | begin 10 | response = http_get(web_app['url'], web_app['username'], web_app['password']) 11 | rescue SocketError => e 12 | if attempts < 3 13 | attempts += 1 14 | sleep 1 15 | retry 16 | else 17 | notify_down(web_app, e) 18 | response = nil 19 | end 20 | rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT, Timeout::Error => e 21 | notify_down(web_app, e) 22 | response = nil 23 | end 24 | if response && !response.body.include?(web_app['substring']) 25 | notify_down(web_app) 26 | end 27 | rescue => e 28 | STDERR.puts "An exception occurred while checking for #{web_app['url']}" 29 | raise e 30 | end 31 | 32 | def notify_down(web_app, exception = nil) 33 | domain = URI.parse(web_app['url']).host 34 | message = "A web application is down!\n" 35 | if exception 36 | message << "Exception: #{exception}\n" 37 | end 38 | message << "URL: #{web_app['url']}\n" 39 | email(config(:from), config(:to), 40 | sprintf("#{config(:subject)}", domain), 41 | message) 42 | end 43 | 44 | def http_get(url, username = nil, password = nil) 45 | headers = {} 46 | uri = URI.parse(url) 47 | http = Net::HTTP.new(uri.host, uri.port) 48 | if uri.scheme == 'https' 49 | http.use_ssl = true 50 | http.verify_mode = OpenSSL::SSL::VERIFY_NONE 51 | end 52 | http.start do 53 | req = Net::HTTP::Get.new(uri.path, headers) 54 | req.basic_auth(username, password) if username 55 | http.request(req) 56 | end 57 | end 58 | 59 | def start 60 | config(:web_apps).each do |web_app| 61 | check(web_app) 62 | end 63 | end 64 | 65 | start 66 | -------------------------------------------------------------------------------- /backup-mysql: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | BACKUP_DIR_ROOT = "/var/backups/mysql" 4 | MAX_BACKUPS = optional_config('max-backups', 10) 5 | 6 | if enc = optional_config(:encrypt) 7 | abort "*** ERROR: please set the configuration option backup-mysql.encrypt.key" if !enc['key'] 8 | abort "*** ERROR: please set the configuration option backup-mysql.encrypt.recipients" if !enc['recipients'] || enc['recipients'].empty? 9 | encrypt_command = "gpg --batch --pinentry-mode loopback --sign --encrypt" 10 | if path = enc['passphrase_file'] 11 | encrypt_command << " --passphrase-file #{Shellwords.escape path}" 12 | end 13 | encrypt_command << " -u '#{enc['key']}'" 14 | enc['recipients'].each do |recipient| 15 | encrypt_command << " --recipient '#{recipient}'" 16 | end 17 | end 18 | 19 | databases = `echo show databases | mysql`.strip.split("\n") 20 | databases.shift 21 | databases.delete("information_schema") 22 | databases.delete("mysql") 23 | 24 | now = Time.now.strftime("%Y-%m-%d-%H:%M:%S") 25 | backup_dir = "#{BACKUP_DIR_ROOT}/#{now}" 26 | 27 | sh "mkdir -p #{backup_dir}" 28 | for database in databases 29 | dump_command = "mysqldump --single-transaction -C #{database} | gzip --best" 30 | if encrypt_command 31 | dump_command << " | #{encrypt_command}" 32 | extension = "sql.gz.gpg" 33 | else 34 | extension = "sql.gz" 35 | end 36 | sh "#{dump_command} | #{pv_or_cat} > #{backup_dir}/#{database}.#{extension}" 37 | end 38 | 39 | puts "Cleaning up, keeping only #{MAX_BACKUPS} most recent backups..." 40 | dirs = Dir["#{BACKUP_DIR_ROOT}/*"].sort.reverse 41 | keep = dirs[0..MAX_BACKUPS] 42 | delete = dirs - keep 43 | delete.each do |dir| 44 | sh "rm -rf #{dir}" 45 | end 46 | sh "chmod -R o-rwx #{BACKUP_DIR_ROOT}" 47 | -------------------------------------------------------------------------------- /collect-ps-periodically: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Collects the output of 'ps' periodically, and dumps them to 3 | # the given directory, with one file per dump. 4 | # 5 | # ## Usage syntax 6 | # 7 | # collect-ps-periodically 8 | # 9 | # `DUMP DIRECTORY` specifies where to dump to. This directory must 10 | # already exist. 11 | # 12 | # `INTERVAL` specifies the interval between dumps, in seconds. You 13 | # can also specify a unit suffix, e.g. `5m` (5 minutes) or `2h` (2 hours). 14 | # 15 | # ## Example 16 | # 17 | # Create a dump directory and run the script in the background, 18 | # collecting every 5 minutes: 19 | # 20 | # mkdir ~/ps-dumps 21 | # nohup collect-ps-periodically ~/ps-dumps 5m & 22 | # 23 | # At any point, feel free to stop the script... 24 | # 25 | # ps aux | grep collect-ps-periodically 26 | # kill 27 | # 28 | # ...and tar the dump directory and send it to someone for analysis: 29 | # 30 | # tar -czf ps-dumps.tar.gz ~/ps-dumps 31 | 32 | set -e 33 | 34 | TARGET_DIR="$1" 35 | INTERVAL="$2" 36 | if [[ -z "$TARGET_DIR" || -z "$INTERVAL" ]]; then 37 | echo 'Usage: collect-ps-periodically ' 38 | exit 1 39 | fi 40 | 41 | function cleanup() 42 | { 43 | # shellcheck disable=SC2155 44 | local PIDS=$(jobs -p) 45 | if [[ -n "$PIDS" ]]; then 46 | # shellcheck disable=SC2086 47 | kill $PIDS || true 48 | fi 49 | } 50 | 51 | trap cleanup EXIT 52 | 53 | while true; do 54 | DATE=$(date --rfc-3339=seconds) 55 | ps -Ao 'user,pid,ppid,lwp,%cpu,%mem,rss,maj_flt,start_time,wchan,stat,flag,nlwp,cmd' --forest > "$TARGET_DIR/$DATE procs.txt" 56 | ps -ALo 'user,pid,ppid,lwp,%cpu,%mem,rss,maj_flt,start_time,wchan,stat,flag,cmd' > "$TARGET_DIR/$DATE threads.txt" 57 | sleep "$INTERVAL" 58 | done 59 | -------------------------------------------------------------------------------- /backup-postgresql: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | BACKUP_DIR_ROOT = "/var/backups/postgresql" 4 | MAX_BACKUPS = optional_config('max-backups', 10) 5 | 6 | if enc = optional_config(:encrypt) 7 | abort "*** ERROR: please set the configuration option backup-postgresql.encrypt.key" if !enc['key'] 8 | abort "*** ERROR: please set the configuration option backup-postgresql.encrypt.recipients" if !enc['recipients'] || enc['recipients'].empty? 9 | encrypt_command = "gpg --sign --encrypt -u '#{enc['key']}'" 10 | enc['recipients'].each do |recipient| 11 | encrypt_command << " --recipient '#{recipient}'" 12 | end 13 | end 14 | 15 | databases = quiet_capture("psql postgres -t <<<'SELECT datname FROM pg_database WHERE datistemplate = false'").strip.split("\n") 16 | databases.map! { |db| db.strip } 17 | databases.delete("postgres") 18 | if exclude_databases = optional_config(:exclude_databases) 19 | databases -= exclude_databases 20 | end 21 | 22 | now = Time.now.strftime("%Y-%m-%d-%H:%M:%S") 23 | backup_dir = "#{BACKUP_DIR_ROOT}/#{now}" 24 | 25 | sh "mkdir -p #{backup_dir}" 26 | for database in databases 27 | dump_command = "pg_dump --clean --quote-all-identifiers --serializable-deferrable --dbname #{database} | gzip --best" 28 | if encrypt_command 29 | dump_command << " | #{encrypt_command}" 30 | extension = "sql.gz.gpg" 31 | else 32 | extension = "sql.gz" 33 | end 34 | sh "set -o pipefail; #{dump_command} | #{pv_or_cat} > #{backup_dir}/#{database}.#{extension}" 35 | end 36 | 37 | puts "Cleaning up, keeping only #{MAX_BACKUPS} most recent backups..." 38 | dirs = Dir["#{BACKUP_DIR_ROOT}/*"].sort.reverse 39 | keep = dirs[0..MAX_BACKUPS] 40 | delete = dirs - keep 41 | delete.each do |dir| 42 | sh "rm -rf #{dir}" 43 | end 44 | sh "chmod -R o-rwx #{BACKUP_DIR_ROOT}" 45 | -------------------------------------------------------------------------------- /permit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | require 'optparse' 4 | 5 | def parse_options 6 | options = {} 7 | parser = OptionParser.new do |opts| 8 | nl = "\n" + ' ' * 37 9 | opts.banner = "Usage: permit [options]" 10 | opts.separator "Give USERNAME read-only or read-write permission to the given directories." 11 | opts.separator "" 12 | 13 | opts.separator "Options:" 14 | opts.on("--read-write", 15 | "Give read-write access. Default is#{nl}" + 16 | "read-only unless this option is given.") do 17 | options[:read_write] = true 18 | end 19 | end 20 | begin 21 | parser.parse! 22 | rescue OptionParser::ParseError => e 23 | puts e 24 | puts 25 | puts "Please see '--help' for valid options." 26 | exit 1 27 | end 28 | 29 | if options[:help] 30 | puts parser 31 | exit 32 | elsif ARGV.size < 2 33 | puts parser 34 | exit 1 35 | else 36 | return options 37 | end 38 | end 39 | 40 | options = parse_options 41 | username, *dirs = ARGV 42 | dirs.each do |dir| 43 | print_activity "cd #{dir}" 44 | Dir.chdir(dir) do 45 | executable_files = "/tmp/executable-files.#{$$}" 46 | sh "find -type f -executable -print0 > #{executable_files}" 47 | if options[:read_write] 48 | sh "find -type f -print0 | xargs -0 -n 1000 -r setfacl -m user:#{username}:rw-" 49 | sh "find -type d -print0 | xargs -0 -n 1000 -r setfacl -m user:#{username}:rwx" 50 | sh "find -type d -print0 | xargs -0 -n 1000 -r setfacl -d -m user:#{username}:rwx" 51 | else 52 | sh "find -type f -print0 | xargs -0 -n 1000 -r setfacl -m user:#{username}:r-" 53 | sh "find -type d -print0 | xargs -0 -n 1000 -r setfacl -m user:#{username}:r-x" 54 | sh "find -type d -print0 | xargs -0 -n 1000 -r setfacl -d -m user:#{username}:r-x" 55 | end 56 | sh "find -type f -print0 | xargs -0 -n 1000 -r chmod -x" 57 | sh "cat #{executable_files} | xargs -0 -n 1000 -r chmod +x" 58 | sh "rm -f #{executable_files}" 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /rotate-files: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | require 'optparse' 4 | 5 | class RotateFiles 6 | def initialize(argv) 7 | @argv = argv.dup 8 | end 9 | 10 | def run 11 | parse_options 12 | prepare 13 | create_file 14 | cleanup 15 | end 16 | 17 | private 18 | def parse_options 19 | @max = 50 20 | 21 | parser = OptionParser.new do |opts| 22 | nl = "\n#{' ' * 37}" 23 | opts.banner = "Usage: rotate-files [OUTPUT SUFFIX] [OPTIONS]" 24 | opts.separator 'Copy INPUT to the path specified by OUTPUT_PREFIX + timestamp + OUTPUT_SUFFIX,' 25 | opts.separator 'then delete excess files that match the same pattern.' 26 | opts.separator '' 27 | opts.separator 'Example: rotate-files backup.tar.gz /backups/backup- .tar.gz' 28 | opts.separator 'Creates /backups/backup-.tar.gz, deletes old backup files matching' 29 | opts.separator 'this pattern.' 30 | opts.separator '' 31 | 32 | opts.on('--max NUMBER', Integer, "Maximum number of files to keep.#{nl}" \ 33 | "Default: 50") do |val| 34 | @max = val 35 | end 36 | opts.on('--dry-run', "Print what will happen, but do not#{nl}" \ 37 | "actually do it") do 38 | @dry_run = true 39 | end 40 | opts.on('-h', '--help', 'Display this help message') do 41 | @help = true 42 | end 43 | end 44 | 45 | begin 46 | parser.parse!(@argv) 47 | rescue OptionParser::ParseError => e 48 | STDERR.puts "*** ERROR: #{e}" 49 | abort parser.to_s 50 | end 51 | 52 | if @help 53 | puts parser 54 | exit 55 | end 56 | 57 | if @argv.size < 2 58 | puts parser 59 | abort 60 | end 61 | 62 | @input = @argv[0] 63 | @output_prefix = @argv[1] 64 | @output_suffix = @argv[2] 65 | end 66 | 67 | def prepare 68 | now = Time.now.strftime('%Y-%m-%d-%H:%M:%S') 69 | @output = "#{@output_prefix}#{now}#{@output_suffix}" 70 | @output_dir = File.dirname(@output) 71 | end 72 | 73 | def create_file 74 | puts "Creating #{@output}" 75 | if @dry_run 76 | puts 'Dry running, not actually creating that file' 77 | else 78 | sh 'cp', @input, @output 79 | end 80 | end 81 | 82 | def cleanup 83 | puts "Cleaning up, keeping only #{@max} most recent files" 84 | files = sorted_files_eligible_for_cleanup 85 | 86 | # Determine which files to keep 87 | keep = files[0..@max] 88 | if !keep.include?(@output) 89 | keep << @output 90 | end 91 | 92 | # Determine which files to delete, then do that 93 | delete = files - keep 94 | if delete.empty? 95 | puts 'Nothing to remove' 96 | else 97 | delete.each do |path| 98 | if @dry_run 99 | puts "Dry running; would have removed #{path}" 100 | else 101 | sh 'rm', path 102 | end 103 | end 104 | end 105 | end 106 | 107 | def sorted_files_eligible_for_cleanup 108 | files = [] 109 | Dir["#{@output_dir}/*"].each do |path| 110 | if File.file?(path) && 111 | path.start_with?(@output_prefix) && 112 | path.end_with?(@output_suffix) 113 | files << path 114 | end 115 | end 116 | files.sort! 117 | files.reverse! 118 | files 119 | end 120 | end 121 | 122 | RotateFiles.new(ARGV).run 123 | -------------------------------------------------------------------------------- /shared.rb: -------------------------------------------------------------------------------- 1 | require 'shellwords' 2 | 3 | TOOLS_DIR = File.expand_path(File.dirname(__FILE__)) 4 | OLD_PATH = ENV['PATH'] 5 | ENV['PATH'] = "#{TOOLS_DIR}:#{ENV['PATH']}" 6 | if ENV['TOOL_LEVEL'] 7 | TOOL_LEVEL = ENV['TOOL_LEVEL'].to_i 8 | else 9 | TOOL_LEVEL = 0 10 | end 11 | ENV['TOOL_LEVEL'] = (TOOL_LEVEL + 1).to_s 12 | 13 | def print_activity(message) 14 | if TOOL_LEVEL == 0 15 | if STDOUT.tty? 16 | puts "\e[1m# #{message}\e[22m" 17 | else 18 | puts "# #{message}" 19 | end 20 | else 21 | puts "#{' ' * TOOL_LEVEL}-> #{message}" 22 | end 23 | end 24 | 25 | def sh(command, *args) 26 | print_activity "#{command} #{args.join(' ')}" 27 | quiet_sh(command, *args) 28 | end 29 | 30 | def quiet_sh(command, *args) 31 | if args.empty? 32 | result = system('bash', '-o', 'pipefail', '-c', command) 33 | else 34 | command_str = Shellwords.join([command] + args) 35 | result = system('bash', '-o', 'pipefail', '-c', command_str) 36 | end 37 | if !result 38 | abort "*** COMMAND FAILED: #{command} #{args.join(' ')}".strip 39 | end 40 | end 41 | 42 | def quiet_capture(command, *args) 43 | if args.empty? 44 | `/bin/bash -c #{Shellwords.escape command}` 45 | else 46 | command_str = Shellwords.join([command] + args) 47 | `/bin/bash -c #{Shellwords.escape command_str}` 48 | end 49 | end 50 | 51 | # Check whether the specified command is in $PATH, and return its 52 | # absolute filename. Returns nil if the command is not found. 53 | # 54 | # This function exists because system('which') doesn't always behave 55 | # correctly, for some weird reason. 56 | def find_command(name) 57 | name = name.to_s 58 | ENV['PATH'].to_s.split(File::PATH_SEPARATOR).detect do |directory| 59 | path = File.join(directory, name) 60 | if File.file?(path) && File.executable?(path) 61 | return path 62 | end 63 | end 64 | return nil 65 | end 66 | 67 | # Returns "pv" if that command is installed, or "cat" if not. 68 | # "pv" is the Pipe Viewer tool, very useful for displaying 69 | # progress bars in pipe operations (apt-get install pv). 70 | def pv_or_cat 71 | if find_command('pv') 72 | return 'pv' 73 | else 74 | return 'cat' 75 | end 76 | end 77 | 78 | def load_config 79 | require 'yaml' 80 | tool_name = File.basename($0) 81 | filenames = [ 82 | "#{TOOLS_DIR}/config/#{tool_name}.yml", 83 | "#{TOOLS_DIR}/config/config.yml", 84 | "#{TOOLS_DIR}/config.yml", 85 | "/etc/phusion-server-tools/#{tool_name}.yml", 86 | "/etc/phusion-server-tools/config.yml", 87 | "/etc/phusion-server-tools.yml" 88 | ] 89 | config_filename = nil 90 | filenames.each do |filename| 91 | if File.exist?(filename) 92 | config_filename = filename 93 | break 94 | end 95 | end 96 | if !config_filename 97 | STDERR.puts "*** ERROR: you must create a config file at one of the following locations:\n" 98 | filenames.each do |filename| 99 | STDERR.puts " * #{filename}" 100 | end 101 | STDERR.puts "Please see #{TOOLS_DIR}/config.yml.example for an example." 102 | exit 1 103 | end 104 | all_config = YAML.load_file(config_filename) 105 | $TOOL_CONFIG = (all_config && all_config[tool_name]) || {} 106 | end 107 | 108 | def config(name) 109 | load_config if !$TOOL_CONFIG 110 | value = $TOOL_CONFIG[name.to_s] 111 | if !value 112 | abort "*** ERROR: configuration option #{File.basename($0)}.#{name} not set." 113 | end 114 | return value 115 | end 116 | 117 | def optional_config(name, default = nil) 118 | load_config if !$TOOL_CONFIG 119 | name = name.to_s 120 | if $TOOL_CONFIG.has_key?(name) 121 | return $TOOL_CONFIG[name] 122 | else 123 | return default 124 | end 125 | end 126 | 127 | def hostname 128 | @hostname ||= `hostname`.strip 129 | end 130 | 131 | def email(from, to, subject, body) 132 | to = to.join(", ") if to.is_a?(Array) 133 | IO.popen("sendmail -t", "w") do |f| 134 | f.puts "To: #{to}" 135 | f.puts "From: #{from}" if from 136 | f.puts "Subject: #{subject}" 137 | f.puts 138 | f.puts body 139 | end 140 | end 141 | -------------------------------------------------------------------------------- /set-capistrano-permissions: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | require 'optparse' 4 | 5 | def parse_options 6 | options = {} 7 | parser = OptionParser.new do |opts| 8 | nl = "\n" + ' ' * 37 9 | opts.banner = "Usage: set-capistrano-permissions [options]" 10 | opts.separator "" 11 | 12 | opts.separator "Options:" 13 | opts.on("--for-app-dir DIR", 14 | "Set only permissions for the given app#{nl}" + 15 | "directory") do |value| 16 | options[:for_app_dir] = value 17 | end 18 | end 19 | begin 20 | parser.parse! 21 | rescue OptionParser::ParseError => e 22 | puts e 23 | puts 24 | puts "Please see '--help' for valid options." 25 | exit 1 26 | end 27 | 28 | if options[:help] 29 | puts parser 30 | exit 31 | else 32 | return options 33 | end 34 | end 35 | 36 | # Set the permissions on a /u/apps/.../releases/... directory. 37 | def set_permissions_on_release_dir(dir) 38 | # Deny web server access to everything except for a few things. 39 | sh "deny #{WWW_USER} #{dir}" 40 | 41 | # Executable access to the directory itself 42 | # so that the web server can access subdirectories. 43 | sh "setfacl -m user:#{WWW_USER}:--x #{dir}" 44 | 45 | # Read-only access to the 'public' directory so that 46 | # the web server can serve static assets. 47 | if File.directory?("#{dir}/public") 48 | sh "permit #{WWW_USER} #{dir}/public" 49 | end 50 | 51 | # Executable access to the 'config' directory so that 52 | # Phusion Passenger's app autodetection works. 53 | if File.directory?("#{dir}/config") 54 | sh "setfacl -m user:#{WWW_USER}:--x #{dir}/config" 55 | end 56 | end 57 | 58 | # Set the permissions on a /u/apps/... directory. 59 | def set_permissions_on_app_dir(dir) 60 | # Give the web server read-only access to everything. 61 | # We tighten up permissions in later commands. 62 | sh "permit #{WWW_USER} #{dir}" 63 | 64 | # Make the application directory itself executable-only 65 | # by the web server. 66 | if File.directory?("#{dir}/releases") || File.directory?("#{dir}/shared") 67 | sh "setfacl -m user:#{WWW_USER}:--x #{dir}" 68 | sh "setfacl -d -m user:#{WWW_USER}:--x #{dir}" 69 | end 70 | 71 | # Make the 'releases' directory executable-only by the web server 72 | # and set correct permissions on each release subdirectory. 73 | if File.directory?("#{dir}/releases") 74 | sh "setfacl -m user:#{WWW_USER}:--x #{dir}/releases" 75 | Dir["#{dir}/releases/*"].each do |release_subdir| 76 | set_permissions_on_release_dir(release_subdir) 77 | end 78 | end 79 | 80 | # Deny web server access to everything in the 'shared' 81 | # directory, with some exceptions. 82 | if File.directory?("#{dir}/shared") 83 | sh "deny #{WWW_USER} #{dir}/shared" 84 | 85 | # If there's a Capistrano repository cache, then give 86 | # it the same permissions as a release directory because 87 | # Capistrano actually makes a release directory but copying 88 | # cached-copy with 'cp -dpR', thereby copying all ACLs too. 89 | if File.directory?("#{dir}/shared/cached-copy") 90 | set_permissions_on_release_dir("#{dir}/shared/cached-copy") 91 | end 92 | 93 | # If you store attachment files in the 'shared' 94 | # directory then you can allow read-only access to that: 95 | # sh "setfacl -m user:#{WWW_USER}:--x #{dir}/shared" 96 | # sh "permit #{dir}/shared/attachments" 97 | end 98 | end 99 | 100 | def start 101 | options = parse_options 102 | if options[:for_app_dir] 103 | sh "setfacl -m user:#{WWW_USER}:--x #{CAPISTRANO_DIR}" 104 | sh "setfacl -d -m user:#{WWW_USER}:r-x #{CAPISTRANO_DIR}" 105 | set_permissions_on_app_dir(options[:for_app_dir]) 106 | else 107 | sh "chmod -R g+w,o-rwx #{CAPISTRANO_DIR}" 108 | sh "setfacl -m user:#{WWW_USER}:--x #{CAPISTRANO_DIR}" 109 | sh "setfacl -d -m user:#{WWW_USER}:r-x #{CAPISTRANO_DIR}" 110 | 111 | Dir["#{CAPISTRANO_DIR}/*"].each do |dir| 112 | set_permissions_on_app_dir(dir) 113 | end 114 | end 115 | end 116 | 117 | CAPISTRANO_DIR = config(:capistrano_dir) 118 | WWW_USER = config(:www_user) 119 | 120 | start 121 | -------------------------------------------------------------------------------- /monitor-cpu: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | require 'pp' 4 | 5 | class MonitorCpu 6 | def initialize(interval, total_threshold, per_core_threshold) 7 | @interval = interval 8 | @total_threshold = total_threshold 9 | @per_core_threshold = per_core_threshold 10 | @start_time = Time.now 11 | @iteration = 1 12 | @cpu_usages = [] 13 | end 14 | 15 | def start 16 | while true 17 | measure_and_take_action! 18 | end 19 | end 20 | 21 | def measure_and_take_action! 22 | while @cpu_usages.size >= @interval 23 | @cpu_usages.shift 24 | end 25 | @cpu_usages << measure_cpu_usage 26 | if current_time - @start_time >= @interval * 60 && (exceeded_cpu_id = threshold_exceeded?) 27 | send_alert(exceeded_cpu_id) 28 | sleep(60) 29 | @start_time = current_time 30 | @iteration = 1 31 | @cpu_usages.clear 32 | return true 33 | else 34 | sleep_time = next_wake_time - current_time 35 | sleep(sleep_time) if sleep_time > 0 36 | @iteration += 1 37 | return false 38 | end 39 | end 40 | 41 | def measurement_points 42 | return @cpu_usages.size 43 | end 44 | 45 | def average(cpu_id = :all) 46 | total = 0 47 | @cpu_usages.each do |cpu_usage| 48 | total += cpu_usage[cpu_id] 49 | end 50 | return total / @cpu_usages.size 51 | end 52 | 53 | def threshold_exceeded? 54 | @cpu_usages.last.each_key do |cpu_id| 55 | usage = average(cpu_id) 56 | if cpu_id == :all && usage >= @total_threshold || usage >= @per_core_threshold 57 | return cpu_id 58 | end 59 | end 60 | return nil 61 | end 62 | 63 | private 64 | def sleep(sleep_time) 65 | Kernel.sleep(sleep_time) 66 | end 67 | 68 | def current_time 69 | return Time.now 70 | end 71 | 72 | def query_cpu_stats 73 | result = {} 74 | stat = File.read("/proc/stat").split("\n") 75 | stat.each do |line| 76 | # Filter out the CPU statistics lines. 77 | if line =~ /^cpu(\d*) / 78 | cpu_id = $1.empty? ? :all : $1.to_i 79 | columns = line.split(/\s+/) 80 | # Discard the "cpu" prefix. 81 | columns.shift 82 | 83 | total = 0 84 | idle = columns[3].to_i 85 | columns.each do |column| 86 | total += column.to_i 87 | end 88 | result[cpu_id] = { :total => total, :idle => idle } 89 | end 90 | end 91 | return result 92 | end 93 | 94 | def measure_cpu_usage(sleep_time = 1) 95 | stats1 = query_cpu_stats 96 | sleep sleep_time 97 | stats2 = query_cpu_stats 98 | 99 | result = {} 100 | stats1.each_key do |cpu_id| 101 | diff_total = stats2[cpu_id][:total] - stats1[cpu_id][:total] 102 | diff_idle = stats2[cpu_id][:idle] - stats1[cpu_id][:idle] 103 | if diff_total == 0 104 | diff_usage = 0.0 105 | else 106 | diff_usage = 100 * (diff_total - diff_idle) / diff_total.to_f 107 | end 108 | result[cpu_id] = diff_usage 109 | end 110 | 111 | return result 112 | end 113 | 114 | def next_wake_time 115 | return @start_time + @iteration * 60 116 | end 117 | 118 | def send_alert(exceeded_cpu_id) 119 | average = sprintf("%.1f", self.average(exceeded_cpu_id)) 120 | if exceeded_cpu_id == :all 121 | message = "Average total CPU usage over the past #{@interval} minutes is #{average}% (>= #{@total_threshold}%)." 122 | else 123 | message = "Average CPU usage of core #{exceeded_cpu_id} over the past #{@interval} minutes is #{average}% (>= #{@per_core_threshold}%)." 124 | end 125 | email(config(:from), config(:to), config(:subject), message) 126 | end 127 | end 128 | 129 | if defined?(Spec) || defined?(RSpec) 130 | describe MonitorCpu do 131 | before :each do 132 | @monitor = MonitorCpu.new(5, 90, 90) 133 | @monitor.stub!(:sleep).and_return do |sleep_time| 134 | new_time = @monitor.send(:current_time) + sleep_time 135 | @monitor.stub!(:current_time).and_return(new_time) 136 | end 137 | @monitor.stub!(:email) 138 | @monitor.stub!(:config).with(:from).and_return("noreply@phusion.nl") 139 | @monitor.stub!(:config).with(:to).and_return("info@phusion.nl") 140 | @monitor.stub!(:config).with(:subject).and_return("CPU usage exceeded!") 141 | @now = Time.now 142 | end 143 | 144 | def mock_time(minutes_passed) 145 | @monitor.stub!(:current_time).and_return(@now + minutes_passed * 60) 146 | end 147 | 148 | def mock_cpu_usage(usage) 149 | @monitor.stub!(:measure_cpu_usage).and_return(usage) 150 | end 151 | 152 | it "sends an alert and resets the state when the average total CPU over a period of time exceeds the threshold" do 153 | 5.times do |i| 154 | mock_time(i) 155 | mock_cpu_usage(:all => 100, 0 => 100) 156 | @monitor.measure_and_take_action!.should be_false 157 | @monitor.measurement_points.should == i + 1 158 | @monitor.average.should == 100 159 | end 160 | 161 | mock_time(5) 162 | mock_cpu_usage(:all => 100, 0 => 100) 163 | @monitor.measure_and_take_action!.should be_true 164 | @monitor.measurement_points.should == 0 165 | 166 | 5.times do |i| 167 | mock_time(6 + i) 168 | mock_cpu_usage(:all => 100, 0 => 100) 169 | @monitor.measure_and_take_action!.should be_false 170 | @monitor.measurement_points.should == i + 1 171 | @monitor.average.should == 100 172 | end 173 | 174 | mock_time(11) 175 | mock_cpu_usage(:all => 100, 0 => 100) 176 | @monitor.measure_and_take_action!.should be_true 177 | @monitor.measurement_points.should == 0 178 | end 179 | 180 | it "sends an alert and resets the state when the average CPU of a single core over a period of time exceeds the threshold" do 181 | 5.times do |i| 182 | mock_time(i) 183 | mock_cpu_usage(:all => 50, 0 => 100, 1 => 0) 184 | @monitor.measure_and_take_action!.should be_false 185 | @monitor.measurement_points.should == i + 1 186 | @monitor.average.should == 50 187 | @monitor.average(0).should == 100 188 | @monitor.average(1).should == 0 189 | end 190 | 191 | mock_time(5) 192 | mock_cpu_usage(:all => 50, 0 => 100, 1 => 0) 193 | @monitor.measure_and_take_action!.should be_true 194 | @monitor.measurement_points.should == 0 195 | 196 | 5.times do |i| 197 | mock_time(6 + i) 198 | mock_cpu_usage(:all => 50, 0 => 100, 1 => 0) 199 | @monitor.measure_and_take_action!.should be_false 200 | @monitor.measurement_points.should == i + 1 201 | @monitor.average.should == 50 202 | @monitor.average(0).should == 100 203 | @monitor.average(1).should == 0 204 | end 205 | 206 | mock_time(11) 207 | mock_cpu_usage(:all => 50, 0 => 100, 1 => 0) 208 | @monitor.measure_and_take_action!.should be_true 209 | @monitor.measurement_points.should == 0 210 | end 211 | 212 | it "deletes old measurement points that are no longer relevant" do 213 | 5.times do |i| 214 | mock_time(i) 215 | mock_cpu_usage(:all => 50, 0 => 50) 216 | @monitor.measure_and_take_action!.should be_false 217 | @monitor.measurement_points.should == i + 1 218 | end 219 | 220 | mock_time(5) 221 | mock_cpu_usage(:all => 70, 0 => 70) 222 | @monitor.measure_and_take_action!.should be_false 223 | @monitor.measurement_points.should == 5 224 | @monitor.average.should == (70 + 50 + 50 + 50 + 50) / 5.0 225 | 226 | mock_time(6) 227 | mock_cpu_usage(:all => 100, 0 => 100) 228 | @monitor.measure_and_take_action!.should be_false 229 | @monitor.measurement_points.should == 5 230 | @monitor.average.should == (100 + 70 + 50 + 50 + 50) / 5.0 231 | end 232 | end 233 | 234 | else 235 | config(:from) 236 | config(:to) 237 | config(:subject) 238 | begin 239 | monitor = MonitorCpu.new(config(:interval), config(:total_threshold), config(:per_core_threshold)) 240 | trap 'QUIT' do 241 | pp monitor 242 | STDOUT.flush 243 | end 244 | monitor.start 245 | rescue Interrupt 246 | # Do nothing. 247 | rescue SignalException => e 248 | if e.message == "SIGTERM" 249 | # Do nothing 250 | else 251 | raise 252 | end 253 | end 254 | end 255 | -------------------------------------------------------------------------------- /run: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require File.expand_path(File.dirname(__FILE__) + '/shared') 3 | ENV['PATH'] = OLD_PATH 4 | STDOUT.sync = STDERR.sync = true 5 | 6 | require 'rubygems' 7 | require 'optparse' 8 | 9 | OPTIONS = {} 10 | 11 | def parse_options 12 | parser = OptionParser.new do |opts| 13 | nl = "\n" + ' ' * 37 14 | opts.banner = "Usage: ./run [options] COMMAND..." 15 | opts.separator "Run a command with various options." 16 | opts.separator "" 17 | 18 | opts.separator "Options:" 19 | opts.on("--log-file FILE", "Log to file in addition to printing to terminal") do |value| 20 | OPTIONS[:log_file] = value 21 | end 22 | opts.on("--append", "Append to log file instead of overwriting it.") do 23 | OPTIONS[:append] = true 24 | end 25 | opts.on("--syslog", "Log to syslog in additional to printing to terminal") do 26 | OPTIONS[:syslog] = true 27 | end 28 | opts.on("--pv", "Pipe output through pv") do 29 | OPTIONS[:pv] = true 30 | end 31 | opts.on("--program-name NAME", "Run command with the given argv[0]") do |value| 32 | OPTIONS[:program_name] = value 33 | end 34 | opts.on("--status-file FILE") do |value| 35 | OPTIONS[:status_file] = value 36 | end 37 | opts.on("--lock-file FILE") do |value| 38 | OPTIONS[:lock_file] = value 39 | end 40 | opts.on("--email-to ADDRESSES", "Separated by comma") do |value| 41 | OPTIONS[:email_to] = value 42 | end 43 | end 44 | begin 45 | parser.parse! 46 | rescue OptionParser::ParseError => e 47 | STDERR.puts e 48 | STDERR.puts 49 | STDERR.puts "Please see '--help' for valid options." 50 | exit 1 51 | end 52 | 53 | if ARGV.size < 1 54 | STDERR.puts parser 55 | exit 1 56 | end 57 | end 58 | 59 | def can_exec_directly? 60 | return !OPTIONS[:log_file] && !OPTIONS[:syslog] && !OPTIONS[:pv] && !OPTIONS[:status_file] && !OPTIONS[:lock_file] && !OPTIONS[:email_to] 61 | end 62 | 63 | def start 64 | parse_options 65 | 66 | begin 67 | lock_file = create_lock_file 68 | create_log_file 69 | write_status_file('') 70 | 71 | STDIN.reopen("/dev/null", "r") 72 | 73 | if has_sink? 74 | main_process = spawn(main_command, 75 | :out => :pipe, 76 | :err => :pipe) 77 | sink_process = spawn_sink(main_process) 78 | [:in, :out].each do |channel| 79 | main_process[channel].close if main_process[channel] 80 | main_process.delete(channel) 81 | end 82 | elsif can_exec_directly? 83 | exec(*main_command) 84 | else 85 | command = spawn(main_command) 86 | end 87 | 88 | while true 89 | begin 90 | Process.waitpid(main_process[:pid]) 91 | exit_code = ($?.exitstatus || 2) 92 | main_process.delete(:pid) 93 | break 94 | rescue Errno::ECHILD 95 | exit_code = 1 96 | main_process.delete(:pid) 97 | break 98 | rescue SignalException => e 99 | signame = get_signal_name(e) 100 | Process.kill(signame, main_process[:pid]) 101 | end 102 | end 103 | 104 | # TODO: are we supposed to wait for the output sink process? 105 | # If we only wait for the command then the output sink process 106 | # may not have finished processing all the output yet. 107 | # But if we wait for both, and the command spawns subprocesses, 108 | # then the output sink process doesn't exit until all those 109 | # subprocesses have also exited. Maybe we should provide a 110 | # command line option for this. 111 | if sink_process 112 | sink_process[:pids].each do |pid| 113 | begin 114 | Process.waitpid(pid) 115 | rescue Errno::ECHILD 116 | # Ignore exception. 117 | end 118 | end 119 | sink_process = nil 120 | end 121 | 122 | write_status_file(exit_code) 123 | if OPTIONS[:email_to] 124 | email( 125 | OPTIONS[:email_from], 126 | OPTIONS[:email_to], 127 | "Command finished with exit code #{exit_code}: #{ARGV.join(' ')}", 128 | 129 | "Command: #{ARGV.join(' ')}\n" + 130 | "Exit code: #{exit_code}\n" + 131 | "Host: #{`hostname`.strip}\n" + 132 | "Log file: #{OPTIONS[:log_file]}\n" 133 | ) 134 | end 135 | exit(exit_code) 136 | 137 | rescue SystemExit 138 | raise 139 | 140 | rescue Exception => e 141 | if OPTIONS[:log_file] 142 | f = File.open(OPTIONS[:log_file], 'a') 143 | else 144 | f = IO.popen("logger -t '#{program_name}:runner[#{$$}]'", "w") 145 | end 146 | begin 147 | f.puts("#{e.class}: #{e.message || e}\n " + 148 | e.backtrace.join("\n ")) 149 | ensure 150 | f.close 151 | end 152 | Process.kill('SIGTERM', main_process[:pid]) if main_process && main_process[:pid] 153 | raise e 154 | 155 | ensure 156 | delete_lock_file(lock_file) if lock_file 157 | end 158 | end 159 | 160 | def spawn(command, options) 161 | result = {} 162 | if options[:in] == :pipe 163 | stdin_pipe = IO.pipe 164 | result[:in] = stdin_pipe[1] 165 | end 166 | if options[:out] == :pipe 167 | stdout_pipe = IO.pipe 168 | result[:out] = stdout_pipe[0] 169 | end 170 | result[:pid] = fork do 171 | if options[:in] == :pipe 172 | STDIN.reopen(stdin_pipe[0]) 173 | elsif options[:in].is_a?(Array) 174 | STDIN.reopen(*options[:in]) 175 | elsif options[:in] 176 | STDIN.reopen(options[:in]) 177 | end 178 | if options[:out] == :pipe 179 | STDOUT.reopen(stdout_pipe[1]) 180 | elsif options[:out].is_a?(Array) 181 | STDOUT.reopen(*options[:out]) 182 | elsif options[:out] 183 | STDOUT.reopen(options[:out]) 184 | end 185 | if options[:err] == :pipe 186 | STDERR.reopen(stdout_pipe[1]) 187 | elsif options[:err].is_a?(Array) 188 | STDERR.reopen(*options[:err]) 189 | elsif options[:err] 190 | STDERR.reopen(options[:err]) 191 | end 192 | stdin_pipe[1].close if stdin_pipe 193 | stdout_pipe[0].close if stdout_pipe 194 | if options[:setsid] 195 | Process.setsid 196 | end 197 | begin 198 | exec(*command) 199 | rescue SystemCallError => e 200 | STDERR.puts "Cannot execute '#{command.join(' ')}': #{e}" 201 | exit! 127 202 | end 203 | end 204 | stdin_pipe[0].close if stdin_pipe 205 | stdout_pipe[1].close if stdout_pipe 206 | return result 207 | end 208 | 209 | def has_sink? 210 | return OPTIONS[:syslog] || OPTIONS[:log_file] || OPTIONS[:pv] 211 | end 212 | 213 | def spawn_sink(main_process) 214 | if OPTIONS[:syslog] 215 | command = ["#{TOOLS_DIR}/syslog-tee", "-t", "#{program_name}[#{main_process[:pid]}]"] 216 | elsif OPTIONS[:log_file] 217 | if OPTIONS[:append] 218 | command = ["tee", "-a", OPTIONS[:log_file]] 219 | else 220 | command = ["tee", OPTIONS[:log_file]] 221 | end 222 | elsif OPTIONS[:pv] 223 | command = pv_command 224 | else 225 | raise "Unknown options combination" 226 | end 227 | 228 | # We setsid because we don't want to let terminal signals reach any sink processes. 229 | if (OPTIONS[:syslog] || OPTIONS[:log_file]) && OPTIONS[:pv] 230 | # Pipeline: main_process | sink | pv 231 | sink_process = spawn(command, :setsid => true, :in => main_process[:out], :out => :pipe) 232 | pv_process = spawn(pv_command, :setsid => true, :in => sink_process[:out]) 233 | sink_process[:out].close 234 | sink_process.delete(:out) 235 | return { :pids => [sink_process[:pid], pv_process[:pid]] } 236 | else 237 | # Pipeline: main_process | sink 238 | sink_process = spawn(command, :setsid => true, :in => main_process[:out]) 239 | return { :pids => [sink_process[:pid]] } 240 | end 241 | end 242 | 243 | def main_command 244 | if OPTIONS[:program_name] 245 | args = ARGV.dup 246 | argv0 = args.shift 247 | return [[argv0, OPTIONS[:program_name]], *args] 248 | else 249 | return ARGV 250 | end 251 | end 252 | 253 | def program_name 254 | return OPTIONS[:program_name] || File.basename(ARGV[0]) 255 | end 256 | 257 | def pv_command 258 | return ["pv"] 259 | end 260 | 261 | def get_signal_name(signal_exception) 262 | if signal_exception.is_a?(Interrupt) 263 | return "SIGINT" 264 | else 265 | return signal_exception.signm 266 | end 267 | end 268 | 269 | def create_lock_file 270 | if OPTIONS[:lock_file] 271 | File.open(OPTIONS[:lock_file], File::WRONLY | File::EXCL | File::CREAT) do |f| 272 | f.puts Process.pid 273 | end 274 | return true 275 | else 276 | return nil 277 | end 278 | rescue Errno::EEXIST 279 | raise "Lock file #{OPTIONS[:lock_file]} already exists!" 280 | end 281 | 282 | def create_log_file 283 | if OPTIONS[:log_file] 284 | File.open(OPTIONS[:log_file], OPTIONS[:append] ? 'a' : 'w').close 285 | end 286 | end 287 | 288 | def delete_lock_file(lock_file) 289 | File.unlink(OPTIONS[:lock_file]) 290 | end 291 | 292 | def write_status_file(content) 293 | if OPTIONS[:status_file] 294 | File.open(OPTIONS[:status_file], "w") do |f| 295 | f.write(content.to_s) 296 | end 297 | end 298 | end 299 | 300 | start 301 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Phusion Server Tools 2 | 3 | A collection of server administration tools that we use. Everything is written in Ruby and designed to work with Debian. These scripts may work with other operating systems or distributions as well, but it's not tested. [Read documentation with table of contents.](http://phusion.github.com/phusion-server-tools/) 4 | 5 | Install with: 6 | 7 | git clone https://github.com/phusion/phusion-server-tools.git /tools 8 | 9 | It's not necessary to install to /tools, you can install to anywhere, but this document assumes that you have installed to /tools. 10 | 11 | Each tool has its own prerequities, but here are some common prerequities: 12 | 13 | * Ruby (obviously) 14 | * `pv` - `apt-get install pv`. Not required but very useful; allows display of progress bars. 15 | 16 | Some tools require additional configuration through `config.yml`, which must be located in the same directory as the tool or in `/etc/phusion-server-tools.yml`. Please see `config.yml.example` for an example. 17 | 18 | ## Cryptographic verification 19 | 20 | We do not release source tarballs for Juvia. Users are expected to get the source code from Github. 21 | 22 | From time to time, we create Git tags for milestones. These milestones are signed with the [Phusion Software Signing key](http://www.phusion.nl/about/gpg). After importing this key you can verify Git tags as follows: 23 | 24 | git tag --verify milestone-2013-03-11 25 | 26 | 27 | ## Backup 28 | 29 | Tip: looking to backup files other than MySQL dumps? Use the `rotate-files` tool. 30 | 31 | ### backup-mysql - Rotated, compressed, encrypted MySQL dumps 32 | 33 | A script which backs up all MySQL databases to `/var/backups/mysql`. By default at most 10 backups are kept, but this can be configured. All backups are compressed with gzip and can optionally be encrypted. The backup directory is denied all world access. 34 | 35 | It uses `mysql` to obtain a list of databases and `mysqldump` to dump the database contents. If you want to run this script unattended you should therefore set the right login information in `~/.my.cnf`, sections `[mysql]` and `[mysqldump]`. 36 | 37 | Encryption can be configured through the 'encrypt' option in config.yml. 38 | 39 | Make it run daily at 12:00 AM and 0:00 AM in cron: 40 | 41 | 0 0,12 * * * /tools/silence-unless-failed /tools/backup-mysql 42 | 43 | ### backup-postgresql - Rotated, compressed, encrypted PostgreSQL dumps 44 | 45 | A script which backs up all PostgreSQL databases to `/var/backups/postgresql`. By default at most 10 backups are kept, but this can be configured. All backups are compressed with gzip and can optionally be encrypted. The backup directory is denied all world access. 46 | 47 | It uses `psql` to obtain a list of databases and `pg_dump` to dump the database contents. If you want to run this script unattended you should therefore set the right login information relevant environment variables such as PGUSER. 48 | 49 | Encryption can be configured through the 'encrypt' option in config.yml. 50 | 51 | Make it run daily at 12:00 AM and 0:00 AM in cron: 52 | 53 | 0 0,12 * * * /tools/silence-unless-failed /tools/backup-postgresql 54 | 55 | ## Monitoring and alerting 56 | 57 | ### monitor-cpu - Monitors CPU usage and send email on suspicious activity 58 | 59 | A daemon which measures the total CPU usage and per-core CPU usage every minute, and sends an email if the average total usage or the average per-core usage over a period of time equals or exceeds a threshold. 60 | 61 | Config options: 62 | 63 | * total_threshold: The total CPU usage threshold (0-100) to check against. 64 | * per_core_threshold: The per-core CPU usage threshold (0-100) to check against. 65 | * interval: The interval, in minutes, over which the average is calculated. 66 | * to, from, subject: Configuration for the email alert. 67 | 68 | You should run monitor-cpu with daemon tools: 69 | 70 | mkdir -p /etc/service/monitor-cpu 71 | cat < /etc/service/monitor-cpu/run.tmp 72 | #!/bin/bash 73 | exec setuidgid daemon /tools/run --syslog /tools/monitor-cpu 74 | EOF 75 | chmod +x /etc/service/monitor-cpu/run.tmp 76 | mv /etc/service/monitor-cpu/run.tmp /etc/service/monitor-cpu/run 77 | 78 | ### notify-if-queue-becomes-large - Monitor RabbitMQ queue sizes 79 | 80 | This script monitors all RabbitMQ queues on the localhost RabbitMQ installation and sends an email if one of them contain more messages than a defined threshold. You can configure the settings in `config.yml`. 81 | 82 | Run it every 15 minutes in cron: 83 | 84 | 0,15,30,45 * * * * /tools/notify-if-queue-becomes-large 85 | 86 | ### check-web-apps - Checks web applications' health 87 | 88 | This script sends HTTP requests to all listed web applications and checks whether the response contains a certain substring. If not, an email is sent. 89 | 90 | Run it every 10 minutes in cron: 91 | 92 | 0,10,20,30,40,50 * * * * /tools/check-web-apps 93 | 94 | 95 | ## File management 96 | 97 | ### permit and deny - Easily set fine-grained permissions using ACLs 98 | 99 | `permit` recursively gives a user access to a directory by using ACLs. The default ACL is modified too so that any new files created in that directory or in subdirectories inherit the ACL rules that allow access for the given user. 100 | 101 | `deny` recursively removes all ACLs for a given user on a directory, including default ACLs. 102 | 103 | The standard `setfacl` tool is too hard to use and sometimes does stupid things such as unexpectedly making files executable. These scripts are simple and work as expected. 104 | 105 | # Recursively give web server read-only access to /webapps/foo. 106 | /tools/permit www-data /webapps/foo 107 | 108 | # Recursively give user 'deploy' read-write access to /webapps/bar. 109 | /tools/permit deploy /webapps/bar --read-write 110 | 111 | # Recursively remove all ACLs for user 'joe' on /secrets/area66. 112 | /tools/deny joe /secrets/area66 113 | 114 | You need the `getfacl` and `setfacl` commands: 115 | 116 | apt-get install acl 117 | 118 | You must also make sure your filesystem is mounted with ACL support, e.g.: 119 | 120 | mount -o remount,acl / 121 | 122 | Don't forget to update /etc/fstab too. 123 | 124 | ### add-line 125 | 126 | Adds a line to the given file if it doesn't already include it. 127 | 128 | /tools/add-line foo.log "hello world" 129 | # Same effect: 130 | /tools/add-line foo.log hello world 131 | 132 | ### remove-line 133 | 134 | Removes the first instance of a line from the given file. Does nothing if the file doesn't include that line. 135 | 136 | /tools/remove-line foo.log "hello world" 137 | # Same effect: 138 | /tools/remove-line foo.log hello world 139 | 140 | ### set-section 141 | 142 | Sets the content of a named section inside a text file while preserving all other text. Contents are read from stdin. A section looks like this: 143 | 144 | ###### BEGIN #{section_name} ###### 145 | some text 146 | ###### END #{section_name} ###### 147 | 148 | If the section doesn't exist, then it will be created. 149 | 150 | $ cat foo.txt 151 | hello world 152 | $ echo hamburger | /tools/set-section foo.txt "mcdonalds menu" 153 | $ cat foo.txt 154 | hello world 155 | ##### BEGIN mcdonalds menu ##### 156 | hamburger 157 | ##### END mcdonalds menu ##### 158 | 159 | If the section already exists then its contents will be updated. 160 | 161 | # Using above foo.txt. 162 | $ echo french fries | /tools/set-section foo.txt "mcdonalds menu" 163 | $ cat foo.txt 164 | hello world 165 | ##### BEGIN mcdonalds menu ##### 166 | french fries 167 | ##### END mcdonalds menu ##### 168 | 169 | If the content is empty then the section will be removed if it exists. 170 | 171 | # Using above foo.txt 172 | $ echo | /tools/set-section foo.txt "mcdonalds menu" 173 | $ cat foo.txt 174 | hello world 175 | 176 | ### truncate 177 | 178 | Truncates all given files to 0 bytes. 179 | 180 | ### rotate-files 181 | 182 | Allows you to use the common pattern of creating a new file, while deleting files that are too old. The most common use case for this tool is to store a backup file while deleting older backups. 183 | 184 | The usage is as follows: 185 | 186 | rotate-files [OUTPUT SUFFIX] [OPTIONS] 187 | 188 | Suppose you have used some tool to create a database dump at `/tmp/backup.tar.gz`. If you run the following command... 189 | 190 | rotate-files /tmp/backup.tar.gz /backups/backup- .tar.gz 191 | 192 | ...then it will create the file `/backups/backup-.tar.gz`. It will also delete old backup files matching this same pattern. 193 | 194 | Old file deletion works by keeping only the most recent 50 files. This way, running `rotate-files` on an old directory won't result in all old backups to be deleted. You can customize the number of files to keep with the `--max` parameter. 195 | 196 | Recency is determined through the timestamp in the filename, not the file timestamp metadata. 197 | 198 | 199 | ## RabbitMQ 200 | 201 | ### display-queue - Display statistics for local RabbitMQ queues 202 | 203 | This tool displays statistics for RabbitMQ queues in a more friendly formatter than `rabbitmqctl list_queues`. The meanings of the columns are as follows: 204 | 205 | * Messages - Total number of messages in the queue. Equal to `Ready + Unack`. 206 | * Ready - Number of messages in the queue not yet consumed. 207 | * Unack - Number of messages in the queue that have been consumed, but not yet acknowledged. 208 | * Consumers - Number of consumers subscribed to this queue. 209 | * Memory - The amount of memory that RabbitMQ is using for this queue. 210 | 211 | ### watch-queue - Display changes in local RabbitMQ queues 212 | 213 | `watch-queue` combines the `watch` tool with `display-queue`. It continuously displays the latest queue statistics and highlights changes. 214 | 215 | ### purge-queue - Remove all messages from a local RabbitMQ queue 216 | 217 | `purge-queue` removes all messages from given given RabbitMQ queue. It connects to a RabbitMQ server on localhost on the default port. Note that consumed-but-unacknowledged messages in the queue cannot be removed. 218 | 219 | purge-queue 220 | 221 | ### notify-if-queue-becomes-large - Monitor RabbitMQ queue sizes 222 | 223 | See the related documentation under "Monitoring and alerting". 224 | 225 | 226 | ## Security 227 | 228 | ### confine-to-rsync 229 | 230 | To be used in combination with SSH for confining an account to only rsync access. Very useful for locking down automated backup users. 231 | 232 | Consider two hypothetical servers, `backup.org` and `production.org`. Once in a while backup.org runs an automated `rsync` command, copying data from production.org to its local disk. Backup.org's SSH key is installed on production.org. If someone hacks into backup.org we don't want it to be able to login to production.org or do anything else that might cause damage, so we need to make sure that backup.org can only rsync from production.org, and only for certain directories. 233 | 234 | `confine-to-rsync` is to be installed into production.org's `authorized_keys` file as execution command: 235 | 236 | command="/tools/confine-to-rsync /directory1 /directory2",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-dss AAAAB3Nza(...rest of backup.org's key here...) 237 | 238 | `confine-to-rsync` checks whether the client is trying to execute rsync in server mode, and if so, whether the rsync is only being run on either /directory1 or /directory2. If not it will abort with an error. 239 | 240 | 241 | ## Other 242 | 243 | ### silcence-unless-failed 244 | 245 | Runs the given command but only print its output (both STDOUT and STDERR) if its exit code is non-zero. The script's own exit code is the same as the command's exit code. 246 | 247 | /tools/silence-unless-failed my-command arg1 arg2 --arg3 248 | 249 | ### timestamp 250 | 251 | Runs the given command, and prepends timestamps to all its output. This will cause stdout and stderr to be merged and to be printed to stdout. 252 | 253 | /tools/timestamp my-command arg1 arg2 --arg3 254 | 255 | ### run 256 | 257 | This tool allows running a command in various ways. Supported features: 258 | 259 | * Running the command with a different name (`argv[0]` value). Specify `--program-name NAME` to use this feature. 260 | * Sending a copy of the output to a log file. Specify `--log-file FILENAME` to use this feature. It will overwrite the log file by default; specify `--append` to append to the file instead. 261 | * Sending a copy of the output to syslog. Specify `--syslog` to use this feature. `run` will use the command's program name as the syslog program name. `--program-name` is respected. 262 | * Sending the output to [pv](http://www.ivarch.com/programs/pv.shtml). You can combine this with `--log-file` and `--syslog`. 263 | * Printing the exit code of the command to a status file once the command exits. Specify `--status-file FILENAME` to use this feature. 264 | * Holding a lock file while the command is running. If the lock file already exists, then `run` will abort with an error. Otherwise, it will create the lock file, write the command's PID to the file and delete the file after the command has finished. 265 | * Sending an email after the command has finished. Specify `--email-to EMAILS` to use this feature. It should be a comma-separated list of addresses. 266 | 267 | `run` always exhibits the following properties: 268 | 269 | * It redirects stdin to /dev/null. 270 | * It exits with the same exit code as the command, unlike bash which exits with the exit code of the last command in the pipeline. 271 | * stdout and stderr are both combined into a single stream. If you specify `--log-file`, `--syslog` or `--pv` then both stdout and stderr will be redirected to the pipeline. 272 | * All signals are forwarded to the command process. 273 | 274 | ### syslog-tee 275 | 276 | This is like `tee`, but writes to syslog instead of a file. Accepts the same arguments as the `logger` command. 277 | 278 | ### udp-to-syslog 279 | 280 | Forwards all incoming data on a UDP port to syslog. For each message, the source address is also noted. Originally written to be used in combination with Linux's netconsole. 281 | 282 | See `./udp-to-syslog --help` for options. 283 | 284 | ### gc-git-repos 285 | 286 | Garbage collects all git repositories defined in `config.yml`. For convenience, the list of repositories to garbage collect can be a glob, e.g. `/u/apps/**/*.git`. 287 | 288 | In order to preserve file permissions, the `git gc` command is run as the owner of the repository directory by invoking `su`. Therefore this tool must be run as root, or it must be run as the owner of all given git repositories. 289 | 290 | Make it run every Sunday at 0:00 AM in cron with low I/O priority: 291 | 292 | 0 0 * * sun /tools/silence-unless-failed ionice -n 7 /tools/gc-git-repos 293 | --------------------------------------------------------------------------------