├── templates └── default │ ├── http.erb │ ├── https.erb │ ├── whitelist.erb │ ├── apache.conf.erb │ ├── ganglia_graphite.rb.erb │ ├── gmetad.conf.erb │ ├── gmond_unicast.conf.erb │ └── gmond.conf.erb ├── resources ├── python.rb └── gmetric.rb ├── metadata.rb ├── recipes ├── iptables.rb ├── graphite.rb ├── source.rb ├── web.rb ├── gmetad.rb └── default.rb ├── providers ├── gmetric.rb └── python.rb ├── attributes └── ganglia.rb └── README.rdoc /templates/default/http.erb: -------------------------------------------------------------------------------- 1 | -A FWR -p tcp --dport 80 -j ACCEPT 2 | -------------------------------------------------------------------------------- /templates/default/https.erb: -------------------------------------------------------------------------------- 1 | -A FWR -p tcp --dport 443 -j ACCEPT 2 | -------------------------------------------------------------------------------- /templates/default/whitelist.erb: -------------------------------------------------------------------------------- 1 | # Whitelist an IP address 2 | -A FWR -s <%= @subnet %> -j ACCEPT 3 | -------------------------------------------------------------------------------- /resources/python.rb: -------------------------------------------------------------------------------- 1 | 2 | actions :enable, :disable 3 | 4 | attribute :module_name, :kind_of => String, :name_attribute => true 5 | attribute :options, :kind_of => Hash, :default => {} 6 | -------------------------------------------------------------------------------- /resources/gmetric.rb: -------------------------------------------------------------------------------- 1 | 2 | actions :enable, :disable 3 | 4 | attribute :script_name, :kind_of => String, :name_attribute => true 5 | attribute :options, :kind_of => Hash, :default => {} 6 | -------------------------------------------------------------------------------- /metadata.rb: -------------------------------------------------------------------------------- 1 | maintainer "Heavy Water Software Inc." 2 | maintainer_email "darrin@heavywater.ca" 3 | license "Apache 2.0" 4 | description "Installs/Configures ganglia" 5 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.rdoc')) 6 | version "0.1.1" 7 | 8 | %w{ debian ubuntu redhat centos fedora }.each do |os| 9 | supports os 10 | end 11 | 12 | recommends "graphite" 13 | suggests "iptables" 14 | 15 | -------------------------------------------------------------------------------- /recipes/iptables.rb: -------------------------------------------------------------------------------- 1 | include_recipe "iptables" 2 | 3 | iptables_rule "http" 4 | iptables_rule "https" 5 | 6 | workers = search(:node, "*:*") || [] 7 | subnets = [] 8 | 9 | workers.each do |w| 10 | subnets << [ w.name, "#{w['ipaddress']}/32" ] 11 | end 12 | 13 | subnets.each do |h| 14 | template "/etc/iptables.d/#{h[0]}" do 15 | source "whitelist.erb" 16 | mode "644" 17 | variables :subnet => h[1] 18 | notifies :run, "execute[rebuild-iptables]" 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /recipes/graphite.rb: -------------------------------------------------------------------------------- 1 | graphite_host = search(:node, "role:#{node['ganglia']['server_role']} AND chef_environment:#{node.chef_environment}").map {|node| node.ipaddress} 2 | if graphite_host.empty? 3 | graphite_host = "localhost" 4 | end 5 | 6 | template "/usr/local/sbin/ganglia_graphite.rb" do 7 | source "ganglia_graphite.rb.erb" 8 | mode "744" 9 | variables :graphite_host => graphite_host 10 | end 11 | 12 | cron "ganglia_graphite" do 13 | command "/usr/local/sbin/ganglia_graphite.rb" 14 | end 15 | -------------------------------------------------------------------------------- /templates/default/apache.conf.erb: -------------------------------------------------------------------------------- 1 | :<%= @config['vhost_port'] -%>> 2 | ServerAdmin <%= @config['server_admin'] %> 3 | ErrorLog <%= @config['error_log'] %> 4 | LogLevel <%= @config['log_level'] %> 5 | 6 | Alias <%= @config['alias'] -%> "/usr/share/ganglia-webfrontend" 7 | 8 | Options Indexes MultiViews FollowSymLinks 9 | AllowOverride None 10 | <% if @config['allow_from'] %> 11 | Order deny,allow 12 | Deny from all 13 | Allow from <%= @config['allow_from'] %> 14 | <% else %> 15 | Order allow,deny 16 | Allow from all 17 | <% end %> 18 | 19 | 20 | -------------------------------------------------------------------------------- /providers/gmetric.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | action :enable do 4 | 5 | #script 6 | template "/usr/local/bin/#{new_resource.script_name}-ganglia" do 7 | source "ganglia/#{new_resource.script_name}.gmetric.erb" 8 | owner "root" 9 | group "root" 10 | mode "755" 11 | variables :options => new_resource.options 12 | end 13 | 14 | #cron 15 | template "/etc/cron.d/#{new_resource.script_name}-ganglia" do 16 | source "ganglia/#{new_resource.script_name}.cron.erb" 17 | owner "root" 18 | group "root" 19 | mode "644" 20 | variables :options => new_resource.options 21 | end 22 | 23 | end 24 | 25 | action :disable do 26 | 27 | file "/usr/local/bin/#{new_resource.script_name}-ganglia" do 28 | action :delete 29 | end 30 | 31 | file "/etc/cron.d/#{new_resource.script_name}-ganglia" do 32 | action :delete 33 | end 34 | 35 | end -------------------------------------------------------------------------------- /providers/python.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | action :enable do 4 | 5 | #python module 6 | template "/usr/lib/ganglia/python_modules/#{new_resource.module_name}.py" do 7 | source "ganglia/#{new_resource.module_name}.py.erb" 8 | owner "root" 9 | group "root" 10 | mode "644" 11 | variables :options => new_resource.options 12 | notifies :restart, resources(:service => "ganglia-monitor") 13 | end 14 | 15 | #configuration 16 | template "/etc/ganglia/conf.d/#{new_resource.module_name}.pyconf" do 17 | source "ganglia/#{new_resource.module_name}.pyconf.erb" 18 | owner "root" 19 | group "root" 20 | mode "644" 21 | variables :options => new_resource.options 22 | notifies :restart, resources(:service => "ganglia-monitor") 23 | end 24 | 25 | end 26 | 27 | action :disable do 28 | 29 | file "/usr/lib/ganglia/python_modules/#{new_resource.module_name}.py" do 30 | action :delete 31 | notifies :restart, resources(:service => "ganglia-monitor") 32 | end 33 | 34 | file "/etc/ganglia/conf.d/#{new_resource.module_name}.pyconf" do 35 | action :delete 36 | notifies :restart, resources(:service => "ganglia-monitor") 37 | end 38 | 39 | end -------------------------------------------------------------------------------- /recipes/source.rb: -------------------------------------------------------------------------------- 1 | if platform?( "redhat", "centos", "fedora" ) 2 | package "apr-devel" 3 | package "libconfuse-devel" 4 | package "expat-devel" 5 | package "rrdtool-devel" 6 | end 7 | 8 | remote_file "/usr/src/ganglia-#{node[:ganglia][:version]}.tar.gz" do 9 | source node[:ganglia][:uri] 10 | checksum node[:ganglia][:checksum] 11 | end 12 | 13 | src_path = "/usr/src/ganglia-#{node[:ganglia][:version]}" 14 | 15 | execute "untar ganglia" do 16 | command "tar xzf ganglia-#{node[:ganglia][:version]}.tar.gz" 17 | creates src_path 18 | cwd "/usr/src" 19 | end 20 | 21 | execute "configure ganglia build" do 22 | command "./configure --with-gmetad --with-libpcre=no --sysconfdir=/etc/ganglia" 23 | creates "#{src_path}/config.log" 24 | cwd src_path 25 | end 26 | 27 | execute "build ganglia" do 28 | command "make" 29 | creates "#{src_path}/gmond/gmond" 30 | cwd src_path 31 | end 32 | 33 | execute "install ganglia" do 34 | command "make install" 35 | creates "/usr/sbin/gmond" 36 | cwd src_path 37 | end 38 | 39 | link "/usr/lib/ganglia" do 40 | to "/usr/lib64/ganglia" 41 | only_if do 42 | node[:kernel][:machine] == "x86_64" and 43 | platform?( "redhat", "centos", "fedora" ) 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /recipes/web.rb: -------------------------------------------------------------------------------- 1 | directory "/etc/ganglia-webfrontend" 2 | 3 | case node[:platform] 4 | when "ubuntu", "debian" 5 | package "ganglia-webfrontend" 6 | 7 | link "/etc/apache2/sites-enabled/ganglia" do 8 | to "/etc/ganglia-webfrontend/apache.conf" 9 | notifies :restart, "service[apache2]" 10 | not_if do 11 | node[:ganglia][:apache][:write_config_file] 12 | end 13 | end 14 | 15 | when "redhat", "centos", "fedora" 16 | package "httpd" 17 | package "php" 18 | include_recipe "ganglia::source" 19 | include_recipe "ganglia::gmetad" 20 | 21 | execute "copy web directory" do 22 | command "cp -r web /var/www/html/ganglia" 23 | creates "/var/www/html/ganglia" 24 | cwd "/usr/src/ganglia-#{node[:ganglia][:version]}" 25 | end 26 | end 27 | 28 | # If applicable, write an apache config file for Ganglia 29 | if node[:ganglia][:apache][:write_config_file] 30 | template "/etc/apache2/sites-available/ganglia" do 31 | source "apache.conf.erb" 32 | owner "root" 33 | group "root" 34 | mode 0644 35 | action :create 36 | variables( 37 | :config => node[:ganglia][:apache] 38 | ) 39 | end 40 | 41 | bash "enable ganlia web" do 42 | user "root" 43 | code "a2ensite ganglia" 44 | notifies :reload, resources( :service => "apache2"), :delayed 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /recipes/gmetad.rb: -------------------------------------------------------------------------------- 1 | case node[:platform] 2 | when "ubuntu", "debian" 3 | package "gmetad" 4 | when "redhat", "centos", "fedora" 5 | include_recipe "ganglia::source" 6 | execute "copy gmetad init script" do 7 | command "cp " + 8 | "/usr/src/ganglia-#{node[:ganglia][:version]}/gmetad/gmetad.init " + 9 | "/etc/init.d/gmetad" 10 | not_if "test -f /etc/init.d/gmetad" 11 | end 12 | end 13 | 14 | directory "/var/lib/ganglia/rrds" do 15 | owner "nobody" 16 | recursive true 17 | end 18 | 19 | case node[:ganglia][:unicast][:enable] 20 | when true 21 | template "/etc/ganglia/gmetad.conf" do 22 | source "gmetad.conf.erb" 23 | variables( :hosts => "localhost", 24 | :cluster_name => node[:ganglia][:cluster_name]) 25 | notifies :restart, "service[gmetad]" 26 | end 27 | if node[:recipes].include? "iptables" 28 | include_recipe "ganglia::iptables" 29 | end 30 | when false 31 | ips = [] 32 | node[:ganglia][:cluster_nodes].each do |node| 33 | ips << search(:node, "name:#{node}").map {|n| n.ipaddress} 34 | end 35 | template "/etc/ganglia/gmetad.conf" do 36 | source "gmetad.conf.erb" 37 | variables( :hosts => ips.join(" "), 38 | :cluster_name => node[:ganglia][:cluster_name]) 39 | notifies :restart, "service[gmetad]" 40 | end 41 | end 42 | 43 | service "gmetad" do 44 | supports :restart => true 45 | action [ :enable, :start ] 46 | end 47 | -------------------------------------------------------------------------------- /attributes/ganglia.rb: -------------------------------------------------------------------------------- 1 | default[:ganglia][:version] = "3.1.7" 2 | default[:ganglia][:uri] = "http://sourceforge.net/projects/ganglia/files/ganglia%20monitoring%20core/3.1.7/ganglia-3.1.7.tar.gz/download" 3 | default[:ganglia][:checksum] = "bb1a4953" 4 | 5 | default[:ganglia][:server_role] = "ganglia" 6 | 7 | # Cluster Information 8 | default[:ganglia][:cluster][:name] = "unspecified" 9 | default[:ganglia][:cluster][:owner] = "unspecified" 10 | default[:ganglia][:cluster][:latlong] = "unspecified" 11 | default[:ganglia][:cluster][:url] = "unspecified" 12 | 13 | # Multicast send/receive config 14 | default[:ganglia][:multicast][:send_channel][:mcast_join] = "239.2.11.71" 15 | default[:ganglia][:multicast][:send_channel][:port] = "8649" 16 | default[:ganglia][:multicast][:send_channel][:ttl] = 1 17 | 18 | default[:ganglia][:multicast][:recv_channel][:mcast_join] = "239.2.11.71" 19 | default[:ganglia][:multicast][:recv_channel][:port] = "8649" 20 | default[:ganglia][:multicast][:recv_channel][:bind] = "239.2.11.71" 21 | 22 | # Unicast send/receive config 23 | default[:ganglia][:unicast][:enable] = false 24 | default[:ganglia][:unicast][:port] = "8649" 25 | default[:ganglia][:unicast][:ttl] = 1 26 | 27 | # A list if chef node names that are in a cluster. This is 28 | # ignored if Unicast is used. 29 | default[:ganglia][:cluster_nodes] = [] 30 | 31 | # Some attributes used to write an apache config file 32 | default[:ganglia][:apache][:write_config_file] = true 33 | default[:ganglia][:apache][:vhost_addr] = "*" 34 | default[:ganglia][:apache][:vhost_port] = "80" 35 | default[:ganglia][:apache][:server_admin] = "root@localhost" 36 | default[:ganglia][:apache][:error_log] = "/var/log/apache2/error.log" 37 | default[:ganglia][:apache][:log_level] = "warn" 38 | default[:ganglia][:apache][:alias] = "/ganglia" 39 | # Setting `allow_from` will restrict the addresses from which Ganglia 40 | # can be viewed. E.G. setting to "127.0.0.0/255.0.0.0 ::1/128" will 41 | # only allow local connections 42 | default[:ganglia][:apache][:allow_from] = nil 43 | 44 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = DESCRIPTION: 2 | 3 | Installs and configures Ganglia. 4 | 5 | http://ganglia.sourceforge.net/ 6 | 7 | = REQUIREMENTS: 8 | 9 | * SELinux must be disabled on CentOS 10 | * iptables must allow access to port 80 11 | 12 | = ATTRIBUTES: 13 | 14 | See the `attributes/ganglia.rb` file. 15 | 16 | = USAGE: 17 | 18 | A run list with "recipe[ganglia]" enables monitoring. 19 | 20 | A run list with "recipe[ganglia::web]" enables the web interface. NOTE: if 21 | you're using apache, and you want to run the ganglia web interface on a 22 | different port, you should configure that with attributes available from 23 | the apache cookbooks: 24 | 25 | default[:apache][:listen_ports] 26 | 27 | However, This cookbook *does* make several attributes available to write an 28 | apache virtual host config file. 29 | 30 | A run list with "recipe[ganglia::graphite]" enables graphite graphs. 31 | 32 | = LWRP: 33 | 34 | == gmetric 35 | 36 | Installs a gmetric plugin. 37 | 38 | The plugin is composed of two templates: 39 | * One for the script 40 | * One for the cron job that will call the script 41 | 42 | The templates must be in the caller cookbook. 43 | 44 | Example: 45 | 46 | ganglia_gmetric 'memcache' do 47 | options :port => 11211 48 | end 49 | 50 | templates: 51 | cookbooks/memcache/templates/default/memcache.gmetric.erb 52 | cookbooks/memcache/templates/default/memcache.cron.erb 53 | 54 | The content of 'options' will be passed to the templates 55 | 56 | == python 57 | 58 | Installs a python plugin. 59 | 60 | The plugin is composed of two templates: 61 | * One for the python module 62 | * One for the configuration of the module 63 | 64 | The templates must be in the caller cookbook. 65 | 66 | Example: 67 | 68 | ganglia_python 'memcache' do 69 | options :port => 11211 70 | end 71 | 72 | templates: 73 | cookbooks/memcache/templates/default/memcache.py.erb 74 | cookbooks/memcache/templates/default/memcache.pyconf.erb 75 | 76 | The content of 'options' will be passed to the templates 77 | 78 | = CAVEATS: 79 | 80 | This cookbook has been tested on Ubuntu 10.04 and Centos 5.5. 81 | 82 | Search seems to takes a moment or two to index. 83 | You may need to converge again to see recently added nodes. 84 | -------------------------------------------------------------------------------- /recipes/default.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook Name:: ganglia 3 | # Recipe:: default 4 | # 5 | # Copyright 2011, Heavy Water Software Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | case node[:platform] 21 | when "ubuntu", "debian" 22 | package "ganglia-monitor" 23 | when "redhat", "centos", "fedora" 24 | include_recipe "ganglia::source" 25 | 26 | execute "copy ganglia-monitor init script" do 27 | command "cp " + 28 | "/usr/src/ganglia-#{node[:ganglia][:version]}/gmond/gmond.init " + 29 | "/etc/init.d/ganglia-monitor" 30 | not_if "test -f /etc/init.d/ganglia-monitor" 31 | end 32 | 33 | user "ganglia" 34 | end 35 | 36 | directory "/etc/ganglia" 37 | 38 | case node[:ganglia][:unicast][:enable] 39 | when true 40 | host = search(:node, "role:#{node['ganglia']['server_role']} AND chef_environment:#{node.chef_environment}").map {|node| node.ipaddress} 41 | if host.empty? 42 | host = "127.0.0.1" 43 | end 44 | template "/etc/ganglia/gmond.conf" do 45 | source "gmond_unicast.conf.erb" 46 | variables( 47 | :cluster => node[:ganglia][:cluster], 48 | :unicast => node[:ganglia][:unicast], 49 | :host => host 50 | ) 51 | notifies :restart, "service[ganglia-monitor]" 52 | end 53 | when false 54 | template "/etc/ganglia/gmond.conf" do 55 | source "gmond.conf.erb" 56 | variables( 57 | :cluster => node[:ganglia][:cluster], 58 | :mcast_send => node[:ganglia][:multicast][:send_channel], 59 | :mcast_recv => node[:ganglia][:multicast][:recv_channel], 60 | :host => host 61 | ) 62 | notifies :restart, "service[ganglia-monitor]" 63 | end 64 | end 65 | 66 | service "ganglia-monitor" do 67 | pattern "gmond" 68 | supports :restart => true 69 | action [ :enable, :start ] 70 | end 71 | -------------------------------------------------------------------------------- /templates/default/ganglia_graphite.rb.erb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | ################################################################################# 4 | # Parse Ganglia XML stream and send metrics to Graphite 5 | # License: Same as Ganglia 6 | # Author: Vladimir Vuksan 7 | # Modified from script written by: Kostas Georgiou 8 | ################################################################################# 9 | require "rexml/document" 10 | require 'socket' 11 | 12 | # Adjust to the appropriate values 13 | ganglia_hostname = 'localhost' 14 | ganglia_port = 8649 15 | graphite_host = '<%= @graphite_host %>' 16 | graphite_port = 2003 17 | Debug = false 18 | 19 | begin 20 | # Open up a socket to gmond 21 | file = TCPSocket.open(ganglia_hostname, ganglia_port) 22 | # Open up a socket to graphite 23 | graphite = TCPSocket.open(graphite_host, graphite_port) 24 | # We need current time stamp in UNIX time 25 | now = Time.now.to_i 26 | # Parse the XML we got from gmond 27 | doc = REXML::Document.new file 28 | #doc.write( $stdout, 0 ) 29 | 30 | grid=nil 31 | doc.elements.each("GANGLIA_XML/GRID") { |element| 32 | grid=element.attributes["NAME"] 33 | } 34 | puts "GRID: #{grid}\n" if Debug 35 | 36 | cluster=nil 37 | doc.elements.each("GANGLIA_XML/GRID/CLUSTER") { |element| 38 | cluster=element.attributes["NAME"] 39 | puts "CLUSTER: #{cluster}\n" if Debug 40 | 41 | doc.elements.each("GANGLIA_XML/GRID[@NAME='#{grid}']/CLUSTER[@NAME='#{cluster}']/HOST") { |host| 42 | metric_prefix=host.attributes["NAME"].gsub(".", "_") 43 | host.elements.each("METRIC") { |metric| 44 | # Set metric prefix to the host name. Graphite uses dots to separate subtrees 45 | # therefore we have to change dots in hostnames to _ 46 | # Do substitution of whitespace after XML parsing to avoid problems with 47 | # pre-exiting whitespace in GRID / CLUSTER names in XML. 48 | grid.gsub!(/\W/, "_") 49 | cluster.gsub!(/\W/, "_") 50 | if metric.attributes["TYPE"] != "string" 51 | graphite.puts "#{grid}.#{cluster}.#{metric_prefix}.#{metric.attributes["NAME"]} #{metric.attributes["VAL"]} #{now}\n" if !Debug 52 | puts "#{grid}.#{cluster}.#{metric_prefix}.#{metric.attributes["NAME"]} #{metric.attributes["VAL"]} #{now}\n" if Debug 53 | end 54 | } 55 | } 56 | } 57 | 58 | graphite.close() 59 | file.close() 60 | rescue 61 | end 62 | -------------------------------------------------------------------------------- /templates/default/gmetad.conf.erb: -------------------------------------------------------------------------------- 1 | # This is an example of a Ganglia Meta Daemon configuration file 2 | # http://ganglia.sourceforge.net/ 3 | # 4 | # $Id: gmetad.conf.in 1639 2008-08-09 23:30:32Z carenas $ 5 | # 6 | #------------------------------------------------------------------------------- 7 | # Setting the debug_level to 1 will keep daemon in the forground and 8 | # show only error messages. Setting this value higher than 1 will make 9 | # gmetad output debugging information and stay in the foreground. 10 | # default: 0 11 | # debug_level 10 12 | # 13 | #------------------------------------------------------------------------------- 14 | # What to monitor. The most important section of this file. 15 | # 16 | # The data_source tag specifies either a cluster or a grid to 17 | # monitor. If we detect the source is a cluster, we will maintain a complete 18 | # set of RRD databases for it, which can be used to create historical 19 | # graphs of the metrics. If the source is a grid (it comes from another gmetad), 20 | # we will only maintain summary RRDs for it. 21 | # 22 | # Format: 23 | # data_source "my cluster" [polling interval] address1:port addreses2:port ... 24 | # 25 | # The keyword 'data_source' must immediately be followed by a unique 26 | # string which identifies the source, then an optional polling interval in 27 | # seconds. The source will be polled at this interval on average. 28 | # If the polling interval is omitted, 15sec is asssumed. 29 | # 30 | # A list of machines which service the data source follows, in the 31 | # format ip:port, or name:port. If a port is not specified then 8649 32 | # (the default gmond port) is assumed. 33 | # default: There is no default value 34 | # 35 | # data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655 36 | # data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651 37 | # data_source "another source" 1.3.4.7:8655 1.3.4.8 38 | 39 | data_source "<%= @cluster_name %>" <%= @hosts %> 40 | 41 | # 42 | # Round-Robin Archives 43 | # You can specify custom Round-Robin archives here (defaults are listed below) 44 | # 45 | # RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \ 46 | # "RRA:AVERAGE:0.5:5760:374" 47 | # 48 | 49 | # 50 | #------------------------------------------------------------------------------- 51 | # Scalability mode. If on, we summarize over downstream grids, and respect 52 | # authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output 53 | # in tags, we ignore all tags we see, and always assume 54 | # we are the "authority" on data source feeds. This approach does not scale to 55 | # large groups of clusters, but is provided for backwards compatibility. 56 | # default: on 57 | # scalable off 58 | # 59 | #------------------------------------------------------------------------------- 60 | # The name of this Grid. All the data sources above will be wrapped in a GRID 61 | # tag with this name. 62 | # default: unspecified 63 | gridname "<%= @cluster_name %>" 64 | # 65 | #------------------------------------------------------------------------------- 66 | # The authority URL for this grid. Used by other gmetads to locate graphs 67 | # for our data sources. Generally points to a ganglia/ 68 | # website on this machine. 69 | # default: "http://hostname/ganglia/", 70 | # where hostname is the name of this machine, as defined by gethostname(). 71 | # authority "http://mycluster.org/newprefix/" 72 | # 73 | #------------------------------------------------------------------------------- 74 | # List of machines this gmetad will share XML with. Localhost 75 | # is always trusted. 76 | # default: There is no default value 77 | # trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org 78 | # 79 | #------------------------------------------------------------------------------- 80 | # If you want any host which connects to the gmetad XML to receive 81 | # data, then set this value to "on" 82 | # default: off 83 | # all_trusted on 84 | # 85 | #------------------------------------------------------------------------------- 86 | # If you don't want gmetad to setuid then set this to off 87 | # default: on 88 | # setuid off 89 | # 90 | #------------------------------------------------------------------------------- 91 | # User gmetad will setuid to (defaults to "nobody") 92 | # default: "nobody" 93 | # setuid_username "nobody" 94 | # 95 | #------------------------------------------------------------------------------- 96 | # The port gmetad will answer requests for XML 97 | # default: 8651 98 | # xml_port 8651 99 | # 100 | #------------------------------------------------------------------------------- 101 | # The port gmetad will answer queries for XML. This facility allows 102 | # simple subtree and summation views of the XML tree. 103 | # default: 8652 104 | # interactive_port 8652 105 | # 106 | #------------------------------------------------------------------------------- 107 | # The number of threads answering XML requests 108 | # default: 4 109 | # server_threads 10 110 | # 111 | #------------------------------------------------------------------------------- 112 | # Where gmetad stores its round-robin databases 113 | # default: "/var/lib/ganglia/rrds" 114 | # rrd_rootdir "/some/other/place" 115 | -------------------------------------------------------------------------------- /templates/default/gmond_unicast.conf.erb: -------------------------------------------------------------------------------- 1 | /* This configuration is as close to 2.5.x default behavior as possible 2 | The values closely match ./gmond/metric.h definitions in 2.5.x */ 3 | globals { 4 | daemonize = yes 5 | setuid = yes 6 | user = ganglia 7 | debug_level = 0 8 | max_udp_msg_len = 1472 9 | mute = no 10 | deaf = no 11 | host_dmax = 0 /*secs */ 12 | cleanup_threshold = 300 /*secs */ 13 | gexec = no 14 | send_metadata_interval = 30 15 | } 16 | 17 | /* If a cluster attribute is specified, then all gmond hosts are wrapped inside 18 | * of a tag. If you do not specify a cluster tag, then all will 19 | * NOT be wrapped inside of a tag. */ 20 | cluster { 21 | name = "<%= @cluster[:name] %>" 22 | owner = "<%= @cluster[:owner] %>" 23 | latlong = "<%= @cluster[:latlong] %>" 24 | url = "<%= @cluster[:url] %>" 25 | } 26 | 27 | /* The host section describes attributes of the host, like the location */ 28 | host { 29 | location = "unspecified" 30 | } 31 | 32 | /* Feel free to specify as many udp_send_channels as you like. Gmond 33 | used to only support having a single channel */ 34 | udp_send_channel { 35 | host = <%= @host %> 36 | port = <%= @unicast[:port] %> 37 | ttl = <%= @unicast[:ttl] %> 38 | } 39 | 40 | /* You can specify as many udp_recv_channels as you like as well. */ 41 | udp_recv_channel { 42 | port = <%= @unicast[:port] %> 43 | } 44 | 45 | /* You can specify as many tcp_accept_channels as you like to share 46 | an xml description of the state of the cluster */ 47 | tcp_accept_channel { 48 | port = <%= @unicast[:port] %> 49 | } 50 | 51 | /* Each metrics module that is referenced by gmond must be specified and 52 | loaded. If the module has been statically linked with gmond, it does not 53 | require a load path. However all dynamically loadable modules must include 54 | a load path. */ 55 | modules { 56 | module { 57 | name = "core_metrics" 58 | } 59 | module { 60 | name = "cpu_module" 61 | path = "/usr/lib/ganglia/modcpu.so" 62 | } 63 | module { 64 | name = "disk_module" 65 | path = "/usr/lib/ganglia/moddisk.so" 66 | } 67 | module { 68 | name = "load_module" 69 | path = "/usr/lib/ganglia/modload.so" 70 | } 71 | module { 72 | name = "mem_module" 73 | path = "/usr/lib/ganglia/modmem.so" 74 | } 75 | module { 76 | name = "net_module" 77 | path = "/usr/lib/ganglia/modnet.so" 78 | } 79 | module { 80 | name = "proc_module" 81 | path = "/usr/lib/ganglia/modproc.so" 82 | } 83 | module { 84 | name = "sys_module" 85 | path = "/usr/lib/ganglia/modsys.so" 86 | } 87 | } 88 | 89 | include ('/etc/ganglia/conf.d/*.conf') 90 | 91 | 92 | /* The old internal 2.5.x metric array has been replaced by the following 93 | collection_group directives. What follows is the default behavior for 94 | collecting and sending metrics that is as close to 2.5.x behavior as 95 | possible. */ 96 | 97 | /* This collection group will cause a heartbeat (or beacon) to be sent every 98 | 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses 99 | the age of the running gmond. */ 100 | collection_group { 101 | collect_once = yes 102 | time_threshold = 20 103 | metric { 104 | name = "heartbeat" 105 | } 106 | } 107 | 108 | /* This collection group will send general info about this host every 1200 secs. 109 | This information doesn't change between reboots and is only collected once. */ 110 | collection_group { 111 | collect_once = yes 112 | time_threshold = 1200 113 | metric { 114 | name = "cpu_num" 115 | title = "CPU Count" 116 | } 117 | metric { 118 | name = "cpu_speed" 119 | title = "CPU Speed" 120 | } 121 | metric { 122 | name = "mem_total" 123 | title = "Memory Total" 124 | } 125 | /* Should this be here? Swap can be added/removed between reboots. */ 126 | metric { 127 | name = "swap_total" 128 | title = "Swap Space Total" 129 | } 130 | metric { 131 | name = "boottime" 132 | title = "Last Boot Time" 133 | } 134 | metric { 135 | name = "machine_type" 136 | title = "Machine Type" 137 | } 138 | metric { 139 | name = "os_name" 140 | title = "Operating System" 141 | } 142 | metric { 143 | name = "os_release" 144 | title = "Operating System Release" 145 | } 146 | metric { 147 | name = "location" 148 | title = "Location" 149 | } 150 | } 151 | 152 | /* This collection group will send the status of gexecd for this host every 300 secs */ 153 | /* Unlike 2.5.x the default behavior is to report gexecd OFF. */ 154 | collection_group { 155 | collect_once = yes 156 | time_threshold = 300 157 | metric { 158 | name = "gexec" 159 | title = "Gexec Status" 160 | } 161 | } 162 | 163 | /* This collection group will collect the CPU status info every 20 secs. 164 | The time threshold is set to 90 seconds. In honesty, this time_threshold could be 165 | set significantly higher to reduce unneccessary network chatter. */ 166 | collection_group { 167 | collect_every = 20 168 | time_threshold = 90 169 | /* CPU status */ 170 | metric { 171 | name = "cpu_user" 172 | value_threshold = "1.0" 173 | title = "CPU User" 174 | } 175 | metric { 176 | name = "cpu_system" 177 | value_threshold = "1.0" 178 | title = "CPU System" 179 | } 180 | metric { 181 | name = "cpu_idle" 182 | value_threshold = "5.0" 183 | title = "CPU Idle" 184 | } 185 | metric { 186 | name = "cpu_nice" 187 | value_threshold = "1.0" 188 | title = "CPU Nice" 189 | } 190 | metric { 191 | name = "cpu_aidle" 192 | value_threshold = "5.0" 193 | title = "CPU aidle" 194 | } 195 | metric { 196 | name = "cpu_wio" 197 | value_threshold = "1.0" 198 | title = "CPU wio" 199 | } 200 | /* The next two metrics are optional if you want more detail... 201 | ... since they are accounted for in cpu_system. 202 | metric { 203 | name = "cpu_intr" 204 | value_threshold = "1.0" 205 | title = "CPU intr" 206 | } 207 | metric { 208 | name = "cpu_sintr" 209 | value_threshold = "1.0" 210 | title = "CPU sintr" 211 | } 212 | */ 213 | } 214 | 215 | collection_group { 216 | collect_every = 20 217 | time_threshold = 90 218 | /* Load Averages */ 219 | metric { 220 | name = "load_one" 221 | value_threshold = "1.0" 222 | title = "One Minute Load Average" 223 | } 224 | metric { 225 | name = "load_five" 226 | value_threshold = "1.0" 227 | title = "Five Minute Load Average" 228 | } 229 | metric { 230 | name = "load_fifteen" 231 | value_threshold = "1.0" 232 | title = "Fifteen Minute Load Average" 233 | } 234 | } 235 | 236 | /* This group collects the number of running and total processes */ 237 | collection_group { 238 | collect_every = 80 239 | time_threshold = 950 240 | metric { 241 | name = "proc_run" 242 | value_threshold = "1.0" 243 | title = "Total Running Processes" 244 | } 245 | metric { 246 | name = "proc_total" 247 | value_threshold = "1.0" 248 | title = "Total Processes" 249 | } 250 | } 251 | 252 | /* This collection group grabs the volatile memory metrics every 40 secs and 253 | sends them at least every 180 secs. This time_threshold can be increased 254 | significantly to reduce unneeded network traffic. */ 255 | collection_group { 256 | collect_every = 40 257 | time_threshold = 180 258 | metric { 259 | name = "mem_free" 260 | value_threshold = "1024.0" 261 | title = "Free Memory" 262 | } 263 | metric { 264 | name = "mem_shared" 265 | value_threshold = "1024.0" 266 | title = "Shared Memory" 267 | } 268 | metric { 269 | name = "mem_buffers" 270 | value_threshold = "1024.0" 271 | title = "Memory Buffers" 272 | } 273 | metric { 274 | name = "mem_cached" 275 | value_threshold = "1024.0" 276 | title = "Cached Memory" 277 | } 278 | metric { 279 | name = "swap_free" 280 | value_threshold = "1024.0" 281 | title = "Free Swap Space" 282 | } 283 | } 284 | 285 | collection_group { 286 | collect_every = 40 287 | time_threshold = 300 288 | metric { 289 | name = "bytes_out" 290 | value_threshold = 4096 291 | title = "Bytes Sent" 292 | } 293 | metric { 294 | name = "bytes_in" 295 | value_threshold = 4096 296 | title = "Bytes Received" 297 | } 298 | metric { 299 | name = "pkts_in" 300 | value_threshold = 256 301 | title = "Packets Received" 302 | } 303 | metric { 304 | name = "pkts_out" 305 | value_threshold = 256 306 | title = "Packets Sent" 307 | } 308 | } 309 | 310 | /* Different than 2.5.x default since the old config made no sense */ 311 | collection_group { 312 | collect_every = 1800 313 | time_threshold = 3600 314 | metric { 315 | name = "disk_total" 316 | value_threshold = 1.0 317 | title = "Total Disk Space" 318 | } 319 | } 320 | 321 | collection_group { 322 | collect_every = 40 323 | time_threshold = 180 324 | metric { 325 | name = "disk_free" 326 | value_threshold = 1.0 327 | title = "Disk Space Available" 328 | } 329 | metric { 330 | name = "part_max_used" 331 | value_threshold = 1.0 332 | title = "Maximum Disk Space Used" 333 | } 334 | } 335 | 336 | -------------------------------------------------------------------------------- /templates/default/gmond.conf.erb: -------------------------------------------------------------------------------- 1 | /* This configuration is as close to 2.5.x default behavior as possible 2 | The values closely match ./gmond/metric.h definitions in 2.5.x */ 3 | globals { 4 | daemonize = yes 5 | setuid = yes 6 | user = ganglia 7 | debug_level = 0 8 | max_udp_msg_len = 1472 9 | mute = no 10 | deaf = no 11 | host_dmax = 0 /*secs */ 12 | cleanup_threshold = 300 /*secs */ 13 | gexec = no 14 | send_metadata_interval = 0 15 | } 16 | 17 | /* If a cluster attribute is specified, then all gmond hosts are wrapped inside 18 | * of a tag. If you do not specify a cluster tag, then all will 19 | * NOT be wrapped inside of a tag. */ 20 | cluster { 21 | name = "<%= @cluster[:name] %>" 22 | owner = "<%= @cluster[:owner] %>" 23 | latlong = "<%= @cluster[:latlong] %>" 24 | url = "<%= @cluster[:url] %>" 25 | } 26 | 27 | /* The host section describes attributes of the host, like the location */ 28 | host { 29 | location = "unspecified" 30 | } 31 | 32 | /* Feel free to specify as many udp_send_channels as you like. Gmond 33 | used to only support having a single channel */ 34 | udp_send_channel { 35 | mcast_join = <%= @mcast_send[:mcast_join] %> 36 | port = <%= @mcast_send[:port] %> 37 | ttl = <%= @mcast_send[:ttl] %> 38 | } 39 | 40 | /* You can specify as many udp_recv_channels as you like as well. */ 41 | udp_recv_channel { 42 | mcast_join = <%= @mcast_recv[:mcast_join] %> 43 | port = <%= @mcast_recv[:port] %> 44 | bind = <%= @mcast_recv[:bind] %> 45 | } 46 | 47 | /* You can specify as many tcp_accept_channels as you like to share 48 | an xml description of the state of the cluster */ 49 | tcp_accept_channel { 50 | port = <%= @mcast_recv[:port] %> 51 | } 52 | 53 | <% if node[:recipes].include? "ganglia::gmetad" && "ganglia::graphite" -%> 54 | /* Additional receive channel for graphite integration */ 55 | udp_recv_channel { 56 | bind = 127.0.0.1 57 | port = <%= @mcast_recv[:port] %> 58 | } 59 | <% end -%> 60 | 61 | /* Each metrics module that is referenced by gmond must be specified and 62 | loaded. If the module has been statically linked with gmond, it does not 63 | require a load path. However all dynamically loadable modules must include 64 | a load path. */ 65 | modules { 66 | module { 67 | name = "core_metrics" 68 | } 69 | module { 70 | name = "cpu_module" 71 | path = "/usr/lib/ganglia/modcpu.so" 72 | } 73 | module { 74 | name = "disk_module" 75 | path = "/usr/lib/ganglia/moddisk.so" 76 | } 77 | module { 78 | name = "load_module" 79 | path = "/usr/lib/ganglia/modload.so" 80 | } 81 | module { 82 | name = "mem_module" 83 | path = "/usr/lib/ganglia/modmem.so" 84 | } 85 | module { 86 | name = "net_module" 87 | path = "/usr/lib/ganglia/modnet.so" 88 | } 89 | module { 90 | name = "proc_module" 91 | path = "/usr/lib/ganglia/modproc.so" 92 | } 93 | module { 94 | name = "sys_module" 95 | path = "/usr/lib/ganglia/modsys.so" 96 | } 97 | } 98 | 99 | include ('/etc/ganglia/conf.d/*.conf') 100 | 101 | 102 | /* The old internal 2.5.x metric array has been replaced by the following 103 | collection_group directives. What follows is the default behavior for 104 | collecting and sending metrics that is as close to 2.5.x behavior as 105 | possible. */ 106 | 107 | /* This collection group will cause a heartbeat (or beacon) to be sent every 108 | 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses 109 | the age of the running gmond. */ 110 | collection_group { 111 | collect_once = yes 112 | time_threshold = 20 113 | metric { 114 | name = "heartbeat" 115 | } 116 | } 117 | 118 | /* This collection group will send general info about this host every 1200 secs. 119 | This information doesn't change between reboots and is only collected once. */ 120 | collection_group { 121 | collect_once = yes 122 | time_threshold = 1200 123 | metric { 124 | name = "cpu_num" 125 | title = "CPU Count" 126 | } 127 | metric { 128 | name = "cpu_speed" 129 | title = "CPU Speed" 130 | } 131 | metric { 132 | name = "mem_total" 133 | title = "Memory Total" 134 | } 135 | /* Should this be here? Swap can be added/removed between reboots. */ 136 | metric { 137 | name = "swap_total" 138 | title = "Swap Space Total" 139 | } 140 | metric { 141 | name = "boottime" 142 | title = "Last Boot Time" 143 | } 144 | metric { 145 | name = "machine_type" 146 | title = "Machine Type" 147 | } 148 | metric { 149 | name = "os_name" 150 | title = "Operating System" 151 | } 152 | metric { 153 | name = "os_release" 154 | title = "Operating System Release" 155 | } 156 | metric { 157 | name = "location" 158 | title = "Location" 159 | } 160 | } 161 | 162 | /* This collection group will send the status of gexecd for this host every 300 secs */ 163 | /* Unlike 2.5.x the default behavior is to report gexecd OFF. */ 164 | collection_group { 165 | collect_once = yes 166 | time_threshold = 300 167 | metric { 168 | name = "gexec" 169 | title = "Gexec Status" 170 | } 171 | } 172 | 173 | /* This collection group will collect the CPU status info every 20 secs. 174 | The time threshold is set to 90 seconds. In honesty, this time_threshold could be 175 | set significantly higher to reduce unneccessary network chatter. */ 176 | collection_group { 177 | collect_every = 20 178 | time_threshold = 90 179 | /* CPU status */ 180 | metric { 181 | name = "cpu_user" 182 | value_threshold = "1.0" 183 | title = "CPU User" 184 | } 185 | metric { 186 | name = "cpu_system" 187 | value_threshold = "1.0" 188 | title = "CPU System" 189 | } 190 | metric { 191 | name = "cpu_idle" 192 | value_threshold = "5.0" 193 | title = "CPU Idle" 194 | } 195 | metric { 196 | name = "cpu_nice" 197 | value_threshold = "1.0" 198 | title = "CPU Nice" 199 | } 200 | metric { 201 | name = "cpu_aidle" 202 | value_threshold = "5.0" 203 | title = "CPU aidle" 204 | } 205 | metric { 206 | name = "cpu_wio" 207 | value_threshold = "1.0" 208 | title = "CPU wio" 209 | } 210 | /* The next two metrics are optional if you want more detail... 211 | ... since they are accounted for in cpu_system. 212 | metric { 213 | name = "cpu_intr" 214 | value_threshold = "1.0" 215 | title = "CPU intr" 216 | } 217 | metric { 218 | name = "cpu_sintr" 219 | value_threshold = "1.0" 220 | title = "CPU sintr" 221 | } 222 | */ 223 | } 224 | 225 | collection_group { 226 | collect_every = 20 227 | time_threshold = 90 228 | /* Load Averages */ 229 | metric { 230 | name = "load_one" 231 | value_threshold = "1.0" 232 | title = "One Minute Load Average" 233 | } 234 | metric { 235 | name = "load_five" 236 | value_threshold = "1.0" 237 | title = "Five Minute Load Average" 238 | } 239 | metric { 240 | name = "load_fifteen" 241 | value_threshold = "1.0" 242 | title = "Fifteen Minute Load Average" 243 | } 244 | } 245 | 246 | /* This group collects the number of running and total processes */ 247 | collection_group { 248 | collect_every = 80 249 | time_threshold = 950 250 | metric { 251 | name = "proc_run" 252 | value_threshold = "1.0" 253 | title = "Total Running Processes" 254 | } 255 | metric { 256 | name = "proc_total" 257 | value_threshold = "1.0" 258 | title = "Total Processes" 259 | } 260 | } 261 | 262 | /* This collection group grabs the volatile memory metrics every 40 secs and 263 | sends them at least every 180 secs. This time_threshold can be increased 264 | significantly to reduce unneeded network traffic. */ 265 | collection_group { 266 | collect_every = 40 267 | time_threshold = 180 268 | metric { 269 | name = "mem_free" 270 | value_threshold = "1024.0" 271 | title = "Free Memory" 272 | } 273 | metric { 274 | name = "mem_shared" 275 | value_threshold = "1024.0" 276 | title = "Shared Memory" 277 | } 278 | metric { 279 | name = "mem_buffers" 280 | value_threshold = "1024.0" 281 | title = "Memory Buffers" 282 | } 283 | metric { 284 | name = "mem_cached" 285 | value_threshold = "1024.0" 286 | title = "Cached Memory" 287 | } 288 | metric { 289 | name = "swap_free" 290 | value_threshold = "1024.0" 291 | title = "Free Swap Space" 292 | } 293 | } 294 | 295 | collection_group { 296 | collect_every = 40 297 | time_threshold = 300 298 | metric { 299 | name = "bytes_out" 300 | value_threshold = 4096 301 | title = "Bytes Sent" 302 | } 303 | metric { 304 | name = "bytes_in" 305 | value_threshold = 4096 306 | title = "Bytes Received" 307 | } 308 | metric { 309 | name = "pkts_in" 310 | value_threshold = 256 311 | title = "Packets Received" 312 | } 313 | metric { 314 | name = "pkts_out" 315 | value_threshold = 256 316 | title = "Packets Sent" 317 | } 318 | } 319 | 320 | /* Different than 2.5.x default since the old config made no sense */ 321 | collection_group { 322 | collect_every = 1800 323 | time_threshold = 3600 324 | metric { 325 | name = "disk_total" 326 | value_threshold = 1.0 327 | title = "Total Disk Space" 328 | } 329 | } 330 | 331 | collection_group { 332 | collect_every = 40 333 | time_threshold = 180 334 | metric { 335 | name = "disk_free" 336 | value_threshold = 1.0 337 | title = "Disk Space Available" 338 | } 339 | metric { 340 | name = "part_max_used" 341 | value_threshold = 1.0 342 | title = "Maximum Disk Space Used" 343 | } 344 | } 345 | 346 | --------------------------------------------------------------------------------