├── templates
└── default
│ ├── http.erb
│ ├── https.erb
│ ├── whitelist.erb
│ ├── apache.conf.erb
│ ├── ganglia_graphite.rb.erb
│ ├── gmetad.conf.erb
│ ├── gmond_unicast.conf.erb
│ └── gmond.conf.erb
├── resources
├── python.rb
└── gmetric.rb
├── metadata.rb
├── recipes
├── iptables.rb
├── graphite.rb
├── source.rb
├── web.rb
├── gmetad.rb
└── default.rb
├── providers
├── gmetric.rb
└── python.rb
├── attributes
└── ganglia.rb
└── README.rdoc
/templates/default/http.erb:
--------------------------------------------------------------------------------
1 | -A FWR -p tcp --dport 80 -j ACCEPT
2 |
--------------------------------------------------------------------------------
/templates/default/https.erb:
--------------------------------------------------------------------------------
1 | -A FWR -p tcp --dport 443 -j ACCEPT
2 |
--------------------------------------------------------------------------------
/templates/default/whitelist.erb:
--------------------------------------------------------------------------------
1 | # Whitelist an IP address
2 | -A FWR -s <%= @subnet %> -j ACCEPT
3 |
--------------------------------------------------------------------------------
/resources/python.rb:
--------------------------------------------------------------------------------
1 |
2 | actions :enable, :disable
3 |
4 | attribute :module_name, :kind_of => String, :name_attribute => true
5 | attribute :options, :kind_of => Hash, :default => {}
6 |
--------------------------------------------------------------------------------
/resources/gmetric.rb:
--------------------------------------------------------------------------------
1 |
2 | actions :enable, :disable
3 |
4 | attribute :script_name, :kind_of => String, :name_attribute => true
5 | attribute :options, :kind_of => Hash, :default => {}
6 |
--------------------------------------------------------------------------------
/metadata.rb:
--------------------------------------------------------------------------------
1 | maintainer "Heavy Water Software Inc."
2 | maintainer_email "darrin@heavywater.ca"
3 | license "Apache 2.0"
4 | description "Installs/Configures ganglia"
5 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.rdoc'))
6 | version "0.1.1"
7 |
8 | %w{ debian ubuntu redhat centos fedora }.each do |os|
9 | supports os
10 | end
11 |
12 | recommends "graphite"
13 | suggests "iptables"
14 |
15 |
--------------------------------------------------------------------------------
/recipes/iptables.rb:
--------------------------------------------------------------------------------
1 | include_recipe "iptables"
2 |
3 | iptables_rule "http"
4 | iptables_rule "https"
5 |
6 | workers = search(:node, "*:*") || []
7 | subnets = []
8 |
9 | workers.each do |w|
10 | subnets << [ w.name, "#{w['ipaddress']}/32" ]
11 | end
12 |
13 | subnets.each do |h|
14 | template "/etc/iptables.d/#{h[0]}" do
15 | source "whitelist.erb"
16 | mode "644"
17 | variables :subnet => h[1]
18 | notifies :run, "execute[rebuild-iptables]"
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/recipes/graphite.rb:
--------------------------------------------------------------------------------
1 | graphite_host = search(:node, "role:#{node['ganglia']['server_role']} AND chef_environment:#{node.chef_environment}").map {|node| node.ipaddress}
2 | if graphite_host.empty?
3 | graphite_host = "localhost"
4 | end
5 |
6 | template "/usr/local/sbin/ganglia_graphite.rb" do
7 | source "ganglia_graphite.rb.erb"
8 | mode "744"
9 | variables :graphite_host => graphite_host
10 | end
11 |
12 | cron "ganglia_graphite" do
13 | command "/usr/local/sbin/ganglia_graphite.rb"
14 | end
15 |
--------------------------------------------------------------------------------
/templates/default/apache.conf.erb:
--------------------------------------------------------------------------------
1 | :<%= @config['vhost_port'] -%>>
2 | ServerAdmin <%= @config['server_admin'] %>
3 | ErrorLog <%= @config['error_log'] %>
4 | LogLevel <%= @config['log_level'] %>
5 |
6 | Alias <%= @config['alias'] -%> "/usr/share/ganglia-webfrontend"
7 |
8 | Options Indexes MultiViews FollowSymLinks
9 | AllowOverride None
10 | <% if @config['allow_from'] %>
11 | Order deny,allow
12 | Deny from all
13 | Allow from <%= @config['allow_from'] %>
14 | <% else %>
15 | Order allow,deny
16 | Allow from all
17 | <% end %>
18 |
19 |
20 |
--------------------------------------------------------------------------------
/providers/gmetric.rb:
--------------------------------------------------------------------------------
1 |
2 |
3 | action :enable do
4 |
5 | #script
6 | template "/usr/local/bin/#{new_resource.script_name}-ganglia" do
7 | source "ganglia/#{new_resource.script_name}.gmetric.erb"
8 | owner "root"
9 | group "root"
10 | mode "755"
11 | variables :options => new_resource.options
12 | end
13 |
14 | #cron
15 | template "/etc/cron.d/#{new_resource.script_name}-ganglia" do
16 | source "ganglia/#{new_resource.script_name}.cron.erb"
17 | owner "root"
18 | group "root"
19 | mode "644"
20 | variables :options => new_resource.options
21 | end
22 |
23 | end
24 |
25 | action :disable do
26 |
27 | file "/usr/local/bin/#{new_resource.script_name}-ganglia" do
28 | action :delete
29 | end
30 |
31 | file "/etc/cron.d/#{new_resource.script_name}-ganglia" do
32 | action :delete
33 | end
34 |
35 | end
--------------------------------------------------------------------------------
/providers/python.rb:
--------------------------------------------------------------------------------
1 |
2 |
3 | action :enable do
4 |
5 | #python module
6 | template "/usr/lib/ganglia/python_modules/#{new_resource.module_name}.py" do
7 | source "ganglia/#{new_resource.module_name}.py.erb"
8 | owner "root"
9 | group "root"
10 | mode "644"
11 | variables :options => new_resource.options
12 | notifies :restart, resources(:service => "ganglia-monitor")
13 | end
14 |
15 | #configuration
16 | template "/etc/ganglia/conf.d/#{new_resource.module_name}.pyconf" do
17 | source "ganglia/#{new_resource.module_name}.pyconf.erb"
18 | owner "root"
19 | group "root"
20 | mode "644"
21 | variables :options => new_resource.options
22 | notifies :restart, resources(:service => "ganglia-monitor")
23 | end
24 |
25 | end
26 |
27 | action :disable do
28 |
29 | file "/usr/lib/ganglia/python_modules/#{new_resource.module_name}.py" do
30 | action :delete
31 | notifies :restart, resources(:service => "ganglia-monitor")
32 | end
33 |
34 | file "/etc/ganglia/conf.d/#{new_resource.module_name}.pyconf" do
35 | action :delete
36 | notifies :restart, resources(:service => "ganglia-monitor")
37 | end
38 |
39 | end
--------------------------------------------------------------------------------
/recipes/source.rb:
--------------------------------------------------------------------------------
1 | if platform?( "redhat", "centos", "fedora" )
2 | package "apr-devel"
3 | package "libconfuse-devel"
4 | package "expat-devel"
5 | package "rrdtool-devel"
6 | end
7 |
8 | remote_file "/usr/src/ganglia-#{node[:ganglia][:version]}.tar.gz" do
9 | source node[:ganglia][:uri]
10 | checksum node[:ganglia][:checksum]
11 | end
12 |
13 | src_path = "/usr/src/ganglia-#{node[:ganglia][:version]}"
14 |
15 | execute "untar ganglia" do
16 | command "tar xzf ganglia-#{node[:ganglia][:version]}.tar.gz"
17 | creates src_path
18 | cwd "/usr/src"
19 | end
20 |
21 | execute "configure ganglia build" do
22 | command "./configure --with-gmetad --with-libpcre=no --sysconfdir=/etc/ganglia"
23 | creates "#{src_path}/config.log"
24 | cwd src_path
25 | end
26 |
27 | execute "build ganglia" do
28 | command "make"
29 | creates "#{src_path}/gmond/gmond"
30 | cwd src_path
31 | end
32 |
33 | execute "install ganglia" do
34 | command "make install"
35 | creates "/usr/sbin/gmond"
36 | cwd src_path
37 | end
38 |
39 | link "/usr/lib/ganglia" do
40 | to "/usr/lib64/ganglia"
41 | only_if do
42 | node[:kernel][:machine] == "x86_64" and
43 | platform?( "redhat", "centos", "fedora" )
44 | end
45 | end
46 |
--------------------------------------------------------------------------------
/recipes/web.rb:
--------------------------------------------------------------------------------
1 | directory "/etc/ganglia-webfrontend"
2 |
3 | case node[:platform]
4 | when "ubuntu", "debian"
5 | package "ganglia-webfrontend"
6 |
7 | link "/etc/apache2/sites-enabled/ganglia" do
8 | to "/etc/ganglia-webfrontend/apache.conf"
9 | notifies :restart, "service[apache2]"
10 | not_if do
11 | node[:ganglia][:apache][:write_config_file]
12 | end
13 | end
14 |
15 | when "redhat", "centos", "fedora"
16 | package "httpd"
17 | package "php"
18 | include_recipe "ganglia::source"
19 | include_recipe "ganglia::gmetad"
20 |
21 | execute "copy web directory" do
22 | command "cp -r web /var/www/html/ganglia"
23 | creates "/var/www/html/ganglia"
24 | cwd "/usr/src/ganglia-#{node[:ganglia][:version]}"
25 | end
26 | end
27 |
28 | # If applicable, write an apache config file for Ganglia
29 | if node[:ganglia][:apache][:write_config_file]
30 | template "/etc/apache2/sites-available/ganglia" do
31 | source "apache.conf.erb"
32 | owner "root"
33 | group "root"
34 | mode 0644
35 | action :create
36 | variables(
37 | :config => node[:ganglia][:apache]
38 | )
39 | end
40 |
41 | bash "enable ganlia web" do
42 | user "root"
43 | code "a2ensite ganglia"
44 | notifies :reload, resources( :service => "apache2"), :delayed
45 | end
46 | end
47 |
--------------------------------------------------------------------------------
/recipes/gmetad.rb:
--------------------------------------------------------------------------------
1 | case node[:platform]
2 | when "ubuntu", "debian"
3 | package "gmetad"
4 | when "redhat", "centos", "fedora"
5 | include_recipe "ganglia::source"
6 | execute "copy gmetad init script" do
7 | command "cp " +
8 | "/usr/src/ganglia-#{node[:ganglia][:version]}/gmetad/gmetad.init " +
9 | "/etc/init.d/gmetad"
10 | not_if "test -f /etc/init.d/gmetad"
11 | end
12 | end
13 |
14 | directory "/var/lib/ganglia/rrds" do
15 | owner "nobody"
16 | recursive true
17 | end
18 |
19 | case node[:ganglia][:unicast][:enable]
20 | when true
21 | template "/etc/ganglia/gmetad.conf" do
22 | source "gmetad.conf.erb"
23 | variables( :hosts => "localhost",
24 | :cluster_name => node[:ganglia][:cluster_name])
25 | notifies :restart, "service[gmetad]"
26 | end
27 | if node[:recipes].include? "iptables"
28 | include_recipe "ganglia::iptables"
29 | end
30 | when false
31 | ips = []
32 | node[:ganglia][:cluster_nodes].each do |node|
33 | ips << search(:node, "name:#{node}").map {|n| n.ipaddress}
34 | end
35 | template "/etc/ganglia/gmetad.conf" do
36 | source "gmetad.conf.erb"
37 | variables( :hosts => ips.join(" "),
38 | :cluster_name => node[:ganglia][:cluster_name])
39 | notifies :restart, "service[gmetad]"
40 | end
41 | end
42 |
43 | service "gmetad" do
44 | supports :restart => true
45 | action [ :enable, :start ]
46 | end
47 |
--------------------------------------------------------------------------------
/attributes/ganglia.rb:
--------------------------------------------------------------------------------
1 | default[:ganglia][:version] = "3.1.7"
2 | default[:ganglia][:uri] = "http://sourceforge.net/projects/ganglia/files/ganglia%20monitoring%20core/3.1.7/ganglia-3.1.7.tar.gz/download"
3 | default[:ganglia][:checksum] = "bb1a4953"
4 |
5 | default[:ganglia][:server_role] = "ganglia"
6 |
7 | # Cluster Information
8 | default[:ganglia][:cluster][:name] = "unspecified"
9 | default[:ganglia][:cluster][:owner] = "unspecified"
10 | default[:ganglia][:cluster][:latlong] = "unspecified"
11 | default[:ganglia][:cluster][:url] = "unspecified"
12 |
13 | # Multicast send/receive config
14 | default[:ganglia][:multicast][:send_channel][:mcast_join] = "239.2.11.71"
15 | default[:ganglia][:multicast][:send_channel][:port] = "8649"
16 | default[:ganglia][:multicast][:send_channel][:ttl] = 1
17 |
18 | default[:ganglia][:multicast][:recv_channel][:mcast_join] = "239.2.11.71"
19 | default[:ganglia][:multicast][:recv_channel][:port] = "8649"
20 | default[:ganglia][:multicast][:recv_channel][:bind] = "239.2.11.71"
21 |
22 | # Unicast send/receive config
23 | default[:ganglia][:unicast][:enable] = false
24 | default[:ganglia][:unicast][:port] = "8649"
25 | default[:ganglia][:unicast][:ttl] = 1
26 |
27 | # A list if chef node names that are in a cluster. This is
28 | # ignored if Unicast is used.
29 | default[:ganglia][:cluster_nodes] = []
30 |
31 | # Some attributes used to write an apache config file
32 | default[:ganglia][:apache][:write_config_file] = true
33 | default[:ganglia][:apache][:vhost_addr] = "*"
34 | default[:ganglia][:apache][:vhost_port] = "80"
35 | default[:ganglia][:apache][:server_admin] = "root@localhost"
36 | default[:ganglia][:apache][:error_log] = "/var/log/apache2/error.log"
37 | default[:ganglia][:apache][:log_level] = "warn"
38 | default[:ganglia][:apache][:alias] = "/ganglia"
39 | # Setting `allow_from` will restrict the addresses from which Ganglia
40 | # can be viewed. E.G. setting to "127.0.0.0/255.0.0.0 ::1/128" will
41 | # only allow local connections
42 | default[:ganglia][:apache][:allow_from] = nil
43 |
44 |
--------------------------------------------------------------------------------
/README.rdoc:
--------------------------------------------------------------------------------
1 | = DESCRIPTION:
2 |
3 | Installs and configures Ganglia.
4 |
5 | http://ganglia.sourceforge.net/
6 |
7 | = REQUIREMENTS:
8 |
9 | * SELinux must be disabled on CentOS
10 | * iptables must allow access to port 80
11 |
12 | = ATTRIBUTES:
13 |
14 | See the `attributes/ganglia.rb` file.
15 |
16 | = USAGE:
17 |
18 | A run list with "recipe[ganglia]" enables monitoring.
19 |
20 | A run list with "recipe[ganglia::web]" enables the web interface. NOTE: if
21 | you're using apache, and you want to run the ganglia web interface on a
22 | different port, you should configure that with attributes available from
23 | the apache cookbooks:
24 |
25 | default[:apache][:listen_ports]
26 |
27 | However, This cookbook *does* make several attributes available to write an
28 | apache virtual host config file.
29 |
30 | A run list with "recipe[ganglia::graphite]" enables graphite graphs.
31 |
32 | = LWRP:
33 |
34 | == gmetric
35 |
36 | Installs a gmetric plugin.
37 |
38 | The plugin is composed of two templates:
39 | * One for the script
40 | * One for the cron job that will call the script
41 |
42 | The templates must be in the caller cookbook.
43 |
44 | Example:
45 |
46 | ganglia_gmetric 'memcache' do
47 | options :port => 11211
48 | end
49 |
50 | templates:
51 | cookbooks/memcache/templates/default/memcache.gmetric.erb
52 | cookbooks/memcache/templates/default/memcache.cron.erb
53 |
54 | The content of 'options' will be passed to the templates
55 |
56 | == python
57 |
58 | Installs a python plugin.
59 |
60 | The plugin is composed of two templates:
61 | * One for the python module
62 | * One for the configuration of the module
63 |
64 | The templates must be in the caller cookbook.
65 |
66 | Example:
67 |
68 | ganglia_python 'memcache' do
69 | options :port => 11211
70 | end
71 |
72 | templates:
73 | cookbooks/memcache/templates/default/memcache.py.erb
74 | cookbooks/memcache/templates/default/memcache.pyconf.erb
75 |
76 | The content of 'options' will be passed to the templates
77 |
78 | = CAVEATS:
79 |
80 | This cookbook has been tested on Ubuntu 10.04 and Centos 5.5.
81 |
82 | Search seems to takes a moment or two to index.
83 | You may need to converge again to see recently added nodes.
84 |
--------------------------------------------------------------------------------
/recipes/default.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Cookbook Name:: ganglia
3 | # Recipe:: default
4 | #
5 | # Copyright 2011, Heavy Water Software Inc.
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 |
20 | case node[:platform]
21 | when "ubuntu", "debian"
22 | package "ganglia-monitor"
23 | when "redhat", "centos", "fedora"
24 | include_recipe "ganglia::source"
25 |
26 | execute "copy ganglia-monitor init script" do
27 | command "cp " +
28 | "/usr/src/ganglia-#{node[:ganglia][:version]}/gmond/gmond.init " +
29 | "/etc/init.d/ganglia-monitor"
30 | not_if "test -f /etc/init.d/ganglia-monitor"
31 | end
32 |
33 | user "ganglia"
34 | end
35 |
36 | directory "/etc/ganglia"
37 |
38 | case node[:ganglia][:unicast][:enable]
39 | when true
40 | host = search(:node, "role:#{node['ganglia']['server_role']} AND chef_environment:#{node.chef_environment}").map {|node| node.ipaddress}
41 | if host.empty?
42 | host = "127.0.0.1"
43 | end
44 | template "/etc/ganglia/gmond.conf" do
45 | source "gmond_unicast.conf.erb"
46 | variables(
47 | :cluster => node[:ganglia][:cluster],
48 | :unicast => node[:ganglia][:unicast],
49 | :host => host
50 | )
51 | notifies :restart, "service[ganglia-monitor]"
52 | end
53 | when false
54 | template "/etc/ganglia/gmond.conf" do
55 | source "gmond.conf.erb"
56 | variables(
57 | :cluster => node[:ganglia][:cluster],
58 | :mcast_send => node[:ganglia][:multicast][:send_channel],
59 | :mcast_recv => node[:ganglia][:multicast][:recv_channel],
60 | :host => host
61 | )
62 | notifies :restart, "service[ganglia-monitor]"
63 | end
64 | end
65 |
66 | service "ganglia-monitor" do
67 | pattern "gmond"
68 | supports :restart => true
69 | action [ :enable, :start ]
70 | end
71 |
--------------------------------------------------------------------------------
/templates/default/ganglia_graphite.rb.erb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | #################################################################################
4 | # Parse Ganglia XML stream and send metrics to Graphite
5 | # License: Same as Ganglia
6 | # Author: Vladimir Vuksan
7 | # Modified from script written by: Kostas Georgiou
8 | #################################################################################
9 | require "rexml/document"
10 | require 'socket'
11 |
12 | # Adjust to the appropriate values
13 | ganglia_hostname = 'localhost'
14 | ganglia_port = 8649
15 | graphite_host = '<%= @graphite_host %>'
16 | graphite_port = 2003
17 | Debug = false
18 |
19 | begin
20 | # Open up a socket to gmond
21 | file = TCPSocket.open(ganglia_hostname, ganglia_port)
22 | # Open up a socket to graphite
23 | graphite = TCPSocket.open(graphite_host, graphite_port)
24 | # We need current time stamp in UNIX time
25 | now = Time.now.to_i
26 | # Parse the XML we got from gmond
27 | doc = REXML::Document.new file
28 | #doc.write( $stdout, 0 )
29 |
30 | grid=nil
31 | doc.elements.each("GANGLIA_XML/GRID") { |element|
32 | grid=element.attributes["NAME"]
33 | }
34 | puts "GRID: #{grid}\n" if Debug
35 |
36 | cluster=nil
37 | doc.elements.each("GANGLIA_XML/GRID/CLUSTER") { |element|
38 | cluster=element.attributes["NAME"]
39 | puts "CLUSTER: #{cluster}\n" if Debug
40 |
41 | doc.elements.each("GANGLIA_XML/GRID[@NAME='#{grid}']/CLUSTER[@NAME='#{cluster}']/HOST") { |host|
42 | metric_prefix=host.attributes["NAME"].gsub(".", "_")
43 | host.elements.each("METRIC") { |metric|
44 | # Set metric prefix to the host name. Graphite uses dots to separate subtrees
45 | # therefore we have to change dots in hostnames to _
46 | # Do substitution of whitespace after XML parsing to avoid problems with
47 | # pre-exiting whitespace in GRID / CLUSTER names in XML.
48 | grid.gsub!(/\W/, "_")
49 | cluster.gsub!(/\W/, "_")
50 | if metric.attributes["TYPE"] != "string"
51 | graphite.puts "#{grid}.#{cluster}.#{metric_prefix}.#{metric.attributes["NAME"]} #{metric.attributes["VAL"]} #{now}\n" if !Debug
52 | puts "#{grid}.#{cluster}.#{metric_prefix}.#{metric.attributes["NAME"]} #{metric.attributes["VAL"]} #{now}\n" if Debug
53 | end
54 | }
55 | }
56 | }
57 |
58 | graphite.close()
59 | file.close()
60 | rescue
61 | end
62 |
--------------------------------------------------------------------------------
/templates/default/gmetad.conf.erb:
--------------------------------------------------------------------------------
1 | # This is an example of a Ganglia Meta Daemon configuration file
2 | # http://ganglia.sourceforge.net/
3 | #
4 | # $Id: gmetad.conf.in 1639 2008-08-09 23:30:32Z carenas $
5 | #
6 | #-------------------------------------------------------------------------------
7 | # Setting the debug_level to 1 will keep daemon in the forground and
8 | # show only error messages. Setting this value higher than 1 will make
9 | # gmetad output debugging information and stay in the foreground.
10 | # default: 0
11 | # debug_level 10
12 | #
13 | #-------------------------------------------------------------------------------
14 | # What to monitor. The most important section of this file.
15 | #
16 | # The data_source tag specifies either a cluster or a grid to
17 | # monitor. If we detect the source is a cluster, we will maintain a complete
18 | # set of RRD databases for it, which can be used to create historical
19 | # graphs of the metrics. If the source is a grid (it comes from another gmetad),
20 | # we will only maintain summary RRDs for it.
21 | #
22 | # Format:
23 | # data_source "my cluster" [polling interval] address1:port addreses2:port ...
24 | #
25 | # The keyword 'data_source' must immediately be followed by a unique
26 | # string which identifies the source, then an optional polling interval in
27 | # seconds. The source will be polled at this interval on average.
28 | # If the polling interval is omitted, 15sec is asssumed.
29 | #
30 | # A list of machines which service the data source follows, in the
31 | # format ip:port, or name:port. If a port is not specified then 8649
32 | # (the default gmond port) is assumed.
33 | # default: There is no default value
34 | #
35 | # data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655
36 | # data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651
37 | # data_source "another source" 1.3.4.7:8655 1.3.4.8
38 |
39 | data_source "<%= @cluster_name %>" <%= @hosts %>
40 |
41 | #
42 | # Round-Robin Archives
43 | # You can specify custom Round-Robin archives here (defaults are listed below)
44 | #
45 | # RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \
46 | # "RRA:AVERAGE:0.5:5760:374"
47 | #
48 |
49 | #
50 | #-------------------------------------------------------------------------------
51 | # Scalability mode. If on, we summarize over downstream grids, and respect
52 | # authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output
53 | # in tags, we ignore all tags we see, and always assume
54 | # we are the "authority" on data source feeds. This approach does not scale to
55 | # large groups of clusters, but is provided for backwards compatibility.
56 | # default: on
57 | # scalable off
58 | #
59 | #-------------------------------------------------------------------------------
60 | # The name of this Grid. All the data sources above will be wrapped in a GRID
61 | # tag with this name.
62 | # default: unspecified
63 | gridname "<%= @cluster_name %>"
64 | #
65 | #-------------------------------------------------------------------------------
66 | # The authority URL for this grid. Used by other gmetads to locate graphs
67 | # for our data sources. Generally points to a ganglia/
68 | # website on this machine.
69 | # default: "http://hostname/ganglia/",
70 | # where hostname is the name of this machine, as defined by gethostname().
71 | # authority "http://mycluster.org/newprefix/"
72 | #
73 | #-------------------------------------------------------------------------------
74 | # List of machines this gmetad will share XML with. Localhost
75 | # is always trusted.
76 | # default: There is no default value
77 | # trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org
78 | #
79 | #-------------------------------------------------------------------------------
80 | # If you want any host which connects to the gmetad XML to receive
81 | # data, then set this value to "on"
82 | # default: off
83 | # all_trusted on
84 | #
85 | #-------------------------------------------------------------------------------
86 | # If you don't want gmetad to setuid then set this to off
87 | # default: on
88 | # setuid off
89 | #
90 | #-------------------------------------------------------------------------------
91 | # User gmetad will setuid to (defaults to "nobody")
92 | # default: "nobody"
93 | # setuid_username "nobody"
94 | #
95 | #-------------------------------------------------------------------------------
96 | # The port gmetad will answer requests for XML
97 | # default: 8651
98 | # xml_port 8651
99 | #
100 | #-------------------------------------------------------------------------------
101 | # The port gmetad will answer queries for XML. This facility allows
102 | # simple subtree and summation views of the XML tree.
103 | # default: 8652
104 | # interactive_port 8652
105 | #
106 | #-------------------------------------------------------------------------------
107 | # The number of threads answering XML requests
108 | # default: 4
109 | # server_threads 10
110 | #
111 | #-------------------------------------------------------------------------------
112 | # Where gmetad stores its round-robin databases
113 | # default: "/var/lib/ganglia/rrds"
114 | # rrd_rootdir "/some/other/place"
115 |
--------------------------------------------------------------------------------
/templates/default/gmond_unicast.conf.erb:
--------------------------------------------------------------------------------
1 | /* This configuration is as close to 2.5.x default behavior as possible
2 | The values closely match ./gmond/metric.h definitions in 2.5.x */
3 | globals {
4 | daemonize = yes
5 | setuid = yes
6 | user = ganglia
7 | debug_level = 0
8 | max_udp_msg_len = 1472
9 | mute = no
10 | deaf = no
11 | host_dmax = 0 /*secs */
12 | cleanup_threshold = 300 /*secs */
13 | gexec = no
14 | send_metadata_interval = 30
15 | }
16 |
17 | /* If a cluster attribute is specified, then all gmond hosts are wrapped inside
18 | * of a tag. If you do not specify a cluster tag, then all will
19 | * NOT be wrapped inside of a tag. */
20 | cluster {
21 | name = "<%= @cluster[:name] %>"
22 | owner = "<%= @cluster[:owner] %>"
23 | latlong = "<%= @cluster[:latlong] %>"
24 | url = "<%= @cluster[:url] %>"
25 | }
26 |
27 | /* The host section describes attributes of the host, like the location */
28 | host {
29 | location = "unspecified"
30 | }
31 |
32 | /* Feel free to specify as many udp_send_channels as you like. Gmond
33 | used to only support having a single channel */
34 | udp_send_channel {
35 | host = <%= @host %>
36 | port = <%= @unicast[:port] %>
37 | ttl = <%= @unicast[:ttl] %>
38 | }
39 |
40 | /* You can specify as many udp_recv_channels as you like as well. */
41 | udp_recv_channel {
42 | port = <%= @unicast[:port] %>
43 | }
44 |
45 | /* You can specify as many tcp_accept_channels as you like to share
46 | an xml description of the state of the cluster */
47 | tcp_accept_channel {
48 | port = <%= @unicast[:port] %>
49 | }
50 |
51 | /* Each metrics module that is referenced by gmond must be specified and
52 | loaded. If the module has been statically linked with gmond, it does not
53 | require a load path. However all dynamically loadable modules must include
54 | a load path. */
55 | modules {
56 | module {
57 | name = "core_metrics"
58 | }
59 | module {
60 | name = "cpu_module"
61 | path = "/usr/lib/ganglia/modcpu.so"
62 | }
63 | module {
64 | name = "disk_module"
65 | path = "/usr/lib/ganglia/moddisk.so"
66 | }
67 | module {
68 | name = "load_module"
69 | path = "/usr/lib/ganglia/modload.so"
70 | }
71 | module {
72 | name = "mem_module"
73 | path = "/usr/lib/ganglia/modmem.so"
74 | }
75 | module {
76 | name = "net_module"
77 | path = "/usr/lib/ganglia/modnet.so"
78 | }
79 | module {
80 | name = "proc_module"
81 | path = "/usr/lib/ganglia/modproc.so"
82 | }
83 | module {
84 | name = "sys_module"
85 | path = "/usr/lib/ganglia/modsys.so"
86 | }
87 | }
88 |
89 | include ('/etc/ganglia/conf.d/*.conf')
90 |
91 |
92 | /* The old internal 2.5.x metric array has been replaced by the following
93 | collection_group directives. What follows is the default behavior for
94 | collecting and sending metrics that is as close to 2.5.x behavior as
95 | possible. */
96 |
97 | /* This collection group will cause a heartbeat (or beacon) to be sent every
98 | 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
99 | the age of the running gmond. */
100 | collection_group {
101 | collect_once = yes
102 | time_threshold = 20
103 | metric {
104 | name = "heartbeat"
105 | }
106 | }
107 |
108 | /* This collection group will send general info about this host every 1200 secs.
109 | This information doesn't change between reboots and is only collected once. */
110 | collection_group {
111 | collect_once = yes
112 | time_threshold = 1200
113 | metric {
114 | name = "cpu_num"
115 | title = "CPU Count"
116 | }
117 | metric {
118 | name = "cpu_speed"
119 | title = "CPU Speed"
120 | }
121 | metric {
122 | name = "mem_total"
123 | title = "Memory Total"
124 | }
125 | /* Should this be here? Swap can be added/removed between reboots. */
126 | metric {
127 | name = "swap_total"
128 | title = "Swap Space Total"
129 | }
130 | metric {
131 | name = "boottime"
132 | title = "Last Boot Time"
133 | }
134 | metric {
135 | name = "machine_type"
136 | title = "Machine Type"
137 | }
138 | metric {
139 | name = "os_name"
140 | title = "Operating System"
141 | }
142 | metric {
143 | name = "os_release"
144 | title = "Operating System Release"
145 | }
146 | metric {
147 | name = "location"
148 | title = "Location"
149 | }
150 | }
151 |
152 | /* This collection group will send the status of gexecd for this host every 300 secs */
153 | /* Unlike 2.5.x the default behavior is to report gexecd OFF. */
154 | collection_group {
155 | collect_once = yes
156 | time_threshold = 300
157 | metric {
158 | name = "gexec"
159 | title = "Gexec Status"
160 | }
161 | }
162 |
163 | /* This collection group will collect the CPU status info every 20 secs.
164 | The time threshold is set to 90 seconds. In honesty, this time_threshold could be
165 | set significantly higher to reduce unneccessary network chatter. */
166 | collection_group {
167 | collect_every = 20
168 | time_threshold = 90
169 | /* CPU status */
170 | metric {
171 | name = "cpu_user"
172 | value_threshold = "1.0"
173 | title = "CPU User"
174 | }
175 | metric {
176 | name = "cpu_system"
177 | value_threshold = "1.0"
178 | title = "CPU System"
179 | }
180 | metric {
181 | name = "cpu_idle"
182 | value_threshold = "5.0"
183 | title = "CPU Idle"
184 | }
185 | metric {
186 | name = "cpu_nice"
187 | value_threshold = "1.0"
188 | title = "CPU Nice"
189 | }
190 | metric {
191 | name = "cpu_aidle"
192 | value_threshold = "5.0"
193 | title = "CPU aidle"
194 | }
195 | metric {
196 | name = "cpu_wio"
197 | value_threshold = "1.0"
198 | title = "CPU wio"
199 | }
200 | /* The next two metrics are optional if you want more detail...
201 | ... since they are accounted for in cpu_system.
202 | metric {
203 | name = "cpu_intr"
204 | value_threshold = "1.0"
205 | title = "CPU intr"
206 | }
207 | metric {
208 | name = "cpu_sintr"
209 | value_threshold = "1.0"
210 | title = "CPU sintr"
211 | }
212 | */
213 | }
214 |
215 | collection_group {
216 | collect_every = 20
217 | time_threshold = 90
218 | /* Load Averages */
219 | metric {
220 | name = "load_one"
221 | value_threshold = "1.0"
222 | title = "One Minute Load Average"
223 | }
224 | metric {
225 | name = "load_five"
226 | value_threshold = "1.0"
227 | title = "Five Minute Load Average"
228 | }
229 | metric {
230 | name = "load_fifteen"
231 | value_threshold = "1.0"
232 | title = "Fifteen Minute Load Average"
233 | }
234 | }
235 |
236 | /* This group collects the number of running and total processes */
237 | collection_group {
238 | collect_every = 80
239 | time_threshold = 950
240 | metric {
241 | name = "proc_run"
242 | value_threshold = "1.0"
243 | title = "Total Running Processes"
244 | }
245 | metric {
246 | name = "proc_total"
247 | value_threshold = "1.0"
248 | title = "Total Processes"
249 | }
250 | }
251 |
252 | /* This collection group grabs the volatile memory metrics every 40 secs and
253 | sends them at least every 180 secs. This time_threshold can be increased
254 | significantly to reduce unneeded network traffic. */
255 | collection_group {
256 | collect_every = 40
257 | time_threshold = 180
258 | metric {
259 | name = "mem_free"
260 | value_threshold = "1024.0"
261 | title = "Free Memory"
262 | }
263 | metric {
264 | name = "mem_shared"
265 | value_threshold = "1024.0"
266 | title = "Shared Memory"
267 | }
268 | metric {
269 | name = "mem_buffers"
270 | value_threshold = "1024.0"
271 | title = "Memory Buffers"
272 | }
273 | metric {
274 | name = "mem_cached"
275 | value_threshold = "1024.0"
276 | title = "Cached Memory"
277 | }
278 | metric {
279 | name = "swap_free"
280 | value_threshold = "1024.0"
281 | title = "Free Swap Space"
282 | }
283 | }
284 |
285 | collection_group {
286 | collect_every = 40
287 | time_threshold = 300
288 | metric {
289 | name = "bytes_out"
290 | value_threshold = 4096
291 | title = "Bytes Sent"
292 | }
293 | metric {
294 | name = "bytes_in"
295 | value_threshold = 4096
296 | title = "Bytes Received"
297 | }
298 | metric {
299 | name = "pkts_in"
300 | value_threshold = 256
301 | title = "Packets Received"
302 | }
303 | metric {
304 | name = "pkts_out"
305 | value_threshold = 256
306 | title = "Packets Sent"
307 | }
308 | }
309 |
310 | /* Different than 2.5.x default since the old config made no sense */
311 | collection_group {
312 | collect_every = 1800
313 | time_threshold = 3600
314 | metric {
315 | name = "disk_total"
316 | value_threshold = 1.0
317 | title = "Total Disk Space"
318 | }
319 | }
320 |
321 | collection_group {
322 | collect_every = 40
323 | time_threshold = 180
324 | metric {
325 | name = "disk_free"
326 | value_threshold = 1.0
327 | title = "Disk Space Available"
328 | }
329 | metric {
330 | name = "part_max_used"
331 | value_threshold = 1.0
332 | title = "Maximum Disk Space Used"
333 | }
334 | }
335 |
336 |
--------------------------------------------------------------------------------
/templates/default/gmond.conf.erb:
--------------------------------------------------------------------------------
1 | /* This configuration is as close to 2.5.x default behavior as possible
2 | The values closely match ./gmond/metric.h definitions in 2.5.x */
3 | globals {
4 | daemonize = yes
5 | setuid = yes
6 | user = ganglia
7 | debug_level = 0
8 | max_udp_msg_len = 1472
9 | mute = no
10 | deaf = no
11 | host_dmax = 0 /*secs */
12 | cleanup_threshold = 300 /*secs */
13 | gexec = no
14 | send_metadata_interval = 0
15 | }
16 |
17 | /* If a cluster attribute is specified, then all gmond hosts are wrapped inside
18 | * of a tag. If you do not specify a cluster tag, then all will
19 | * NOT be wrapped inside of a tag. */
20 | cluster {
21 | name = "<%= @cluster[:name] %>"
22 | owner = "<%= @cluster[:owner] %>"
23 | latlong = "<%= @cluster[:latlong] %>"
24 | url = "<%= @cluster[:url] %>"
25 | }
26 |
27 | /* The host section describes attributes of the host, like the location */
28 | host {
29 | location = "unspecified"
30 | }
31 |
32 | /* Feel free to specify as many udp_send_channels as you like. Gmond
33 | used to only support having a single channel */
34 | udp_send_channel {
35 | mcast_join = <%= @mcast_send[:mcast_join] %>
36 | port = <%= @mcast_send[:port] %>
37 | ttl = <%= @mcast_send[:ttl] %>
38 | }
39 |
40 | /* You can specify as many udp_recv_channels as you like as well. */
41 | udp_recv_channel {
42 | mcast_join = <%= @mcast_recv[:mcast_join] %>
43 | port = <%= @mcast_recv[:port] %>
44 | bind = <%= @mcast_recv[:bind] %>
45 | }
46 |
47 | /* You can specify as many tcp_accept_channels as you like to share
48 | an xml description of the state of the cluster */
49 | tcp_accept_channel {
50 | port = <%= @mcast_recv[:port] %>
51 | }
52 |
53 | <% if node[:recipes].include? "ganglia::gmetad" && "ganglia::graphite" -%>
54 | /* Additional receive channel for graphite integration */
55 | udp_recv_channel {
56 | bind = 127.0.0.1
57 | port = <%= @mcast_recv[:port] %>
58 | }
59 | <% end -%>
60 |
61 | /* Each metrics module that is referenced by gmond must be specified and
62 | loaded. If the module has been statically linked with gmond, it does not
63 | require a load path. However all dynamically loadable modules must include
64 | a load path. */
65 | modules {
66 | module {
67 | name = "core_metrics"
68 | }
69 | module {
70 | name = "cpu_module"
71 | path = "/usr/lib/ganglia/modcpu.so"
72 | }
73 | module {
74 | name = "disk_module"
75 | path = "/usr/lib/ganglia/moddisk.so"
76 | }
77 | module {
78 | name = "load_module"
79 | path = "/usr/lib/ganglia/modload.so"
80 | }
81 | module {
82 | name = "mem_module"
83 | path = "/usr/lib/ganglia/modmem.so"
84 | }
85 | module {
86 | name = "net_module"
87 | path = "/usr/lib/ganglia/modnet.so"
88 | }
89 | module {
90 | name = "proc_module"
91 | path = "/usr/lib/ganglia/modproc.so"
92 | }
93 | module {
94 | name = "sys_module"
95 | path = "/usr/lib/ganglia/modsys.so"
96 | }
97 | }
98 |
99 | include ('/etc/ganglia/conf.d/*.conf')
100 |
101 |
102 | /* The old internal 2.5.x metric array has been replaced by the following
103 | collection_group directives. What follows is the default behavior for
104 | collecting and sending metrics that is as close to 2.5.x behavior as
105 | possible. */
106 |
107 | /* This collection group will cause a heartbeat (or beacon) to be sent every
108 | 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
109 | the age of the running gmond. */
110 | collection_group {
111 | collect_once = yes
112 | time_threshold = 20
113 | metric {
114 | name = "heartbeat"
115 | }
116 | }
117 |
118 | /* This collection group will send general info about this host every 1200 secs.
119 | This information doesn't change between reboots and is only collected once. */
120 | collection_group {
121 | collect_once = yes
122 | time_threshold = 1200
123 | metric {
124 | name = "cpu_num"
125 | title = "CPU Count"
126 | }
127 | metric {
128 | name = "cpu_speed"
129 | title = "CPU Speed"
130 | }
131 | metric {
132 | name = "mem_total"
133 | title = "Memory Total"
134 | }
135 | /* Should this be here? Swap can be added/removed between reboots. */
136 | metric {
137 | name = "swap_total"
138 | title = "Swap Space Total"
139 | }
140 | metric {
141 | name = "boottime"
142 | title = "Last Boot Time"
143 | }
144 | metric {
145 | name = "machine_type"
146 | title = "Machine Type"
147 | }
148 | metric {
149 | name = "os_name"
150 | title = "Operating System"
151 | }
152 | metric {
153 | name = "os_release"
154 | title = "Operating System Release"
155 | }
156 | metric {
157 | name = "location"
158 | title = "Location"
159 | }
160 | }
161 |
162 | /* This collection group will send the status of gexecd for this host every 300 secs */
163 | /* Unlike 2.5.x the default behavior is to report gexecd OFF. */
164 | collection_group {
165 | collect_once = yes
166 | time_threshold = 300
167 | metric {
168 | name = "gexec"
169 | title = "Gexec Status"
170 | }
171 | }
172 |
173 | /* This collection group will collect the CPU status info every 20 secs.
174 | The time threshold is set to 90 seconds. In honesty, this time_threshold could be
175 | set significantly higher to reduce unneccessary network chatter. */
176 | collection_group {
177 | collect_every = 20
178 | time_threshold = 90
179 | /* CPU status */
180 | metric {
181 | name = "cpu_user"
182 | value_threshold = "1.0"
183 | title = "CPU User"
184 | }
185 | metric {
186 | name = "cpu_system"
187 | value_threshold = "1.0"
188 | title = "CPU System"
189 | }
190 | metric {
191 | name = "cpu_idle"
192 | value_threshold = "5.0"
193 | title = "CPU Idle"
194 | }
195 | metric {
196 | name = "cpu_nice"
197 | value_threshold = "1.0"
198 | title = "CPU Nice"
199 | }
200 | metric {
201 | name = "cpu_aidle"
202 | value_threshold = "5.0"
203 | title = "CPU aidle"
204 | }
205 | metric {
206 | name = "cpu_wio"
207 | value_threshold = "1.0"
208 | title = "CPU wio"
209 | }
210 | /* The next two metrics are optional if you want more detail...
211 | ... since they are accounted for in cpu_system.
212 | metric {
213 | name = "cpu_intr"
214 | value_threshold = "1.0"
215 | title = "CPU intr"
216 | }
217 | metric {
218 | name = "cpu_sintr"
219 | value_threshold = "1.0"
220 | title = "CPU sintr"
221 | }
222 | */
223 | }
224 |
225 | collection_group {
226 | collect_every = 20
227 | time_threshold = 90
228 | /* Load Averages */
229 | metric {
230 | name = "load_one"
231 | value_threshold = "1.0"
232 | title = "One Minute Load Average"
233 | }
234 | metric {
235 | name = "load_five"
236 | value_threshold = "1.0"
237 | title = "Five Minute Load Average"
238 | }
239 | metric {
240 | name = "load_fifteen"
241 | value_threshold = "1.0"
242 | title = "Fifteen Minute Load Average"
243 | }
244 | }
245 |
246 | /* This group collects the number of running and total processes */
247 | collection_group {
248 | collect_every = 80
249 | time_threshold = 950
250 | metric {
251 | name = "proc_run"
252 | value_threshold = "1.0"
253 | title = "Total Running Processes"
254 | }
255 | metric {
256 | name = "proc_total"
257 | value_threshold = "1.0"
258 | title = "Total Processes"
259 | }
260 | }
261 |
262 | /* This collection group grabs the volatile memory metrics every 40 secs and
263 | sends them at least every 180 secs. This time_threshold can be increased
264 | significantly to reduce unneeded network traffic. */
265 | collection_group {
266 | collect_every = 40
267 | time_threshold = 180
268 | metric {
269 | name = "mem_free"
270 | value_threshold = "1024.0"
271 | title = "Free Memory"
272 | }
273 | metric {
274 | name = "mem_shared"
275 | value_threshold = "1024.0"
276 | title = "Shared Memory"
277 | }
278 | metric {
279 | name = "mem_buffers"
280 | value_threshold = "1024.0"
281 | title = "Memory Buffers"
282 | }
283 | metric {
284 | name = "mem_cached"
285 | value_threshold = "1024.0"
286 | title = "Cached Memory"
287 | }
288 | metric {
289 | name = "swap_free"
290 | value_threshold = "1024.0"
291 | title = "Free Swap Space"
292 | }
293 | }
294 |
295 | collection_group {
296 | collect_every = 40
297 | time_threshold = 300
298 | metric {
299 | name = "bytes_out"
300 | value_threshold = 4096
301 | title = "Bytes Sent"
302 | }
303 | metric {
304 | name = "bytes_in"
305 | value_threshold = 4096
306 | title = "Bytes Received"
307 | }
308 | metric {
309 | name = "pkts_in"
310 | value_threshold = 256
311 | title = "Packets Received"
312 | }
313 | metric {
314 | name = "pkts_out"
315 | value_threshold = 256
316 | title = "Packets Sent"
317 | }
318 | }
319 |
320 | /* Different than 2.5.x default since the old config made no sense */
321 | collection_group {
322 | collect_every = 1800
323 | time_threshold = 3600
324 | metric {
325 | name = "disk_total"
326 | value_threshold = 1.0
327 | title = "Total Disk Space"
328 | }
329 | }
330 |
331 | collection_group {
332 | collect_every = 40
333 | time_threshold = 180
334 | metric {
335 | name = "disk_free"
336 | value_threshold = 1.0
337 | title = "Disk Space Available"
338 | }
339 | metric {
340 | name = "part_max_used"
341 | value_threshold = 1.0
342 | title = "Maximum Disk Space Used"
343 | }
344 | }
345 |
346 |
--------------------------------------------------------------------------------