├── templates └── default │ ├── zookeeper-myid.erb │ ├── storm.yaml.erb │ ├── generic-env.sh.erb │ ├── generic.properties.erb │ ├── storm_env.ini.erb │ ├── generic-site.xml.erb │ ├── fair-scheduler.xml.erb │ └── jaas.conf.erb ├── test ├── fixtures │ └── cookbooks │ │ └── hadoop_test │ │ ├── metadata.rb │ │ ├── recipes │ │ ├── systemd_reload.rb │ │ ├── default.rb │ │ ├── hdfs.rb │ │ └── yarn.rb │ │ └── attributes │ │ └── default.rb └── integration │ └── default │ ├── inspec.yml │ └── controls │ └── default_spec.rb ├── .delivery └── project.toml ├── spec ├── spec_helper.rb └── unit │ └── recipes │ ├── pig_spec.rb │ ├── slider_spec.rb │ ├── hive_server_spec.rb │ ├── avro_spec.rb │ ├── hadoop_yarn_proxyserver_spec.rb │ ├── parquet_spec.rb │ ├── spark_master_spec.rb │ ├── oozie_client_spec.rb │ ├── storm_ui_spec.rb │ ├── storm_nimbus_spec.rb │ ├── storm_supervisor_spec.rb │ ├── hadoop_hdfs_zkfc_spec.rb │ ├── hadoop_kms_server_spec.rb │ ├── hive_server2_spec.rb │ ├── hadoop_hdfs_datanode_spec.rb │ ├── hadoop_mapreduce_historyserver_spec.rb │ ├── flume_agent_spec.rb │ ├── _system_tuning_spec.rb │ ├── spark_worker_spec.rb │ ├── hbase_rest_spec.rb │ ├── hadoop_hdfs_journalnode_spec.rb │ ├── hbase_thrift_spec.rb │ ├── hadoop_yarn_resourcemanager_spec.rb │ ├── spark_historyserver_spec.rb │ ├── flume_spec.rb │ ├── hbase_regionserver_spec.rb │ ├── hadoop_hdfs_namenode_spec.rb │ ├── tez_spec.rb │ ├── hadoop_hdfs_secondarynamenode_spec.rb │ ├── hadoop_kms_spec.rb │ ├── _sql_connectors_spec.rb │ ├── zookeeper_spec.rb │ ├── hadoop_yarn_nodemanager_spec.rb │ ├── hbase_master_spec.rb │ ├── repo_spec.rb │ ├── hbase_spec.rb │ ├── oozie_spec.rb │ ├── _compression_libs_spec.rb │ ├── hive_spec.rb │ ├── spark_spec.rb │ ├── hive2_spec.rb │ ├── spark2_spec.rb │ └── hive_metastore_spec.rb ├── .gitignore ├── .codeclimate.yml ├── attributes ├── hbase.rb ├── capacity_scheduler.rb ├── spark2.rb ├── spark.rb ├── tez.rb ├── storm.rb ├── zookeeper.rb └── kms.rb ├── recipes ├── hcatalog.rb ├── pig.rb ├── oozie_client.rb ├── _storm_checkconfig.rb ├── avro.rb ├── _zookeeper_checkconfig.rb ├── slider.rb ├── parquet.rb ├── flume_agent.rb ├── _hadoop_hdfs_checkconfig.rb ├── _hadoop_checkconfig.rb ├── zookeeper.rb ├── _system_tuning.rb ├── flume.rb ├── _hive_checkconfig.rb ├── _compression_libs.rb ├── hbase_rest.rb ├── _sql_connectors.rb ├── hbase_thrift.rb ├── hive_server.rb ├── hbase_regionserver.rb ├── hadoop_kms_server.rb ├── hive_server2.rb ├── spark_master.rb ├── storm_ui.rb ├── storm_nimbus.rb ├── hadoop_yarn_proxyserver.rb ├── storm_supervisor.rb ├── _hbase_checkconfig.rb ├── hadoop_hdfs_zkfc.rb ├── spark_worker.rb ├── spark_historyserver.rb ├── tez.rb ├── hadoop_hdfs_journalnode.rb ├── hadoop_yarn_nodemanager.rb ├── hadoop_hdfs_secondarynamenode.rb ├── hadoop_kms.rb ├── hadoop_hdfs_datanode.rb ├── hadoop_mapreduce_historyserver.rb ├── hadoop_hdfs_namenode.rb ├── hbase_master.rb └── spark2.rb ├── CONTRIBUTING.md ├── .travis.yml ├── metadata.rb ├── .kitchen.yml ├── Berksfile ├── Rakefile ├── TESTING.md ├── chefignore └── .kitchen.dokken.yml /templates/default/zookeeper-myid.erb: -------------------------------------------------------------------------------- 1 | <%=@myid%> 2 | 3 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/metadata.rb: -------------------------------------------------------------------------------- 1 | name 'hadoop_test' 2 | version '0.0.1' 3 | 4 | depends 'hadoop' 5 | depends 'java' 6 | -------------------------------------------------------------------------------- /.delivery/project.toml: -------------------------------------------------------------------------------- 1 | remote_file = "https://raw.githubusercontent.com/chef-cookbooks/community_cookbook_tools/master/delivery/project.toml" 2 | -------------------------------------------------------------------------------- /templates/default/storm.yaml.erb: -------------------------------------------------------------------------------- 1 | # generated by Chef for <%= node['fqdn'] %>, changes will be overwritten 2 | 3 | <%= JSON.parse(@storm_conf.to_json).to_yaml %> 4 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/recipes/systemd_reload.rb: -------------------------------------------------------------------------------- 1 | execute 'systemd-daemon-reload' do 2 | command 'systemctl daemon-reload' 3 | action :nothing 4 | end 5 | -------------------------------------------------------------------------------- /templates/default/generic-env.sh.erb: -------------------------------------------------------------------------------- 1 | # generated by chef, changes will be overwritten 2 | 3 | <% @options.each do |key, value| %> 4 | export <%= key.upcase %>="<%= value %>" 5 | <% end %> 6 | -------------------------------------------------------------------------------- /templates/default/generic.properties.erb: -------------------------------------------------------------------------------- 1 | # generated by Chef for <%= node['fqdn'] %>, changes will be overwritten 2 | 3 | <% @properties.each do |key, value| %> 4 | <%= "#{key}=#{value}" %> 5 | <% end %> 6 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'chefspec' 2 | require 'chefspec/berkshelf' 3 | 4 | RSpec.configure do |config| 5 | config.formatter = :documentation 6 | config.color = true 7 | end 8 | 9 | at_exit { ChefSpec::Coverage.report! } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | Berksfile.lock 3 | *~ 4 | *# 5 | .#* 6 | \#*# 7 | .*.sw[a-z] 8 | *.un~ 9 | /cookbooks 10 | 11 | # Bundler 12 | Gemfile.lock 13 | bin/* 14 | .bundle/* 15 | 16 | .kitchen/ 17 | .kitchen.local.yml 18 | 19 | .chef 20 | -------------------------------------------------------------------------------- /test/integration/default/inspec.yml: -------------------------------------------------------------------------------- 1 | name: hadoop-simple 2 | title: Integration tests for HDFS and YARN basic functionality 3 | summary: This InSpec profile contains integration tests for Hadoop HDFS and YARN 4 | version: 0.0.1 5 | supports: 6 | - os-family: linux 7 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | engines: 3 | duplication: 4 | enabled: true 5 | config: 6 | languages: 7 | ruby: 8 | mass_threshold: 60 9 | fixme: 10 | enabled: true 11 | foodcritic: 12 | enabled: true 13 | rubocop: 14 | enabled: true 15 | ratings: 16 | paths: 17 | - "**.rb" 18 | exclude_paths: 19 | - spec/ 20 | -------------------------------------------------------------------------------- /templates/default/storm_env.ini.erb: -------------------------------------------------------------------------------- 1 | # generated by Chef for <%= node['fqdn'] %>, changes will be overwritten 2 | 3 | # Environment variables in the following section will be used 4 | # in storm python script. They override the environment variables 5 | # set in the shell. 6 | [environment] 7 | <% @options.each do |key, value| %> 8 | <%= key.upcase %>:<%= value %> 9 | <% end %> 10 | -------------------------------------------------------------------------------- /templates/default/generic-site.xml.erb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | <% (@options || {}).sort.each do |name, value| %> 8 | 9 | <%= name %> 10 | <%= value %> 11 | 12 | <% end %> 13 | 14 | -------------------------------------------------------------------------------- /attributes/hbase.rb: -------------------------------------------------------------------------------- 1 | # These are the defaults for hbase-site.xml 2 | default['hbase']['hbase_site']['hbase.rootdir'] = "#{node['hadoop']['core_site']['fs.defaultFS']}/hbase" 3 | # These are the defaults for hbase-policy.xml 4 | default['hbase']['hbase_policy']['security.client.protocol.acl'] = '*' 5 | default['hbase']['hbase_policy']['security.admin.protocol.acl'] = '*' 6 | default['hbase']['hbase_policy']['security.masterregion.protocol.acl'] = '*' 7 | # limits.d settings 8 | default['hbase']['limits']['nofile'] = '32768' 9 | default['hbase']['limits']['memlock'] = 'unlimited' 10 | -------------------------------------------------------------------------------- /spec/unit/recipes/pig_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::pig' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | end.converge(described_recipe) 11 | end 12 | 13 | it 'install pig package' do 14 | expect(chef_run).to install_package('pig_2_3_4_7_4') 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /spec/unit/recipes/slider_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::slider' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.override['hadoop']['distribution_version'] = '2.6.0.3' 10 | end.converge(described_recipe) 11 | end 12 | pkg = 'slider_2_6_0_3_8' 13 | 14 | it "installs #{pkg} package" do 15 | expect(chef_run).to install_package(pkg) 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/recipes/default.rb: -------------------------------------------------------------------------------- 1 | # The default sets up HDFS/YARN 2 | include_recipe 'java::default' 3 | 4 | if node.key?('java') && node['java'].key?('java_home') 5 | 6 | Chef::Log.info("JAVA_HOME = #{node['java']['java_home']}") 7 | 8 | # set in ruby environment for commands like hdfs namenode -format 9 | ENV['JAVA_HOME'] = node['java']['java_home'] 10 | # set in hadoop_env 11 | node.default['hadoop']['hadoop_env']['java_home'] = node['java']['java_home'] 12 | # set in hbase_env 13 | node.default['hbase']['hbase_env']['java_home'] = node['java']['java_home'] 14 | # set in hive_env 15 | node.default['hive']['hive_env']['java_home'] = node['java']['java_home'] 16 | end 17 | 18 | include_recipe 'hadoop_test::hdfs' 19 | include_recipe 'hadoop_test::yarn' 20 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/recipes/hdfs.rb: -------------------------------------------------------------------------------- 1 | include_recipe 'hadoop::hadoop_hdfs_namenode' 2 | include_recipe 'hadoop::hadoop_hdfs_datanode' 3 | 4 | include_recipe 'hadoop_test::systemd_reload' 5 | 6 | ruby_block 'Format and start HDFS' do 7 | block do 8 | true 9 | end 10 | notifies :run, 'execute[systemd-daemon-reload]', :immediately if 11 | (node['platform_family'] == 'rhel' && node['platform_version'].to_i >= 7) || 12 | (node['platform'] == 'ubuntu' && node['platform_version'].to_i >= 16) || 13 | (node['platform'] == 'debian' && node['platform_version'].to_i >= 8) 14 | notifies :run, 'execute[hdfs-namenode-format]', :immediately 15 | notifies :start, 'service[hadoop-hdfs-namenode]', :immediately 16 | notifies :start, 'service[hadoop-hdfs-datanode]', :immediately 17 | end 18 | -------------------------------------------------------------------------------- /recipes/hcatalog.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hcatalog 4 | # 5 | # Copyright © 2013-2014 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hive' 21 | 22 | package 'hcatalog' do 23 | action :install 24 | end 25 | -------------------------------------------------------------------------------- /recipes/pig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: pig 4 | # 5 | # Copyright © 2013-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | package hadoop_package('pig') do 23 | action :install 24 | end 25 | -------------------------------------------------------------------------------- /recipes/oozie_client.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: oozie_client 4 | # 5 | # Copyright © 2013-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | package hadoop_package('oozie-client') do 23 | action :install 24 | end 25 | -------------------------------------------------------------------------------- /test/integration/default/controls/default_spec.rb: -------------------------------------------------------------------------------- 1 | # 2 | 3 | describe directory('/etc/hadoop/conf.chef') do 4 | it { should exist } 5 | it { should be_readable } 6 | end 7 | 8 | %w( 9 | core-site.xml 10 | hdfs-site.xml 11 | mapred-site.xml 12 | yarn-site.xml 13 | ).each do |sitefile| 14 | describe file("/etc/hadoop/conf.chef/#{sitefile}") do 15 | it { should exist } 16 | end 17 | end 18 | 19 | %w( 20 | hadoop-hdfs-namenode 21 | hadoop-hdfs-datanode 22 | hadoop-yarn-resourcemanager 23 | hadoop-yarn-nodemanager 24 | ).each do |svc| 25 | describe service(svc) do 26 | it { should be_installed } 27 | it { should be_running } 28 | end 29 | end 30 | 31 | [ 32 | 'hdfs dfs -ls /', 33 | 'yarn application -list', 34 | ].each do |cmd| 35 | describe command(cmd) do 36 | its('exit_status') { should eq 0 } 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /recipes/_storm_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _storm_checkconfig 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # If using JAAS, make sure it's configured fully 21 | check_deprecated_jaas_config('storm') 22 | check_jaas_config('storm') 23 | -------------------------------------------------------------------------------- /recipes/avro.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: avro 4 | # 5 | # Copyright © 2013-2014 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | package 'avro-tools' do 23 | action :install 24 | only_if { node['hadoop']['distribution'] == 'cdh' } 25 | end 26 | -------------------------------------------------------------------------------- /recipes/_zookeeper_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _zookeeper_checkconfig 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # If using JAAS, make sure it's configured fully 21 | check_deprecated_jaas_config('zookeeper') 22 | check_jaas_config('zookeeper') 23 | -------------------------------------------------------------------------------- /templates/default/fair-scheduler.xml.erb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | <% if @pools %> 8 | <% @pools.each do |pool, pool_value| %> 9 | 10 | <% pool_value.each do |k, v| %> 11 | <%= "<#{k}>#{v}" %> 12 | <% end %> 13 | 14 | <% end %> 15 | <% end %> 16 | 17 | <% if @users %> 18 | <% @users.each do |user, user_value| %> 19 | 20 | <% user_value.each do |k, v| %> 21 | <%= "<#{k}>#{v}" %> 22 | <% end %> 23 | 24 | <% end %> 25 | <% end %> 26 | 27 | <% if @defaults %> 28 | <% @defaults.each do |k, v| %> 29 | <%= "<#{k}>#{v}" %> 30 | <% end %> 31 | <% end %> 32 | 33 | 34 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/attributes/default.rb: -------------------------------------------------------------------------------- 1 | # force HDFS format when notified 2 | override['hadoop']['force_format'] = true 3 | # disable setting limits 4 | override['hadoop']['limits'] = [] 5 | # shrink heaps, use minimal settings 6 | override['hadoop']['hadoop_env']['hadoop_heapsize'] = '256' 7 | override['hadoop']['mapred_site']['mapreduce.map.java.opts'] = '-Xmx384m' 8 | override['hadoop']['mapred_site']['mapreduce.reduce.java.opts'] = '-Xmx384m' 9 | override['hadoop']['mapred_site']['mapreduce.job.maps'] = '3' 10 | override['hadoop']['mapred_site']['mapreduce.job.reduces'] = '3' 11 | override['hadoop']['mapred_site']['mapreduce.task.io.sort.factor'] = '10' 12 | override['hadoop']['mapred_site']['mapreduce.task.io.sort.mb'] = '100' 13 | override['hadoop']['yarn_env']['yarn_nodemanager_heapsize'] = '256' 14 | override['hadoop']['yarn_site']['yarn.scheduler.minimum-allocation-mb'] = '256' 15 | -------------------------------------------------------------------------------- /recipes/slider.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook Name:: hadoop 3 | # Recipe:: slider 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | pkg = hadoop_package('slider') 22 | 23 | package pkg do 24 | action :install 25 | end 26 | 27 | # TODO: slider-env.sh log4j.properties slider-client.xml 28 | -------------------------------------------------------------------------------- /recipes/parquet.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: parquet 4 | # 5 | # Copyright © 2013-2014 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' if node['hadoop']['distribution'] == 'cdh' 21 | 22 | package 'parquet-format' do 23 | action :install 24 | only_if { node['hadoop']['distribution'] == 'cdh' } 25 | end 26 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to hadoop_cookbook 2 | 3 | If you want to help make the hadoop cookbook better, by adding new features, fixing bugs, or even suggesting improvements to something that's already there, here's how you can contribute: 4 | 5 | * Fork hadoop_cookbook into your own GitHub repository 6 | * Create a topic branch with an appropriate name 7 | * Work on your favorite feature to your content 8 | * Once you are satisifed, create a pull request by going to the caskdata/hadoop_cookbook project. 9 | * Address all the review comments 10 | * Once addressed, the changes will be committed to the caskdata/hadoop_cookbook repo. 11 | 12 | Issues can be reported using Cask's [issue tracker](https://issues.cask.co/browse/COOK/component/10600) for this cookbook. 13 | 14 | ## License 15 | 16 | By contributing to this repo, you agree that your contributions will be licensed under the [Apache License Version 2.0 (APLv2)](LICENSE.txt). 17 | -------------------------------------------------------------------------------- /spec/unit/recipes/hive_server_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hive_server' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/test -L /).and_return(false) 9 | stub_command(/update-alternatives --display /).and_return(false) 10 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hive-server' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | 4 | addons: 5 | apt: 6 | sources: 7 | - chef-current-trusty 8 | packages: 9 | - chefdk 10 | 11 | # Don't `bundle install` which takes about 1.5 mins 12 | install: echo "skip bundle install" 13 | 14 | # Comment this, so we test everything, all the time 15 | # branches: 16 | # only: 17 | # - master 18 | 19 | services: docker 20 | 21 | env: 22 | matrix: 23 | - INSTANCE=default-centos-7 24 | - INSTANCE=default-ubuntu-1404 25 | - INSTANCE=default-ubuntu-1604 26 | 27 | before_script: 28 | - sudo iptables -L DOCKER || ( echo "DOCKER iptables chain missing" ; sudo iptables -N DOCKER ) 29 | - eval "$(chef shell-init bash)" 30 | - chef --version 31 | - cookstyle --version 32 | - foodcritic --version 33 | 34 | script: KITCHEN_LOCAL_YAML=.kitchen.dokken.yml kitchen verify ${INSTANCE} 35 | 36 | matrix: 37 | include: 38 | - script: 39 | - chef exec delivery local all 40 | env: UNIT_AND_LINT=1 41 | -------------------------------------------------------------------------------- /spec/unit/recipes/avro_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::avro' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | end.converge(described_recipe) 9 | end 10 | 11 | it 'does not install avro-tools package' do 12 | expect(chef_run).not_to install_package('avro-tools') 13 | end 14 | 15 | context 'using CDH 5' do 16 | let(:chef_run) do 17 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 18 | node.override['hadoop']['distribution'] = 'cdh' 19 | node.default['hadoop']['distribution_version'] = '5.3.2' 20 | node.automatic['domain'] = 'example.com' 21 | end.converge(described_recipe) 22 | end 23 | 24 | it 'installs avro-tools package' do 25 | expect(chef_run).to install_package('avro-tools') 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_yarn_proxyserver_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_yarn_proxyserver' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['yarn_site']['yarn.web-proxy.address'] = '127.0.0.1' 9 | stub_command(/update-alternatives --display /).and_return(false) 10 | stub_command(/test -L /).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hadoop-yarn-proxyserver' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /metadata.rb: -------------------------------------------------------------------------------- 1 | name 'hadoop' 2 | maintainer 'Cask Data, Inc.' 3 | maintainer_email 'ops@cask.co' 4 | license 'Apache-2.0' 5 | description 'Installs/Configures Hadoop (HDFS/YARN/MRv2), HBase, Hive, Flume, Oozie, Pig, Spark, Storm, Tez, and ZooKeeper' 6 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.md')) 7 | version '2.14.0' 8 | 9 | depends 'yum', '>= 3.0' 10 | depends 'apt', '>= 2.1.2' 11 | 12 | %w(dpkg_autostart selinux sysctl ulimit).each do |cb| 13 | depends cb 14 | end 15 | 16 | # RHEL-like distributions 17 | %w( 18 | centos 19 | redhat 20 | scientific 21 | ).each do |os| 22 | supports os, '>= 6.0' 23 | end 24 | 25 | %w( 26 | amazon 27 | debian 28 | ubuntu 29 | ).each do |os| 30 | supports os 31 | end 32 | 33 | source_url 'https://github.com/caskdata/hadoop_cookbook' if respond_to?(:source_url) 34 | issues_url 'https://issues.cask.co/browse/COOK/component/10600' if respond_to?(:issues_url) 35 | chef_version '>= 11' if respond_to?(:chef_version) 36 | -------------------------------------------------------------------------------- /spec/unit/recipes/parquet_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::parquet' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | end.converge(described_recipe) 9 | end 10 | 11 | it 'does not install parquet-format package' do 12 | expect(chef_run).not_to install_package('parquet-format') 13 | end 14 | 15 | context 'using CDH 5' do 16 | let(:chef_run) do 17 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 18 | node.override['hadoop']['distribution'] = 'cdh' 19 | node.default['hadoop']['distribution_version'] = '5.3.2' 20 | node.automatic['domain'] = 'example.com' 21 | end.converge(described_recipe) 22 | end 23 | 24 | it 'installs parquet-format package' do 25 | expect(chef_run).to install_package('parquet-format') 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /spec/unit/recipes/spark_master_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::spark_master' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['spark']['release']['install'] = true 9 | stub_command(/test -L /).and_return(false) 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 12 | end.converge(described_recipe) 13 | end 14 | pkg = 'spark-master' 15 | 16 | %W( 17 | /etc/default/#{pkg} 18 | /etc/init.d/#{pkg} 19 | ).each do |file| 20 | it "creates #{file} from template" do 21 | expect(chef_run).to create_template(file) 22 | end 23 | end 24 | 25 | it "creates #{pkg} service resource, but does not run it" do 26 | expect(chef_run.service(pkg)).to do_nothing 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /templates/default/jaas.conf.erb: -------------------------------------------------------------------------------- 1 | <% unless @client.nil? %> 2 | Client { 3 | com.sun.security.auth.module.Krb5LoginModule required 4 | <% if @client['useticketcache'].to_s == 'true' %> 5 | useTicketCache=true 6 | <% else %> 7 | useTicketCache=false 8 | <% end %> 9 | <% if @client['usekeytab'].to_s == 'true' %> 10 | useKeyTab=<%= @client['usekeytab'] %> 11 | keyTab="<%= @client['keytab'] %>" 12 | principal="<%= @client['principal'] %>"; 13 | <% else %> 14 | useKeyTab=false; 15 | <% end %> 16 | }; 17 | 18 | <% end %> 19 | <% unless @server.nil? %> 20 | Server { 21 | com.sun.security.auth.module.Krb5LoginModule required 22 | storeKey=true 23 | <% if @server['useticketcache'].to_s == 'true' %> 24 | useTicketCache=true 25 | <% else %> 26 | useTicketCache=false 27 | <% end %> 28 | <% if @server['usekeytab'].to_s == 'true' %> 29 | useKeyTab=<%= @server['usekeytab'] %> 30 | keyTab="<%= @server['keytab'] %>" 31 | principal="<%= @server['principal'] %>"; 32 | <% else %> 33 | useKeyTab=false; 34 | <% end %> 35 | }; 36 | 37 | <% end %> 38 | -------------------------------------------------------------------------------- /spec/unit/recipes/oozie_client_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::oozie_client' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | end.converge(described_recipe) 11 | end 12 | 13 | it 'install oozie-client package' do 14 | expect(chef_run).to install_package('oozie_2_3_4_7_4-client') 15 | end 16 | 17 | context 'on CDH' do 18 | let(:chef_run) do 19 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 20 | node.automatic['domain'] = 'example.com' 21 | node.override['hadoop']['distribution'] = 'cdh' 22 | end.converge(described_recipe) 23 | end 24 | 25 | it 'install oozie-client package' do 26 | expect(chef_run).to install_package('oozie-client') 27 | end 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /attributes/capacity_scheduler.rb: -------------------------------------------------------------------------------- 1 | # Defaults, as shipped with HDP 2.0 2 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.maximum-applications'] = '10000' 3 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.maximum-am-resource-percent'] = '0.1' 4 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.queues'] = 'default' 5 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.capacity'] = '100' 6 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.user-limit-factor'] = '1' 7 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.maximum-capacity'] = '100' 8 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.state'] = 'RUNNING' 9 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.acl_submit_applications'] = '*' 10 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.root.default.acl_administer_queue'] = '*' 11 | default['hadoop']['capacity_scheduler']['yarn.scheduler.capacity.node-locality-delay'] = '-1' 12 | -------------------------------------------------------------------------------- /spec/unit/recipes/storm_ui_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::storm_ui' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | node.default['storm']['release']['install'] = false 11 | stub_command(/test -L /).and_return(false) 12 | stub_command(/update-alternatives --display /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'storm-ui' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | /etc/storm/conf.chef/#{pkg}-env.sh 21 | ).each do |file| 22 | it "creates #{file} from template" do 23 | expect(chef_run).to create_template(file) 24 | end 25 | end 26 | 27 | it "creates #{pkg} service resource, but does not run it" do 28 | expect(chef_run.service(pkg)).to do_nothing 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /spec/unit/recipes/storm_nimbus_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::storm_nimbus' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | node.default['storm']['release']['install'] = false 11 | stub_command(/test -L /).and_return(false) 12 | stub_command(/update-alternatives --display /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'storm-nimbus' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | /etc/storm/conf.chef/#{pkg}-env.sh 21 | ).each do |file| 22 | it "creates #{file} from template" do 23 | expect(chef_run).to create_template(file) 24 | end 25 | end 26 | 27 | it "creates #{pkg} service resource, but does not run it" do 28 | expect(chef_run.service(pkg)).to do_nothing 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /spec/unit/recipes/storm_supervisor_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::storm_supervisor' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | node.default['storm']['release']['install'] = false 11 | stub_command(/test -L /).and_return(false) 12 | stub_command(/update-alternatives --display /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'storm-supervisor' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | /etc/storm/conf.chef/#{pkg}-env.sh 21 | ).each do |file| 22 | it "creates #{file} from template" do 23 | expect(chef_run).to create_template(file) 24 | end 25 | end 26 | 27 | it "creates #{pkg} service resource, but does not run it" do 28 | expect(chef_run.service(pkg)).to do_nothing 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_hdfs_zkfc_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_hdfs_zkfc' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['hdfs_site']['dfs.nameservices'] = 'hdfs' 9 | node.default['hadoop']['hdfs_site']['fs.defaultFS'] = 'hdfs://hdfs' 10 | node.default['hadoop']['hdfs_site']['dfs.ha.fencing.methods'] = 'something' 11 | stub_command(/update-alternatives --display /).and_return(false) 12 | stub_command(/test -L /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'hadoop-hdfs-zkfc' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /recipes/flume_agent.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: flume_agent 4 | # 5 | # Copyright © 2013-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::flume' 21 | 22 | pkg = 23 | if node['hadoop']['distribution'] == 'cdh' 24 | 'flume-ng-agent' 25 | else 26 | hadoop_package('flume-agent') 27 | end 28 | 29 | dpkg_autostart pkg do 30 | allow false 31 | end 32 | 33 | package pkg do 34 | action :install 35 | end 36 | 37 | service 'flume-agent' do 38 | supports [restart: true, reload: false, status: true] 39 | action :nothing 40 | end 41 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_kms_server_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_kms_server' do 4 | context 'on CentOS 6.9 with CDH' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.override['hadoop']['distribution'] = 'cdh' 9 | node.default['hadoop_kms']['kms_env']['kms_log'] = '/var/log/hadoop-kms' 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 12 | stub_command(/test -L /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'hadoop-kms-server' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /spec/unit/recipes/hive_server2_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hive_server2' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hive']['hive_site']['hive.support.concurrency'] = 'true' 9 | node.default['hive']['hive_site']['hive.zookeeper.quorum'] = 'localhost' 10 | stub_command(/test -L /).and_return(false) 11 | stub_command(/update-alternatives --display /).and_return(false) 12 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'hive-server2' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /recipes/_hadoop_hdfs_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _hadoop_hdfs_checkconfig 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # For HDFS functionality, we require fs.defaultFS property to be set 21 | if node['hadoop'].key?('core_site') && node['hadoop']['core_site'].key?('fs.defaultFS') 22 | Chef::Log.info("HDFS NameNode configured at #{node['hadoop']['core_site']['fs.defaultFS']}") 23 | else 24 | Chef::Application.fatal!("HDFS NameNode must be configured! Set default['hadoop']['core_site']['fs.defaultFS'] to the NameNode.") 25 | end 26 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/hadoop_test/recipes/yarn.rb: -------------------------------------------------------------------------------- 1 | include_recipe 'hadoop::hadoop_yarn_resourcemanager' 2 | include_recipe 'hadoop::hadoop_yarn_nodemanager' 3 | 4 | include_recipe 'hadoop_test::systemd_reload' 5 | 6 | execute 'wait-for-hdfs-safemode' do 7 | command 'hadoop dfsadmin -safemode wait' 8 | end 9 | 10 | ruby_block 'Start YARN' do 11 | block do 12 | true 13 | end 14 | notifies :run, 'execute[hdfs-tmpdir]', :immediately 15 | notifies :run, 'execute[yarn-remote-app-log-dir]', :immediately 16 | notifies :run, 'execute[yarn-app-mapreduce-am-staging-dir]', :immediately 17 | notifies :run, 'execute[hdp22-mapreduce-tarball]', :immediately if hdp22? 18 | notifies :run, 'execute[iop-mapreduce-tarball]', :immediately if iop? 19 | notifies :run, 'execute[systemd-daemon-reload]', :immediately if 20 | (node['platform_family'] == 'rhel' && node['platform_version'].to_i >= 7) || 21 | (node['platform'] == 'ubuntu' && node['platform_version'].to_i >= 16) || 22 | (node['platform'] == 'debian' && node['platform_version'].to_i >= 8) 23 | notifies :start, 'service[hadoop-yarn-resourcemanager]', :immediately 24 | notifies :start, 'service[hadoop-yarn-nodemanager]', :immediately 25 | end 26 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_hdfs_datanode_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_hdfs_datanode' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/update-alternatives --display /).and_return(false) 9 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 10 | stub_command(/test -L /).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hadoop-hdfs-datanode' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | 28 | it 'creates HDFS data dir' do 29 | expect(chef_run).to create_directory('/tmp/hadoop-hdfs/dfs/data').with( 30 | user: 'hdfs', 31 | group: 'hdfs' 32 | ) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_mapreduce_historyserver_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_mapreduce_historyserver' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/update-alternatives --display /).and_return(false) 9 | stub_command(/test -L /).and_return(false) 10 | end.converge(described_recipe) 11 | end 12 | pkg = 'hadoop-mapreduce-historyserver' 13 | 14 | %W( 15 | /etc/default/#{pkg} 16 | /etc/init.d/#{pkg} 17 | ).each do |file| 18 | it "creates #{file} from template" do 19 | expect(chef_run).to create_template(file) 20 | end 21 | end 22 | 23 | it "creates #{pkg} service resource, but does not run it" do 24 | expect(chef_run.service(pkg)).to do_nothing 25 | end 26 | 27 | %w( 28 | mapreduce-jobhistory-done-dir 29 | mapreduce-jobhistory-intermediate-done-dir 30 | ).each do |dir| 31 | it "creates #{dir} resource, but does not run it" do 32 | expect(chef_run.execute(dir)).to do_nothing 33 | end 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /spec/unit/recipes/flume_agent_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::flume_agent' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | end.converge(described_recipe) 11 | end 12 | 13 | it 'install flume-agent package' do 14 | expect(chef_run).to install_package('flume_2_3_4_7_4-agent') 15 | end 16 | 17 | it 'creates flume-agent service resource, but does not run it' do 18 | expect(chef_run.service('flume-agent')).to do_nothing 19 | end 20 | 21 | context 'using CDH' do 22 | let(:chef_run) do 23 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 24 | node.automatic['domain'] = 'example.com' 25 | node.override['hadoop']['distribution'] = 'cdh' 26 | end.converge(described_recipe) 27 | end 28 | 29 | it 'install flume-ng package' do 30 | expect(chef_run).to install_package('flume-ng-agent') 31 | end 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /.kitchen.yml: -------------------------------------------------------------------------------- 1 | --- 2 | driver: 3 | name: vagrant 4 | # customize: 5 | # cpus: 2 6 | # memory: 4096 7 | 8 | provisioner: 9 | name: chef_zero 10 | require_chef_omnibus: true 11 | 12 | verifier: 13 | name: inspec 14 | 15 | platforms: 16 | - name: centos-6.7 17 | - name: centos-7.3 18 | - name: debian-6.0.10 19 | run_list: 20 | - apt::default 21 | - name: ubuntu-12.04 22 | run_list: 23 | - apt::default 24 | - name: ubuntu-14.04 25 | run_list: 26 | - apt::default 27 | - name: ubuntu-16.04 28 | run_list: 29 | - apt::default 30 | 31 | suites: 32 | - name: default 33 | run_list: 34 | - recipe[java::default] 35 | - recipe[hadoop::default] 36 | - recipe[hadoop::hbase] 37 | - recipe[hadoop::hive] 38 | - recipe[hadoop::oozie] 39 | - recipe[hadoop::pig] 40 | - recipe[hadoop::spark] 41 | - recipe[hadoop::storm] 42 | - recipe[hadoop::zookeeper] 43 | attributes: 44 | hadoop: 45 | hdfs_site: 46 | 'dfs.datanode.max.transfer.threads': 4096 47 | hive: 48 | hive_site: 49 | 'hive.support.concurrency': true 50 | 'hive.zookeeper.quorum': 'localhost' 51 | java: 52 | jdk_version: '7' 53 | -------------------------------------------------------------------------------- /spec/unit/recipes/_system_tuning_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::_system_tuning' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do 7 | allow(::File).to receive_messages(file?: true) 8 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 9 | end.converge(described_recipe) 10 | end 11 | 12 | it 'disables swap' do 13 | expect(chef_run).to apply_sysctl_param('vm.swappiness') 14 | end 15 | 16 | it 'disables transparent hugepage compaction' do 17 | expect(chef_run).to run_execute('disable-transparent-hugepage-compaction') 18 | end 19 | end 20 | 21 | context 'on Ubuntu 14.04' do 22 | let(:chef_run) do 23 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do 24 | allow(::File).to receive_messages(file?: true) 25 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 26 | end.converge(described_recipe) 27 | end 28 | 29 | it 'disables transparent hugepage compaction' do 30 | expect(chef_run).to run_execute('disable-transparent-hugepage-compaction') 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /Berksfile: -------------------------------------------------------------------------------- 1 | source 'https://supermarket.chef.io' 2 | 3 | require 'chef/version' 4 | 5 | if Chef::VERSION.to_f < 12.0 6 | cookbook 'apt', '< 4.0' 7 | cookbook 'build-essential', '< 3.0' 8 | cookbook 'homebrew', '< 3.0' 9 | cookbook 'mingw', '< 1.0' 10 | cookbook 'ohai', '< 4.0' 11 | cookbook 'selinux', '< 1.0' 12 | cookbook 'sysctl', '< 0.10' 13 | cookbook 'windows', '< 2.0' 14 | cookbook 'yum', '< 4.0' 15 | cookbook 'yum-epel', '< 2.0' 16 | elsif Chef::VERSION.to_f < 12.5 17 | cookbook 'apt', '< 6.0' 18 | cookbook 'build-essential', '< 8.0' 19 | cookbook 'homebrew', '< 4.0' 20 | cookbook 'mingw', '< 2.0' 21 | cookbook 'ohai', '< 5.0' 22 | cookbook 'selinux', '< 1.0' 23 | cookbook 'sysctl', '< 0.10' 24 | cookbook 'windows', '< 3.0' 25 | cookbook 'yum', '< 5.0' 26 | elsif Chef::VERSION.to_f < 12.6 27 | cookbook 'apt', '< 6.0' 28 | cookbook 'windows', '< 3.0' 29 | cookbook 'yum', '< 5.0' 30 | elsif Chef::VERSION.to_f < 12.9 31 | cookbook 'apt', '< 6.0' 32 | cookbook 'yum', '< 5.0' 33 | elsif Chef::VERSION.to_f < 12.14 34 | cookbook 'yum', '< 5.0' 35 | end 36 | 37 | group :integration do 38 | cookbook 'java' 39 | cookbook 'hadoop_test', path: './test/fixtures/cookbooks/hadoop_test' 40 | end 41 | 42 | metadata 43 | -------------------------------------------------------------------------------- /spec/unit/recipes/spark_worker_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::spark_worker' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['spark']['release']['install'] = true 9 | node.default['spark']['spark_env']['spark_worker_dir'] = '/data/spark/work' 10 | stub_command(/test -L /).and_return(false) 11 | stub_command(/update-alternatives --display /).and_return(false) 12 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'spark-worker' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | 30 | it 'creates /data/spark/work directory' do 31 | expect(chef_run).to create_directory('/data/spark/work') 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | 3 | require 'bundler/setup' 4 | 5 | # chefspec task against spec/*_spec.rb 6 | require 'rspec/core/rake_task' 7 | RSpec::Core::RakeTask.new(:chefspec) 8 | 9 | # foodcritic rake task 10 | desc 'Foodcritic linter' 11 | task :foodcritic do 12 | sh 'foodcritic -f correctness .' 13 | end 14 | 15 | # rubocop rake task 16 | desc 'Ruby style guide linter' 17 | task :rubocop do 18 | sh 'rubocop -D' 19 | end 20 | 21 | # creates metadata.json 22 | desc 'Create metadata.json from metadata.rb' 23 | task :metadata do 24 | sh 'knife cookbook metadata from file metadata.rb' 25 | end 26 | 27 | # share cookbook to Chef community site 28 | desc 'Share cookbook to community site' 29 | task :share do 30 | sh 'knife cookbook site share hadoop databases' 31 | end 32 | 33 | # test-kitchen 34 | begin 35 | require 'kitchen/rake_tasks' 36 | desc 'Run Test Kitchen integration tests' 37 | task :integration do 38 | Kitchen.logger = Kitchen.default_file_logger 39 | Kitchen::Config.new.instances.each do |instance| 40 | instance.test(:always) 41 | end 42 | end 43 | rescue LoadError 44 | puts '>>>>> Kitchen gem not loaded, omitting tasks' unless ENV['CI'] 45 | end 46 | 47 | # default tasks are quick, commit tests 48 | task default: %w(foodcritic rubocop chefspec) 49 | -------------------------------------------------------------------------------- /recipes/_hadoop_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _hadoop_checkconfig 4 | # 5 | # Copyright © 2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # When using the Fair Scheduler, we must have a configuration 21 | if node['hadoop'].key?('yarn_site') && node['hadoop']['yarn_site'].key?('yarn.resourcemanager.scheduler.class') && \ 22 | node['hadoop']['yarn_site']['yarn.resourcemanager.scheduler.class'] == \ 23 | 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler' 24 | unless node['hadoop'].key?('fair_scheduler') && !node['hadoop']['fair_scheduler'].empty? 25 | Chef::Application.fatal!('Cannot set YARN scheduler to fair-scheduler without configuring it!') 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /spec/unit/recipes/hbase_rest_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hbase_rest' do 4 | context 'on CentOS 6.9' do 5 | context 'in distributed mode' do 6 | let(:chef_run) do 7 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 8 | node.automatic['domain'] = 'example.com' 9 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'] = '4096' 10 | node.default['hbase']['hbase_site']['hbase.rootdir'] = 'hdfs://localhost:8020/hbase' 11 | node.default['hbase']['hbase_site']['hbase.zookeeper.quorum'] = 'localhost' 12 | node.default['hbase']['hbase_site']['hbase.cluster.distributed'] = 'true' 13 | stub_command(/test -L /).and_return(false) 14 | stub_command(/update-alternatives --display /).and_return(false) 15 | end.converge(described_recipe) 16 | end 17 | pkg = 'hbase-rest' 18 | 19 | %W( 20 | /etc/default/#{pkg} 21 | /etc/init.d/#{pkg} 22 | ).each do |file| 23 | it "creates #{file} from template" do 24 | expect(chef_run).to create_template(file) 25 | end 26 | end 27 | 28 | it "creates #{pkg} service resource, but does not run it" do 29 | expect(chef_run.service(pkg)).to do_nothing 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_hdfs_journalnode_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_hdfs_journalnode' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['hdfs_site']['dfs.journalnode.edits.dir'] = '/tmp/hadoop-hdfs/dfs/journal' 9 | stub_command(/update-alternatives --display /).and_return(false) 10 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 11 | stub_command(/test -L /).and_return(false) 12 | end.converge(described_recipe) 13 | end 14 | pkg = 'hadoop-hdfs-journalnode' 15 | 16 | %W( 17 | /etc/default/#{pkg} 18 | /etc/init.d/#{pkg} 19 | ).each do |file| 20 | it "creates #{file} from template" do 21 | expect(chef_run).to create_template(file) 22 | end 23 | end 24 | 25 | it "creates #{pkg} service resource, but does not run it" do 26 | expect(chef_run.service(pkg)).to do_nothing 27 | end 28 | 29 | it 'creates HDFS journalnode edits dir' do 30 | expect(chef_run).to create_directory('/tmp/hadoop-hdfs/dfs/journal').with( 31 | user: 'hdfs', 32 | group: 'hdfs' 33 | ) 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /spec/unit/recipes/hbase_thrift_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hbase_thrift' do 4 | context 'on CentOS 6.9' do 5 | context 'in distributed mode' do 6 | let(:chef_run) do 7 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 8 | node.automatic['domain'] = 'example.com' 9 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'] = '4096' 10 | node.default['hbase']['hbase_site']['hbase.rootdir'] = 'hdfs://localhost:8020/hbase' 11 | node.default['hbase']['hbase_site']['hbase.zookeeper.quorum'] = 'localhost' 12 | node.default['hbase']['hbase_site']['hbase.cluster.distributed'] = 'true' 13 | stub_command(/test -L /).and_return(false) 14 | stub_command(/update-alternatives --display /).and_return(false) 15 | end.converge(described_recipe) 16 | end 17 | pkg = 'hbase-thrift' 18 | 19 | %W( 20 | /etc/default/#{pkg} 21 | /etc/init.d/#{pkg} 22 | ).each do |file| 23 | it "creates #{file} from template" do 24 | expect(chef_run).to create_template(file) 25 | end 26 | end 27 | 28 | it "creates #{pkg} service resource, but does not run it" do 29 | expect(chef_run.service(pkg)).to do_nothing 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_yarn_resourcemanager_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_yarn_resourcemanager' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/update-alternatives --display /).and_return(false) 9 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 10 | stub_command(/test -L /).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hadoop-yarn-resourcemanager' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | 28 | %w( 29 | hdfs-tmpdir 30 | hdp22-mapreduce-tarball 31 | yarn-app-mapreduce-am-staging-dir 32 | yarn-remote-app-log-dir 33 | ).each do |exec| 34 | it "creates #{exec} resource, but does not run it" do 35 | expect(chef_run.execute(exec)).to do_nothing 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/unit/recipes/spark_historyserver_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::spark_historyserver' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['spark']['release']['install'] = true 9 | stub_command(/test -L /).and_return(false) 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'spark-history-server' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | 28 | it 'creates hdfs-spark-userdir execute resource, but does not run it' do 29 | expect(chef_run).to_not run_execute('hdfs-spark-userdir').with(user: 'hdfs') 30 | end 31 | 32 | it 'creates hdfs-spark-eventlog-dir execute resource, but does not run it' do 33 | expect(chef_run).to_not run_execute('hdfs-spark-eventlog-dir').with(user: 'hdfs') 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /recipes/zookeeper.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: zookeeper 4 | # 5 | # Copyright © 2013-2014 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | include_recipe 'hadoop::_zookeeper_checkconfig' 22 | 23 | # HDP 2.2+ zookeeper debian packages have a missing package dependency on hdp-select 24 | package 'hdp-select' do 25 | action :install 26 | only_if { node['platform_family'] == 'debian' && hdp22? } 27 | end 28 | 29 | package hadoop_package('zookeeper') do 30 | action :install 31 | end 32 | 33 | zookeeper_conf_dir = "/etc/zookeeper/#{node['zookeeper']['conf_dir']}" 34 | 35 | directory zookeeper_conf_dir do 36 | mode '0755' 37 | owner 'root' 38 | group 'root' 39 | action :create 40 | recursive true 41 | end 42 | 43 | write_deprecated_jaas_config('zookeeper') 44 | write_jaas_config('zookeeper') 45 | -------------------------------------------------------------------------------- /attributes/spark2.rb: -------------------------------------------------------------------------------- 1 | # spark2-env.sh 2 | default['spark2']['spark_env']['standalone_spark_master_host'] = node['fqdn'] 3 | default['spark2']['spark_env']['spark_master_ip'] = node['fqdn'] 4 | default['spark2']['spark_env']['spark_launch_with_scala'] = 0 5 | default['spark2']['spark_env']['spark_library_path'] = '${SPARK_HOME}/lib' 6 | default['spark2']['spark_env']['scala_library_path'] = '${SPARK_HOME}/lib' 7 | default['spark2']['spark_env']['spark_master_webui_port'] = 18_080 8 | default['spark2']['spark_env']['spark_worker_webui_port'] = 18_081 9 | default['spark2']['spark_env']['spark_master_port'] = 7077 10 | default['spark2']['spark_env']['spark_worker_port'] = 7078 11 | default['spark2']['spark_env']['spark_pid_dir'] = '/var/run/spark2/' 12 | default['spark2']['spark_env']['spark_history_server_log_dir'] = 'hdfs:///user/spark/applicationHistory' 13 | default['spark2']['spark_env']['hadoop_conf_dir'] = '/etc/hadoop/conf' 14 | default['spark2']['spark_env']['spark_dist_classpath'] = '$(hadoop classpath)' 15 | # spark2-defaults.xml 16 | default['spark2']['spark_defaults']['spark.eventLog.dir'] = 'hdfs:///user/spark/applicationHistory' 17 | default['spark2']['spark_defaults']['spark.eventLog.enabled'] = true 18 | default['spark2']['spark_defaults']['spark.yarn.historyServer.address'] = "#{node['fqdn']}:10020" 19 | -------------------------------------------------------------------------------- /recipes/_system_tuning.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _system_tuning 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Disable SELinux 21 | include_recipe 'selinux::disabled' 22 | 23 | include_recipe 'sysctl::default' 24 | # Disable swapping 25 | sysctl_param 'vm.swappiness' do 26 | value 0 27 | end 28 | 29 | # Disable transparent_hugepage compaction 30 | # COOK-57 location can vary within CentOS 31 | %w(transparent_hugepage redhat_transparent_hugepage).each do |dir| 32 | thp_defrag = "/sys/kernel/mm/#{dir}/defrag" 33 | next unless ::File.file?(thp_defrag) 34 | 35 | # disable transparent_hugepage (if not already disabled) 36 | execute 'disable-transparent-hugepage-compaction' do 37 | command "echo never > #{thp_defrag}" 38 | not_if "grep '\\[never\\]' #{thp_defrag}" 39 | end 40 | break 41 | end 42 | -------------------------------------------------------------------------------- /spec/unit/recipes/flume_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::flume' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['flume']['flume_conf']['key'] = 'value' 9 | node.default['hadoop']['distribution'] = 'hdp' 10 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 11 | end.converge(described_recipe) 12 | end 13 | conf_dir = '/etc/flume/conf.chef' 14 | 15 | it 'installs flume package' do 16 | expect(chef_run).to install_package('flume_2_3_4_7_4') 17 | end 18 | 19 | it "creates #{conf_dir} directory" do 20 | expect(chef_run).to create_directory(conf_dir) 21 | end 22 | 23 | it "creates #{conf_dir}/flume.conf from template" do 24 | expect(chef_run).to create_template("#{conf_dir}/flume.conf") 25 | end 26 | 27 | context 'using CDH' do 28 | let(:chef_run) do 29 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 30 | node.automatic['domain'] = 'example.com' 31 | node.override['hadoop']['distribution'] = 'cdh' 32 | end.converge(described_recipe) 33 | end 34 | 35 | it 'install flume-ng package' do 36 | expect(chef_run).to install_package('flume-ng') 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/unit/recipes/hbase_regionserver_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hbase_regionserver' do 4 | context 'on CentOS 6.9' do 5 | context 'in distributed mode' do 6 | let(:chef_run) do 7 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 8 | node.automatic['domain'] = 'example.com' 9 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'] = '4096' 10 | node.default['hbase']['hbase_site']['hbase.rootdir'] = 'hdfs://localhost:8020/hbase' 11 | node.default['hbase']['hbase_site']['hbase.zookeeper.quorum'] = 'localhost' 12 | node.default['hbase']['hbase_site']['hbase.cluster.distributed'] = 'true' 13 | stub_command(/test -L /).and_return(false) 14 | stub_command(/update-alternatives --display /).and_return(false) 15 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 16 | end.converge(described_recipe) 17 | end 18 | pkg = 'hbase-regionserver' 19 | 20 | %W( 21 | /etc/default/#{pkg} 22 | /etc/init.d/#{pkg} 23 | ).each do |file| 24 | it "creates #{file} from template" do 25 | expect(chef_run).to create_template(file) 26 | end 27 | end 28 | 29 | it "creates #{pkg} service resource, but does not run it" do 30 | expect(chef_run.service(pkg)).to do_nothing 31 | end 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_hdfs_namenode_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_hdfs_namenode' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/update-alternatives --display /).and_return(false) 9 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 10 | stub_command(/test -L /).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hadoop-hdfs-namenode' 14 | 15 | %W( 16 | /etc/default/#{pkg} 17 | /etc/init.d/#{pkg} 18 | ).each do |file| 19 | it "creates #{file} from template" do 20 | expect(chef_run).to create_template(file) 21 | end 22 | end 23 | 24 | it "creates #{pkg} service resource, but does not run it" do 25 | expect(chef_run.service(pkg)).to do_nothing 26 | end 27 | 28 | it 'creates HDFS name dir' do 29 | expect(chef_run).to create_directory('/tmp/hadoop-hdfs/dfs/name').with( 30 | user: 'hdfs', 31 | group: 'hdfs' 32 | ) 33 | end 34 | 35 | %w(hdfs-namenode-format hdfs-namenode-bootstrap-standby hdfs-namenode-initialize-sharededits).each do |exec| 36 | it "creates #{exec} execute resource, but does not run it" do 37 | expect(chef_run.execute(exec)).to do_nothing 38 | end 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /recipes/flume.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: flume 4 | # 5 | # Copyright © 2013-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | pkg = 23 | if node['hadoop']['distribution'] == 'cdh' 24 | 'flume-ng' 25 | else 26 | hadoop_package('flume') 27 | end 28 | 29 | package pkg do 30 | action :install 31 | end 32 | 33 | flume_conf_dir = "/etc/flume/#{node['flume']['conf_dir']}" 34 | 35 | directory flume_conf_dir do 36 | mode '0755' 37 | owner 'root' 38 | group 'root' 39 | action :create 40 | recursive true 41 | end 42 | 43 | # Setup flume.conf 44 | my_vars = { properties: node['flume']['flume_conf'] } 45 | 46 | template "#{flume_conf_dir}/flume.conf" do 47 | source 'generic.properties.erb' 48 | mode '0644' 49 | owner 'root' 50 | group 'root' 51 | action :create 52 | variables my_vars 53 | only_if { node['flume'].key?('flume_conf') && !node['flume']['flume_conf'].empty? } 54 | end # End flume.conf 55 | -------------------------------------------------------------------------------- /spec/unit/recipes/tez_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::tez' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | stub_command('hdfs dfs -test -d hdfs://fauxhai.local/apps/tez').and_return(false) 11 | stub_command(/update-alternatives --display /).and_return(false) 12 | end.converge(described_recipe) 13 | end 14 | 15 | it 'install tez package' do 16 | expect(chef_run).to install_package('tez_2_3_4_7_4') 17 | end 18 | 19 | it 'does not executes execute[tez-hdfs-appdir]' do 20 | expect(chef_run).to_not run_execute('tez-hdfs-appdir') 21 | end 22 | 23 | it 'does not executes execute[hive-hdfs-appdir]' do 24 | expect(chef_run).to_not run_execute('hive-hdfs-appdir') 25 | end 26 | 27 | it 'executes execute[update tez-conf alternatives]' do 28 | expect(chef_run).to run_execute('update tez-conf alternatives') 29 | end 30 | 31 | it 'creates /etc/tez/conf.chef directory' do 32 | expect(chef_run).to create_directory('/etc/tez/conf.chef') 33 | end 34 | 35 | %w(/etc/tez/conf.chef/tez-site.xml /etc/tez/conf.chef/tez-env.sh).each do |template| 36 | it "creates #{template} template" do 37 | expect(chef_run).to create_template(template) 38 | end 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /TESTING.md: -------------------------------------------------------------------------------- 1 | This cookbook uses a variety of testing components: 2 | 3 | - Unit tests: [ChefSpec](https://docs.chef.io/chefspec.html) 4 | - Integration tests: [Test Kitchen](http://kitchen.ci/) 5 | - Chef Style lints: [Foodcritic](http://www.foodcritic.io/) 6 | - Ruby Style lints: [cookstyle](https://github.com/chef/cookstyle) 7 | 8 | 9 | Prerequisites 10 | ------------- 11 | You can install the [Chef Development Kit (Chef-DK)](http://downloads.chef.io/chef-dk/) to more easily install the above components. 12 | 13 | You must also have Vagrant and VirtualBox installed: 14 | 15 | - [Vagrant](https://vagrantup.com) 16 | - [VirtualBox](https://virtualbox.org) 17 | 18 | 19 | Development 20 | ----------- 21 | 1. Clone the git repository from GitHub: 22 | 23 | - `git clone git@github.com:caskdata/hadoop_cookbook.git` 24 | 25 | 2. Install the dependencies using bundler: 26 | 27 | - `chef exec bundle install --path ../vendor` 28 | 29 | 3. Create a branch for your changes: 30 | 31 | - `git checkout -b my_bug_fix` 32 | 33 | 4. Make any changes 34 | 5. Write tests to support those changes. It is highly recommended you write both unit and integration tests. 35 | 6. Run the tests: 36 | 37 | - `chef exec bundle exec rspec` 38 | - `chef exec bundle exec foodcritic .` 39 | - `chef exec bundle exec rubocop` 40 | - `chef exec bundle exec kitchen test` 41 | 42 | 7. Assuming the tests pass, open a Pull Request on GitHub 43 | 44 | For more information, see [the cookbook's Contribution Guidelines](https://github.com/caskdata/hadoop_cookbook/blob/master/CONTRIBUTING.md) 45 | -------------------------------------------------------------------------------- /recipes/_hive_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _hive_checkconfig 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # HiveServer2 requires Hive Table Lock Manager 21 | # http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/4.2.0/CDH4-Installation-Guide/cdh4ig_topic_18_5.html 22 | if node['hive'].key?('hive_site') && node['hive']['hive_site'].key?('hive.support.concurrency') && 23 | node['hive']['hive_site']['hive.support.concurrency'].to_s == 'true' && node['hive']['hive_site'].key?('hive.zookeeper.quorum') 24 | Chef::Log.info('Hive Table Lock Manager enabled') 25 | Chef::Log.info("Hive ZooKeeper Quorum: #{node['hive']['hive_site']['hive.zookeeper.quorum']}") 26 | else 27 | Chef::Application.fatal!("You *must* set node['hive']['hive_site']['hive.support.concurrency'] and node['hive']['hive_site']['hive.zookeeper.quorum']") 28 | end 29 | 30 | # If using JAAS, make sure it's configured fully 31 | check_deprecated_jaas_config('hive') 32 | check_jaas_config('hive') 33 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_hdfs_secondarynamenode_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_hdfs_secondarynamenode' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['hdfs_site']['dfs.namenode.checkpoint.dir'] = '/tmp/hadoop-hdfs/dfs/namesecondary' 9 | node.default['hadoop']['hdfs_site']['dfs.namenode.checkpoint.edits.dir'] = '/tmp/hadoop-hdfs/dfs/namesecondaryedits' 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 12 | stub_command(/test -L /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'hadoop-hdfs-secondarynamenode' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | 30 | it 'creates HDFS checkpoint dirs' do 31 | expect(chef_run).to create_directory('/tmp/hadoop-hdfs/dfs/namesecondary').with( 32 | user: 'hdfs', 33 | group: 'hdfs' 34 | ) 35 | expect(chef_run).to create_directory('/tmp/hadoop-hdfs/dfs/namesecondaryedits').with( 36 | user: 'hdfs', 37 | group: 'hdfs' 38 | ) 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /attributes/spark.rb: -------------------------------------------------------------------------------- 1 | 2 | # spark release 3 | default['spark']['release']['install'] = false 4 | default['spark']['release']['install_path'] = '/opt' 5 | default['spark']['release']['version'] = '1.6.0' 6 | default['spark']['release']['hadoop_version'] = 'hadoop2.6' 7 | 8 | # spark-env.sh 9 | default['spark']['spark_env']['standalone_spark_master_host'] = node['fqdn'] 10 | default['spark']['spark_env']['spark_master_ip'] = node['fqdn'] 11 | default['spark']['spark_env']['spark_launch_with_scala'] = 0 12 | default['spark']['spark_env']['spark_library_path'] = '${SPARK_HOME}/lib' 13 | default['spark']['spark_env']['scala_library_path'] = '${SPARK_HOME}/lib' 14 | default['spark']['spark_env']['spark_master_webui_port'] = 18_080 15 | default['spark']['spark_env']['spark_worker_webui_port'] = 18_081 16 | default['spark']['spark_env']['spark_master_port'] = 7077 17 | default['spark']['spark_env']['spark_worker_port'] = 7078 18 | default['spark']['spark_env']['spark_pid_dir'] = '/var/run/spark/' 19 | default['spark']['spark_env']['spark_history_server_log_dir'] = 'hdfs:///user/spark/applicationHistory' 20 | default['spark']['spark_env']['hadoop_conf_dir'] = '/etc/hadoop/conf' 21 | default['spark']['spark_env']['spark_dist_classpath'] = '$(hadoop classpath)' 22 | # spark-defaults.xml 23 | default['spark']['spark_defaults']['spark.eventLog.dir'] = 'hdfs:///user/spark/applicationHistory' 24 | default['spark']['spark_defaults']['spark.eventLog.enabled'] = true 25 | default['spark']['spark_defaults']['spark.yarn.historyServer.address'] = "#{node['fqdn']}:10020" 26 | -------------------------------------------------------------------------------- /chefignore: -------------------------------------------------------------------------------- 1 | # Put files/directories that should be ignored in this file when uploading 2 | # or sharing to the community site. 3 | # Lines that start with '# ' are comments. 4 | 5 | # OS generated files # 6 | ###################### 7 | .DS_Store 8 | Icon? 9 | nohup.out 10 | ehthumbs.db 11 | Thumbs.db 12 | 13 | # SASS # 14 | ######## 15 | .sass-cache 16 | 17 | # EDITORS # 18 | ########### 19 | \#* 20 | .#* 21 | *~ 22 | *.sw[a-z] 23 | *.bak 24 | REVISION 25 | TAGS* 26 | tmtags 27 | *_flymake.* 28 | *_flymake 29 | *.tmproj 30 | .project 31 | .settings 32 | mkmf.log 33 | 34 | ## COMPILED ## 35 | ############## 36 | a.out 37 | *.o 38 | *.pyc 39 | *.so 40 | *.com 41 | *.class 42 | *.dll 43 | *.exe 44 | */rdoc/ 45 | 46 | # Testing # 47 | ########### 48 | .watchr 49 | .rspec 50 | .rubocop.yml 51 | .rubocop_todo.yml 52 | .kitchen.yml 53 | .codeclimate.yml 54 | spec/* 55 | spec/fixtures/* 56 | test/* 57 | features/* 58 | files/default/tests/* 59 | Gemfile 60 | Guardfile 61 | Procfile 62 | Rakefile 63 | Thorfile 64 | 65 | # SCM # 66 | ####### 67 | .git 68 | */.git 69 | .gitignore 70 | .gitmodules 71 | .gitconfig 72 | .gitattributes 73 | .svn 74 | */.bzr/* 75 | */.hg/* 76 | */.svn/* 77 | 78 | # Berkshelf # 79 | ############# 80 | Berksfile 81 | Berksfile.lock 82 | cookbooks/* 83 | tmp 84 | 85 | # Cookbooks # 86 | ############# 87 | CONTRIBUTING* 88 | CHANGELOG* 89 | chefignore 90 | 91 | # Strainer # 92 | ############ 93 | Colanderfile 94 | Strainerfile 95 | .colander 96 | .strainer 97 | 98 | # Vagrant # 99 | ########### 100 | .vagrant 101 | Vagrantfile 102 | 103 | # Travis # 104 | ########## 105 | .travis.yml 106 | -------------------------------------------------------------------------------- /recipes/_compression_libs.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _compression_libs 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | pkgs = [] 23 | 24 | # Everybody gets snappy 25 | case node['platform_family'] 26 | when 'debian' 27 | # HDP-UTILS repo provides its own libsnappy1, which conflicts with Ubuntu's libsnappy1v5 28 | pkgs += if node['platform_version'].to_i >= 16 && node['hadoop']['distribution'] != 'hdp' 29 | ['libsnappy1v5', 'libsnappy-dev'] 30 | else 31 | ['libsnappy1', 'libsnappy-dev'] 32 | end 33 | when 'rhel', 'amazon' 34 | pkgs += ['snappy', 'snappy-devel'] 35 | end 36 | 37 | # HDP 2.2+ has lzo 38 | if hdp22? 39 | case node['platform_family'] 40 | when 'debian' 41 | pkgs += ['liblzo2-2', 'liblzo2-dev', 'hadooplzo'] 42 | when 'rhel', 'amazon' 43 | pkgs += ['lzo', 'lzo-devel', 'hadooplzo', 'hadooplzo-native'] 44 | end 45 | elsif iop? 46 | pkgs += ['lzo', 'lzo-devel', 'hadoop-lzo', 'hadoop-lzo-native'] 47 | end 48 | 49 | pkgs.each do |pkg| 50 | package pkg do 51 | action :install 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_kms_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_kms' do 4 | context 'on CentOS 6.9 with CDH' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.override['hadoop']['distribution'] = 'cdh' 9 | node.default['hadoop_kms']['kms_env']['kms_log'] = '/var/log/hadoop-kms' 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 12 | stub_command(/test -L /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | 16 | it 'installs hadoop-kms package' do 17 | expect(chef_run).to install_package('hadoop-kms') 18 | end 19 | 20 | it 'creates hadoop-kms conf_dir' do 21 | expect(chef_run).to create_directory('/etc/hadoop-kms/conf.chef').with( 22 | user: 'root', 23 | group: 'root' 24 | ) 25 | end 26 | 27 | it 'creates /var/log/hadoop-kms' do 28 | expect(chef_run).to create_directory('/var/log/hadoop-kms').with( 29 | mode: '0755' 30 | ) 31 | end 32 | 33 | %w( 34 | core-site.xml 35 | kms-acls.xml 36 | kms-env.sh 37 | kms-site.xml 38 | kms-log4j.properties 39 | ).each do |file| 40 | it "creates #{file} from template" do 41 | expect(chef_run).to create_template("/etc/hadoop-kms/conf.chef/#{file}") 42 | end 43 | end 44 | 45 | it 'runs execute[update hadoop-kms-conf alternatives]' do 46 | expect(chef_run).to run_execute('update hadoop-kms-conf alternatives') 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /attributes/tez.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Attribute:: tez 4 | # 5 | # Copyright © 2013-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | default['tez']['tez_env']['tez_conf_dir'] = "/etc/tez/#{node['tez']['conf_dir']}" 21 | default['tez']['tez_env']['tez_jars'] = '/usr/lib/tez/*:/usr/lib/tez/lib/*' 22 | default['tez']['tez_site']['tez.lib.uris'] = '${fs.defaultFS}/apps/tez/,${fs.defaultFS}/apps/tez/lib/' 23 | 24 | if node['hadoop']['distribution'] == 'hdp' && node['hadoop']['distribution_version'].to_f >= 2.2 25 | default['tez']['tez_env']['tez_jars'] = '/usr/hdp/current/tez-client/*:/usr/hdp/current/tez-client/lib/*' 26 | default['tez']['tez_site']['tez.lib.uris'] = '${fs.defaultFS}/hdp/apps/${hdp.version}/tez/tez.tar.gz' 27 | end 28 | 29 | default['hadoop']['hadoop_env']['hadoop_classpath'] = 30 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_classpath') 31 | "$HADOOP_CLASSPATH:#{default['hadoop']['hadoop_env']['hadoop_classpath']}:#{node['tez']['tez_env']['tez_conf_dir']}:#{node['tez']['tez_env']['tez_jars']}" 32 | else 33 | "$HADOOP_CLASSPATH:#{node['tez']['tez_env']['tez_conf_dir']}:#{node['tez']['tez_env']['tez_jars']}" 34 | end 35 | -------------------------------------------------------------------------------- /.kitchen.dokken.yml: -------------------------------------------------------------------------------- 1 | driver: 2 | name: dokken 3 | privileged: true 4 | chef_version: <%= ENV['CHEF_VERSION'] || 'current' %> 5 | 6 | transport: 7 | name: dokken 8 | 9 | provisioner: 10 | name: dokken 11 | # These three lines enforce Chef 13 compatibility 12 | client_rb: 13 | treat_deprecation_warnings_as_errors: true 14 | resource_cloning: false 15 | 16 | platforms: 17 | - name: centos-6 18 | driver: 19 | chef_version: '12.21.3' 20 | hostname: 'localhost.localdomain' 21 | image: dokken/centos-6 22 | pid_one_command: /sbin/init 23 | platform: rhel 24 | - name: centos-7 25 | driver: 26 | image: dokken/centos-7 27 | intermediate_instructions: 28 | - RUN /usr/bin/yum install -y systemd-sysv 29 | pid_one_command: /usr/lib/systemd/systemd 30 | platform: rhel 31 | - name: ubuntu-12.04 32 | driver: 33 | chef_version: '12.21.3' 34 | image: dokken/ubuntu-12.04 35 | intermediate_instructions: 36 | - RUN /usr/bin/apt-get update 37 | pid_one_command: /sbin/init 38 | - name: ubuntu-14.04 39 | driver: 40 | image: dokken/ubuntu-14.04 41 | intermediate_instructions: 42 | - RUN /usr/bin/apt-get update 43 | pid_one_command: /sbin/init 44 | - name: ubuntu-16.04 45 | driver: 46 | image: dokken/ubuntu-16.04 47 | intermediate_instructions: 48 | - RUN /usr/bin/apt-get update 49 | pid_one_command: /bin/systemd 50 | 51 | suites: 52 | - name: default 53 | run_list: 54 | - recipe[java::default] 55 | - recipe[hadoop_test::default] 56 | attributes: 57 | hadoop: 58 | hdfs_site: 59 | 'dfs.datanode.max.transfer.threads': 4096 60 | hive: 61 | hive_site: 62 | 'hive.support.concurrency': true 63 | 'hive.zookeeper.quorum': 'localhost' 64 | java: 65 | jdk_version: '7' 66 | -------------------------------------------------------------------------------- /spec/unit/recipes/_sql_connectors_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::_sql_connectors' do 4 | context 'on CentOS 6.9' do 5 | context 'Using MySQL' do 6 | let(:chef_run) do 7 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 8 | node.default['hadoop']['sql_connector'] = 'mysql' 9 | end.converge(described_recipe) 10 | end 11 | 12 | it 'install mysql-connector-java package' do 13 | expect(chef_run).to install_package('mysql-connector-java') 14 | end 15 | end 16 | 17 | context 'using PostgreSQL' do 18 | let(:chef_run) do 19 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 20 | node.default['hadoop']['sql_connector'] = 'postgresql' 21 | end.converge(described_recipe) 22 | end 23 | 24 | it 'install postgresql-jdbc package' do 25 | expect(chef_run).to install_package('postgresql-jdbc') 26 | end 27 | end 28 | end 29 | 30 | context 'on Ubuntu 14.04' do 31 | context 'Using MySQL on CDH' do 32 | let(:chef_run) do 33 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 34 | node.default['hadoop']['sql_connector'] = 'mysql' 35 | node.override['hadoop']['distribution'] = 'cdh' 36 | end.converge(described_recipe) 37 | end 38 | 39 | it 'install libmysql-java package' do 40 | expect(chef_run).to install_package('libmysql-java') 41 | end 42 | end 43 | 44 | context 'Using PostgreSQL' do 45 | let(:chef_run) do 46 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 47 | node.default['hadoop']['sql_connector'] = 'postgresql' 48 | end.converge(described_recipe) 49 | end 50 | 51 | it 'install libpostgresql-jdbc-java package' do 52 | expect(chef_run).to install_package('libpostgresql-jdbc-java') 53 | end 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /attributes/storm.rb: -------------------------------------------------------------------------------- 1 | # storm release 2 | default['storm']['release']['install'] = false 3 | default['storm']['release']['install_path'] = '/opt' 4 | default['storm']['release']['version'] = '0.9.5' 5 | default['storm']['release']['checksum'] = 6 | case node['storm']['release']['version'] 7 | when '0.9.5' 8 | '2e8337126de8d1e180abe77fb81af7c971f8c4b2dad94e446ac86c0f02ba3fb2' 9 | when '0.10.0-beta1' 10 | 'e54b400b1e1a012149e74602e2441b355c433f27773e7191172342c8b595467d' 11 | end 12 | 13 | # storm environment 14 | # default['storm']['storm_env']['JAVA_HOME'] = 'home' 15 | default['storm']['storm_env']['STORM_JAR_JVM_OPTS'] = '-Xmx64M -Xms32M' 16 | 17 | # storm configuration 18 | default['storm']['storm_conf']['nimbus.host'] = '127.0.0.1' 19 | default['storm']['storm_conf']['storm.local.dir'] = '/var/lib/storm' 20 | default['storm']['storm_conf']['storm.log.dir'] = '/var/log/storm' 21 | default['storm']['storm_conf']['storm.zookeeper.servers'] = ['127.0.0.1'] 22 | 23 | # Define the amount of workers than can be run on this machine 24 | # default['storm']['storm_conf']['supervisor.slots.ports'] = [ 25 | # 6700, 26 | # 6701, 27 | # 6702, 28 | # 6703 29 | # ] 30 | 31 | ## List of custom serializations 32 | # default['storm']['storm_conf']['topology.kryo.register'] = [ 33 | # 'org.mycompany.MyType', 34 | # { 'org.mycompany.MyType2' => 'org.mycompany.MyType2Serializer' } 35 | # ] 36 | 37 | ## List of custom kryo decorators 38 | # default['storm']['storm_conf']['topology.kryo.decorators'] = ['org.mycompany.MyDecorator'] 39 | 40 | ## Locations of the drpc servers 41 | # default['storm']['storm_conf']['drpc.servers'] = [ 42 | # 'server1', 43 | # 'server2' 44 | # ] 45 | 46 | ## Metrics Consumers 47 | # default['storm']['storm_conf']['topology.metrics.consumer.register'] = [ 48 | # {'class' => 'backtype.storm.metric.LoggingMetricsConsumer', 49 | # 'parallelism.hint' => 1}, 50 | # {'class' => 'org.mycompany.MyMetricsConsumer', 51 | # 'parallelism.hint' => 1, 52 | # 'argument' => [{ 'endpoint' => 'metrics-collector.mycompany.org'}]} 53 | # ] 54 | -------------------------------------------------------------------------------- /spec/unit/recipes/zookeeper_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::zookeeper' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['distribution'] = 'hdp' 9 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 10 | node.default['zookeeper']['master_jaas']['client']['foo'] = 'bar' 11 | stub_command(/update-alternatives --display /).and_return(false) 12 | end.converge(described_recipe) 13 | end 14 | 15 | it 'install zookeeper package' do 16 | expect(chef_run).to install_package('zookeeper_2_3_4_7_4') 17 | end 18 | 19 | it 'creates /etc/zookeeper/conf.chef/master_jaas.conf from template' do 20 | expect(chef_run).to create_template('/etc/zookeeper/conf.chef/master_jaas.conf') 21 | end 22 | end 23 | 24 | context 'on HDP 2.1' do 25 | let(:chef_run) do 26 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 27 | node.automatic['domain'] = 'example.com' 28 | node.default['hadoop']['distribution'] = 'hdp' 29 | node.default['hadoop']['distribution_version'] = '2.1.15.0' 30 | stub_command(/update-alternatives --display /).and_return(false) 31 | end.converge(described_recipe) 32 | end 33 | 34 | it 'install zookeeper package' do 35 | expect(chef_run).to install_package('zookeeper') 36 | end 37 | end 38 | 39 | context 'on Ubuntu 14.04 on CDH 5.6' do 40 | let(:chef_run) do 41 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 42 | node.automatic['domain'] = 'example.com' 43 | node.override['hadoop']['distribution'] = 'cdh' 44 | node.default['hadoop']['distribution_version'] = '5.6.0' 45 | stub_command(/update-alternatives --display /).and_return(false) 46 | end.converge(described_recipe) 47 | end 48 | 49 | it 'install zookeeper package' do 50 | expect(chef_run).to install_package('zookeeper') 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /recipes/hbase_rest.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hbase_rest 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hbase' 21 | pkg = 'hbase-rest' 22 | 23 | hbase_log_dir = 24 | if node['hbase'].key?('hbase_env') && node['hbase']['hbase_env'].key?('hbase_log_dir') 25 | node['hbase']['hbase_env']['hbase_log_dir'] 26 | else 27 | '/var/log/hbase' 28 | end 29 | 30 | # Create /etc/default configuration 31 | template "/etc/default/#{pkg}" do 32 | source 'generic-env.sh.erb' 33 | mode '0644' 34 | owner 'root' 35 | group 'root' 36 | action :create 37 | variables options: { 38 | 'hbase_home' => "#{hadoop_lib_dir}/hbase", 39 | 'hbase_pid_dir' => '/var/run/hbase', 40 | 'hbase_log_dir' => hbase_log_dir, 41 | 'hbase_ident_string' => 'hbase', 42 | 'hbase_conf_dir' => '/etc/hbase/conf', 43 | } 44 | end 45 | 46 | template "/etc/init.d/#{pkg}" do 47 | source 'hadoop-init.erb' 48 | mode '0755' 49 | owner 'root' 50 | group 'root' 51 | action :create 52 | variables options: { 53 | 'desc' => 'HBase REST Service', 54 | 'name' => pkg, 55 | 'process' => 'java', 56 | 'binary' => "#{hadoop_lib_dir}/hbase/bin/hbase-daemon.sh", 57 | 'args' => '--config ${CONF_DIR} start rest', 58 | 'confdir' => '${HBASE_CONF_DIR}', 59 | 'user' => 'hbase', 60 | 'home' => "#{hadoop_lib_dir}/hbase", 61 | 'pidfile' => "${HBASE_PID_DIR}/hbase-#{pkg}.pid", 62 | 'logfile' => "${HBASE_LOG_DIR}/#{pkg}.log", 63 | } 64 | end 65 | 66 | service pkg do 67 | status_command "service #{pkg} status" 68 | supports [restart: true, reload: false, status: true] 69 | action :nothing 70 | end 71 | -------------------------------------------------------------------------------- /recipes/_sql_connectors.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _sql_connectors 4 | # 5 | # Copyright © 2015-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | pkgs = [] 23 | jars = [] 24 | 25 | # rubocop: disable Metrics/BlockNesting 26 | if node['hadoop'].key?('sql_connector') 27 | case node['hadoop']['sql_connector'] 28 | when 'mysql' 29 | if node['platform_family'] == 'rhel' && node['platform_version'].to_i == '5' 30 | Chef::Log.warn('You must download and install JDBC connectors, manually') 31 | pkgs = nil 32 | elsif node['platform_family'] == 'debian' && node['hadoop']['distribution'] != 'hdp' 33 | pkgs = ['libmysql-java'] 34 | jars = ['mysql-connector-java'] 35 | else 36 | pkgs = ['mysql-connector-java'] 37 | jars = pkgs 38 | end 39 | when 'postgresql' 40 | if platform_family?('rhel', 'amazon') 41 | if node['platform_version'].to_i == '5' 42 | Chef::Log.warn('You must download and install JDBC connectors, manually') 43 | pkgs = nil 44 | else 45 | pkgs = ['postgresql-jdbc'] 46 | end 47 | jars = pkgs 48 | else # Assume debian 49 | pkgs = ['libpostgresql-jdbc-java'] 50 | jars = ['postgresql-jdbc4'] 51 | end 52 | ### TODO: Oracle support 53 | when 'oracle' 54 | Chef::Log.warn('You must download and install JDBC connectors, manually') 55 | pkgs = nil 56 | jars = pkgs 57 | else 58 | Chef::Log.info('No JDBC driver necessary') 59 | end 60 | end 61 | # rubocop: enable Metrics/BlockNesting 62 | 63 | pkgs.each do |p| 64 | package p do 65 | action :install 66 | end 67 | end 68 | 69 | node.default['hadoop']['sql_jars'] = jars 70 | -------------------------------------------------------------------------------- /recipes/hbase_thrift.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hbase_thrift 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hbase' 21 | pkg = 'hbase-thrift' 22 | 23 | hbase_log_dir = 24 | if node['hbase'].key?('hbase_env') && node['hbase']['hbase_env'].key?('hbase_log_dir') 25 | node['hbase']['hbase_env']['hbase_log_dir'] 26 | else 27 | '/var/log/hbase' 28 | end 29 | 30 | # Create /etc/default configuration 31 | template "/etc/default/#{pkg}" do 32 | source 'generic-env.sh.erb' 33 | mode '0644' 34 | owner 'root' 35 | group 'root' 36 | action :create 37 | variables options: { 38 | 'hbase_home' => "#{hadoop_lib_dir}/hbase", 39 | 'hbase_pid_dir' => '/var/run/hbase', 40 | 'hbase_log_dir' => hbase_log_dir, 41 | 'hbase_ident_string' => 'hbase', 42 | 'hbase_conf_dir' => '/etc/hbase/conf', 43 | 'hbase_thrift_mode' => '-nonblocking', 44 | } 45 | end 46 | 47 | template "/etc/init.d/#{pkg}" do 48 | source 'hadoop-init.erb' 49 | mode '0755' 50 | owner 'root' 51 | group 'root' 52 | action :create 53 | variables options: { 54 | 'desc' => 'HBase Thrift Interface', 55 | 'name' => pkg, 56 | 'process' => 'java', 57 | 'binary' => "#{hadoop_lib_dir}/hbase/bin/hbase-daemon.sh", 58 | 'args' => '--config ${CONF_DIR} start thrift', 59 | 'confdir' => '${HBASE_CONF_DIR}', 60 | 'user' => 'hbase', 61 | 'home' => "#{hadoop_lib_dir}/hbase", 62 | 'pidfile' => "${HBASE_PID_DIR}/hbase-#{pkg}.pid", 63 | 'logfile' => "${HBASE_LOG_DIR}/#{pkg}.log", 64 | } 65 | end 66 | 67 | service pkg do 68 | status_command "service #{pkg} status" 69 | supports [restart: true, reload: false, status: true] 70 | action :nothing 71 | end 72 | -------------------------------------------------------------------------------- /recipes/hive_server.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hive_server 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hive' 21 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 22 | pkg = 'hive-server' 23 | 24 | hive_log_dir = 25 | if node['hive'].key?('hive_env') && node['hive']['hive_env'].key?('hive_log_dir') 26 | node['hive']['hive_env']['hive_log_dir'] 27 | else 28 | '/var/log/hive' 29 | end 30 | 31 | # Create /etc/default configuration 32 | template "/etc/default/#{pkg}" do 33 | source 'generic-env.sh.erb' 34 | mode '0644' 35 | owner 'root' 36 | group 'root' 37 | action :create 38 | variables options: { 39 | 'hive_home' => "#{hadoop_lib_dir}/hive", 40 | 'hive_pid_dir' => '/var/run/hive', 41 | 'hive_log_dir' => hive_log_dir, 42 | 'hive_ident_string' => 'hive', 43 | 'hive_conf_dir' => '/etc/hive/conf', 44 | } 45 | end 46 | 47 | template "/etc/init.d/#{pkg}" do 48 | source 'hadoop-init.erb' 49 | mode '0755' 50 | owner 'root' 51 | group 'root' 52 | action :create 53 | variables options: { 54 | 'desc' => 'Hive Server', 55 | 'name' => pkg, 56 | 'process' => 'java', 57 | 'binary' => "#{hadoop_lib_dir}/hive/bin/hive", 58 | 'args' => '--config ${CONF_DIR} --service server > ${LOG_FILE} 2>&1 < /dev/null &', 59 | 'confdir' => '${HIVE_CONF_DIR}', 60 | 'user' => 'hive', 61 | 'home' => "#{hadoop_lib_dir}/hive", 62 | 'pidfile' => "${HIVE_PID_DIR}/#{pkg}.pid", 63 | 'logfile' => "${HIVE_LOG_DIR}/#{pkg}.log", 64 | } 65 | end 66 | 67 | service pkg do 68 | status_command "service #{pkg} status" 69 | supports [restart: true, reload: false, status: true] 70 | action :nothing 71 | end 72 | -------------------------------------------------------------------------------- /spec/unit/recipes/hadoop_yarn_nodemanager_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hadoop_yarn_nodemanager' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.default['hadoop']['distribution'] = 'hdp' 8 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 9 | node.automatic['domain'] = 'example.com' 10 | stub_command(/update-alternatives --display /).and_return(false) 11 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 12 | stub_command(/test -L /).and_return(false) 13 | end.converge(described_recipe) 14 | end 15 | pkg = 'hadoop-yarn-nodemanager' 16 | 17 | %W( 18 | /etc/default/#{pkg} 19 | /etc/init.d/#{pkg} 20 | ).each do |file| 21 | it "creates #{file} from template" do 22 | expect(chef_run).to create_template(file) 23 | end 24 | end 25 | 26 | it "creates #{pkg} service resource, but does not run it" do 27 | expect(chef_run.service(pkg)).to do_nothing 28 | end 29 | 30 | it 'ensures /usr/hdp/2.3.4.7-4/hadoop-yarn/bin/container-executor has proper permissions' do 31 | expect(chef_run).to create_file('/usr/hdp/2.3.4.7-4/hadoop-yarn/bin/container-executor').with( 32 | user: 'root', 33 | group: 'yarn', 34 | mode: '6050' 35 | ) 36 | end 37 | 38 | context 'using HDP 2.1.15.0' do 39 | let(:chef_run) do 40 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 41 | node.default['hadoop']['distribution'] = 'hdp' 42 | node.default['hadoop']['distribution_version'] = '2.1.15.0' 43 | node.automatic['domain'] = 'example.com' 44 | stub_command(/update-alternatives --display /).and_return(false) 45 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 46 | stub_command(/test -L /).and_return(false) 47 | end.converge(described_recipe) 48 | end 49 | 50 | it 'ensures /usr/lib/hadoop-yarn/bin/container-executor has proper permissions' do 51 | expect(chef_run).to create_file('/usr/lib/hadoop-yarn/bin/container-executor').with( 52 | user: 'root', 53 | group: 'yarn', 54 | mode: '6050' 55 | ) 56 | end 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /recipes/hbase_regionserver.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hbase_regionserver 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hbase' 21 | include_recipe 'hadoop::_hbase_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | pkg = 'hbase-regionserver' 24 | 25 | hbase_log_dir = 26 | if node['hbase'].key?('hbase_env') && node['hbase']['hbase_env'].key?('hbase_log_dir') 27 | node['hbase']['hbase_env']['hbase_log_dir'] 28 | else 29 | '/var/log/hbase' 30 | end 31 | 32 | # Create /etc/default configuration 33 | template "/etc/default/#{pkg}" do 34 | source 'generic-env.sh.erb' 35 | mode '0644' 36 | owner 'root' 37 | group 'root' 38 | action :create 39 | variables options: { 40 | 'hbase_home' => "#{hadoop_lib_dir}/hbase", 41 | 'hbase_pid_dir' => '/var/run/hbase', 42 | 'hbase_log_dir' => hbase_log_dir, 43 | 'hbase_ident_string' => 'hbase', 44 | 'hbase_conf_dir' => '/etc/hbase/conf', 45 | } 46 | end 47 | 48 | template "/etc/init.d/#{pkg}" do 49 | source 'hadoop-init.erb' 50 | mode '0755' 51 | owner 'root' 52 | group 'root' 53 | action :create 54 | variables options: { 55 | 'desc' => 'HBase RegionServer', 56 | 'name' => pkg, 57 | 'process' => 'java', 58 | 'binary' => "#{hadoop_lib_dir}/hbase/bin/hbase-daemon.sh", 59 | 'args' => '--config ${CONF_DIR} start regionserver', 60 | 'confdir' => '${HBASE_CONF_DIR}', 61 | 'user' => 'hbase', 62 | 'home' => "#{hadoop_lib_dir}/hbase", 63 | 'pidfile' => "${HBASE_PID_DIR}/hbase-#{pkg}.pid", 64 | 'logfile' => "${HBASE_LOG_DIR}/#{pkg}.log", 65 | } 66 | end 67 | 68 | service pkg do 69 | status_command "service #{pkg} status" 70 | supports [restart: true, reload: false, status: true] 71 | action :nothing 72 | end 73 | -------------------------------------------------------------------------------- /recipes/hadoop_kms_server.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_kms_server 4 | # 5 | # Copyright © 2015-2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hadoop_kms' 21 | pkg = 'hadoop-kms-server' 22 | 23 | hadoop_kms_log_dir = 24 | if node['hadoop_kms'].key?('kms_env') && node['hadoop_kms']['kms_env'].key?('kms_log') 25 | node['hadoop_kms']['kms_env']['kms_log'] 26 | else 27 | '/var/log/hadoop-kms' 28 | end 29 | 30 | # Create /etc/default/hadoop-kms-server 31 | template "/etc/default/#{pkg}" do 32 | source 'generic-env.sh.erb' 33 | mode '0755' 34 | owner 'root' 35 | group 'root' 36 | action :create 37 | variables options: { 38 | 'kms_user' => 'kms', 39 | 'kms_config' => '/etc/hadoop-kms/conf', 40 | 'kms_log' => hadoop_kms_log_dir, 41 | 'kms_temp' => '/var/run/hadoop-kms', 42 | 'kms_catalina_home' => '/usr/lib/bigtop-tomcat', 43 | 'catalina_pid' => '/var/run/hadoop-kms/hadoop-kms-kms.pid', 44 | 'catalina_base' => '/var/lib/hadoop-kms/tomcat-deployment', 45 | 'catalina_tmpdir' => '/var/run/hadoop-kms', 46 | } 47 | end 48 | 49 | template "/etc/init.d/#{pkg}" do 50 | source 'hadoop-init.erb' 51 | mode '0755' 52 | owner 'root' 53 | group 'root' 54 | action :create 55 | variables options: { 56 | 'desc' => 'Hadoop Key Management Service Server', 57 | 'name' => pkg, 58 | 'process' => 'java', 59 | 'binary' => "#{hadoop_lib_dir}/hadoop-kms/sbin/kms.sh", 60 | 'args' => '--config ${CONF_DIR} start kms', 61 | 'confdir' => '${KMS_CONFIG}', 62 | 'user' => 'kms', 63 | 'home' => "#{hadoop_lib_dir}/hadoop-kms", 64 | 'pidfile' => '${CATALINA_PID}', 65 | 'logfile' => "${KMS_LOG}/#{pkg}.log", 66 | } 67 | end 68 | 69 | service pkg do 70 | status_command "service #{pkg} status" 71 | supports [restart: true, reload: false, status: true] 72 | action :nothing 73 | end 74 | -------------------------------------------------------------------------------- /recipes/hive_server2.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hive_server 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hive' 21 | include_recipe 'hadoop::_hive_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | include_recipe 'hadoop::zookeeper' 24 | pkg = 'hive-server2' 25 | 26 | hive_conf_dir = "/etc/hive/#{node['hive']['conf_dir']}" 27 | 28 | write_deprecated_jaas_config('hive') 29 | write_jaas_config('hive') 30 | 31 | hive_log_dir = 32 | if node['hive'].key?('hive_env') && node['hive']['hive_env'].key?('hive_log_dir') 33 | node['hive']['hive_env']['hive_log_dir'] 34 | else 35 | '/var/log/hive' 36 | end 37 | 38 | # Create /etc/default configuration 39 | template "/etc/default/#{pkg}" do 40 | source 'generic-env.sh.erb' 41 | mode '0644' 42 | owner 'root' 43 | group 'root' 44 | action :create 45 | variables options: { 46 | 'hive_home' => "#{hadoop_lib_dir}/hive", 47 | 'hive_pid_dir' => '/var/run/hive', 48 | 'hive_log_dir' => hive_log_dir, 49 | 'hive_ident_string' => 'hive', 50 | 'hive_conf_dir' => hive_conf_dir, 51 | } 52 | end 53 | 54 | template "/etc/init.d/#{pkg}" do 55 | source 'hadoop-init.erb' 56 | mode '0755' 57 | owner 'root' 58 | group 'root' 59 | action :create 60 | variables options: { 61 | 'desc' => 'Hive Server2', 62 | 'name' => pkg, 63 | 'process' => 'java', 64 | 'binary' => "#{hadoop_lib_dir}/hive/bin/hive", 65 | 'args' => '--config ${CONF_DIR} --service hiveserver2 > ${LOG_FILE} 2>&1 < /dev/null &', 66 | 'confdir' => '${HIVE_CONF_DIR}', 67 | 'user' => 'hive', 68 | 'home' => "#{hadoop_lib_dir}/hive", 69 | 'pidfile' => "${HIVE_PID_DIR}/#{pkg}.pid", 70 | 'logfile' => "${HIVE_LOG_DIR}/#{pkg}.log", 71 | } 72 | end 73 | 74 | service pkg do 75 | status_command "service #{pkg} status" 76 | supports [restart: true, reload: false, status: true] 77 | action :nothing 78 | end 79 | -------------------------------------------------------------------------------- /spec/unit/recipes/hbase_master_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hbase_master' do 4 | context 'on CentOS 6.9' do 5 | context 'in distributed mode' do 6 | let(:chef_run) do 7 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 8 | node.automatic['domain'] = 'example.com' 9 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'] = '4096' 10 | node.default['hbase']['hbase_site']['hbase.rootdir'] = 'hdfs://localhost:8020/hbase' 11 | node.default['hbase']['hbase_site']['hbase.zookeeper.quorum'] = 'localhost' 12 | node.default['hbase']['hbase_site']['hbase.cluster.distributed'] = 'true' 13 | stub_command(/test -L /).and_return(false) 14 | stub_command(/update-alternatives --display /).and_return(false) 15 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 16 | end.converge(described_recipe) 17 | end 18 | pkg = 'hbase-master' 19 | 20 | %W( 21 | /etc/default/#{pkg} 22 | /etc/init.d/#{pkg} 23 | ).each do |file| 24 | it "creates #{file} from template" do 25 | expect(chef_run).to create_template(file) 26 | end 27 | end 28 | 29 | it "creates #{pkg} service resource, but does not run it" do 30 | expect(chef_run.service(pkg)).to do_nothing 31 | end 32 | 33 | %w( 34 | hbase-bulkload-stagingdir 35 | hbase-hdfs-rootdir 36 | ).each do |dir| 37 | it "creates #{dir} execute resource, but does not run it" do 38 | expect(chef_run.execute(dir)).to do_nothing 39 | end 40 | end 41 | end 42 | 43 | context 'in local mode' do 44 | let(:chef_run) do 45 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 46 | node.automatic['domain'] = 'example.com' 47 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'] = '4096' 48 | node.override['hbase']['hbase_site']['hbase.rootdir'] = 'file:///tmp/hbase' 49 | node.default['hbase']['hbase_site']['hbase.zookeeper.quorum'] = 'localhost' 50 | node.default['hbase']['hbase_site']['hbase.cluster.distributed'] = 'false' 51 | stub_command(/test -L /).and_return(false) 52 | stub_command(/update-alternatives --display /).and_return(false) 53 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 54 | end.converge(described_recipe) 55 | end 56 | 57 | it 'creates hbase.rootdir directory' do 58 | expect(chef_run).to create_directory('/tmp/hbase') 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /recipes/spark_master.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: spark_master 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::spark' 21 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 22 | pkg = 'spark-master' 23 | 24 | spark_log_dir = 25 | if node['spark'].key?('spark_env') && node['spark']['spark_env'].key?('spark_log_dir') 26 | node['spark']['spark_env']['spark_log_dir'] 27 | else 28 | '/var/log/spark' 29 | end 30 | 31 | eventlog_dir = 32 | if node['spark']['spark_defaults'].key?('spark.eventLog.dir') 33 | node['spark']['spark_defaults']['spark.eventLog.dir'] 34 | else 35 | '/user/spark/applicationHistory' 36 | end 37 | 38 | # Create /etc/default configuration 39 | template "/etc/default/#{pkg}" do 40 | source 'generic-env.sh.erb' 41 | mode '0644' 42 | owner 'root' 43 | group 'root' 44 | action :create 45 | variables options: { 46 | 'spark_home' => "#{hadoop_lib_dir}/spark", 47 | 'spark_pid_dir' => '/var/run/spark', 48 | 'spark_log_dir' => spark_log_dir, 49 | 'spark_ident_string' => 'spark', 50 | 'spark_history_server_log_dir' => eventlog_dir, 51 | 'spark_history_opts' => '$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=${SPARK_HISTORY_SERVER_LOG_DIR}', 52 | 'spark_conf_dir' => '/etc/spark/conf', 53 | } 54 | end 55 | 56 | template "/etc/init.d/#{pkg}" do 57 | source 'hadoop-init.erb' 58 | mode '0755' 59 | owner 'root' 60 | group 'root' 61 | action :create 62 | variables options: { 63 | 'desc' => 'Spark Master', 64 | 'name' => pkg, 65 | 'process' => 'java', 66 | 'binary' => "#{hadoop_lib_dir}/spark/bin/spark-class", 67 | 'confdir' => '${SPARK_CONF_DIR}', 68 | 'args' => 'org.apache.spark.deploy.master.Master > ${LOG_FILE} 2>&1 < /dev/null &', 69 | 'user' => 'spark', 70 | 'home' => "#{hadoop_lib_dir}/spark", 71 | 'pidfile' => "${SPARK_PID_DIR}/#{pkg}.pid", 72 | 'logfile' => "${SPARK_LOG_DIR}/#{pkg}.log", 73 | } 74 | end 75 | 76 | service pkg do 77 | status_command "service #{pkg} status" 78 | supports [restart: true, reload: false, status: true] 79 | action :nothing 80 | end 81 | -------------------------------------------------------------------------------- /attributes/zookeeper.rb: -------------------------------------------------------------------------------- 1 | # Minimal configuration: http://zookeeper.apache.org/doc/r3.4.5/zookeeperStarted.html#sc_InstallingSingleMode 2 | default['zookeeper']['zoocfg']['clientPort'] = '2181' 3 | default['zookeeper']['zoocfg']['dataDir'] = '/var/lib/zookeeper' 4 | 5 | # default log4j configuration parameters from the distribution - note that some of these are overridden in /usr/bin/zookeeper-server 6 | default['zookeeper']['log4j']['zookeeper.root.logger'] = 'INFO, CONSOLE' 7 | default['zookeeper']['log4j']['zookeeper.console.threshold'] = 'INFO' 8 | default['zookeeper']['log4j']['zookeeper.log.dir'] = '.' 9 | default['zookeeper']['log4j']['zookeeper.log.file'] = 'zookeeper.log' 10 | default['zookeeper']['log4j']['zookeeper.log.threshold'] = 'DEBUG' 11 | default['zookeeper']['log4j']['zookeeper.tracelog.dir'] = '.' 12 | default['zookeeper']['log4j']['zookeeper.tracelog.file'] = 'zookeeper_trace.log' 13 | default['zookeeper']['log4j']['log4j.rootLogger'] = '${zookeeper.root.logger}' 14 | default['zookeeper']['log4j']['log4j.appender.CONSOLE'] = 'org.apache.log4j.ConsoleAppender' 15 | default['zookeeper']['log4j']['log4j.appender.CONSOLE.Threshold'] = '${zookeeper.console.threshold}' 16 | default['zookeeper']['log4j']['log4j.appender.CONSOLE.layout'] = 'org.apache.log4j.PatternLayout' 17 | default['zookeeper']['log4j']['log4j.appender.CONSOLE.layout.ConversionPattern'] = '%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n' 18 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE'] = 'org.apache.log4j.RollingFileAppender' 19 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.Threshold'] = '${zookeeper.log.threshold}' 20 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.File'] = '${zookeeper.log.dir}/${zookeeper.log.file}' 21 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.MaxFileSize'] = '10MB' 22 | # Uncomment the following to set a maximum number of logs to keep 23 | # default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.MaxBackupIndex'] = '10' 24 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.layout'] = 'org.apache.log4j.PatternLayout' 25 | default['zookeeper']['log4j']['log4j.appender.ROLLINGFILE.layout.ConversionPattern'] = '%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n' 26 | default['zookeeper']['log4j']['log4j.appender.TRACEFILE'] = 'org.apache.log4j.FileAppender' 27 | default['zookeeper']['log4j']['log4j.appender.TRACEFILE.Threshold'] = 'TRACE' 28 | default['zookeeper']['log4j']['log4j.appender.TRACEFILE.File'] = '${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}' 29 | default['zookeeper']['log4j']['log4j.appender.TRACEFILE.layout'] = 'org.apache.log4j.PatternLayout' 30 | default['zookeeper']['log4j']['log4j.appender.TRACEFILE.layout.ConversionPattern'] = '%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n' 31 | -------------------------------------------------------------------------------- /recipes/storm_ui.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: storm_ui 4 | # 5 | # Copyright © 2015 VAHNA 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | require 'yaml' 20 | 21 | include_recipe 'hadoop::storm' 22 | 23 | pkg = 'storm-ui' 24 | 25 | storm_conf_dir = "/etc/storm/#{node['storm']['conf_dir']}" 26 | 27 | storm_log_dir = 28 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_log_dir') 29 | node['storm']['storm_env']['storm_log_dir'] 30 | else 31 | '/var/log/storm' 32 | end 33 | 34 | storm_home_dir = 35 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_home') 36 | node['storm']['storm_env']['storm_home'] 37 | else 38 | "#{hadoop_lib_dir}/storm" 39 | end 40 | 41 | template "/etc/init.d/#{pkg}" do 42 | source 'hadoop-init.erb' 43 | mode '0755' 44 | owner 'root' 45 | group 'root' 46 | action :create 47 | variables options: { 48 | 'desc' => 'Storm Nimbus Server', 49 | 'name' => pkg, 50 | 'process' => 'java', 51 | 'binary' => "#{storm_home_dir}/bin/storm", 52 | 'confdir' => storm_conf_dir, 53 | 'args' => 'ui &', 54 | 'user' => 'storm', 55 | 'home' => storm_home_dir, 56 | 'pidfile' => "${STORM_PID_DIR}/#{pkg}.pid", 57 | 'logfile' => "${STORM_LOG_DIR}/#{pkg}.log", 58 | } 59 | end 60 | 61 | # Start storm-env.sh 62 | template "#{storm_conf_dir}/#{pkg}-env.sh" do 63 | source 'generic-env.sh.erb' 64 | mode '0755' 65 | owner 'root' 66 | group 'root' 67 | action :create 68 | variables options: node['storm']['storm_env'] 69 | only_if { node['storm'].key?('storm_env') && !node['storm']['storm_env'].empty? } 70 | end # End storm-env.sh 71 | 72 | # Create /etc/default configuration 73 | template "/etc/default/#{pkg}" do 74 | source 'generic-env.sh.erb' 75 | mode '0644' 76 | owner 'root' 77 | group 'root' 78 | action :create 79 | variables options: { 80 | 'storm_home' => storm_home_dir, 81 | 'storm_pid_dir' => '/var/run/storm', 82 | 'storm_log_dir' => storm_log_dir, 83 | 'storm_conf_dir' => storm_conf_dir, 84 | } 85 | end 86 | 87 | service pkg do 88 | status_command "service #{pkg} status" 89 | supports [restart: true, reload: false, status: true] 90 | action :nothing 91 | end 92 | -------------------------------------------------------------------------------- /recipes/storm_nimbus.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: storm_nimbus 4 | # 5 | # Copyright © 2015 VAHNA 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | require 'yaml' 20 | 21 | include_recipe 'hadoop::storm' 22 | 23 | pkg = 'storm-nimbus' 24 | 25 | storm_conf_dir = "/etc/storm/#{node['storm']['conf_dir']}" 26 | 27 | storm_log_dir = 28 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_log_dir') 29 | node['storm']['storm_env']['storm_log_dir'] 30 | else 31 | '/var/log/storm' 32 | end 33 | 34 | storm_home_dir = 35 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_home') 36 | node['storm']['storm_env']['storm_home'] 37 | else 38 | "#{hadoop_lib_dir}/storm" 39 | end 40 | 41 | template "/etc/init.d/#{pkg}" do 42 | source 'hadoop-init.erb' 43 | mode '0755' 44 | owner 'root' 45 | group 'root' 46 | action :create 47 | variables options: { 48 | 'desc' => 'Storm Nimbus Server', 49 | 'name' => pkg, 50 | 'process' => 'java', 51 | 'binary' => "#{storm_home_dir}/bin/storm", 52 | 'confdir' => storm_conf_dir, 53 | 'args' => 'nimbus &', 54 | 'user' => 'storm', 55 | 'home' => storm_home_dir, 56 | 'pidfile' => "${STORM_PID_DIR}/#{pkg}.pid", 57 | 'logfile' => "${STORM_LOG_DIR}/#{pkg}.log", 58 | } 59 | end 60 | 61 | # Start storm-env.sh 62 | template "#{storm_conf_dir}/#{pkg}-env.sh" do 63 | source 'generic-env.sh.erb' 64 | mode '0755' 65 | owner 'root' 66 | group 'root' 67 | action :create 68 | variables options: node['storm']['storm_env'] 69 | only_if { node['storm'].key?('storm_env') && !node['storm']['storm_env'].empty? } 70 | end # End storm-env.sh 71 | 72 | # Create /etc/default configuration 73 | template "/etc/default/#{pkg}" do 74 | source 'generic-env.sh.erb' 75 | mode '0644' 76 | owner 'root' 77 | group 'root' 78 | action :create 79 | variables options: { 80 | 'storm_home' => storm_home_dir, 81 | 'storm_pid_dir' => '/var/run/storm', 82 | 'storm_log_dir' => storm_log_dir, 83 | 'storm_conf_dir' => storm_conf_dir, 84 | } 85 | end 86 | 87 | service pkg do 88 | status_command "service #{pkg} status" 89 | supports [restart: true, reload: false, status: true] 90 | action :nothing 91 | end 92 | -------------------------------------------------------------------------------- /recipes/hadoop_yarn_proxyserver.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_yarn_proxyserver 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | 22 | if node['hadoop'].key?('yarn_site') && node['hadoop']['yarn_site'].key?('yarn.web-proxy.address') 23 | Chef::Log.info("Setting up YARN Web Proxy at #{node['hadoop']['yarn_site']['yarn.web-proxy.address']}") 24 | else 25 | Chef::Application.fatal!("YARN Web Proxy must be configured! Set default['hadoop']['yarn_site']['yarn.web-proxy.address']}!") 26 | end 27 | pkg = 'hadoop-yarn-proxyserver' 28 | 29 | yarn_log_dir = 30 | if node['hadoop'].key?('yarn_env') && node['hadoop']['yarn_env'].key?('yarn_log_dir') 31 | node['hadoop']['yarn_env']['yarn_log_dir'] 32 | elsif hdp22? || iop? 33 | '/var/log/hadoop/yarn' 34 | else 35 | '/var/log/hadoop-yarn' 36 | end 37 | 38 | yarn_pid_dir = 39 | if hdp22? || iop? 40 | '/var/run/hadoop/yarn' 41 | else 42 | '/var/run/hadoop-yarn' 43 | end 44 | 45 | # Create /etc/default configuration 46 | template "/etc/default/#{pkg}" do 47 | source 'generic-env.sh.erb' 48 | mode '0644' 49 | owner 'root' 50 | group 'root' 51 | action :create 52 | variables options: { 53 | 'yarn_pid_dir' => yarn_pid_dir, 54 | 'yarn_log_dir' => yarn_log_dir, 55 | 'yarn_ident_string' => 'yarn', 56 | 'yarn_conf_dir' => '/etc/hadoop/conf', 57 | } 58 | end 59 | 60 | template "/etc/init.d/#{pkg}" do 61 | source 'hadoop-init.erb' 62 | mode '0755' 63 | owner 'root' 64 | group 'root' 65 | action :create 66 | variables options: { 67 | 'desc' => 'Hadoop YARN Proxy Server', 68 | 'name' => pkg, 69 | 'process' => 'java', 70 | 'binary' => "#{hadoop_lib_dir}/hadoop-yarn/sbin/yarn-daemon.sh", 71 | 'args' => '--config ${CONF_DIR} start proxyserver', 72 | 'confdir' => '${HADOOP_CONF_DIR}', 73 | 'user' => 'yarn', 74 | 'home' => "#{hadoop_lib_dir}/hadoop", 75 | 'pidfile' => '${YARN_PID_DIR}/yarn-yarn-proxyserver.pid', 76 | 'logfile' => "${YARN_LOG_DIR}/#{pkg}.log", 77 | } 78 | end 79 | 80 | service pkg do 81 | status_command "service #{pkg} status" 82 | supports [restart: true, reload: false, status: true] 83 | action :nothing 84 | end 85 | -------------------------------------------------------------------------------- /recipes/storm_supervisor.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: storm_supervisor 4 | # 5 | # Copyright © 2015 VAHNA 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | require 'yaml' 20 | 21 | include_recipe 'hadoop::storm' 22 | 23 | pkg = 'storm-supervisor' 24 | 25 | storm_conf_dir = "/etc/storm/#{node['storm']['conf_dir']}" 26 | 27 | storm_log_dir = 28 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_log_dir') 29 | node['storm']['storm_env']['storm_log_dir'] 30 | else 31 | '/var/log/storm' 32 | end 33 | 34 | storm_home_dir = 35 | if node['storm'].key?('storm_env') && node['storm']['storm_env'].key?('storm_home') 36 | node['storm']['storm_env']['storm_home'] 37 | else 38 | "#{hadoop_lib_dir}/storm" 39 | end 40 | 41 | template "/etc/init.d/#{pkg}" do 42 | source 'hadoop-init.erb' 43 | mode '0755' 44 | owner 'root' 45 | group 'root' 46 | action :create 47 | variables options: { 48 | 'desc' => 'Storm Nimbus Server', 49 | 'name' => pkg, 50 | 'process' => 'java', 51 | 'binary' => "#{storm_home_dir}/bin/storm", 52 | 'confdir' => storm_conf_dir, 53 | 'args' => 'supervisor &', 54 | 'user' => 'storm', 55 | 'home' => storm_home_dir, 56 | 'pidfile' => "${STORM_PID_DIR}/#{pkg}.pid", 57 | 'logfile' => "${STORM_LOG_DIR}/#{pkg}.log", 58 | } 59 | end 60 | 61 | # Start storm-env.sh 62 | template "#{storm_conf_dir}/#{pkg}-env.sh" do 63 | source 'generic-env.sh.erb' 64 | mode '0755' 65 | owner 'root' 66 | group 'root' 67 | action :create 68 | variables options: node['storm']['storm_env'] 69 | only_if { node['storm'].key?('storm_env') && !node['storm']['storm_env'].empty? } 70 | end # End storm-env.sh 71 | 72 | # Create /etc/default configuration 73 | template "/etc/default/#{pkg}" do 74 | source 'generic-env.sh.erb' 75 | mode '0644' 76 | owner 'root' 77 | group 'root' 78 | action :create 79 | variables options: { 80 | 'storm_home' => storm_home_dir, 81 | 'storm_pid_dir' => '/var/run/storm', 82 | 'storm_log_dir' => storm_log_dir, 83 | 'storm_conf_dir' => storm_conf_dir, 84 | } 85 | end 86 | 87 | service pkg do 88 | status_command "service #{pkg} status" 89 | supports [restart: true, reload: false, status: true] 90 | action :nothing 91 | end 92 | -------------------------------------------------------------------------------- /recipes/_hbase_checkconfig.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: _hbase_checkconfig 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # We need dfs.datanode.max.transfer.threads >= 4096 21 | # http://hbase.apache.org/book/configuration.html#hadoop 22 | if node['hadoop'].key?('hdfs_site') && node['hadoop']['hdfs_site'].key?('dfs.datanode.max.transfer.threads') && 23 | node['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'].to_i >= 4096 24 | Chef::Log.info("Set dfs.datanode.max.transfer.threads to #{node['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads']}") 25 | elsif node['hadoop'].key?('hdfs_site') && node['hadoop']['hdfs_site'].key?('dfs.datanode.max.xcievers') && 26 | node['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'].to_i >= 4096 27 | Chef::Log.info("Set dfs.datanode.max.transfer.threads to #{node['hadoop']['hdfs_site']['dfs.datanode.max.xcievers']}") 28 | Chef::Log.warn('dfs.datanode.max.xcievers is deprecated, use dfs.datanode.max.transfer.threads, instead') 29 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'] = node['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'] 30 | else 31 | Chef::Application.fatal!("You *must* set node['hadoop']['hdfs_site']['dfs.datanode.max.transfer.threads'] >= 4096 for HBase") 32 | end 33 | 34 | # HBase needs hbase.rootdir and hbase.zookeeper.quorum in distributed mode 35 | if node['hbase'].key?('hbase_site') && node['hbase']['hbase_site'].key?('hbase.cluster.distributed') && 36 | node['hbase']['hbase_site']['hbase.cluster.distributed'].to_s == 'true' 37 | if node['hbase'].key?('hbase_site') && node['hbase']['hbase_site'].key?('hbase.rootdir') && node['hbase']['hbase_site'].key?('hbase.zookeeper.quorum') 38 | Chef::Log.info("HBase root: #{node['hbase']['hbase_site']['hbase.rootdir']}") 39 | Chef::Log.info("HBase ZooKeeper Quorum: #{node['hbase']['hbase_site']['hbase.zookeeper.quorum']}") 40 | else 41 | Chef::Application.fatal!("You *must* set node['hbase']['hbase_site']['hbase.rootdir'] and node['hbase']['hbase_site']['hbase.zookeeper.quorum'] in distributed mode") 42 | end 43 | end 44 | 45 | # If using JAAS, make sure it's configured fully 46 | check_deprecated_jaas_config('hbase') 47 | check_jaas_config('hbase') 48 | -------------------------------------------------------------------------------- /recipes/hadoop_hdfs_zkfc.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_hdfs_zkfc 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | include_recipe 'hadoop::_hadoop_hdfs_ha_checkconfig' 22 | include_recipe 'hadoop::zookeeper' 23 | pkg = 'hadoop-hdfs-zkfc' 24 | 25 | hadoop_log_dir = 26 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_log_dir') 27 | node['hadoop']['hadoop_env']['hadoop_log_dir'] 28 | elsif hdp22? || iop? 29 | '/var/log/hadoop/hdfs' 30 | else 31 | '/var/log/hadoop-hdfs' 32 | end 33 | 34 | hadoop_pid_dir = 35 | if hdp22? || iop? 36 | '/var/run/hadoop/hdfs' 37 | else 38 | '/var/run/hadoop-hdfs' 39 | end 40 | 41 | # Create /etc/default configuration 42 | template "/etc/default/#{pkg}" do 43 | source 'generic-env.sh.erb' 44 | mode '0644' 45 | owner 'root' 46 | group 'root' 47 | action :create 48 | variables options: { 49 | 'hadoop_pid_dir' => hadoop_pid_dir, 50 | 'hadoop_log_dir' => hadoop_log_dir, 51 | 'hadoop_namenode_user' => 'hdfs', 52 | 'hadoop_secondarynamenode_user' => 'hdfs', 53 | 'hadoop_datanode_user' => 'hdfs', 54 | 'hadoop_ident_string' => 'hdfs', 55 | 'hadoop_privileged_nfs_user' => 'hdfs', 56 | 'hadoop_privileged_nfs_pid_dir' => hadoop_pid_dir, 57 | 'hadoop_privileged_nfs_log_dir' => hadoop_log_dir, 58 | 'hadoop_secure_dn_user' => 'hdfs', 59 | 'hadoop_secure_dn_pid_dir' => hadoop_pid_dir, 60 | 'hadoop_secure_dn_log_dir' => hadoop_log_dir, 61 | } 62 | end 63 | 64 | template "/etc/init.d/#{pkg}" do 65 | source 'hadoop-init.erb' 66 | mode '0755' 67 | owner 'root' 68 | group 'root' 69 | action :create 70 | variables options: { 71 | 'desc' => 'Hadoop HDFS ZooKeeper Failover Controller', 72 | 'name' => pkg, 73 | 'process' => 'java', 74 | 'binary' => "#{hadoop_lib_dir}/hadoop/sbin/hadoop-daemon.sh", 75 | 'args' => '--config ${CONF_DIR} start zkfc', 76 | 'confdir' => '${HADOOP_CONF_DIR}', 77 | 'user' => 'hdfs', 78 | 'home' => "#{hadoop_lib_dir}/hadoop", 79 | 'pidfile' => "${HADOOP_PID_DIR}/#{pkg}.pid", 80 | 'logfile' => "${HADOOP_LOG_DIR}/#{pkg}.log", 81 | } 82 | end 83 | 84 | service pkg do 85 | status_command "service #{pkg} status" 86 | supports [restart: true, reload: false, status: true] 87 | action :nothing 88 | end 89 | -------------------------------------------------------------------------------- /recipes/spark_worker.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: spark_worker 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::spark' 21 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 22 | pkg = 'spark-worker' 23 | 24 | worker_dir = 25 | if node['spark'].key?('spark_env') && node['spark']['spark_env'].key?('spark_worker_dir') 26 | node['spark']['spark_env']['spark_worker_dir'] 27 | else 28 | '/var/run/spark/work' 29 | end 30 | 31 | directory worker_dir do 32 | mode '0755' 33 | owner 'spark' 34 | group 'spark' 35 | recursive true 36 | action :create 37 | end 38 | 39 | spark_log_dir = 40 | if node['spark'].key?('spark_env') && node['spark']['spark_env'].key?('spark_log_dir') 41 | node['spark']['spark_env']['spark_log_dir'] 42 | else 43 | '/var/log/spark' 44 | end 45 | 46 | eventlog_dir = 47 | if node['spark']['spark_defaults'].key?('spark.eventLog.dir') 48 | node['spark']['spark_defaults']['spark.eventLog.dir'] 49 | else 50 | '/user/spark/applicationHistory' 51 | end 52 | 53 | # Create /etc/default configuration 54 | template "/etc/default/#{pkg}" do 55 | source 'generic-env.sh.erb' 56 | mode '0644' 57 | owner 'root' 58 | group 'root' 59 | action :create 60 | variables options: { 61 | 'spark_home' => "#{hadoop_lib_dir}/spark", 62 | 'spark_pid_dir' => '/var/run/spark', 63 | 'spark_log_dir' => spark_log_dir, 64 | 'spark_ident_string' => 'spark', 65 | 'spark_history_server_log_dir' => eventlog_dir, 66 | 'spark_history_opts' => '$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=${SPARK_HISTORY_SERVER_LOG_DIR}', 67 | 'spark_conf_dir' => '/etc/spark/conf', 68 | } 69 | end 70 | 71 | template "/etc/init.d/#{pkg}" do 72 | source 'hadoop-init.erb' 73 | mode '0755' 74 | owner 'root' 75 | group 'root' 76 | action :create 77 | variables options: { 78 | 'desc' => 'Spark Worker', 79 | 'name' => pkg, 80 | 'process' => 'java', 81 | 'binary' => "#{hadoop_lib_dir}/spark/bin/spark-class", 82 | 'args' => 'org.apache.spark.deploy.worker.Worker spark://${STANDALONE_SPARK_MASTER_HOST}:${SPARK_MASTER_PORT} > ${LOG_FILE} 2>&1 < /dev/null &', 83 | 'confdir' => '${SPARK_CONF_DIR}', 84 | 'user' => 'spark', 85 | 'home' => "#{hadoop_lib_dir}/spark", 86 | 'pidfile' => "${SPARK_PID_DIR}/#{pkg}.pid", 87 | 'logfile' => "${SPARK_LOG_DIR}/#{pkg}.log", 88 | } 89 | end 90 | 91 | service pkg do 92 | status_command "service #{pkg} status" 93 | supports [restart: true, reload: false, status: true] 94 | action :nothing 95 | end 96 | -------------------------------------------------------------------------------- /recipes/spark_historyserver.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: spark_historyserver 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::spark' 21 | pkg = 'spark-history-server' 22 | 23 | dfs = node['hadoop']['core_site']['fs.defaultFS'] 24 | 25 | execute 'hdfs-spark-userdir' do 26 | command "hdfs dfs -mkdir -p #{dfs}/user/spark && hdfs dfs -chown -R spark:spark #{dfs}/user/spark" 27 | user 'hdfs' 28 | group 'hdfs' 29 | timeout 300 30 | action :nothing 31 | end 32 | 33 | eventlog_dir = 34 | if node['spark']['spark_defaults'].key?('spark.eventLog.dir') 35 | node['spark']['spark_defaults']['spark.eventLog.dir'] 36 | else 37 | 'hdfs:///user/spark/applicationHistory' 38 | end 39 | 40 | execute 'hdfs-spark-eventlog-dir' do 41 | command "hdfs dfs -mkdir -p #{eventlog_dir} && hdfs dfs -chown -R spark:spark #{eventlog_dir} && hdfs dfs -chmod 1777 #{eventlog_dir}" 42 | user 'hdfs' 43 | group 'hdfs' 44 | timeout 300 45 | action :nothing 46 | end 47 | 48 | spark_log_dir = 49 | if node['spark'].key?('spark_env') && node['spark']['spark_env'].key?('spark_log_dir') 50 | node['spark']['spark_env']['spark_log_dir'] 51 | else 52 | '/var/log/spark' 53 | end 54 | 55 | # Create /etc/default configuration 56 | template "/etc/default/#{pkg}" do 57 | source 'generic-env.sh.erb' 58 | mode '0644' 59 | owner 'root' 60 | group 'root' 61 | action :create 62 | variables options: { 63 | 'spark_home' => "#{hadoop_lib_dir}/spark", 64 | 'spark_pid_dir' => '/var/run/spark', 65 | 'spark_log_dir' => spark_log_dir, 66 | 'spark_ident_string' => 'spark', 67 | 'spark_history_server_log_dir' => eventlog_dir, 68 | 'spark_history_opts' => '$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=${SPARK_HISTORY_SERVER_LOG_DIR}', 69 | 'spark_conf_dir' => '/etc/spark/conf', 70 | } 71 | end 72 | 73 | template "/etc/init.d/#{pkg}" do 74 | source 'hadoop-init.erb' 75 | mode '0755' 76 | owner 'root' 77 | group 'root' 78 | action :create 79 | variables options: { 80 | 'desc' => 'Spark History Server', 81 | 'name' => pkg, 82 | 'process' => 'java', 83 | 'binary' => "#{hadoop_lib_dir}/spark/bin/spark-class", 84 | 'args' => 'org.apache.spark.deploy.history.HistoryServer > ${LOG_FILE} 2>&1 < /dev/null &', 85 | 'confdir' => '${SPARK_CONF_DIR}', 86 | 'user' => 'spark', 87 | 'home' => "#{hadoop_lib_dir}/spark", 88 | 'pidfile' => "${SPARK_PID_DIR}/#{pkg}.pid", 89 | 'logfile' => "${SPARK_LOG_DIR}/#{pkg}.log", 90 | } 91 | end 92 | 93 | service pkg do 94 | status_command "service #{pkg} status" 95 | supports [restart: true, reload: false, status: true] 96 | action :nothing 97 | end 98 | -------------------------------------------------------------------------------- /spec/unit/recipes/repo_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::repo' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | end.converge(described_recipe) 9 | end 10 | 11 | %w(Updates-HDP-2.x HDP-UTILS-1.1.0.21).each do |repo| 12 | it "add #{repo} yum_repository" do 13 | expect(chef_run).to add_yum_repository(repo) 14 | end 15 | end 16 | end 17 | 18 | context 'using HDP GA release' do 19 | let(:chef_run) do 20 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 21 | node.automatic['domain'] = 'example.com' 22 | node.override['hadoop']['distribution_version'] = '2.2.0.0' 23 | end.converge(described_recipe) 24 | end 25 | 26 | %w(HDP-2.x HDP-UTILS-1.1.0.21).each do |repo| 27 | it "add #{repo} yum_repository" do 28 | expect(chef_run).to add_yum_repository(repo) 29 | end 30 | end 31 | end 32 | 33 | context 'using CDH 5' do 34 | let(:chef_run) do 35 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 36 | node.automatic['domain'] = 'example.com' 37 | node.override['hadoop']['distribution'] = 'cdh' 38 | node.override['hadoop']['distribution_version'] = '5.4.2' 39 | end.converge(described_recipe) 40 | end 41 | 42 | it 'adds cloudera-cdh5 yum_repository' do 43 | expect(chef_run).to add_yum_repository('cloudera-cdh5') 44 | end 45 | end 46 | 47 | context 'using IOP' do 48 | let(:chef_run) do 49 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 50 | node.automatic['domain'] = 'example.com' 51 | node.override['hadoop']['distribution'] = 'iop' 52 | node.override['hadoop']['distribution_version'] = '4.1.0.0' 53 | end.converge(described_recipe) 54 | end 55 | 56 | %w(IOP-4.1.x IOP-UTILS-1.2.0.0).each do |repo| 57 | it "add #{repo} yum_repository" do 58 | expect(chef_run).to add_yum_repository(repo) 59 | end 60 | end 61 | end 62 | 63 | context 'on Ubuntu 14.04' do 64 | let(:chef_run) do 65 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 66 | node.automatic['domain'] = 'example.com' 67 | end.converge(described_recipe) 68 | end 69 | 70 | %w(hdp hdp-utils).each do |repo| 71 | it "add #{repo} apt_repository" do 72 | expect(chef_run).to add_apt_repository(repo) 73 | end 74 | end 75 | 76 | it 'add hdp apt_preference' do 77 | expect(chef_run).to add_apt_preference('hdp') 78 | end 79 | end 80 | 81 | context 'using CDH 5' do 82 | let(:chef_run) do 83 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 84 | node.automatic['domain'] = 'example.com' 85 | node.override['hadoop']['distribution'] = 'cdh' 86 | end.converge(described_recipe) 87 | end 88 | 89 | it 'adds cloudera-cdh5 apt_repository' do 90 | expect(chef_run).to add_apt_repository('cloudera-cdh5') 91 | end 92 | 93 | it 'add cloudera-cdh5 apt_preference' do 94 | expect(chef_run).to add_apt_preference('cloudera-cdh5') 95 | end 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /spec/unit/recipes/hbase_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hbase' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'] = '4096' 9 | node.default['hbase']['hadoop_metrics']['foo'] = 'bar' 10 | node.default['hbase']['hbase_site']['hbase.rootdir'] = 'hdfs://localhost:8020/hbase' 11 | node.default['hbase']['hbase_env']['hbase_log_dir'] = '/data/log/hbase' 12 | node.default['hbase']['log4j']['log4j.threshold'] = 'ALL' 13 | node.default['hadoop']['distribution'] = 'hdp' 14 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 15 | stub_command(/test -L /).and_return(false) 16 | stub_command(/update-alternatives --display /).and_return(false) 17 | end.converge(described_recipe) 18 | end 19 | 20 | it 'installs hbase package' do 21 | expect(chef_run).to install_package('hbase_2_3_4_7_4') 22 | end 23 | 24 | it 'creates hbase conf_dir' do 25 | expect(chef_run).to create_directory('/etc/hbase/conf.chef').with( 26 | user: 'root', 27 | group: 'root' 28 | ) 29 | end 30 | 31 | %w( 32 | hadoop-metrics.properties 33 | hbase-env.sh 34 | hbase-policy.xml 35 | hbase-site.xml 36 | log4j.properties 37 | ).each do |file| 38 | it "creates #{file} from template" do 39 | expect(chef_run).to create_template("/etc/hbase/conf.chef/#{file}") 40 | end 41 | end 42 | 43 | it 'creates /etc/default/hbase from template' do 44 | expect(chef_run).to create_template('/etc/default/hbase') 45 | end 46 | 47 | it 'creates hbase HBASE_LOG_DIR' do 48 | expect(chef_run).to create_directory('/data/log/hbase').with( 49 | mode: '0755', 50 | user: 'hbase', 51 | group: 'hbase' 52 | ) 53 | end 54 | 55 | it 'deletes /var/log/hbase' do 56 | expect(chef_run).to delete_directory('/var/log/hbase') 57 | end 58 | 59 | it 'creates /var/log/hbase symlink' do 60 | link = chef_run.link('/var/log/hbase') 61 | expect(link).to link_to('/data/log/hbase') 62 | end 63 | 64 | it 'sets hbase limits' do 65 | expect(chef_run).to create_ulimit_domain('hbase') 66 | end 67 | 68 | it 'deletes /etc/hbase/conf directory' do 69 | expect(chef_run).to delete_directory('/etc/hbase/conf') 70 | end 71 | 72 | it 'runs execute[update hbase-conf alternatives]' do 73 | expect(chef_run).to run_execute('update hbase-conf alternatives') 74 | end 75 | end 76 | 77 | context 'on Ubuntu 14.04' do 78 | let(:chef_run) do 79 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 80 | node.automatic['domain'] = 'example.com' 81 | node.default['hadoop']['hdfs_site']['dfs.datanode.max.xcievers'] = '4096' 82 | stub_command(/test -L /).and_return(false) 83 | stub_command(/update-alternatives --display /).and_return(false) 84 | end.converge(described_recipe) 85 | end 86 | 87 | it 'installs hbase package' do 88 | expect(chef_run).to install_package('hbase') 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /spec/unit/recipes/oozie_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::oozie' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['oozie']['oozie_env']['oozie_log_dir'] = '/data/log/oozie' 9 | node.default['oozie']['oozie_site']['example_property'] = 'test' 10 | node.default['hadoop']['distribution'] = 'hdp' 11 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 12 | stub_command(/test -L /).and_return(false) 13 | stub_command(/update-alternatives --display /).and_return(false) 14 | end.converge(described_recipe) 15 | end 16 | name = 'oozie' 17 | pkg = 'oozie_2_3_4_7_4' 18 | 19 | it "install #{name} package" do 20 | expect(chef_run).to install_package(pkg) 21 | end 22 | 23 | it "creates #{name} service resource, but does not run it" do 24 | expect(chef_run.service(name)).to do_nothing 25 | end 26 | 27 | it 'creates oozie conf_dir' do 28 | expect(chef_run).to create_directory('/etc/oozie/conf.chef').with( 29 | user: 'root', 30 | group: 'root' 31 | ) 32 | end 33 | 34 | %w( 35 | oozie-env.sh 36 | oozie-site.xml 37 | ).each do |file| 38 | it "creates #{file} from template" do 39 | expect(chef_run).to create_template("/etc/oozie/conf.chef/#{file}") 40 | end 41 | end 42 | 43 | it 'install unzip package' do 44 | expect(chef_run).to install_package('unzip') 45 | end 46 | 47 | it 'creates ext-2.2.zip file' do 48 | expect(chef_run).to create_remote_file_if_missing('/var/lib/oozie/ext-2.2.zip') 49 | end 50 | 51 | it 'does not run script[extract extjs into Oozie data directory]' do 52 | expect(chef_run).not_to run_script('extract extjs into Oozie data directory') 53 | end 54 | 55 | it 'deletes /var/log/oozie' do 56 | expect(chef_run).to delete_directory('/var/log/oozie') 57 | end 58 | 59 | it 'creates /data/log/oozie' do 60 | expect(chef_run).to create_directory('/data/log/oozie').with( 61 | mode: '0755' 62 | ) 63 | end 64 | 65 | it 'creates /var/log/oozie symlink' do 66 | link = chef_run.link('/var/log/oozie') 67 | expect(link).to link_to('/data/log/oozie') 68 | end 69 | 70 | it 'runs execute[update oozie-conf alternatives]' do 71 | expect(chef_run).to run_execute('update oozie-conf alternatives') 72 | end 73 | 74 | context 'using CDH' do 75 | let(:chef_run) do 76 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 77 | node.automatic['domain'] = 'example.com' 78 | node.override['hadoop']['distribution'] = 'cdh' 79 | node.override['hadoop']['distribution_version'] = '5.7.0' 80 | stub_command(/update-alternatives --display /).and_return(false) 81 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 82 | stub_command(/test -L /).and_return(false) 83 | end.converge(described_recipe) 84 | end 85 | cdhpkg = 'oozie' 86 | 87 | it "install #{cdhpkg} package" do 88 | expect(chef_run).to install_package(cdhpkg) 89 | end 90 | end 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /attributes/kms.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Attribute:: kms 4 | # 5 | # Copyright © 2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # KMS ACLs 21 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.CREATE'] = '*' 22 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.DELETE'] = '*' 23 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.ROLLOVER'] = '*' 24 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.GET'] = '*' 25 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.GET_KEYS'] = '*' 26 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.GET_METADATA'] = '*' 27 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.SET_KEY_MATERIAL'] = '*' 28 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.GENERATE_EEK'] = '*' 29 | default['hadoop_kms']['kms_acls']['hadoop.kms.acl.DECRYPT_EEK'] = '*' 30 | default['hadoop_kms']['kms_acls']['default.key.acl.MANAGEMENT'] = '*' 31 | default['hadoop_kms']['kms_acls']['default.key.acl.GENERATE_EEK'] = '*' 32 | default['hadoop_kms']['kms_acls']['default.key.acl.DECRYPT_EEK'] = '*' 33 | default['hadoop_kms']['kms_acls']['default.key.acl.READ'] = '*' 34 | 35 | # Default kms-site.xml 36 | default['hadoop_kms']['kms_site']['hadoop.kms.cache.enable'] = 'true' 37 | 38 | # Default Logging options 39 | default['hadoop_kms']['log4j']['log4j.appender.kms'] = 'org.apache.log4j.DailyRollingFileAppender' 40 | default['hadoop_kms']['log4j']['log4j.appender.kms.DatePattern'] = '.yyyy-MM-dd' 41 | default['hadoop_kms']['log4j']['log4j.appender.kms.File'] = '${kms.log.dir}/kms.log' 42 | default['hadoop_kms']['log4j']['log4j.appender.kms.Append'] = 'true' 43 | default['hadoop_kms']['log4j']['log4j.appender.kms.layout'] = 'org.apache.log4j.PatternLayout' 44 | default['hadoop_kms']['log4j']['log4j.appender.kms.layout.ConversionPattern'] = '%d{ISO8601} %-5p %c{1} - %m%n' 45 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit'] = 'org.apache.log4j.DailyRollingFileAppender' 46 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit.DatePattern'] = '.yyyy-MM-dd' 47 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit.File'] = '${kms.log.dir}/kms-audit.log' 48 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit.Append'] = 'true' 49 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit.layout'] = 'org.apache.log4j.PatternLayout' 50 | default['hadoop_kms']['log4j']['log4j.appender.kms-audit.layout.ConversionPattern'] = '%d{ISO8601} %m%n' 51 | default['hadoop_kms']['log4j']['log4j.logger.kms-audit'] = 'INFO, kms-audit' 52 | default['hadoop_kms']['log4j']['log4j.additivity.kms-audit'] = 'false' 53 | default['hadoop_kms']['log4j']['log4j.rootLogger'] = 'ALL, kms' 54 | default['hadoop_kms']['log4j']['log4j.logger.org.apache.hadoop.conf'] = 'ERROR' 55 | default['hadoop_kms']['log4j']['log4j.logger.org.apache.hadoop'] = 'INFO' 56 | default['hadoop_kms']['log4j']['log4j.logger.com.sun.jersey.server.wadl.generators.WadlGeneratorJAXBGrammarGenerator'] = 'OFF' 57 | -------------------------------------------------------------------------------- /recipes/tez.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: tez 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' if node['hadoop']['distribution'] == 'hdp' 21 | 22 | package hadoop_package('tez') do 23 | action :install 24 | only_if { node['hadoop']['distribution'] == 'hdp' } 25 | end 26 | 27 | # Copy tez library into HDFS 28 | dfs = node['hadoop']['core_site']['fs.defaultFS'] 29 | dest = 30 | if hdp22? 31 | "#{dfs}/hdp/apps/#{hdp_version}/tez" 32 | else 33 | "#{dfs}/apps/tez" 34 | end 35 | src = 36 | if hdp22? 37 | "#{hadoop_lib_dir}/tez/lib/tez.tar.gz" 38 | else 39 | "#{hadoop_lib_dir}/tez/*" 40 | end 41 | execute 'tez-hdfs-appdir' do 42 | command <<-EOS 43 | hdfs dfs -mkdir -p #{dest} && \ 44 | hdfs dfs -put #{src} #{dest} && \ 45 | hdfs dfs -chown -R hdfs:users #{dest} && \ 46 | hdfs dfs -chmod 555 #{dest} && \ 47 | hdfs dfs -chmod 444 #{dest}/* 48 | EOS 49 | timeout 300 50 | user 'hdfs' 51 | group 'hdfs' 52 | not_if "hdfs dfs -test -d #{dest}", user: 'hdfs' 53 | action :nothing 54 | end 55 | 56 | tez_conf_dir = "/etc/tez/#{node['tez']['conf_dir']}" 57 | 58 | directory tez_conf_dir do 59 | mode '0755' 60 | owner 'root' 61 | group 'root' 62 | action :create 63 | recursive true 64 | end 65 | 66 | # Setup tez-site.xml 67 | if node['tez'].key?('tez_site') 68 | my_vars = { options: node['tez']['tez_site'] } 69 | 70 | template "#{tez_conf_dir}/tez-site.xml" do 71 | source 'generic-site.xml.erb' 72 | mode '0644' 73 | owner 'root' 74 | group 'root' 75 | action :create 76 | variables my_vars 77 | end 78 | end # End tez-site.xml 79 | 80 | # Setup tez-env.sh 81 | if node['tez'].key?('tez_env') 82 | my_vars = { options: node['tez']['tez_env'] } 83 | 84 | template "#{tez_conf_dir}/tez-env.sh" do 85 | source 'generic-env.sh.erb' 86 | mode '0755' 87 | owner 'root' 88 | group 'root' 89 | action :create 90 | variables my_vars 91 | end 92 | end # End tez-env.sh 93 | 94 | # Update alternatives to point to our configuration 95 | execute 'update tez-conf alternatives' do 96 | command "update-alternatives --install /etc/tez/conf tez-conf /etc/tez/#{node['tez']['conf_dir']} 50" 97 | not_if "update-alternatives --display tez-conf | grep best | awk '{print $5}' | grep /etc/tez/#{node['tez']['conf_dir']}" 98 | end 99 | 100 | if node.recipe?('hadoop::hive') && node['hive']['hive_site']['hive.execution.engine'] == 'tez' 101 | execute 'hive-hdfs-appdir' do 102 | command <<-EOS 103 | hdfs dfs -mkdir -p #{dfs}/apps/hive/install && \ 104 | hdfs dfs -copyFromLocal #{hadoop_lib_dir}/hive/lib/hive-exec-* #{dfs}/apps/hive/install/ 105 | EOS 106 | timeout 300 107 | user 'hdfs' 108 | group 'hdfs' 109 | not_if "hdfs dfs -test -d #{dfs}/apps/hive/install", user: 'hdfs' 110 | action :nothing 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /recipes/hadoop_hdfs_journalnode.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_hdfs_journalnode 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/HDFSHighAvailabilityWithQJM.html 21 | 22 | include_recipe 'hadoop::default' 23 | include_recipe 'hadoop::_hadoop_hdfs_checkconfig' 24 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 25 | pkg = 'hadoop-hdfs-journalnode' 26 | 27 | dfs_jn_edits_dirs = 28 | if node['hadoop']['hdfs_site'].key?('dfs.journalnode.edits.dir') 29 | node['hadoop']['hdfs_site']['dfs.journalnode.edits.dir'] 30 | else 31 | Chef::Application.fatal!("JournalNode requires node['hadoop']['hdfs_site']['dfs.journalnode.edits.dir'] to be set") 32 | end 33 | 34 | dfs_jn_edits_dirs.split(',').each do |dir| 35 | directory dir.gsub('file://', '') do 36 | mode '0755' 37 | owner 'hdfs' 38 | group 'hdfs' 39 | action :create 40 | recursive true 41 | end 42 | end 43 | 44 | hadoop_log_dir = 45 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_log_dir') 46 | node['hadoop']['hadoop_env']['hadoop_log_dir'] 47 | elsif hdp22? || iop? 48 | '/var/log/hadoop/hdfs' 49 | else 50 | '/var/log/hadoop-hdfs' 51 | end 52 | 53 | hadoop_pid_dir = 54 | if hdp22? || iop? 55 | '/var/run/hadoop/hdfs' 56 | else 57 | '/var/run/hadoop-hdfs' 58 | end 59 | 60 | # Create /etc/default configuration 61 | template "/etc/default/#{pkg}" do 62 | source 'generic-env.sh.erb' 63 | mode '0644' 64 | owner 'root' 65 | group 'root' 66 | action :create 67 | variables options: { 68 | 'hadoop_pid_dir' => hadoop_pid_dir, 69 | 'hadoop_log_dir' => hadoop_log_dir, 70 | 'hadoop_namenode_user' => 'hdfs', 71 | 'hadoop_secondarynamenode_user' => 'hdfs', 72 | 'hadoop_datanode_user' => 'hdfs', 73 | 'hadoop_ident_string' => 'hdfs', 74 | 'hadoop_privileged_nfs_user' => 'hdfs', 75 | 'hadoop_privileged_nfs_pid_dir' => hadoop_pid_dir, 76 | 'hadoop_privileged_nfs_log_dir' => hadoop_log_dir, 77 | 'hadoop_secure_dn_user' => 'hdfs', 78 | 'hadoop_secure_dn_pid_dir' => hadoop_pid_dir, 79 | 'hadoop_secure_dn_log_dir' => hadoop_log_dir, 80 | } 81 | end 82 | 83 | template "/etc/init.d/#{pkg}" do 84 | source 'hadoop-init.erb' 85 | mode '0755' 86 | owner 'root' 87 | group 'root' 88 | action :create 89 | variables options: { 90 | 'desc' => 'Hadoop HDFS JournalNode', 91 | 'name' => pkg, 92 | 'process' => 'java', 93 | 'binary' => "#{hadoop_lib_dir}/hadoop/sbin/hadoop-daemon.sh", 94 | 'args' => '--config ${CONF_DIR} start journalnode', 95 | 'confdir' => '${HADOOP_CONF_DIR}', 96 | 'user' => 'hdfs', 97 | 'home' => "#{hadoop_lib_dir}/hadoop", 98 | 'pidfile' => "${HADOOP_PID_DIR}/#{pkg}.pid", 99 | 'logfile' => "${HADOOP_LOG_DIR}/#{pkg}.log", 100 | } 101 | end 102 | 103 | service pkg do 104 | status_command "service #{pkg} status" 105 | supports [restart: true, reload: false, status: true] 106 | action :nothing 107 | end 108 | -------------------------------------------------------------------------------- /recipes/hadoop_yarn_nodemanager.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_yarn_nodemanager 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 22 | pkg = 'hadoop-yarn-nodemanager' 23 | 24 | # Ensure permissions for secure Hadoop... this *should* be no-op 25 | file "#{hadoop_lib_dir}/hadoop-yarn/bin/container-executor" do 26 | owner 'root' 27 | group 'yarn' 28 | mode '6050' 29 | end 30 | 31 | yarn_log_dir = 32 | if node['hadoop'].key?('yarn_env') && node['hadoop']['yarn_env'].key?('yarn_log_dir') 33 | node['hadoop']['yarn_env']['yarn_log_dir'] 34 | elsif hdp22? || iop? 35 | '/var/log/hadoop/yarn' 36 | else 37 | '/var/log/hadoop-yarn' 38 | end 39 | yarn_pid_dir = 40 | if hdp22? || iop? 41 | '/var/run/hadoop/yarn' 42 | else 43 | '/var/run/hadoop-yarn' 44 | end 45 | nm_local_dirs = 46 | if node['hadoop'].key?('yarn_site') && node['hadoop']['yarn_site'].key?('yarn.nodemanager.local-dirs') 47 | node['hadoop']['yarn_site']['yarn.nodemanager.local-dirs'] 48 | else 49 | 'file:///tmp/hadoop-yarn/nm-local-dir' 50 | end 51 | nm_log_dirs = 52 | if node['hadoop'].key?('yarn_site') && node['hadoop']['yarn_site'].key?('yarn.nodemanager.log-dirs') 53 | node['hadoop']['yarn_site']['yarn.nodemanager.log-dirs'] 54 | else 55 | "#{yarn_log_dir}/userlogs" 56 | end 57 | 58 | node.default['hadoop']['yarn_site']['yarn.nodemanager.local-dirs'] = nm_local_dirs 59 | node.default['hadoop']['yarn_site']['yarn.nodemanager.log-dirs'] = nm_log_dirs 60 | 61 | %w(yarn.nodemanager.local-dirs yarn.nodemanager.log-dirs).each do |opt| 62 | node['hadoop']['yarn_site'][opt].split(',').each do |dir| 63 | directory dir.gsub('file://', '') do 64 | owner 'yarn' 65 | group 'yarn' 66 | mode '0755' 67 | action :create 68 | recursive true 69 | end 70 | end 71 | end 72 | 73 | # Create /etc/default configuration 74 | template "/etc/default/#{pkg}" do 75 | source 'generic-env.sh.erb' 76 | mode '0644' 77 | owner 'root' 78 | group 'root' 79 | action :create 80 | variables options: { 81 | 'yarn_pid_dir' => yarn_pid_dir, 82 | 'yarn_log_dir' => yarn_log_dir, 83 | 'yarn_ident_string' => 'yarn', 84 | 'yarn_conf_dir' => '/etc/hadoop/conf', 85 | } 86 | end 87 | 88 | template "/etc/init.d/#{pkg}" do 89 | source 'hadoop-init.erb' 90 | mode '0755' 91 | owner 'root' 92 | group 'root' 93 | action :create 94 | variables options: { 95 | 'desc' => 'Hadoop YARN NodeManager', 96 | 'name' => pkg, 97 | 'process' => 'java', 98 | 'binary' => "#{hadoop_lib_dir}/hadoop-yarn/sbin/yarn-daemon.sh", 99 | 'args' => '--config ${CONF_DIR} start nodemanager', 100 | 'confdir' => '${HADOOP_CONF_DIR}', 101 | 'user' => 'yarn', 102 | 'home' => "#{hadoop_lib_dir}/hadoop", 103 | 'pidfile' => '${YARN_PID_DIR}/yarn-yarn-nodemanager.pid', 104 | 'logfile' => "${YARN_LOG_DIR}/#{pkg}.log", 105 | } 106 | end 107 | 108 | service pkg do 109 | status_command "service #{pkg} status" 110 | supports [restart: true, reload: false, status: true] 111 | action :nothing 112 | end 113 | -------------------------------------------------------------------------------- /recipes/hadoop_hdfs_secondarynamenode.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_hdfs_secondarynamenode 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | include_recipe 'hadoop::_hadoop_hdfs_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | pkg = 'hadoop-hdfs-secondarynamenode' 24 | 25 | fs_checkpoint_dirs = 26 | hadoop_config('hadoop', 'hdfs_site', 'dfs.namenode.checkpoint.dir', 'fs.checkpoint.dir', 'file:///tmp/hadoop-hdfs/dfs/namesecondary') 27 | 28 | fs_checkpoint_edits_dirs = 29 | hadoop_config('hadoop', 'hdfs_site', 'dfs.namenode.checkpoint.edits.dir', 'fs.checkpoint.edits.dir', fs_checkpoint_dirs) 30 | 31 | node.default['hadoop']['hdfs_site']['dfs.namenode.checkpoint.dir'] = fs_checkpoint_dirs 32 | node.default['hadoop']['hdfs_site']['dfs.namenode.checkpoint.edits.dir'] = fs_checkpoint_edits_dirs 33 | 34 | snn_dirs = 35 | if fs_checkpoint_dirs == fs_checkpoint_edits_dirs 36 | [fs_checkpoint_dirs] 37 | else 38 | [fs_checkpoint_dirs, fs_checkpoint_edits_dirs] 39 | end 40 | 41 | snn_dirs.each do |dirs| 42 | dirs.split(',').each do |dir| 43 | directory dir.gsub('file://', '') do 44 | mode '0700' 45 | owner 'hdfs' 46 | group 'hdfs' 47 | action :create 48 | recursive true 49 | end 50 | end 51 | end 52 | 53 | hadoop_log_dir = 54 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_log_dir') 55 | node['hadoop']['hadoop_env']['hadoop_log_dir'] 56 | elsif hdp22? || iop? 57 | '/var/log/hadoop/hdfs' 58 | else 59 | '/var/log/hadoop-hdfs' 60 | end 61 | 62 | hadoop_pid_dir = 63 | if hdp22? || iop? 64 | '/var/run/hadoop/hdfs' 65 | else 66 | '/var/run/hadoop-hdfs' 67 | end 68 | 69 | # Create /etc/default configuration 70 | template "/etc/default/#{pkg}" do 71 | source 'generic-env.sh.erb' 72 | mode '0644' 73 | owner 'root' 74 | group 'root' 75 | action :create 76 | variables options: { 77 | 'hadoop_pid_dir' => hadoop_pid_dir, 78 | 'hadoop_log_dir' => hadoop_log_dir, 79 | 'hadoop_namenode_user' => 'hdfs', 80 | 'hadoop_secondarynamenode_user' => 'hdfs', 81 | 'hadoop_datanode_user' => 'hdfs', 82 | 'hadoop_ident_string' => 'hdfs', 83 | 'hadoop_privileged_nfs_user' => 'hdfs', 84 | 'hadoop_privileged_nfs_pid_dir' => hadoop_pid_dir, 85 | 'hadoop_privileged_nfs_log_dir' => hadoop_log_dir, 86 | 'hadoop_secure_dn_user' => 'hdfs', 87 | 'hadoop_secure_dn_pid_dir' => hadoop_pid_dir, 88 | 'hadoop_secure_dn_log_dir' => hadoop_log_dir, 89 | } 90 | end 91 | 92 | template "/etc/init.d/#{pkg}" do 93 | source 'hadoop-init.erb' 94 | mode '0755' 95 | owner 'root' 96 | group 'root' 97 | action :create 98 | variables options: { 99 | 'desc' => 'Hadoop HDFS SecondaryNameNode', 100 | 'name' => pkg, 101 | 'process' => 'java', 102 | 'binary' => "#{hadoop_lib_dir}/hadoop/sbin/hadoop-daemon.sh", 103 | 'args' => '--config ${CONF_DIR} start secondarynamenode', 104 | 'confdir' => '${HADOOP_CONF_DIR}', 105 | 'user' => 'hdfs', 106 | 'home' => "#{hadoop_lib_dir}/hadoop", 107 | 'pidfile' => "${HADOOP_PID_DIR}/#{pkg}.pid", 108 | 'logfile' => "${HADOOP_LOG_DIR}/#{pkg}.log", 109 | } 110 | end 111 | 112 | service pkg do 113 | status_command "service #{pkg} status" 114 | supports [restart: true, reload: false, status: true] 115 | action :nothing 116 | end 117 | -------------------------------------------------------------------------------- /recipes/hadoop_kms.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_kms 4 | # 5 | # Copyright © 2016 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | # This service is only supported on CDH, so far 23 | package 'hadoop-kms' do 24 | action :install 25 | only_if { node['hadoop']['distribution'] == 'cdh' } 26 | end 27 | 28 | # Configs: hadoop-kms-conf = /etc/hadoop-kms/conf 29 | 30 | hadoop_kms_conf_dir = "/etc/hadoop-kms/#{node['hadoop_kms']['conf_dir']}" 31 | 32 | directory hadoop_kms_conf_dir do 33 | mode '0755' 34 | owner 'root' 35 | group 'root' 36 | action :create 37 | recursive true 38 | end 39 | 40 | # Hadoop KMS doesn't read Hadoop's config, but we may need core-site, so we merge hadoop['core_site'], allowing overrides 41 | merge1 = node['hadoop']['core_site'] || {} 42 | merge2 = node['hadoop_kms']['core_site'] || {} 43 | node.default['hadoop_kms']['core_site'] = merge1.merge(merge2) 44 | 45 | # Setup core-site.xml kms-acls.xml kms-site.xml 46 | %w(core_site kms_acls kms_site).each do |sitefile| 47 | template "#{hadoop_kms_conf_dir}/#{sitefile.tr('_', '-')}.xml" do 48 | source 'generic-site.xml.erb' 49 | mode '0644' 50 | owner 'root' 51 | group 'root' 52 | action :create 53 | variables options: node['hadoop_kms'][sitefile] 54 | only_if { node['hadoop_kms'].key?(sitefile) && !node['hadoop_kms'][sitefile].empty? } 55 | end 56 | end # End core-site.xml kms-acls.xml kms-site.xml 57 | 58 | hadoop_kms_log_dir = 59 | if node['hadoop_kms'].key?('kms_env') && node['hadoop_kms']['kms_env'].key?('kms_log') 60 | node['hadoop_kms']['kms_env']['kms_log'] 61 | else 62 | '/var/log/hadoop-kms' 63 | end 64 | 65 | directory hadoop_kms_log_dir do 66 | owner 'kms' 67 | group 'kms' 68 | mode '0755' 69 | action :create 70 | recursive true 71 | only_if { node['hadoop_kms'].key?('kms_env') && node['hadoop_kms']['kms_env'].key?('kms_log') } 72 | end 73 | 74 | unless hadoop_kms_log_dir == '/var/log/hadoop-kms' 75 | # Delete default directory, if we aren't set to it 76 | directory '/var/log/hadoop-kms' do 77 | action :delete 78 | recursive true 79 | not_if 'test -L /var/log/hadoop-kms' 80 | end 81 | # symlink 82 | link '/var/log/hadoop-kms' do 83 | to hadoop_kms_log_dir 84 | end 85 | end 86 | 87 | # Setup kms-env.sh 88 | template "#{hadoop_kms_conf_dir}/kms-env.sh" do 89 | source 'generic-env.sh.erb' 90 | mode '0755' 91 | owner 'hdfs' 92 | group 'hdfs' 93 | action :create 94 | variables options: node['hadoop_kms']['kms_env'] 95 | only_if { node['hadoop_kms'].key?('kms_env') && !node['hadoop_kms']['kms_env'].empty? } 96 | end # End kms-env.sh 97 | 98 | # Setup kms-log4j.properties 99 | template "#{hadoop_kms_conf_dir}/kms-log4j.properties" do 100 | source 'generic.properties.erb' 101 | mode '0644' 102 | owner 'root' 103 | group 'root' 104 | action :create 105 | variables properties: node['hadoop_kms']['log4j'] 106 | only_if { node['hadoop_kms'].key?('log4j') && !node['hadoop_kms']['log4j'].empty? } 107 | end # End kms-log4j.properties 108 | 109 | # Update alternatives to point to our configuration 110 | execute 'update hadoop-kms-conf alternatives' do 111 | command "update-alternatives --install /etc/hadoop-kms/conf hadoop-kms-conf /etc/hadoop-kms/#{node['hadoop_kms']['conf_dir']} 50" 112 | not_if "update-alternatives --display hadoop-kms-conf | grep best | awk '{print $5}' | grep /etc/hadoop-kms/#{node['hadoop_kms']['conf_dir']}" 113 | end 114 | -------------------------------------------------------------------------------- /spec/unit/recipes/_compression_libs_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::_compression_libs' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.override['hadoop']['distributon'] = 'hdp' 8 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 9 | end.converge(described_recipe) 10 | end 11 | 12 | %w(snappy snappy-devel lzo lzo-devel hadooplzo hadooplzo-native).each do |pkg| 13 | it "installs #{pkg} package" do 14 | expect(chef_run).to install_package(pkg) 15 | end 16 | end 17 | 18 | context 'using HDP 2.1.15.0' do 19 | let(:chef_run) do 20 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 21 | node.default['hadoop']['distribution'] = 'hdp' 22 | node.default['hadoop']['distribution_version'] = '2.1.15.0' 23 | end.converge(described_recipe) 24 | end 25 | 26 | %w(snappy snappy-devel).each do |pkg| 27 | it "installs #{pkg} package" do 28 | expect(chef_run).to install_package(pkg) 29 | end 30 | end 31 | 32 | %w(lzo lzo-devel hadooplzo hadooplzo-native).each do |pkg| 33 | it "does not install #{pkg} package" do 34 | expect(chef_run).not_to install_package(pkg) 35 | end 36 | end 37 | end 38 | 39 | context 'using CDH' do 40 | let(:chef_run) do 41 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 42 | node.override['hadoop']['distribution'] = 'cdh' 43 | end.converge(described_recipe) 44 | end 45 | 46 | %w(snappy snappy-devel).each do |pkg| 47 | it "installs #{pkg} package" do 48 | expect(chef_run).to install_package(pkg) 49 | end 50 | end 51 | 52 | %w(lzo lzo-devel hadooplzo hadooplzo-native).each do |pkg| 53 | it "does not install #{pkg} package" do 54 | expect(chef_run).not_to install_package(pkg) 55 | end 56 | end 57 | end 58 | end 59 | 60 | context 'on Ubuntu 14.04' do 61 | let(:chef_run) do 62 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 63 | node.default['hadoop']['distribution'] = 'hdp' 64 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 65 | end.converge(described_recipe) 66 | end 67 | 68 | %w(libsnappy1 libsnappy-dev liblzo2-2 liblzo2-dev hadooplzo).each do |pkg| 69 | it "installs #{pkg} package" do 70 | expect(chef_run).to install_package(pkg) 71 | end 72 | end 73 | 74 | context 'using HDP 2.1.15.0' do 75 | let(:chef_run) do 76 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 77 | node.default['hadoop']['distribution'] = 'hdp' 78 | node.default['hadoop']['distribution_version'] = '2.1.15.0' 79 | end.converge(described_recipe) 80 | end 81 | 82 | %w(libsnappy1 libsnappy-dev).each do |pkg| 83 | it "installs #{pkg} package" do 84 | expect(chef_run).to install_package(pkg) 85 | end 86 | end 87 | 88 | %w(liblzo2-2 liblzo2-dev hadooplzo).each do |pkg| 89 | it "does not install #{pkg} package" do 90 | expect(chef_run).not_to install_package(pkg) 91 | end 92 | end 93 | end 94 | 95 | context 'using CDH' do 96 | let(:chef_run) do 97 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 98 | node.override['hadoop']['distribution'] = 'cdh' 99 | end.converge(described_recipe) 100 | end 101 | 102 | %w(libsnappy1 libsnappy-dev).each do |pkg| 103 | it "installs #{pkg} package" do 104 | expect(chef_run).to install_package(pkg) 105 | end 106 | end 107 | 108 | %w(liblzo2-2 liblzo2-dev hadooplzo).each do |pkg| 109 | it "does not install #{pkg} package" do 110 | expect(chef_run).not_to install_package(pkg) 111 | end 112 | end 113 | end 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /recipes/hadoop_hdfs_datanode.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_hdfs_datanode 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | include_recipe 'hadoop::_hadoop_hdfs_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | pkg = 'hadoop-hdfs-datanode' 24 | 25 | dfs_data_dirs = 26 | hadoop_config('hadoop', 'hdfs_site', 'dfs.datanode.data.dir', 'dfs.data.dir', 'file:///tmp/hadoop-hdfs/dfs/data') 27 | 28 | dfs_data_dir_perm = 29 | hadoop_config('hadoop', 'hdfs_site', 'dfs.datanode.data.dir.perm', 'dfs.data.dir.perm', '700') 30 | 31 | node.default['hadoop']['hdfs_site']['dfs.datanode.data.dir'] = dfs_data_dirs 32 | node.default['hadoop']['hdfs_site']['dfs.datanode.data.dir.perm'] = dfs_data_dir_perm 33 | 34 | dfs_data_dirs.split(',').each do |dir| 35 | directory dir.gsub('file://', '') do 36 | mode dfs_data_dir_perm 37 | owner 'hdfs' 38 | group 'hdfs' 39 | action :create 40 | recursive true 41 | end 42 | end 43 | 44 | if node['hadoop']['hdfs_site'].key?('dfs.domain.socket.path') # ~FC023 45 | directory ::File.dirname(node['hadoop']['hdfs_site']['dfs.domain.socket.path']).gsub('file://', '') do 46 | mode '0750' 47 | owner 'hdfs' 48 | group 'hadoop' 49 | action :create 50 | recursive true 51 | end 52 | end 53 | 54 | hadoop_log_dir = 55 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_log_dir') 56 | node['hadoop']['hadoop_env']['hadoop_log_dir'] 57 | elsif hdp22? || iop? 58 | '/var/log/hadoop/hdfs' 59 | else 60 | '/var/log/hadoop-hdfs' 61 | end 62 | 63 | hadoop_pid_dir = 64 | if hdp22? || iop? 65 | '/var/run/hadoop/hdfs' 66 | else 67 | '/var/run/hadoop-hdfs' 68 | end 69 | 70 | target_user = 71 | if hadoop_kerberos? 72 | 'root' 73 | else 74 | 'hdfs' 75 | end 76 | 77 | # Create /etc/default configuration 78 | template "/etc/default/#{pkg}" do 79 | source 'generic-env.sh.erb' 80 | mode '0644' 81 | owner 'root' 82 | group 'root' 83 | action :create 84 | variables options: { 85 | 'hadoop_pid_dir' => hadoop_pid_dir, 86 | 'hadoop_log_dir' => hadoop_log_dir, 87 | 'hadoop_namenode_user' => 'hdfs', 88 | 'hadoop_secondarynamenode_user' => 'hdfs', 89 | 'hadoop_datanode_user' => 'hdfs', 90 | 'hadoop_ident_string' => target_user, 91 | 'hadoop_privileged_nfs_user' => 'hdfs', 92 | 'hadoop_privileged_nfs_pid_dir' => hadoop_pid_dir, 93 | 'hadoop_privileged_nfs_log_dir' => hadoop_log_dir, 94 | 'hadoop_secure_dn_user' => 'hdfs', 95 | 'hadoop_secure_dn_pid_dir' => hadoop_pid_dir, 96 | 'hadoop_secure_dn_log_dir' => hadoop_log_dir, 97 | } 98 | end 99 | 100 | template "/etc/init.d/#{pkg}" do 101 | source 'hadoop-init.erb' 102 | mode '0755' 103 | owner 'root' 104 | group 'root' 105 | action :create 106 | variables options: { 107 | 'desc' => 'Hadoop HDFS DataNode', 108 | 'name' => pkg, 109 | 'process' => 'java', 110 | 'binary' => "#{hadoop_lib_dir}/hadoop/sbin/hadoop-daemon.sh", 111 | 'args' => '--config ${CONF_DIR} start datanode', 112 | 'confdir' => '${HADOOP_CONF_DIR}', 113 | 'user' => target_user, 114 | 'home' => "#{hadoop_lib_dir}/hadoop", 115 | 'pidfile' => "${HADOOP_PID_DIR}/#{pkg}.pid", 116 | 'logfile' => "${HADOOP_LOG_DIR}/#{pkg}.log", 117 | } 118 | end 119 | 120 | service pkg do 121 | status_command "service #{pkg} status" 122 | supports [restart: true, reload: false, status: true] 123 | action :nothing 124 | end 125 | -------------------------------------------------------------------------------- /spec/unit/recipes/hive_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hive' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hive']['hive_site']['hive.exec.local.scratchdir'] = '/tmp/hive/scratch' 9 | node.default['hive']['hive_env']['hive_log_dir'] = '/data/log/hive' 10 | node.default['hadoop']['distribution'] = 'hdp' 11 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 12 | stub_command(/test -L /).and_return(false) 13 | stub_command(/update-alternatives --display /).and_return(false) 14 | end.converge(described_recipe) 15 | end 16 | 17 | it 'installs hive package' do 18 | expect(chef_run).to install_package('hive_2_3_4_7_4') 19 | end 20 | 21 | %w(/etc/hive/conf.chef /var/lib/hive).each do |dir| 22 | it "creates directory #{dir}" do 23 | expect(chef_run).to create_directory(dir) 24 | end 25 | end 26 | 27 | it 'does not execute execute[hive-hdfs-homedir]' do 28 | expect(chef_run).not_to run_execute('hive-hdfs-homedir') 29 | end 30 | 31 | it 'executes execute[update hive-conf alternatives]' do 32 | expect(chef_run).to run_execute('update hive-conf alternatives') 33 | end 34 | 35 | it 'creates hive HIVE_LOG_DIR' do 36 | expect(chef_run).to create_directory('/data/log/hive').with( 37 | mode: '0755', 38 | user: 'hive', 39 | group: 'hive' 40 | ) 41 | end 42 | 43 | it 'deletes /var/log/hive' do 44 | expect(chef_run).to delete_directory('/var/log/hive') 45 | end 46 | 47 | it 'creates /var/log/hive symlink' do 48 | link = chef_run.link('/var/log/hive') 49 | expect(link).to link_to('/data/log/hive') 50 | end 51 | 52 | %w( 53 | /etc/hive/conf.chef/hive-site.xml 54 | /etc/hive/conf.chef/hive-env.sh 55 | ).each do |template| 56 | it "creates #{template} template" do 57 | expect(chef_run).to create_template(template) 58 | end 59 | end 60 | 61 | it 'deletes /etc/hive/conf directory' do 62 | expect(chef_run).to delete_directory('/etc/hive/conf') 63 | end 64 | 65 | it 'creates /tmp/hive/scratch directory' do 66 | expect(chef_run).to create_directory('/tmp/hive/scratch') 67 | end 68 | 69 | context 'using default hive.exec.local.scratchdir' do 70 | let(:chef_run) do 71 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 72 | node.automatic['domain'] = 'example.com' 73 | node.default['hive']['hive_env']['hive_log_dir'] = '/data/log/hive' 74 | stub_command(/test -L /).and_return(false) 75 | stub_command(/update-alternatives --display /).and_return(false) 76 | end.converge(described_recipe) 77 | end 78 | 79 | it 'does not create /tmp/hive directory' do 80 | expect(chef_run).not_to create_directory('/tmp/hive') 81 | end 82 | end 83 | 84 | context 'using /tmp/hive for hive.exec.local.scratchdir' do 85 | let(:chef_run) do 86 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 87 | node.default['hive']['hive_site']['hive.exec.local.scratchdir'] = '/tmp/hive' 88 | node.automatic['domain'] = 'example.com' 89 | stub_command(/test -L /).and_return(false) 90 | stub_command(/update-alternatives --display /).and_return(false) 91 | end.converge(described_recipe) 92 | end 93 | 94 | it 'creates /tmp/hive directory' do 95 | expect(chef_run).to create_directory('/tmp/hive') 96 | end 97 | end 98 | end 99 | 100 | context 'on Ubuntu 14.04' do 101 | let(:chef_run) do 102 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 103 | node.automatic['domain'] = 'example.com' 104 | stub_command(/test -L /).and_return(false) 105 | stub_command(/update-alternatives --display /).and_return(false) 106 | end.converge(described_recipe) 107 | end 108 | 109 | it 'installs hive package' do 110 | expect(chef_run).to install_package('hive') 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /spec/unit/recipes/spark_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::spark' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['spark']['spark_env']['spark_log_dir'] = '/data/log/spark' 9 | node.default['spark']['spark_site']['spark.eventLog.enabled'] = false 10 | node.default['spark']['metrics']['something.something'] = 'dark.side' 11 | node.default['spark']['log4j']['root.logger'] = 'Console' 12 | node.default['hadoop']['distribution'] = 'hdp' 13 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 14 | stub_command(/test -L /).and_return(false) 15 | stub_command(/update-alternatives --display /).and_return(false) 16 | end.converge(described_recipe) 17 | end 18 | 19 | it 'installs spark package' do 20 | expect(chef_run).to install_package('spark_2_3_4_7_4') 21 | end 22 | 23 | it 'installs spark-python package' do 24 | expect(chef_run).to install_package('spark_2_3_4_7_4-python') 25 | end 26 | 27 | it 'installs libgfortran package' do 28 | expect(chef_run).to install_package('libgfortran') 29 | end 30 | 31 | it 'creates spark conf_dir' do 32 | expect(chef_run).to create_directory('/etc/spark/conf.chef').with( 33 | user: 'root', 34 | group: 'root' 35 | ) 36 | end 37 | 38 | it 'deletes /var/log/spark directory' do 39 | expect(chef_run).to delete_directory('/var/log/spark') 40 | end 41 | 42 | it 'creates /data/log/spark directory' do 43 | expect(chef_run).to create_directory('/data/log/spark').with( 44 | mode: '0755' 45 | ) 46 | end 47 | 48 | it 'creates /var/log/spark symlink' do 49 | link = chef_run.link('/var/log/spark') 50 | expect(link).to link_to('/data/log/spark') 51 | end 52 | 53 | %w( 54 | spark-env.sh 55 | spark-defaults.conf 56 | metrics.properties 57 | log4j.properties 58 | ).each do |file| 59 | it "creates #{file} from template" do 60 | expect(chef_run).to create_template("/etc/spark/conf.chef/#{file}") 61 | end 62 | end 63 | 64 | it 'deletes /etc/spark/conf directory' do 65 | expect(chef_run).to delete_directory('/etc/spark/conf') 66 | end 67 | 68 | it 'runs execute[update spark-conf alternatives]' do 69 | expect(chef_run).to run_execute('update spark-conf alternatives') 70 | end 71 | end 72 | 73 | context 'using HDP 2.1 and Spark release tarball' do 74 | let(:chef_run) do 75 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 76 | node.automatic['domain'] = 'example.com' 77 | node.default['spark']['release']['install'] = true 78 | node.default['spark']['spark_env']['spark_log_dir'] = '/data/log/spark' 79 | node.default['spark']['spark_site']['spark.eventLog.enabled'] = false 80 | stub_command(/test -L /).and_return(false) 81 | stub_command(/update-alternatives --display /).and_return(false) 82 | end.converge(described_recipe) 83 | end 84 | 85 | it 'does not install spark-core package' do 86 | expect(chef_run).not_to install_package('spark-core') 87 | end 88 | end 89 | 90 | context 'using CDH 5 on Ubuntu 14.04' do 91 | let(:chef_run) do 92 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 93 | node.automatic['domain'] = 'example.com' 94 | node.override['hadoop']['distribution'] = 'cdh' 95 | node.default['hadoop']['distribution_version'] = '5.3.2' 96 | stub_command(/test -L /).and_return(false) 97 | stub_command(/update-alternatives --display /).and_return(false) 98 | end.converge(described_recipe) 99 | end 100 | 101 | it 'installs spark-core package' do 102 | expect(chef_run).to install_package('spark-core') 103 | end 104 | 105 | it 'installs spark-python package' do 106 | expect(chef_run).to install_package('spark-python') 107 | end 108 | 109 | it 'installs libgfortran3 package' do 110 | expect(chef_run).to install_package('libgfortran3') 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /spec/unit/recipes/hive2_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hive2' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['hive2']['hive_site']['hive.exec.local.scratchdir'] = '/tmp/hive2/scratch' 9 | node.default['hive2']['hive_env']['hive_log_dir'] = '/data/log/hive2' 10 | node.default['hadoop']['distribution'] = 'hdp' 11 | node.default['hadoop']['distribution_version'] = '2.6.0.3' 12 | stub_command(/test -L /).and_return(false) 13 | stub_command(/update-alternatives --display /).and_return(false) 14 | end.converge(described_recipe) 15 | end 16 | 17 | it 'installs hive2 package' do 18 | expect(chef_run).to install_package('hive2_2_6_0_3_8') 19 | end 20 | 21 | %w(/etc/hive2/conf.chef /var/lib/hive2).each do |dir| 22 | it "creates directory #{dir}" do 23 | expect(chef_run).to create_directory(dir) 24 | end 25 | end 26 | 27 | it 'does not execute execute[hive2-hdfs-homedir]' do 28 | expect(chef_run).not_to run_execute('hive2-hdfs-homedir') 29 | end 30 | 31 | it 'executes execute[update hive2-conf alternatives]' do 32 | expect(chef_run).to run_execute('update hive2-conf alternatives') 33 | end 34 | 35 | it 'creates hive2 HIVE_LOG_DIR' do 36 | expect(chef_run).to create_directory('/data/log/hive2').with( 37 | mode: '0755', 38 | user: 'hive', 39 | group: 'hive' 40 | ) 41 | end 42 | 43 | it 'deletes /var/log/hive2' do 44 | expect(chef_run).to delete_directory('/var/log/hive2') 45 | end 46 | 47 | it 'creates /var/log/hive2 symlink' do 48 | link = chef_run.link('/var/log/hive2') 49 | expect(link).to link_to('/data/log/hive2') 50 | end 51 | 52 | %w( 53 | /etc/hive2/conf.chef/hive-site.xml 54 | /etc/hive2/conf.chef/hive-env.sh 55 | ).each do |template| 56 | it "creates #{template} template" do 57 | expect(chef_run).to create_template(template) 58 | end 59 | end 60 | 61 | it 'deletes /etc/hive2/conf directory' do 62 | expect(chef_run).to delete_directory('/etc/hive2/conf') 63 | end 64 | 65 | it 'creates /tmp/hive2/scratch directory' do 66 | expect(chef_run).to create_directory('/tmp/hive2/scratch') 67 | end 68 | 69 | context 'using default hive2.exec.local.scratchdir' do 70 | let(:chef_run) do 71 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 72 | node.automatic['domain'] = 'example.com' 73 | node.default['hive2']['hive_env']['hive_log_dir'] = '/data/log/hive2' 74 | stub_command(/test -L /).and_return(false) 75 | stub_command(/update-alternatives --display /).and_return(false) 76 | end.converge(described_recipe) 77 | end 78 | 79 | it 'does not create /tmp/hive2 directory' do 80 | expect(chef_run).not_to create_directory('/tmp/hive2') 81 | end 82 | end 83 | 84 | context 'using /tmp/hive2 for hive.exec.local.scratchdir' do 85 | let(:chef_run) do 86 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 87 | node.default['hive2']['hive_site']['hive.exec.local.scratchdir'] = '/tmp/hive2' 88 | node.automatic['domain'] = 'example.com' 89 | stub_command(/test -L /).and_return(false) 90 | stub_command(/update-alternatives --display /).and_return(false) 91 | end.converge(described_recipe) 92 | end 93 | 94 | it 'creates /tmp/hive2 directory' do 95 | expect(chef_run).to create_directory('/tmp/hive2') 96 | end 97 | end 98 | end 99 | 100 | context 'on Ubuntu 14.04' do 101 | let(:chef_run) do 102 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 103 | node.automatic['domain'] = 'example.com' 104 | stub_command(/test -L /).and_return(false) 105 | stub_command(/update-alternatives --display /).and_return(false) 106 | end.converge(described_recipe) 107 | end 108 | 109 | it 'installs hive2 package' do 110 | expect(chef_run).to install_package('hive2') 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /spec/unit/recipes/spark2_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::spark2' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | node.default['spark2']['spark_env']['spark_log_dir'] = '/data/log/spark2' 9 | node.default['spark2']['spark_site']['spark.eventLog.enabled'] = false 10 | node.default['spark2']['metrics']['something.something'] = 'dark.side' 11 | node.default['spark2']['log4j']['root.logger'] = 'Console' 12 | node.default['hadoop']['distribution'] = 'hdp' 13 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 14 | stub_command(/test -L /).and_return(false) 15 | stub_command(/update-alternatives --display /).and_return(false) 16 | end.converge(described_recipe) 17 | end 18 | 19 | it 'installs spark2 package' do 20 | expect(chef_run).to install_package('spark2_2_3_4_7_4') 21 | end 22 | 23 | it 'installs spark2-python package' do 24 | expect(chef_run).to install_package('spark2_2_3_4_7_4-python') 25 | end 26 | 27 | it 'installs libgfortran package' do 28 | expect(chef_run).to install_package('libgfortran') 29 | end 30 | 31 | it 'creates spark2 conf_dir' do 32 | expect(chef_run).to create_directory('/etc/spark2/conf.chef').with( 33 | user: 'root', 34 | group: 'root' 35 | ) 36 | end 37 | 38 | it 'deletes /var/log/spark2 directory' do 39 | expect(chef_run).to delete_directory('/var/log/spark2') 40 | end 41 | 42 | it 'creates /data/log/spark2 directory' do 43 | expect(chef_run).to create_directory('/data/log/spark2').with( 44 | mode: '0755' 45 | ) 46 | end 47 | 48 | it 'creates /var/log/spark2 symlink' do 49 | link = chef_run.link('/var/log/spark2') 50 | expect(link).to link_to('/data/log/spark2') 51 | end 52 | 53 | %w( 54 | spark-env.sh 55 | spark-defaults.conf 56 | metrics.properties 57 | log4j.properties 58 | ).each do |file| 59 | it "creates #{file} from template" do 60 | expect(chef_run).to create_template("/etc/spark2/conf.chef/#{file}") 61 | end 62 | end 63 | 64 | it 'deletes /etc/spark2/conf directory' do 65 | expect(chef_run).to delete_directory('/etc/spark2/conf') 66 | end 67 | 68 | it 'runs execute[update spark2-conf alternatives]' do 69 | expect(chef_run).to run_execute('update spark2-conf alternatives') 70 | end 71 | end 72 | 73 | context 'using HDP 2.1 and Spark release tarball' do 74 | let(:chef_run) do 75 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 76 | node.automatic['domain'] = 'example.com' 77 | node.default['spark2']['release']['install'] = true 78 | node.default['spark2']['spark_env']['spark_log_dir'] = '/data/log/spark2' 79 | node.default['spark2']['spark_site']['spark.eventLog.enabled'] = false 80 | stub_command(/test -L /).and_return(false) 81 | stub_command(/update-alternatives --display /).and_return(false) 82 | end.converge(described_recipe) 83 | end 84 | 85 | it 'does not install spark2-core package' do 86 | expect(chef_run).not_to install_package('spark2-core') 87 | end 88 | end 89 | 90 | context 'using CDH 5 on Ubuntu 14.04' do 91 | let(:chef_run) do 92 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 93 | node.automatic['domain'] = 'example.com' 94 | node.override['hadoop']['distribution'] = 'cdh' 95 | node.default['hadoop']['distribution_version'] = '5.3.2' 96 | stub_command(/test -L /).and_return(false) 97 | stub_command(/update-alternatives --display /).and_return(false) 98 | end.converge(described_recipe) 99 | end 100 | 101 | it 'installs spark2-core package' do 102 | expect(chef_run).to install_package('spark2-core') 103 | end 104 | 105 | it 'installs spark2-python package' do 106 | expect(chef_run).to install_package('spark2-python') 107 | end 108 | 109 | it 'installs libgfortran3 package' do 110 | expect(chef_run).to install_package('libgfortran3') 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /recipes/hadoop_mapreduce_historyserver.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_mapreduce_historyserver 4 | # 5 | # Copyright © 2013-2015 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | pkg = 'hadoop-mapreduce-historyserver' 22 | 23 | am_staging_dir = 24 | if node['hadoop'].key?('mapred_site') && node['hadoop']['mapred_site'].key?('yarn.app.mapreduce.am.staging-dir') 25 | node['hadoop']['mapred_site']['yarn.app.mapreduce.am.staging-dir'] 26 | else 27 | '/tmp/hadoop-yarn/staging' 28 | end 29 | 30 | jhs_intermediate_done_dir = 31 | if node['hadoop'].key?('mapred_site') && node['hadoop']['mapred_site'].key?('mapreduce.jobhistory.intermediate-done-dir') 32 | node['hadoop']['mapred_site']['mapreduce.jobhistory.intermediate-done-dir'] 33 | else 34 | "#{am_staging_dir}/history/done_intermediate" 35 | end 36 | 37 | jhs_done_dir = 38 | if node['hadoop'].key?('mapred_site') && node['hadoop']['mapred_site'].key?('mapreduce.jobhistory.done-dir') 39 | node['hadoop']['mapred_site']['mapreduce.jobhistory.done-dir'] 40 | else 41 | "#{am_staging_dir}/history/done" 42 | end 43 | 44 | execute 'mapreduce-jobhistory-intermediate-done-dir' do 45 | command "hdfs dfs -mkdir -p #{jhs_intermediate_done_dir} && hdfs dfs -chown mapred:hadoop #{jhs_intermediate_done_dir} && hdfs dfs -chmod 1777 #{jhs_intermediate_done_dir}" 46 | timeout 300 47 | user 'hdfs' 48 | group 'hdfs' 49 | not_if "hdfs dfs -test -d #{jhs_intermediate_done_dir}", user: 'hdfs' 50 | action :nothing 51 | end 52 | 53 | execute 'mapreduce-jobhistory-done-dir' do 54 | command "hdfs dfs -mkdir -p #{jhs_done_dir} && hdfs dfs -chown mapred:hadoop #{jhs_done_dir} && hdfs dfs -chmod 1777 #{jhs_done_dir}" 55 | timeout 300 56 | user 'hdfs' 57 | group 'hdfs' 58 | not_if "hdfs dfs -test -d #{jhs_done_dir}", user: 'hdfs' 59 | action :nothing 60 | end 61 | 62 | # Default HADOOP_MAPRED_LOG_DIR 63 | hadoop_log_dir = 64 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_mapred_log_dir') 65 | node['hadoop']['hadoop_env']['hadoop_mapred_log_dir'] 66 | elsif hdp22? || iop? 67 | '/var/log/hadoop/mapreduce' 68 | else 69 | '/var/log/hadoop-mapreduce' 70 | end 71 | 72 | hadoop_pid_dir = 73 | if hdp22? || iop? 74 | '/var/run/hadoop/mapreduce' 75 | else 76 | '/var/run/hadoop-mapreduce' 77 | end 78 | 79 | # Create /etc/default configuration 80 | template "/etc/default/#{pkg}" do 81 | source 'generic-env.sh.erb' 82 | mode '0644' 83 | owner 'root' 84 | group 'root' 85 | action :create 86 | variables options: { 87 | 'hadoop_mapred_pid_dir' => hadoop_pid_dir, 88 | 'hadoop_mapred_log_dir' => hadoop_log_dir, 89 | 'hadoop_mapred_ident_string' => 'mapred', 90 | 'hadoop_mapred_home' => "#{hadoop_lib_dir}/hadoop-mapreduce", 91 | 'hadoop_log_dir' => hadoop_log_dir, 92 | } 93 | end 94 | 95 | template "/etc/init.d/#{pkg}" do 96 | source 'hadoop-init.erb' 97 | mode '0755' 98 | owner 'root' 99 | group 'root' 100 | action :create 101 | variables options: { 102 | 'desc' => 'Hadoop MapReduce JobHistory Server', 103 | 'name' => pkg, 104 | 'process' => 'java', 105 | 'binary' => "#{hadoop_lib_dir}/hadoop-mapreduce/sbin/mr-jobhistory-daemon.sh", 106 | 'args' => '--config ${CONF_DIR} start historyserver', 107 | 'confdir' => '${HADOOP_CONF_DIR}', 108 | 'user' => 'mapred', 109 | 'home' => "#{hadoop_lib_dir}/hadoop", 110 | 'pidfile' => '${HADOOP_MAPRED_PID_DIR}/mapred-mapred-historyserver.pid', 111 | 'logfile' => "${HADOOP_MAPRED_LOG_DIR}/#{pkg}.log", 112 | } 113 | end 114 | 115 | service pkg do 116 | status_command "service #{pkg} status" 117 | supports [restart: true, reload: false, status: true] 118 | action :nothing 119 | end 120 | -------------------------------------------------------------------------------- /recipes/hadoop_hdfs_namenode.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hadoop_hdfs_namenode 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::default' 21 | include_recipe 'hadoop::_hadoop_hdfs_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | pkg = 'hadoop-hdfs-namenode' 24 | 25 | dfs_name_dirs = 26 | hadoop_config('hadoop', 'hdfs_site', 'dfs.namenode.name.dir', 'dfs.name.dir', 'file:///tmp/hadoop-hdfs/dfs/name') 27 | 28 | node.default['hadoop']['hdfs_site']['dfs.namenode.name.dir'] = dfs_name_dirs 29 | 30 | dfs_name_dirs.split(',').each do |dir| 31 | directory dir.gsub('file://', '') do 32 | mode '0700' 33 | owner 'hdfs' 34 | group 'hdfs' 35 | action :create 36 | recursive true 37 | end 38 | end 39 | 40 | # Are we using automatic failover HA? 41 | if node['hadoop'].key?('hdfs_site') && node['hadoop']['hdfs_site'].key?('dfs.ha.automatic-failover.enabled') && 42 | node['hadoop']['hdfs_site']['dfs.ha.automatic-failover.enabled'].to_s == 'true' 43 | include_recipe 'hadoop::_hadoop_hdfs_ha_checkconfig' 44 | include_recipe 'hadoop::hadoop_hdfs_zkfc' 45 | end 46 | 47 | execute 'hdfs-namenode-bootstrap-standby' do 48 | command 'hdfs namenode -bootstrapStandby' 49 | action :nothing 50 | group 'hdfs' 51 | user 'hdfs' 52 | end 53 | 54 | execute 'hdfs-namenode-initialize-sharededits' do 55 | command 'hdfs namenode -initializeSharedEdits' 56 | action :nothing 57 | group 'hdfs' 58 | user 'hdfs' 59 | end 60 | 61 | execute 'hdfs-namenode-format' do 62 | command 'hdfs namenode -format -nonInteractive' + (node['hadoop']['force_format'] ? ' -force' : '') 63 | action :nothing 64 | group 'hdfs' 65 | user 'hdfs' 66 | end 67 | 68 | hadoop_log_dir = 69 | if node['hadoop'].key?('hadoop_env') && node['hadoop']['hadoop_env'].key?('hadoop_log_dir') 70 | node['hadoop']['hadoop_env']['hadoop_log_dir'] 71 | elsif hdp22? || iop? 72 | '/var/log/hadoop/hdfs' 73 | else 74 | '/var/log/hadoop-hdfs' 75 | end 76 | 77 | hadoop_pid_dir = 78 | if hdp22? || iop? 79 | '/var/run/hadoop/hdfs' 80 | else 81 | '/var/run/hadoop-hdfs' 82 | end 83 | 84 | # Create /etc/default configuration 85 | template "/etc/default/#{pkg}" do 86 | source 'generic-env.sh.erb' 87 | mode '0644' 88 | owner 'root' 89 | group 'root' 90 | action :create 91 | variables options: { 92 | 'hadoop_pid_dir' => hadoop_pid_dir, 93 | 'hadoop_log_dir' => hadoop_log_dir, 94 | 'hadoop_namenode_user' => 'hdfs', 95 | 'hadoop_secondarynamenode_user' => 'hdfs', 96 | 'hadoop_datanode_user' => 'hdfs', 97 | 'hadoop_ident_string' => 'hdfs', 98 | 'hadoop_privileged_nfs_user' => 'hdfs', 99 | 'hadoop_privileged_nfs_pid_dir' => hadoop_pid_dir, 100 | 'hadoop_privileged_nfs_log_dir' => hadoop_log_dir, 101 | 'hadoop_secure_dn_user' => 'hdfs', 102 | 'hadoop_secure_dn_pid_dir' => hadoop_pid_dir, 103 | 'hadoop_secure_dn_log_dir' => hadoop_log_dir, 104 | } 105 | end 106 | 107 | template "/etc/init.d/#{pkg}" do 108 | source 'hadoop-init.erb' 109 | mode '0755' 110 | owner 'root' 111 | group 'root' 112 | action :create 113 | variables options: { 114 | 'desc' => 'Hadoop HDFS NameNode', 115 | 'name' => pkg, 116 | 'process' => 'java', 117 | 'binary' => "#{hadoop_lib_dir}/hadoop/sbin/hadoop-daemon.sh", 118 | 'args' => '--config ${CONF_DIR} start namenode', 119 | 'confdir' => '${HADOOP_CONF_DIR}', 120 | 'user' => 'hdfs', 121 | 'home' => "#{hadoop_lib_dir}/hadoop", 122 | 'pidfile' => "${HADOOP_PID_DIR}/#{pkg}.pid", 123 | 'logfile' => "${HADOOP_LOG_DIR}/#{pkg}.log", 124 | } 125 | end 126 | 127 | service pkg do 128 | status_command "service #{pkg} status" 129 | supports [restart: true, reload: false, status: true] 130 | action :nothing 131 | end 132 | -------------------------------------------------------------------------------- /recipes/hbase_master.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook:: hadoop 3 | # Recipe:: hbase_master 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::hbase' 21 | include_recipe 'hadoop::_hbase_checkconfig' 22 | include_recipe 'hadoop::_system_tuning' if node['hadoop']['system_tuning_enabled'] 23 | pkg = 'hbase-master' 24 | 25 | # HBase can use a local directory or an HDFS directory for its rootdir... 26 | # if HDFS, create execute block with action :nothing 27 | # else create the local directory when file:// 28 | ### TODO: do not create resources via conditionals, use guards 29 | if node['hbase'].key?('hbase_site') && node['hbase']['hbase_site'].key?('hbase.rootdir') && 30 | node['hbase']['hbase_site']['hbase.rootdir'] =~ %r{^hdfs://} || (node['hbase']['hbase_site']['hbase.rootdir'] =~ %r{^/} && 31 | node['hbase']['hbase_site']['hbase.cluster.distributed'].to_s == 'true') 32 | execute 'hbase-hdfs-rootdir' do 33 | command "hdfs dfs -mkdir -p #{node['hbase']['hbase_site']['hbase.rootdir']} && hdfs dfs -chown hbase #{node['hbase']['hbase_site']['hbase.rootdir']}" 34 | timeout 300 35 | user 'hdfs' 36 | group 'hdfs' 37 | not_if "hdfs dfs -test -d #{node['hbase']['hbase_site']['hbase.rootdir']}", user: 'hdfs' 38 | action :nothing 39 | end 40 | elsif node['hbase']['hbase_site']['hbase.rootdir'] =~ %r{^/|^file://} 41 | directory node['hbase']['hbase_site']['hbase.rootdir'].gsub('file://', '') do 42 | owner 'hbase' 43 | group 'hbase' 44 | mode '0700' 45 | action :create 46 | recursive true 47 | end 48 | else 49 | Chef::Application.fatal!("node['hbase']['hbase_site']['hbase.rootdir'] must be set to file:// or hdfs:// locations") 50 | end 51 | 52 | # https://hbase.apache.org/book/hbase.secure.bulkload.html 53 | bulkload_dir = 54 | if node['hbase']['hbase_site'].key?('hbase.bulkload.staging.dir') 55 | node['hbase']['hbase_site']['hbase.bulkload.staging.dir'] 56 | else 57 | '/tmp/hbase-staging' 58 | end 59 | 60 | node.default['hbase']['hbase_site']['hbase.bulkload.staging.dir'] = bulkload_dir 61 | 62 | execute 'hbase-bulkload-stagingdir' do 63 | command "hdfs dfs -mkdir -p #{bulkload_dir} && hdfs dfs -chown hbase:hbase #{bulkload_dir} && hdfs dfs -chmod 711 #{bulkload_dir}" 64 | timeout 300 65 | user 'hdfs' 66 | group 'hdfs' 67 | not_if "hdfs dfs -test -d #{bulkload_dir}", user: 'hdfs' 68 | action :nothing 69 | end 70 | 71 | hbase_log_dir = 72 | if node['hbase'].key?('hbase_env') && node['hbase']['hbase_env'].key?('hbase_log_dir') 73 | node['hbase']['hbase_env']['hbase_log_dir'] 74 | else 75 | '/var/log/hbase' 76 | end 77 | 78 | # Create /etc/default configuration 79 | template "/etc/default/#{pkg}" do 80 | source 'generic-env.sh.erb' 81 | mode '0644' 82 | owner 'root' 83 | group 'root' 84 | action :create 85 | variables options: { 86 | 'hbase_home' => "#{hadoop_lib_dir}/hbase", 87 | 'hbase_pid_dir' => '/var/run/hbase', 88 | 'hbase_log_dir' => hbase_log_dir, 89 | 'hbase_ident_string' => 'hbase', 90 | 'hbase_conf_dir' => '/etc/hbase/conf', 91 | } 92 | end 93 | 94 | template "/etc/init.d/#{pkg}" do 95 | source 'hadoop-init.erb' 96 | mode '0755' 97 | owner 'root' 98 | group 'root' 99 | action :create 100 | variables options: { 101 | 'desc' => 'HBase Master', 102 | 'name' => pkg, 103 | 'process' => 'java', 104 | 'binary' => "#{hadoop_lib_dir}/hbase/bin/hbase-daemon.sh", 105 | 'args' => '--config ${CONF_DIR} start master', 106 | 'confdir' => '${HBASE_CONF_DIR}', 107 | 'user' => 'hbase', 108 | 'home' => "#{hadoop_lib_dir}/hbase", 109 | 'pidfile' => "${HBASE_PID_DIR}/hbase-#{pkg}.pid", 110 | 'logfile' => "${HBASE_LOG_DIR}/#{pkg}.log", 111 | } 112 | end 113 | 114 | service pkg do 115 | status_command "service #{pkg} status" 116 | supports [restart: true, reload: false, status: true] 117 | action :nothing 118 | end 119 | -------------------------------------------------------------------------------- /spec/unit/recipes/hive_metastore_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'hadoop::hive_metastore' do 4 | context 'on CentOS 6.9' do 5 | let(:chef_run) do 6 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 7 | node.automatic['domain'] = 'example.com' 8 | stub_command(/test -L /).and_return(false) 9 | stub_command(/update-alternatives --display /).and_return(false) 10 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 11 | end.converge(described_recipe) 12 | end 13 | pkg = 'hive-metastore' 14 | 15 | %w(mysql-connector-java postgresql-jdbc).each do |p| 16 | it "does not install #{p} package" do 17 | expect(chef_run).not_to install_package(p) 18 | end 19 | end 20 | 21 | %W( 22 | /etc/default/#{pkg} 23 | /etc/init.d/#{pkg} 24 | ).each do |file| 25 | it "creates #{file} from template" do 26 | expect(chef_run).to create_template(file) 27 | end 28 | end 29 | 30 | it "creates #{pkg} service resource, but does not run it" do 31 | expect(chef_run.service(pkg)).to do_nothing 32 | end 33 | 34 | it 'does not run execute[hive-hdfs-warehousedir]' do 35 | expect(chef_run.execute('hive-hdfs-warehousedir')).to do_nothing 36 | end 37 | 38 | context 'using MySQL on HDP 2.3' do 39 | let(:chef_run) do 40 | ChefSpec::SoloRunner.new(platform: 'centos', version: 6.9) do |node| 41 | node.default['hadoop']['distribution'] = 'hdp' 42 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 43 | node.override['hive']['hive_site']['javax.jdo.option.ConnectionURL'] = 'jdbc:mysql:localhost/hive' 44 | node.automatic['domain'] = 'example.com' 45 | node.default['hive']['hive_env']['hive_log_dir'] = '/data/log/hive' 46 | node.default['hive']['hive_site']['hive.exec.local.scratchdir'] = '/tmp/hive/scratch' 47 | stub_command(/test -L /).and_return(false) 48 | stub_command(/update-alternatives --display /).and_return(false) 49 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 50 | end.converge(described_recipe) 51 | end 52 | 53 | it 'creates mysql-connector-java.jar symlink' do 54 | link = chef_run.link('/usr/hdp/2.3.4.7-4/hive/lib/mysql-connector-java.jar') 55 | expect(link).to link_to('/usr/share/java/mysql-connector-java.jar') 56 | end 57 | end 58 | end 59 | 60 | context 'using Ubuntu 14.04' do 61 | context 'using PostgreSQL' do 62 | let(:chef_run) do 63 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 64 | node.default['hadoop']['distribution'] = 'hdp' 65 | node.default['hadoop']['distribution_version'] = '2.3.4.7' 66 | node.override['hive']['hive_site']['javax.jdo.option.ConnectionURL'] = 'jdbc:postgresql:localhost/hive' 67 | node.automatic['domain'] = 'example.com' 68 | stub_command(/test -L /).and_return(false) 69 | stub_command(/update-alternatives --display /).and_return(false) 70 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 71 | end.converge(described_recipe) 72 | end 73 | 74 | it 'creates postgresql-jdbc4.jar symlink' do 75 | link = chef_run.link('/usr/hdp/2.3.4.7-4/hive/lib/postgresql-jdbc4.jar') 76 | expect(link).to link_to('/usr/share/java/postgresql-jdbc4.jar') 77 | end 78 | end 79 | 80 | context 'using PostgreSQL on HDP 2.1.15.0' do 81 | let(:chef_run) do 82 | ChefSpec::SoloRunner.new(platform: 'ubuntu', version: 14.04) do |node| 83 | node.default['hadoop']['distribution'] = 'hdp' 84 | node.default['hadoop']['distribution_version'] = '2.1.15.0' 85 | node.override['hive']['hive_site']['javax.jdo.option.ConnectionURL'] = 'jdbc:postgresql:localhost/hive' 86 | node.automatic['domain'] = 'example.com' 87 | stub_command(/test -L /).and_return(false) 88 | stub_command(/update-alternatives --display /).and_return(false) 89 | stub_command(%r{/sys/kernel/mm/(.*)transparent_hugepage/defrag}).and_return(false) 90 | end.converge(described_recipe) 91 | end 92 | 93 | it 'creates postgresql-jdbc4.jar symlink' do 94 | link = chef_run.link('/usr/lib/hive/lib/postgresql-jdbc4.jar') 95 | expect(link).to link_to('/usr/share/java/postgresql-jdbc4.jar') 96 | end 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /recipes/spark2.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook Name:: hadoop 3 | # Recipe:: spark2 4 | # 5 | # Copyright © 2013-2017 Cask Data, Inc. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | include_recipe 'hadoop::repo' 21 | 22 | pkg = 23 | if node['hadoop']['distribution'] == 'cdh' 24 | 'spark2-core' 25 | elsif iop? 26 | hadoop_package('spark2-core') 27 | else 28 | hadoop_package('spark2') 29 | end 30 | 31 | package pkg do 32 | action :install 33 | end 34 | 35 | package hadoop_package('spark2-python') do 36 | action :install 37 | end 38 | 39 | # Spark MLib requires this 40 | fortran_libs = 41 | if node['platform_family'] == 'debian' 42 | 'libgfortran3' 43 | else 44 | 'libgfortran' 45 | end 46 | package fortran_libs do 47 | action :install 48 | end 49 | 50 | spark2_conf_dir = "/etc/spark2/#{node['spark2']['conf_dir']}" 51 | 52 | directory spark2_conf_dir do 53 | owner 'root' 54 | group 'root' 55 | mode '0755' 56 | recursive true 57 | action :create 58 | end 59 | 60 | # TODO: /etc/spark2/conf.dist/fairscheduler.xml.template 61 | 62 | spark2_log_dir = 63 | if node['spark2'].key?('spark_env') && node['spark2']['spark_env'].key?('spark_log_dir') 64 | node['spark2']['spark_env']['spark_log_dir'] 65 | else 66 | '/var/log/spark2' 67 | end 68 | 69 | directory spark2_log_dir do 70 | owner 'spark' 71 | group 'spark' 72 | mode '0755' 73 | recursive true 74 | action :create 75 | only_if { node['spark2'].key?('spark_env') && node['spark2']['spark_env'].key?('spark_log_dir') } 76 | end 77 | 78 | unless spark2_log_dir == '/var/log/spark2' 79 | # Delete default directory, if we aren't set to it 80 | directory '/var/log/spark2' do 81 | action :delete 82 | recursive true 83 | not_if 'test -L /var/log/spark2' 84 | end 85 | # symlink 86 | link '/var/log/spark2' do 87 | to spark2_log_dir 88 | end 89 | end 90 | 91 | # Start spark-env.sh 92 | template "#{spark2_conf_dir}/spark-env.sh" do 93 | source 'generic-env.sh.erb' 94 | mode '0755' 95 | owner 'root' 96 | group 'root' 97 | action :create 98 | variables options: node['spark2']['spark_env'] 99 | only_if { node['spark2'].key?('spark_env') && !node['spark2']['spark_env'].empty? } 100 | end # End spark-env.sh 101 | 102 | # Start spark-defaults.conf 103 | template "#{spark2_conf_dir}/spark-defaults.conf" do 104 | source 'generic.properties.erb' 105 | mode '0644' 106 | owner 'root' 107 | group 'root' 108 | variables properties: node['spark2']['spark_defaults'] 109 | only_if { node['spark2'].key?('spark_defaults') && !node['spark2']['spark_defaults'].empty? } 110 | end # End spark-defaults.conf 111 | 112 | # Setup metrics.properties log4j.properties 113 | %w(metrics log4j).each do |propfile| 114 | template "#{spark2_conf_dir}/#{propfile.tr('_', '-')}.properties" do 115 | source 'generic.properties.erb' 116 | mode '0644' 117 | owner 'root' 118 | group 'root' 119 | action :create 120 | variables properties: node['spark2'][propfile] 121 | only_if { node['spark2'].key?(propfile) && !node['spark2'][propfile].empty? } 122 | end 123 | end # End metrics.properties log4j.properties 124 | 125 | # Another Hortonworks mess to clean up, their packages force-install blank configs here 126 | directory '/etc/spark2/conf' do 127 | action :delete 128 | recursive true 129 | not_if 'test -L /etc/spark2/conf' 130 | end 131 | 132 | # Update alternatives to point to our configuration 133 | execute 'update spark2-conf alternatives' do 134 | command "update-alternatives --install /etc/spark2/conf spark-conf /etc/spark2/#{node['spark2']['conf_dir']} 50" 135 | not_if "update-alternatives --display spark2-conf | grep best | awk '{print $5}' | grep /etc/spark2/#{node['spark2']['conf_dir']}" 136 | end 137 | 138 | # Export spark2 environment variables 139 | template '/etc/profile.d/spark2.sh' do 140 | source 'generic-env.sh.erb' 141 | mode '0755' 142 | owner 'root' 143 | group 'root' 144 | variables options: { 'spark2_conf_dir' => "/etc/spark2/#{node['spark2']['conf_dir']}" } 145 | end 146 | --------------------------------------------------------------------------------