├── .gitignore
├── README.md
├── Vagrantfile
├── manifests
    ├── datanode.pp
    ├── master-single.pp
    └── master.pp
├── modules
    ├── avahi
    │   ├── files
    │   │   ├── avahi-daemon.conf
    │   │   └── hosts
    │   └── manifests
    │   │   └── init.pp
    ├── base
    │   ├── files
    │   │   ├── id_rsa
    │   │   ├── id_rsa.pub
    │   │   ├── motd
    │   │   └── ssh_config
    │   └── manifests
    │   │   └── init.pp
    ├── cascading
    │   ├── files
    │   │   ├── ccsdk.sh
    │   │   ├── dotcascading
    │   │   └── sbt
    │   └── manifests
    │   │   └── init.pp
    ├── hadoop
    │   ├── files
    │   │   ├── core-site.xml
    │   │   ├── hadoop-env.sh
    │   │   ├── hdfs-site-single.xml
    │   │   ├── hdfs-site.xml
    │   │   ├── mapred-site.xml
    │   │   ├── masters
    │   │   ├── prepare-cluster.sh
    │   │   ├── slaves
    │   │   ├── slaves-single
    │   │   ├── start-all.sh
    │   │   ├── stop-all.sh
    │   │   ├── verifier
    │   │   ├── yarn-env.sh
    │   │   └── yarn-site.xml
    │   ├── manifests
    │   │   └── init.pp
    │   └── templates
    │   │   └── hadoop-path.sh.erb
    └── hbase
    │   ├── files
    │       ├── hbase-env.sh
    │       ├── hbase-site.xml
    │       └── regionservers
    │   ├── manifests
    │       └── init.pp
    │   └── templates
    │       └── hbase-path.sh.erb
└── single-node
    └── Vagrantfile


/.gitignore:
--------------------------------------------------------------------------------
1 | *.sw[a-z]
2 | .vagrant
3 | employees.tgz
4 | hadoop-*.tar.gz
5 | hadoop-*.tar.gz.mds
6 | hbase-*.tar.gz
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Vagrant + Cascading + Hadoop 2 Cluster
  2 | 
  3 | Clone this project to create a 4 node [Apache Hadoop](http://hadoop.apache.org) cluster with the [Cascading
  4 | SDK](http://www.cascading.org/sdk/) pre-installed.
  5 | 
  6 | The Cascading 2.7 SDK includes Cascading and many of its sub-projects:
  7 | 
  8 | * [Lingual](http://www.cascading.org/lingual/) - ANSI SQL Command Shell and JDBC Driver
  9 | * [Pattern](http://www.cascading.org/pattern/) - Machine Learning
 10 | * [Cascalog](http://cascalog.org) - Clojure DSL over Cascding
 11 | * [Scalding](https://github.com/twitter/scalding) - Scala DSL over Cascading
 12 | * [Multitool](http://www.cascading.org/multitool/) - Command line tool for managing large files
 13 | * [Load](http://www.cascading.org/load/) - Command line tool for load testing Hadoop
 14 | 
 15 | To make getting started as easy as possible this setup also includes build tools used by parts of the SDK:
 16 | 
 17 | * [gradle](http://www.gradle.org/) - build tool used by Cascading and its
 18 |   related projects
 19 | * [leiningen 2](http://leiningen.org/) - a popular build tool in the clojure
 20 |   community, which is used in the [cascalog](http://cascalog.org/) tutorial
 21 |   included in the SDK
 22 | * [sbt](http://www.scala-sbt.org/) - a popular build tool in the scala community, which is
 23 |   used in the [scalding](https://github.com/twitter/scalding/wiki) tutorial included in the SDK
 24 | 
 25 | This work is based on: http://cscarioni.blogspot.co.uk/2012/09/setting-up-hadoop-virtual-cluster-with.html
 26 | 
 27 | ## Deploying the cluster
 28 | 
 29 | First install either [Virtual Box](http://virtualbox.org) (free) or [VMware Fusion/Workstation](http://www.vmware.com/products/fusion/) (paid) and [Vagrant](http://vagrantup.com/) for your platform. If using VMware Fusion/Workstation, you will also need a VMware + Vagrant [license](https://www.vagrantup.com/vmware).
 30 | 
 31 | Then simply clone this repository, change into the directory and bring the cluster up.
 32 | 
 33 |     $ vagrant up
 34 | 
 35 | This will set up 4 machines - `master`, `hadoop1`, `hadoop2` and `hadoop3`. Each of them will have two CPUs and .5GB of
 36 | RAM. If this is too much for your machine, adjust the `Vagrantfile`.
 37 | 
 38 | The machines will be provisioned using [Puppet](http://puppetlabs.com/). All of them will have hadoop
 39 | (apache-hadoop-2.6.0) installed, ssh will be configured and local name resolution also works.
 40 | 
 41 | Hadoop is installed in `/opt/hadoop-2.6.0` and all tools are in the `PATH`.
 42 | 
 43 | The `master` machine acts as the namenode and the yarn resource manager, the 3 others are data nodes and run node
 44 | managers.
 45 | 
 46 | ### Networking
 47 | 
 48 | The cluster uses [zeroconf](http://en.wikipedia.org/wiki/Zero-configuration_networking) (a.k.a. bonjour) for name
 49 | resolution. This means that you never have to remember any IP nor will you have to fiddle with your `/etc/hosts` file.
 50 | 
 51 | Name resolution works from the host to all VMs and between all VMs as well.  If you are using linux, make sure you have
 52 | `avahi-daemon` installed and it is running. On a Mac everything should just work (TM) witouth doing anything.  Windows
 53 | users have to install [Bonjour for Windows](http://support.apple.com/kb/dl999) before starting the cluster.
 54 | 
 55 | The network used is `192.168.7.0/24`. If that causes any problems, change the `Vagrantfile` and
 56 | `modules/avahi/file/hosts` files to something that works for you. Since everything else is name based, no other change
 57 | is required.
 58 | 
 59 | ### Starting the cluster
 60 | 
 61 | This cluster uses the `ssh-into-all-the-boxes-and-start-things-up`-approach, which is fine for testing.
 62 | 
 63 | Once all machines are up and provisioned, the cluster can be started. Log into the master, format hdfs and start the
 64 | cluster.
 65 | 
 66 |      $ vagrant ssh master
 67 |      $ (master) sudo prepare-cluster.sh
 68 |      $ (master) sudo start-all.sh
 69 | 
 70 | After a little while, all daemons will be running and you have a fully working hadoop cluster. Note that the
 71 | `prepare-cluster.sh` step is a one time action.
 72 | 
 73 | ### Stopping the cluster
 74 | 
 75 | If you want to shut down your cluster, but want to keep it around for later use, shut down all the services and tell
 76 | vagrant to stop the machines like this:
 77 | 
 78 |      $ vagrant ssh master
 79 |      $ (master) sudo stop-all.sh
 80 |      $ exit or Ctrl-D
 81 |      $ vagrant halt
 82 | 
 83 | When you want to use your cluster again, simply do this:
 84 | 
 85 |      $ vagrant up
 86 |      $ vagrant ssh master
 87 |      $ (master) sudo start-all.sh
 88 | 
 89 | 
 90 | ### Getting rid of the cluster
 91 | 
 92 | If you don't need the cluster anymore and want to get your disk-space back do this:
 93 | 
 94 |      $ vagrant destroy -f
 95 | 
 96 | This will only delete the VMs all local files in the directory stay untouched and can be used again, if you decide to
 97 | start up a new cluster.
 98 | 
 99 | ## Interacting with the cluster
100 | 
101 | ### Webinterface
102 | 
103 | You can access all services of the cluster with your web-browser.
104 | 
105 | * namenode: http://master.local:50070/dfshealth.jsp
106 | * application master: http://master.local:8088/cluster
107 | * job history server: http://master.local:19888/jobhistory
108 | 
109 | ### Command line
110 | 
111 | To interact with the cluster on the command line, log into the master and use the hadoop command.
112 | 
113 |     $ vagrant ssh master
114 |     $ (master) hadoop fs -ls /
115 |     $ ...
116 | 
117 | You can access the host file system from the `/vagrant` directory, which means that you can drop your hadoop job in
118 | there and run it on your own fully distributed hadoop cluster.
119 | 
120 | ## Performance
121 | 
122 | Since this is a fully virtualized environment running on your computer, it will not be super-fast. This is not the goal
123 | of this setup. The goal is to have a fully distributed cluster for testing and troubleshooting.
124 | 
125 | To not overload the host machine, has each tasktracker a hard limit of 1 map task and 1 reduce task at a time.
126 | 
127 | ## Cascading SDK
128 | 
129 | Puppet will download the latest [Cascading SDK](http://www.cascading.org/sdk/) 2.7-wip build and put all SDK tools in
130 | the `PATH`. The SDK itself can be found in `/opt/CascadingSDK`.
131 | 
132 | ### Driven
133 | 
134 | The SDK allows you to install the [Driven plugin for Cascading]((http://cascading.io/driven) , by simply running
135 | `install-driven-plugin`. This will install the plugin for the vagrant user in `/home/vagrant/.cascading/.driven-plugin`.
136 | 
137 | Installing the plugin will cause every Cascading based application to send telemetry to `https://driven.cascading.io`.
138 | If you no longer want this to happen, you can simply delete the installation directory of the plugin mentioned above.
139 | 
140 | For more information about driven, please read the [Driven
141 | documentation](http://docs.cascading.io/driven/1.0/getting-started/index.html).
142 | 
143 | ## HBase
144 | 
145 | This version of the cluster also contains [Apache HBase](http://hbase.apache.org). The layout on disk is similar to
146 | Hadoop. The distributition is in `/opt/hbase-<version>`. You can start the HBase cluster like so.
147 | 
148 |     $ (master) sudo start-hbase.sh
149 | 
150 | The Hadoop cluster must be running, before you issue this command, since HBase requires HDFS to be up and running.
151 | 
152 | To cluster is shut down like so:
153 | 
154 |     $ (master) sudo stop-hbase.sh
155 | 
156 | The setup is fully distributed. `hadoop1`, `hadoop2` and `hadoop3` are running a
157 | [zookeeper](http://zookeeper.apache.org) instance and a region-server each. The HBase master is running on the `master`
158 | VM.
159 | 
160 | The webinterface of the HBase master is http://master.local:60010.
161 | 
162 | ## Hacking & Troubleshooting & Tips & Tricks
163 | 
164 | ### Getting help
165 | 
166 | If something is not working right, join the [Cascading
167 | mailinglist](https://groups.google.com/forum/#!forum/cascading-user) and post your problem there.
168 | 
169 | ## Single Node setup
170 | 
171 | If your computer is not capable of running 4 VMs at a time, you can still benefit from this setup. The `single-node`
172 | directory contains an alternative `Vagrantfile`, which only starts the `master` and deploys everything on it.
173 | 
174 | The interaction, the start- and stop sequence work the same ways as in the multi-VM cluster, except that it isn't fully
175 | distributed. This slimmed down version of the setup also does not include HBase.
176 | 
177 | To run the single node setup, run `vagrant up` in the `single-node` directory instead of the root directory. Everything
178 | else stays the same.
179 | 
180 | ## Hacking & Troubleshooting
181 | 
182 | ### File sharing
183 | 
184 | Vagrant makes it easy to share files between the vms of the cluster and your host machine. The project directory is
185 | mounted under `/vagrant`, which enables you to get files from or to your host, by simply copying them into that
186 | directory.
187 | 
188 | ### Storage locations
189 | 
190 | The namenode stores the `fsimage` in `/srv/hadoop/namenode`. The datanodes  are storing all data in
191 | `/srv/hadoop/datanode`.
192 | 
193 | ### Resetting the cluster
194 | 
195 | Sometimes, when experimenting too much, your cluster might not start anymore. If that is the case, you can easily reset
196 | it like so.
197 | 
198 |     $ for host in master hadoop1 hadoop2 hadoop3; do vagrant ssh $host --command  'sudo rm -rf /srv/hadoop' ; done
199 |     $ vagrant provision
200 | 
201 | After those two commands your cluster is in the same state as when you started it for the first time. You can now
202 | reformat the namenode and restart all services.
203 | 
204 | ### Puppet
205 | 
206 | If you change any of the puppet modules, you can simply apply the changes with vagrants built-in provisioner.
207 | 
208 |     $ vagrant provision
209 | 
210 | ### Hadoop download
211 | 
212 | In order to save bandwidth and time we download hadoop only once and store it in the `/vagrant` directory, so that the
213 | other vms can reuse it. If the download fails for some reason, delete the tarball and rerun `vagrant provision`.
214 | 
215 | We are also downloading a file containing checksums for the tarball. They are verified, before the cluster is started.
216 | If something went wrong during the download, you will see the `verify_tarball` part of puppet fail. If that is the case,
217 | delete the tarball and the checksum file (`<tarball>.mds`) and rerun `vagrant provision`.
218 | 
219 | ## Wishlist
220 | 
221 | - have a way to configure the names/ips in only one file
222 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | VAGRANTFILE_API_VERSION = "2"
 5 | 
 6 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 7 |   config.vm.box = "cascading-hadoop-base"
 8 |   config.vm.box_url = "http://files.vagrantup.com/precise64.box"
 9 | 
10 |   config.vm.provider :virtualbox do |vb|
11 |     vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"]
12 |   end
13 | 
14 |   config.vm.provider "vmware_fusion" do |v, override|
15 |       override.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
16 |       v.vmx["memsize"] = "512"
17 |       v.vmx["numvcpus"] = "1"
18 |   end
19 | 
20 |   config.vm.define :hadoop1 do |hadoop1|
21 |     hadoop1.vm.network "private_network", ip: "192.168.7.12"
22 |     hadoop1.vm.hostname = "hadoop1.local"
23 | 
24 |     config.vm.provision :puppet do |puppet|
25 |       puppet.manifest_file = "datanode.pp"
26 |       puppet.module_path = "modules"
27 |     end
28 |   end
29 | 
30 |   config.vm.define :hadoop2 do |hadoop2|
31 |     hadoop2.vm.network "private_network", ip: "192.168.7.13"
32 |     hadoop2.vm.hostname = "hadoop2.local"
33 | 
34 |     config.vm.provision :puppet do |puppet|
35 |       puppet.manifest_file = "datanode.pp"
36 |       puppet.module_path = "modules"
37 |     end
38 |   end
39 | 
40 |   config.vm.define :hadoop3 do |hadoop3|
41 |     hadoop3.vm.network "private_network", ip: "192.168.7.14"
42 |     hadoop3.vm.hostname = "hadoop3.local"
43 | 
44 |     config.vm.provision :puppet do |puppet|
45 |       puppet.manifest_file = "datanode.pp"
46 |       puppet.module_path = "modules"
47 |     end
48 |   end
49 | 
50 |   config.vm.define :master, primary: true do |master|
51 |     master.vm.network "private_network", ip: "192.168.7.10"
52 |     master.vm.hostname = "master.local"
53 | 
54 |     config.vm.provision :puppet do |puppet|
55 |       puppet.manifest_file = "master.pp"
56 |       puppet.module_path = "modules"
57 |     end
58 |   end
59 | end
60 | 


--------------------------------------------------------------------------------
/manifests/datanode.pp:
--------------------------------------------------------------------------------
1 | include base
2 | include hadoop
3 | include hbase
4 | include avahi
5 | 


--------------------------------------------------------------------------------
/manifests/master-single.pp:
--------------------------------------------------------------------------------
 1 | include base
 2 | 
 3 | class{ 'hadoop':
 4 |   slaves_file => "puppet:///modules/hadoop/slaves-single",
 5 |   hdfs_site_file => "puppet:///modules/hadoop/hdfs-site-single.xml"
 6 | }
 7 | 
 8 | #include hbase
 9 | include avahi
10 | include cascading
11 | 


--------------------------------------------------------------------------------
/manifests/master.pp:
--------------------------------------------------------------------------------
1 | include base
2 | include hadoop
3 | include hbase
4 | include avahi
5 | include cascading
6 | 


--------------------------------------------------------------------------------
/modules/avahi/files/avahi-daemon.conf:
--------------------------------------------------------------------------------
 1 | # This file is part of avahi.
 2 | #
 3 | # avahi is free software; you can redistribute it and/or modify it
 4 | # under the terms of the GNU Lesser General Public License as
 5 | # published by the Free Software Foundation; either version 2 of the
 6 | # License, or (at your option) any later version.
 7 | #
 8 | # avahi is distributed in the hope that it will be useful, but WITHOUT
 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
11 | # License for more details.
12 | #
13 | # You should have received a copy of the GNU Lesser General Public
14 | # License along with avahi; if not, write to the Free Software
15 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
16 | # USA.
17 | 
18 | # See avahi-daemon.conf(5) for more information on this configuration
19 | # file!
20 | 
21 | [server]
22 | #host-name=foo
23 | #domain-name=cascading
24 | #browse-domains=cascading
25 | use-ipv4=yes
26 | use-ipv6=no
27 | #allow-interfaces=eth0
28 | #deny-interfaces=eth1
29 | #check-response-ttl=no
30 | #use-iff-running=no
31 | #enable-dbus=yes
32 | #disallow-other-stacks=no
33 | #allow-point-to-point=no
34 | #cache-entries-max=4096
35 | #clients-max=4096
36 | #objects-per-client-max=1024
37 | #entries-per-entry-group-max=32
38 | 
39 | [wide-area]
40 | enable-wide-area=yes
41 | 
42 | [publish]
43 | #disable-publishing=no
44 | #disable-user-service-publishing=no
45 | #add-service-cookie=no
46 | #publish-addresses=yes
47 | #publish-hinfo=yes
48 | #publish-workstation=yes
49 | #publish-domain=yes
50 | #publish-dns-servers=192.168.50.1, 192.168.50.2
51 | #publish-resolv-conf-dns-servers=yes
52 | #publish-aaaa-on-ipv4=yes
53 | #publish-a-on-ipv6=no
54 | 
55 | [reflector]
56 | #enable-reflector=no
57 | #reflect-ipv=no
58 | 
59 | [rlimits]
60 | #rlimit-as=
61 | rlimit-core=0
62 | rlimit-data=4194304
63 | rlimit-fsize=0
64 | rlimit-nofile=768
65 | rlimit-stack=4194304
66 | rlimit-nproc=3
67 | 
68 | 


--------------------------------------------------------------------------------
/modules/avahi/files/hosts:
--------------------------------------------------------------------------------
1 | 127.0.0.1       localhost
2 | 192.168.7.10  master.local  master 
3 | 192.168.7.11  backup.local  backup 
4 | 192.168.7.12  hadoop1.local hadoop1
5 | 192.168.7.13  hadoop2.local hadoop2
6 | 192.168.7.14  hadoop3.local hadoop3
7 | 


--------------------------------------------------------------------------------
/modules/avahi/manifests/init.pp:
--------------------------------------------------------------------------------
 1 | class avahi{
 2 |   package { "avahi-daemon":
 3 |     ensure => "installed",
 4 |     require => Exec['apt-get update']
 5 |   }
 6 | 
 7 |   file { "/etc/avahi/avahi-daemon.conf":
 8 |     source => "puppet:///modules/avahi/avahi-daemon.conf",
 9 |     owner => root,
10 |     group => root,
11 |     notify  => Service["avahi-daemon"],
12 |     require => Package["avahi-daemon"]
13 |   }
14 | 
15 |   service{ "avahi-daemon":
16 |     ensure     => "running",
17 |     enable => true,
18 |     require =>  File['/etc/avahi/avahi-daemon.conf']
19 |   }
20 | 
21 |   file{ "/etc/hosts":
22 |     source => "puppet:///modules/avahi/hosts",
23 |     owner => root,
24 |     group => root,
25 |   }
26 | 
27 |   file{ "/etc/avahi/hosts":
28 |     source => "puppet:///modules/avahi/hosts",
29 |     owner => root,
30 |     group => root,
31 |     notify => Service["avahi-daemon"]
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/modules/base/files/id_rsa:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIEpAIBAAKCAQEAnh3QT1Rrkj1Tjvp/eI/2OX/FShiAM42g3t9IbvQT3rQPghha
 3 | bM8OJw5IckbVuSfc4Rirs5nAqiE07jOgRqR2sfWSj+5UUYxvC4d/NLAhgP+vdRI0
 4 | lZkfJKy7xmSEMXshyqQwSxJnKSQQecqEsrhaSvuEYZiS3+yJmQO5rcYYHTxoXunK
 5 | 1GEWy7pDYL+tbrQf4W63vbYDV9oIhLqwbI/QiOcoTzzYKuoj/GQTsgl4FDX9+2wH
 6 | uwI0FDFeRuiTGdXJRHjTzflMuWK36I68d8B3xuNDbsbjKxSEKOagmGt6vf0lTsbC
 7 | SC1rfoWdm28XRnEDnfQF2BTTaMD2smNAXQJEqwIDAQABAoIBADkNyPzXGXnLZztQ
 8 | aG64g8B7ulTiQmBX2pSRPTHMIN9bWb03zGk2zYSKQtn5dWU6XRlcLZjBAiRhlxfc
 9 | yRi0f5/3XMtS1FoYsnkWi3AXnf9OQ6Ga0B6rAfHZ4mfUHV89bxQP6t0r5s2RleBc
10 | k0VqACrOVxZgV581WvE7xuRNdqYVdNJ2gHYCKJJhn+zboDMZxxqEMUuzAWuwV84S
11 | 3LGGMGzkzMG0x7h6zKCuL3z8kkQYf5FNZz5YthAobGYHSf8U8Ck7/Wuk0Rric5us
12 | pZaLTLClU4fivoZ3Bj2GJtUrQhfLidtVFdy2KlxzuqsMQwqxLlW+NrZxS/mooe1P
13 | q1dWcgECgYEAzwu/KuSmFguDGh1uwDOixn1ujYZdqJQ7ada7cNDxJTHynr0d2sQ8
14 | B7Y2mjkc1GbiWAtjl8URhjar4lwWNAUBL9CULqHaZ7wuKcFr0JYZof6wD1oNpfnz
15 | ZpAyq2SV8qVkrrqa38TlyQC29E+cpDM69NhNuH9WebuJKLVkFxLd8GMCgYEAw4Bs
16 | lcFIWvFLv3MrX2NRJvDeb1O0W9FIq9xNEzgq8iXhjahGYQZZh7YgIr92WNgqYqCQ
17 | T/aiKyvTz5fQz/GB/OSxTcsZu8Tf35SZuk9IOqHmQRoGa4L/B4zKbXV0+giDUGYc
18 | HCvacA9SZRDbTcKfyhtsqZ2X30YcI6lSCKadeRkCgYBQfZqWDEndLCCNmH+jzO8t
19 | BzphXqkujmICpW49T51P/nj56T+f+sXzoYzcq/NfR5ga7mKLcBt99+cemvouwvtR
20 | IV9aOaX0yh2QMhe+AXx5k8/SSAoiSmerqcdpBjOdH1ti6jynXZ6PY+28UO7ujtQO
21 | t8CPR0HRFW0fHFWK+tS5TQKBgQCjVQri/o0VtZxkQbMNyWsGTpx3hAYd47aEJtSZ
22 | 5wYyMiXD9L9/1g88i5NDlJQqW8bliWGn/NS1nrMiL6L757iC8QmUhcdlEXE8WOkq
23 | N4yPt60n3LdaYSHyS9TfKA9nyW7z36tCefzQPGzJFREzYROghdL9zjURC4D85jws
24 | EA9mGQKBgQCLIt42pkAaN8eqJ7IBF+3CAckstkq14qzKB8UGdEmuak4bcxJLIvcv
25 | rdGQ+fdtitmMU2fx3YC4s3Z8GoLFzP/Esjb4eSGt0pbhSxYIcHhLCmjmHxnVcZ3S
26 | 6z2Pj3asryK7XxoZiODvZJcS1TbaOp1e2YQyxTEL5fwy4fNr4V+bIQ==
27 | -----END RSA PRIVATE KEY-----
28 | 


--------------------------------------------------------------------------------
/modules/base/files/id_rsa.pub:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCeHdBPVGuSPVOO+n94j/Y5f8VKGIAzjaDe30hu9BPetA+CGFpszw4nDkhyRtW5J9zhGKuzmcCqITTuM6BGpHax9ZKP7lRRjG8Lh380sCGA/691EjSVmR8krLvGZIQxeyHKpDBLEmcpJBB5yoSyuFpK+4RhmJLf7ImZA7mtxhgdPGhe6crUYRbLukNgv61utB/hbre9tgNX2giEurBsj9CI5yhPPNgq6iP8ZBOyCXgUNf37bAe7AjQUMV5G6JMZ1clEeNPN+Uy5Yrfojrx3wHfG40NuxuMrFIQo5qCYa3q9/SVOxsJILWt+hZ2bbxdGcQOd9AXYFNNowPayY0BdAkSr cscarioni@Carlos-MacBook-Air.local
2 | 


--------------------------------------------------------------------------------
/modules/base/files/motd:
--------------------------------------------------------------------------------
1 |                              |o          
2 | ,---.,---.,---.,---.,---.,---|.,---.,---.
3 | |    ,---|`---.|    ,---||   |||   ||   |
4 | `---'`---^`---'`---'`---^`---'``   '`---|
5 |                                     `---'
6 | 


--------------------------------------------------------------------------------
/modules/base/files/ssh_config:
--------------------------------------------------------------------------------
1 | UserKnownHostsFile=/dev/null 
2 | StrictHostKeyChecking=no
3 | 


--------------------------------------------------------------------------------
/modules/base/manifests/init.pp:
--------------------------------------------------------------------------------
 1 | class base{
 2 |   group { "puppet":
 3 |     ensure => "present",
 4 |   }
 5 |   exec { 'apt-get update':
 6 |     command => '/usr/bin/apt-get update',
 7 |   }
 8 | 
 9 |   package { "openjdk-6-jdk" :
10 |     ensure => present,
11 |     require => Exec['apt-get update']
12 |   }
13 | 
14 |   file { "/root/.ssh":
15 |     ensure => "directory",
16 |   }
17 | 
18 |   file { 
19 |   "/root/.ssh/config":
20 |     source => "puppet:///modules/base/ssh_config",
21 |     mode => 600,
22 |     owner => root,
23 |     group => root,
24 |   }
25 | 
26 |   file {
27 |     "/root/.ssh/id_rsa":
28 |       source => "puppet:///modules/base/id_rsa",
29 |       mode => 600,
30 |       owner => root,
31 |       group => root,
32 |   }
33 | 
34 |   file {
35 |     "/root/.ssh/id_rsa.pub":
36 |       source => "puppet:///modules/base/id_rsa.pub",
37 |       mode => 644,
38 |       owner => root,
39 |       group => root,
40 |   }
41 | 
42 |   ssh_authorized_key { "ssh_key":
43 |     ensure => "present",
44 |     key    => "AAAAB3NzaC1yc2EAAAADAQABAAABAQCeHdBPVGuSPVOO+n94j/Y5f8VKGIAzjaDe30hu9BPetA+CGFpszw4nDkhyRtW5J9zhGKuzmcCqITTuM6BGpHax9ZKP7lRRjG8Lh380sCGA/691EjSVmR8krLvGZIQxeyHKpDBLEmcpJBB5yoSyuFpK+4RhmJLf7ImZA7mtxhgdPGhe6crUYRbLukNgv61utB/hbre9tgNX2giEurBsj9CI5yhPPNgq6iP8ZBOyCXgUNf37bAe7AjQUMV5G6JMZ1clEeNPN+Uy5Yrfojrx3wHfG40NuxuMrFIQo5qCYa3q9/SVOxsJILWt+hZ2bbxdGcQOd9AXYFNNowPayY0BdAkSr",
45 |     type   => "ssh-rsa",
46 |     user   => "root",
47 |     require => File['/root/.ssh/id_rsa.pub']
48 |     }
49 | 
50 | 
51 |   file { "/etc/motd":
52 |     source => "puppet:///modules/base/motd",
53 |     mode => 644,
54 |   }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/modules/cascading/files/ccsdk.sh:
--------------------------------------------------------------------------------
 1 | export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64
 2 | export CASCADING_SDK_HOME=/opt/CascadingSDK
 3 | 
 4 | . $CASCADING_SDK_HOME/etc/setenv.sh
 5 | 
 6 | export PATH=$PATH:/opt/tools/bin
 7 | 
 8 | export GRADLE_HOME=/opt/tools/gradle-1.10
 9 | export PATH=$GRADLE_HOME/bin:$PATH
10 | 


--------------------------------------------------------------------------------
/modules/cascading/files/dotcascading:
--------------------------------------------------------------------------------
1 | # set the cascading platform to hadoop2-mr1
2 | cascading.platform.name=hadoop2-mr1
3 | 


--------------------------------------------------------------------------------
/modules/cascading/files/sbt:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 | java -Dsbt.log.noformat=true -Xms512M -Xmx1536M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=384M -jar `dirname $0`/sbt-launch.jar "$@"
3 | 


--------------------------------------------------------------------------------
/modules/cascading/manifests/init.pp:
--------------------------------------------------------------------------------
 1 | class cascading{
 2 | 
 3 |   file { ["/opt/tools/", "/opt/tools/bin"]:
 4 |     ensure => "directory"
 5 |   }
 6 | 
 7 |   exec { "download_sdk":
 8 |     command => "wget -P /tmp -i http://files.cascading.org/sdk/2.7/latest.txt",
 9 |     path => $path,
10 |     # S3 can be slow at times hence a longer timeout
11 |     timeout => 1800,
12 |     unless => "ls /opt | grep CascadingSDK",
13 |     require => Package["openjdk-6-jdk"]
14 |   }
15 | 
16 |   exec { "unpack_sdk" :
17 |     command => "tar xf /tmp/Cascading*tgz -C /opt && mv /opt/Cascading*SDK* /opt/CascadingSDK",
18 |     path => $path,
19 |     unless => "ls /opt | grep CascadingSDK",
20 |     require => Exec["download_sdk"]
21 |   }
22 | 
23 |   exec { "sdk_permissions" :
24 |     command => "chown -R vagrant /opt/CascadingSDK",
25 |     path => $path,
26 |     require => Exec["unpack_sdk"]
27 |   }
28 | 
29 | 
30 |   file { "/etc/profile.d/ccsdk.sh":
31 |     source => "puppet:///modules/cascading/ccsdk.sh",
32 |     owner => root,
33 |     group => root,
34 |   }
35 |   
36 |   file { "/opt/tools/bin/sbt":
37 |     source => "puppet:///modules/cascading/sbt",
38 |     owner => root,
39 |     group => root,
40 |     mode => 755,
41 |   }
42 | 
43 |   file { "/home/vagrant/.cascading/default.properties":
44 |     source => "puppet:///modules/cascading/dotcascading",
45 |     owner => vagrant,
46 |     group => vagrant,
47 |   }
48 | 
49 |   file { "/home/vagrant/.cascading/":
50 |     ensure => "directory",
51 |     owner => vagrant,
52 |     group => vagrant,
53 |   }
54 | 
55 |   exec { "download_leiningen":
56 |        command => "wget -q https://raw.github.com/technomancy/leiningen/stable/bin/lein -O /opt/tools/bin/lein && chmod +x /opt/tools/bin/lein",
57 |        path => $path,
58 |        creates => "/opt/tools/bin/lein",
59 |        require => File["/opt/tools/bin"],
60 |   }
61 |   
62 |   exec { "download_sbt_jar":
63 |        command => "wget -q http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch//0.13.1/sbt-launch.jar -O /opt/tools/bin/sbt-launch.jar",
64 |        path => $path,
65 |        creates => "/opt/tools/bin/sbt-launch.jar",
66 |        require => File["/opt/tools/bin"],
67 |   }
68 | 
69 |   exec { "download_gradle":  
70 |        command => "wget -q http://services.gradle.org/distributions/gradle-1.10-bin.zip -O /tmp/gradle.zip && unzip -o /tmp/gradle.zip -d /opt/tools",
71 |        path => $path,
72 |        creates => "/opt/tools/gradle-1.10",
73 |        require => Package["unzip"]
74 |   }
75 | 
76 |   package { "unzip":
77 |     ensure => "installed"
78 |   }
79 | 
80 |   package { "curl":
81 |     ensure => "installed"
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 |  <configuration>
 4 |   <property>
 5 |    <name>fs.default.name</name>
 6 |    <value>hdfs://master.local:9000</value>
 7 |    <description>The name of the default file system. A URI whose scheme and authority determine the FileSystem implementation.</description>
 8 |   </property>
 9 |  </configuration>
10 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/hadoop-env.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 The Apache Software Foundation
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one
 4 | # or more contributor license agreements.  See the NOTICE file
 5 | # distributed with this work for additional information
 6 | # regarding copyright ownership.  The ASF licenses this file
 7 | # to you under the Apache License, Version 2.0 (the
 8 | # "License"); you may not use this file except in compliance
 9 | # with the License.  You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | # Set Hadoop-specific environment variables here.
20 | 
21 | # The only required environment variable is JAVA_HOME.  All others are
22 | # optional.  When running a distributed configuration it is best to
23 | # set JAVA_HOME in this file, so that it is correctly defined on
24 | # remote nodes.
25 | 
26 | # The java implementation to use.
27 | export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64
28 | 
29 | # The jsvc implementation to use. Jsvc is required to run secure datanodes.
30 | #export JSVC_HOME=${JSVC_HOME}
31 | 
32 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
33 | 
34 | # Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
35 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
36 |   if [ "$HADOOP_CLASSPATH" ]; then
37 |     export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
38 |   else
39 |     export HADOOP_CLASSPATH=$f
40 |   fi
41 | done
42 | 
43 | # The maximum amount of heap to use, in MB. Default is 1000.
44 | #export HADOOP_HEAPSIZE=
45 | #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
46 | 
47 | # Extra Java runtime options.  Empty by default.
48 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
49 | 
50 | # Command specific options appended to HADOOP_OPTS when specified
51 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
52 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
53 | 
54 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
55 | 
56 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
57 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
58 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
59 | 
60 | # On secure datanodes, user to run the datanode as after dropping privileges
61 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
62 | 
63 | # Where log files are stored.  $HADOOP_HOME/logs by default.
64 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
65 | 
66 | # Where log files are stored in the secure data environment.
67 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
68 | 
69 | # The directory where pid files are stored. /tmp by default.
70 | # NOTE: this should be set to a directory that can only be written to by 
71 | #       the user that will run the hadoop daemons.  Otherwise there is the
72 | #       potential for a symlink attack.
73 | export HADOOP_PID_DIR=${HADOOP_PID_DIR}
74 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
75 | 
76 | # A string representing this instance of hadoop. $USER by default.
77 | export HADOOP_IDENT_STRING=$USER
78 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/hdfs-site-single.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <configuration>
 4 |  <property>
 5 |   <name>dfs.replication</name>
 6 |   <value>1</value>
 7 |   <description>The actual number of replications can be specified when the file is created.</description>
 8 |  </property>
 9 | <property>
10 |   <name>dfs.permissions</name>
11 |   <value>false</value>
12 |   <description>
13 |     If "true", enable permission checking in HDFS.
14 |     If "false", permission checking is turned off,
15 |     but all other behavior is unchanged.
16 |     Switching from one parameter value to the other does not change the mode,
17 |     owner or group of files or directories.
18 |   </description>
19 | </property>
20 | <property>
21 |     <name>dfs.data.dir</name>
22 |     <value>/srv/hadoop/datanode</value>
23 | </property>
24 | <property>
25 |     <name>dfs.name.dir</name>
26 |     <value>/srv/hadoop/namenode</value>
27 | </property>
28 | </configuration>
29 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <configuration>
 4 |  <property>
 5 |   <name>dfs.replication</name>
 6 |   <value>3</value>
 7 |   <description>The actual number of replications can be specified when the file is created.</description>
 8 |  </property>
 9 | <property>
10 |   <name>dfs.permissions</name>
11 |   <value>false</value>
12 |   <description>
13 |     If "true", enable permission checking in HDFS.
14 |     If "false", permission checking is turned off,
15 |     but all other behavior is unchanged.
16 |     Switching from one parameter value to the other does not change the mode,
17 |     owner or group of files or directories.
18 |   </description>
19 | </property>
20 | <property>
21 |     <name>dfs.data.dir</name>
22 |     <value>/srv/hadoop/datanode</value>
23 | </property>
24 | <property>
25 |     <name>dfs.name.dir</name>
26 |     <value>/srv/hadoop/namenode</value>
27 | </property>
28 | </configuration>
29 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <configuration>
 4 |  <property>
 5 |   <name>mapred.job.tracker</name>
 6 |   <value>master.local:9001</value>
 7 |   <description>The host and port that the MapReduce job tracker runs at.</description>
 8 |  </property>
 9 |  <property>
10 |      <name>mapred.tasktracker.map.tasks.maximum</name>
11 |   <value>1</value>
12 |  </property>
13 |  <property>
14 |     <name>mapred.tasktracker.reduce.tasks.maximum</name>
15 |   <value>1</value>
16 |  </property>
17 |  <property>
18 |     <name>mapreduce.jobhistory.address</name>
19 |   <value>master.local:10020</value>
20 |  </property>
21 |  <property>
22 |     <name>mapreduce.jobhistory.webapp.address</name>
23 |   <value>master.local:19888</value>
24 |  </property>
25 |  <property>
26 |      <name>mapreduce.framework.name</name>
27 |      <value>yarn</value>
28 |  </property>
29 | </configuration>
30 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/masters:
--------------------------------------------------------------------------------
1 | master.local
2 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/prepare-cluster.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | . /etc/profile
4 | 
5 | export HDFS_USER=hdfs
6 | 
7 | su - $HDFS_USER -c "$HADOOP_PREFIX/bin/hdfs namenode -format"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/slaves:
--------------------------------------------------------------------------------
1 | hadoop1.local
2 | hadoop2.local
3 | hadoop3.local
4 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/slaves-single:
--------------------------------------------------------------------------------
1 | master.local
2 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/start-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | . /etc/profile
 4 | 
 5 | set +x
 6 | 
 7 | export HDFS_USER=hdfs
 8 | export YARN_USER=yarn
 9 | export HISTORY_SERVER_USER=mapred
10 | 
11 | su - $HDFS_USER -c "$HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode"
12 | 
13 | for slave in $(cat $HADOOP_CONF_DIR/slaves); do
14 |  ssh $slave "su - $HDFS_USER -c \"$HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode\"";
15 | done
16 | 
17 | su - $YARN_USER -c "$HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager"
18 | 
19 | for slave in $(cat $HADOOP_CONF_DIR/slaves); do
20 |   ssh $slave "su - $YARN_USER -c \"$HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager\""
21 | done
22 | 
23 | # XXX is optional and it is totally unclear from the docs, that the correct
24 | # config is, to get this running
25 | su - $YARN_USER -c "$HADOOP_YARN_HOME/sbin/yarn-daemon.sh start proxyserver --config $HADOOP_CONF_DIR"
26 | 
27 | # give the namenode some time to come up
28 | sleep 20
29 | # work around https://issues.apache.org/jira/browse/HADOOP-9923
30 | NEEDS_STAGING_DIR=$(hadoop fs -test -d /tmp/hadoop-yarn/staging > /dev/null 2>&1)
31 | if $NEEDS_STAGING_DIR; then
32 |     su - $YARN_USER -c "hadoop fs -mkdir -p /tmp/hadoop-yarn/staging/history/done"
33 |     su - $YARN_USER -c "hadoop fs -chmod -R 777 /tmp/"
34 | fi
35 | 
36 | su - $HISTORY_SERVER_USER -c "$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR"
37 | 
38 | su - vagrant -c "$HADOOP_PREFIX/bin/hadoop fs -mkdir -p /user/vagrant"
39 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/stop-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | . /etc/profile
 4 | 
 5 | export HDFS_USER=hdfs
 6 | export YARN_USER=yarn
 7 | export HISTORY_SERVER_USER=mapred
 8 | 
 9 | su - $HISTORY_SERVER_USER -c "$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR"
10 | 
11 | su - $YARN_USER -c "$HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_DIR"
12 | 
13 | for slave in $(cat $HADOOP_CONF_DIR/slaves); do
14 |   ssh $slave "su - $YARN_USER -c \"$HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager\""
15 | done
16 | 
17 | su - $YARN_USER  -c "$HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager"
18 | 
19 | for slave in $(cat $HADOOP_CONF_DIR/slaves); do
20 |  ssh $slave "su - $HDFS_USER -c \"$HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode\"";
21 | done
22 | 
23 | su - $HDFS_USER -c "$HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode"
24 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/verifier:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |  md5sum --check  <(head -n 1 $1 |\
3 |                    tr -d '\n' | sed -e "s/MD5 = //g;s/ //g" |\
4 |                    awk -F: '{print tolower($2), "", "/vagrant/"$1}')
5 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/yarn-env.sh:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # User for YARN daemons
 17 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
 18 | 
 19 | # resolve links - $0 may be a softlink
 20 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
 21 | 
 22 | # some Java parameters
 23 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/
 24 | if [ "$JAVA_HOME" != "" ]; then
 25 |   #echo "run java in $JAVA_HOME"
 26 |   JAVA_HOME=$JAVA_HOME
 27 | fi
 28 |   
 29 | if [ "$JAVA_HOME" = "" ]; then
 30 |   echo "Error: JAVA_HOME is not set."
 31 |   exit 1
 32 | fi
 33 | 
 34 | JAVA=$JAVA_HOME/bin/java
 35 | JAVA_HEAP_MAX=-Xmx1000m 
 36 | 
 37 | # For setting YARN specific HEAP sizes please use this
 38 | # Parameter and set appropriately
 39 | # YARN_HEAPSIZE=1000
 40 | 
 41 | # check envvars which might override default args
 42 | if [ "$YARN_HEAPSIZE" != "" ]; then
 43 |   JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
 44 | fi
 45 | 
 46 | # Resource Manager specific parameters
 47 | 
 48 | # Specify the max Heapsize for the ResourceManager using a numerical value
 49 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 50 | # the value to 1000.
 51 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 52 | # and/or YARN_RESOURCEMANAGER_OPTS.
 53 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 54 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 55 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 56 | 
 57 | # Specify the JVM options to be used when starting the ResourceManager.
 58 | # These options will be appended to the options specified as YARN_OPTS
 59 | # and therefore may override any similar flags set in YARN_OPTS
 60 | #export YARN_RESOURCEMANAGER_OPTS=
 61 | 
 62 | # Node Manager specific parameters
 63 | 
 64 | # Specify the max Heapsize for the NodeManager using a numerical value
 65 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 66 | # the value to 1000.
 67 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 68 | # and/or YARN_NODEMANAGER_OPTS.
 69 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 70 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 71 | #export YARN_NODEMANAGER_HEAPSIZE=1000
 72 | 
 73 | # Specify the JVM options to be used when starting the NodeManager.
 74 | # These options will be appended to the options specified as YARN_OPTS
 75 | # and therefore may override any similar flags set in YARN_OPTS
 76 | #export YARN_NODEMANAGER_OPTS=
 77 | 
 78 | # so that filenames w/ spaces are handled correctly in loops below
 79 | IFS=
 80 | 
 81 | 
 82 | # default log directory & file
 83 | if [ "$YARN_LOG_DIR" = "" ]; then
 84 |   YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
 85 | fi
 86 | if [ "$YARN_LOGFILE" = "" ]; then
 87 |   YARN_LOGFILE='yarn.log'
 88 | fi
 89 | 
 90 | # default policy file for service-level authorization
 91 | if [ "$YARN_POLICYFILE" = "" ]; then
 92 |   YARN_POLICYFILE="hadoop-policy.xml"
 93 | fi
 94 | 
 95 | # restore ordinary behaviour
 96 | unset IFS
 97 | 
 98 | 
 99 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
100 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
101 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
102 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
103 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
104 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
105 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
106 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
107 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
108 |   YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
109 | fi  
110 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/modules/hadoop/files/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <configuration>
 4 |   <property>
 5 |       <name>yarn.resourcemanager.address</name>
 6 |       <value>master.local:8032</value>
 7 |   </property>
 8 |   <property>
 9 |       <name>yarn.resourcemanager.scheduler.address</name>
10 |       <value>master.local:8030</value>
11 |   </property>
12 |   <property>
13 |       <name>yarn.resourcemanager.resource-tracker.address</name>
14 |       <value>master.local:8031</value>
15 |   </property>
16 |   <property>
17 |       <name>yarn.resourcemanager.admin.address</name>
18 |       <value>master.local:8033</value>
19 |   </property>
20 |   <property>
21 |       <name>yarn.acl.enable</name>
22 |       <value>false</value>
23 |   </property>
24 |   <property>
25 |     <name>yarn.nodemanager.aux-services</name>
26 |     <value>mapreduce_shuffle</value>
27 |     <description>shuffle service that needs to be set for Map Reduce to run </description>
28 |   </property>
29 |   <property>
30 |       <name>yarn.web-proxy.address</name>
31 |       <value>master.local:8100</value>
32 |   </property>
33 |    <property>
34 |       <name>yarn.log-aggregation-enable</name>
35 |       <value>true</value>
36 |   </property>
37 |    <property>
38 |       <name>yarn.log.server.url</name>
39 |       <value>http://master.local:19888/jobhistory/logs/</value>
40 |   </property>
41 |   <!--property>
42 |       <name>yarn.nodemanager.resource.memory-mb</name>
43 |       <value>512</value>
44 |   </property-->
45 | </configuration>
46 | 


--------------------------------------------------------------------------------
/modules/hadoop/manifests/init.pp:
--------------------------------------------------------------------------------
  1 | class hadoop($slaves_file = undef, $hdfs_site_file = undef) {
  2 | 
  3 |   $hadoop_version = "2.6.0"
  4 |   $hadoop_home = "/opt/hadoop-${hadoop_version}"
  5 |   $hadoop_tarball = "hadoop-${hadoop_version}.tar.gz"
  6 |   $hadoop_tarball_checksums = "${hadoop_tarball}.mds"
  7 |   $hadoop_conf_dir = "${hadoop_home}/etc/hadoop"
  8 |   $hadoop_logs_basedir = "$hadoop_home/logs"
  9 |   $hadoop_log_dir  = "${hadoop_logs_basedir}/hadoop"
 10 |   $yarn_log_dir  = "${hadoop_logs_basedir}/yarn"
 11 |   $mapred_log_dir  = "${hadoop_logs_basedir}/mapred"
 12 | 
 13 | 
 14 |   if $slaves_file == undef {
 15 |     $_slaves_file = "puppet:///modules/hadoop/slaves"
 16 |   }
 17 |   else {
 18 |     $_slaves_file = $slaves_file
 19 |   }
 20 |   if $hdfs_site_file == undef {
 21 |     $_hdfs_site_file = "puppet:///modules/hadoop/hdfs-site.xml"
 22 |   }
 23 |   else {
 24 |     $_hdfs_site_file = $hdfs_site_file
 25 |   }
 26 | 
 27 |   file { ["/srv/hadoop/",  "/srv/hadoop/namenode", "/srv/hadoop/datanode/"]:
 28 |     ensure => "directory",
 29 |     owner => "hdfs",
 30 |     group => "hadoop"
 31 |   }
 32 | 
 33 |   exec { "download_grrr":
 34 |     command => "wget --no-check-certificate http://raw.github.com/fs111/grrrr/master/grrr -O /tmp/grrr && chmod +x /tmp/grrr",
 35 |     path => $path,
 36 |     creates => "/tmp/grrr",
 37 |   }
 38 | 
 39 |   exec { "download_hadoop":
 40 |     command => "/tmp/grrr /hadoop/common/hadoop-${hadoop_version}/$hadoop_tarball -O /vagrant/$hadoop_tarball --read-timeout=5 --tries=0",
 41 |     timeout => 1800,
 42 |     path => $path,
 43 |     creates => "/vagrant/$hadoop_tarball",
 44 |     require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]]
 45 |   }
 46 | 
 47 |   exec { "download_checksum":
 48 |     command => "/tmp/grrr /hadoop/common/hadoop-${hadoop_version}/$hadoop_tarball_checksums -O /vagrant/$hadoop_tarball_checksums --read-timeout=5 --tries=0",
 49 |     timeout => 1800,
 50 |     path => $path,
 51 |     unless => "ls /vagrant | grep ${hadoop_tarball_checksums}",
 52 |     require => Exec["download_grrr"],
 53 |   }
 54 | 
 55 |   file { "/tmp/verifier":
 56 |       source => "puppet:///modules/hadoop/verifier",
 57 |       mode => 755,
 58 |       owner => root,
 59 |       group => root,
 60 |   }
 61 | 
 62 |   #exec{ "verify_tarball":
 63 |   #  command =>  "/tmp/verifier /vagrant/${hadoop_tarball_checksums}",
 64 |   #  path => $path,
 65 |   #  require => [File["/tmp/verifier"], Exec["download_hadoop"], Exec["download_checksum"]]
 66 |   #}
 67 | 
 68 |   exec { "unpack_hadoop" :
 69 |     command => "tar xf /vagrant/${hadoop_tarball} -C /opt",
 70 |     path => $path,
 71 |     creates => "${hadoop_home}",
 72 |     require => Exec["download_hadoop"]
 73 |   }
 74 | 
 75 |   exec { "hadoop_conf_permissions" :
 76 |     command => "chown -R vagrant ${hadoop_conf_dir}",
 77 |     path => $path,
 78 |     require => Exec["unpack_hadoop"]
 79 |   }
 80 | 
 81 |   file{ $hadoop_logs_basedir:
 82 |     ensure => "directory",
 83 |     group => "hadoop",
 84 |     require => Exec["unpack_hadoop"]
 85 |   }
 86 | 
 87 |   file {$hadoop_log_dir:
 88 |     ensure => "directory",
 89 |     owner => "hdfs",
 90 |     group => "hadoop",
 91 |     require => File[$hadoop_logs_basedir]
 92 |   }
 93 | 
 94 |   file {$yarn_log_dir:
 95 |     ensure => "directory",
 96 |     owner => "yarn",
 97 |     group => "hadoop",
 98 |     require => File[$hadoop_logs_basedir]
 99 |   }
100 | 
101 |   file {$mapred_log_dir:
102 |     ensure => "directory",
103 |     owner => "mapred",
104 |     group => "hadoop",
105 |     require => File[$hadoop_logs_basedir]
106 |   }
107 | 
108 |   file { "${hadoop_conf_dir}":
109 |     ensure => "directory",
110 |     require => Exec["unpack_hadoop"]
111 |   }
112 | 
113 |   file {
114 |     "${hadoop_conf_dir}/slaves":
115 |       source => $_slaves_file,
116 |       mode => 644,
117 |       owner => vagrant,
118 |       group => root,
119 |       require => File["${hadoop_conf_dir}"]
120 |   }
121 | 
122 |   file {
123 |     "${hadoop_home}/bin/start-all.sh":
124 |       source => "puppet:///modules/hadoop/start-all.sh",
125 |       mode => 755,
126 |       owner => vagrant,
127 |       group => root,
128 |       require => Exec["unpack_hadoop"]
129 |   }
130 | 
131 |   file {
132 |     "${hadoop_home}/bin/prepare-cluster.sh":
133 |       source => "puppet:///modules/hadoop/prepare-cluster.sh",
134 |       mode => 755,
135 |       owner => vagrant,
136 |       group => root,
137 |       require => Exec["unpack_hadoop"]
138 |   }
139 |   file {
140 |     "${hadoop_home}/bin/stop-all.sh":
141 |       source => "puppet:///modules/hadoop/stop-all.sh",
142 |       mode => 755,
143 |       owner => vagrant,
144 |       group => root,
145 |       require => Exec["unpack_hadoop"]
146 |   }
147 | 
148 |   file {
149 |     "${hadoop_conf_dir}/masters":
150 |       source => "puppet:///modules/hadoop/masters",
151 |       mode => 644,
152 |       owner => vagrant,
153 |       group => root,
154 |       require => File["${hadoop_conf_dir}"]
155 |   }
156 | 
157 |   file {
158 |     "${hadoop_conf_dir}/core-site.xml":
159 |       source => "puppet:///modules/hadoop/core-site.xml",
160 |       mode => 644,
161 |       owner => vagrant,
162 |       group => root,
163 |       require => File["${hadoop_conf_dir}"]
164 |   }
165 | 
166 |   file {
167 |     "${hadoop_conf_dir}/mapred-site.xml":
168 |       source => "puppet:///modules/hadoop/mapred-site.xml",
169 |       mode => 644,
170 |       owner => vagrant,
171 |       group => root,
172 |       require => File["${hadoop_conf_dir}"]
173 |   }
174 | 
175 |   file {
176 |     "${hadoop_conf_dir}/hdfs-site.xml":
177 |       source => $_hdfs_site_file,
178 |       mode => 644,
179 |       owner => vagrant,
180 |       group => root,
181 |       require => File["${hadoop_conf_dir}"]
182 |   }
183 | 
184 |   file {
185 |     "${hadoop_conf_dir}/yarn-site.xml":
186 |       source => "puppet:///modules/hadoop/yarn-site.xml",
187 |       mode => 644,
188 |       owner => vagrant,
189 |       group => root,
190 |       require => File["${hadoop_conf_dir}"]
191 |   }
192 | 
193 |   file {
194 |     "${hadoop_conf_dir}/hadoop-env.sh":
195 |       source => "puppet:///modules/hadoop/hadoop-env.sh",
196 |       mode => 644,
197 |       owner => vagrant,
198 |       group => root,
199 |       require => File["${hadoop_conf_dir}"]
200 |   }
201 | 
202 |   file {
203 |     "${hadoop_conf_dir}/yarn-env.sh":
204 |       source => "puppet:///modules/hadoop/yarn-env.sh",
205 |       mode => 644,
206 |       owner => vagrant,
207 |       group => root,
208 |       require => File["${hadoop_conf_dir}"]
209 |   }
210 | 
211 |   file { "/etc/profile.d/hadoop-path.sh":
212 |     content => template("hadoop/hadoop-path.sh.erb"),
213 |     owner => vagrant,
214 |     group => root,
215 |   }
216 |   group { "hadoop":
217 |       ensure => "present",
218 |   }
219 |   user { "hdfs":
220 |       ensure     => "present",
221 |       managehome => "true",
222 |       groups => "hadoop"
223 |   }
224 |   user { "yarn":
225 |       ensure  => "present",
226 |       managehome => "true",
227 |       groups => "hadoop"
228 |   }
229 |   user { "mapred":
230 |       ensure  => "present",
231 |       managehome => "true",
232 |       groups => "hadoop"
233 |   }
234 | }
235 | 


--------------------------------------------------------------------------------
/modules/hadoop/templates/hadoop-path.sh.erb:
--------------------------------------------------------------------------------
 1 | export HADOOP_HOME_WARN_SUPPRESS="true"
 2 | export HADOOP_HOME=<%=hadoop_home%>
 3 | export HADOOP_YARN_HOME=$HADOOP_HOME
 4 | export HADOOP_PREFIX=$HADOOP_HOME
 5 | export HADOOP_CONF_DIR=<%=hadoop_conf_dir%>
 6 | export YARN_CONF_DIR=<%=hadoop_conf_dir%>
 7 | export PATH=$HADOOP_HOME/bin:$PATH
 8 | export YARN_LOG_DIR=<%=yarn_log_dir%>
 9 | export HADOOP_LOG_DIR=<%=hadoop_log_dir%>
10 | export HADOOP_MAPRED_LOG_DIR=<%=mapred_log_dir%>
11 | 


--------------------------------------------------------------------------------
/modules/hbase/files/hbase-env.sh:
--------------------------------------------------------------------------------
  1 | #
  2 | #/**
  3 | # * Copyright 2007 The Apache Software Foundation
  4 | # *
  5 | # * Licensed to the Apache Software Foundation (ASF) under one
  6 | # * or more contributor license agreements.  See the NOTICE file
  7 | # * distributed with this work for additional information
  8 | # * regarding copyright ownership.  The ASF licenses this file
  9 | # * to you under the Apache License, Version 2.0 (the
 10 | # * "License"); you may not use this file except in compliance
 11 | # * with the License.  You may obtain a copy of the License at
 12 | # *
 13 | # *     http://www.apache.org/licenses/LICENSE-2.0
 14 | # *
 15 | # * Unless required by applicable law or agreed to in writing, software
 16 | # * distributed under the License is distributed on an "AS IS" BASIS,
 17 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # * See the License for the specific language governing permissions and
 19 | # * limitations under the License.
 20 | # */
 21 | 
 22 | # Set environment variables here.
 23 | 
 24 | # This script sets variables multiple times over the course of starting an hbase process,
 25 | # so try to keep things idempotent unless you want to take an even deeper look
 26 | # into the startup scripts (bin/hbase, etc.)
 27 | 
 28 | # The java implementation to use.  Java 1.6 required.
 29 | # export JAVA_HOME=/usr/java/jdk1.6.0/
 30 | export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64
 31 | # Extra Java CLASSPATH elements.  Optional.
 32 | # export HBASE_CLASSPATH=
 33 | 
 34 | # The maximum amount of heap to use, in MB. Default is 1000.
 35 | # export HBASE_HEAPSIZE=1000
 36 | 
 37 | # Extra Java runtime options.
 38 | # Below are what we set by default.  May only work with SUN JVM.
 39 | # For more on why as well as other possible settings,
 40 | # see http://wiki.apache.org/hadoop/PerformanceTuning
 41 | export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
 42 | 
 43 | # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
 44 | 
 45 | # This enables basic gc logging to the .out file.
 46 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 47 | 
 48 | # This enables basic gc logging to its own file.
 49 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 50 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 51 | 
 52 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 53 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 54 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 55 | 
 56 | # Uncomment one of the below three options to enable java garbage collection logging for the client processes.
 57 | 
 58 | # This enables basic gc logging to the .out file.
 59 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 60 | 
 61 | # This enables basic gc logging to its own file.
 62 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 63 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 64 | 
 65 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 66 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 67 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 68 | 
 69 | # Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
 70 | # export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
 71 | # Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value.
 72 | 
 73 | 
 74 | # Uncomment and adjust to enable JMX exporting
 75 | # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
 76 | # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
 77 | #
 78 | # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
 79 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
 80 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
 81 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
 82 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
 83 | 
 84 | # File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
 85 | # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
 86 | 
 87 | # File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
 88 | # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
 89 | 
 90 | # Extra ssh options.  Empty by default.
 91 | # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
 92 | 
 93 | # Where log files are stored.  $HBASE_HOME/logs by default.
 94 | # export HBASE_LOG_DIR=${HBASE_HOME}/logs
 95 | 
 96 | # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
 97 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
 98 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
 99 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
100 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
101 | 
102 | # A string representing this instance of hbase. $USER by default.
103 | # export HBASE_IDENT_STRING=$USER
104 | 
105 | # The scheduling priority for daemon processes.  See 'man nice'.
106 | # export HBASE_NICENESS=10
107 | 
108 | # The directory where pid files are stored. /tmp by default.
109 | # export HBASE_PID_DIR=/var/hadoop/pids
110 | 
111 | # Seconds to sleep between slave commands.  Unset by default.  This
112 | # can be useful in large clusters, where, e.g., slave rsyncs can
113 | # otherwise arrive faster than the master can service them.
114 | # export HBASE_SLAVE_SLEEP=0.1
115 | 
116 | # Tell HBase whether it should manage it's own instance of Zookeeper or not.
117 | export HBASE_MANAGES_ZK=true
118 | 


--------------------------------------------------------------------------------
/modules/hbase/files/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <configuration>
 4 |   <property>
 5 |     <name>hbase.zookeeper.quorum</name>
 6 |     <value>hadoop1.local,hadoop2.local,hadoop3.local</value>
 7 |   </property>
 8 |   <property>
 9 |     <name>hbase.zookeeper.property.dataDir</name>
10 |     <value>/srv/zookeeper</value>
11 |     <description>Property from ZooKeeper's config zoo.cfg.  The directory where the snapshot is stored.  </description>
12 |   </property>
13 |   <property>
14 |     <name>hbase.rootdir</name>
15 |     <value>hdfs://master.local:9000/hbase</value>
16 |     <description>The directory shared by RegionServers.</description>
17 |   </property>
18 |   <property>
19 |     <name>hbase.cluster.distributed</name>
20 |     <value>true</value>
21 |     <description>The mode the cluster will be in. Possible values are
22 |       false: standalone and pseudo-distributed setups with managed Zookeeper
23 |       true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
24 |     </description>
25 |   </property>
26 | </configuration>
27 | 


--------------------------------------------------------------------------------
/modules/hbase/files/regionservers:
--------------------------------------------------------------------------------
1 | hadoop1.local
2 | hadoop2.local
3 | hadoop3.local
4 | 


--------------------------------------------------------------------------------
/modules/hbase/manifests/init.pp:
--------------------------------------------------------------------------------
 1 | class hbase {
 2 |   $hbase_version = "0.98.24"
 3 |   $hbase_platform = "hadoop2"
 4 |   $hbase_home = "/opt/hbase-${hbase_version}-${hbase_platform}"
 5 |   $hbase_tarball = "hbase-${hbase_version}-${hbase_platform}-bin.tar.gz"
 6 | 
 7 |   file { "/srv/zookeeper":
 8 |     ensure => "directory"
 9 |   }
10 | 
11 |   exec { "download_hbase":
12 |     command => "/tmp/grrr /hbase/${hbase_version}/$hbase_tarball -O /vagrant/$hbase_tarball --read-timeout=5 --tries=0",
13 |     timeout => 1800,
14 |     path => $path,
15 |     creates => "/vagrant/$hbase_tarball",
16 |     require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]]
17 |   }
18 | 
19 |   exec { "unpack_hbase" :
20 |     command => "tar xf /vagrant/${hbase_tarball} -C /opt",
21 |     path => $path,
22 |     creates => "${hbase_home}",
23 |     require => Exec["download_hbase"]
24 |   }
25 | 
26 |   file {
27 |     "${hbase_home}/conf/regionservers":
28 |       source => "puppet:///modules/hbase/regionservers",
29 |       mode => 644,
30 |       owner => root,
31 |       group => root,
32 |       require => Exec["unpack_hbase"]
33 |   }
34 | 
35 |   file {
36 |     "${hbase_home}/conf/hbase-site.xml":
37 |       source => "puppet:///modules/hbase/hbase-site.xml",
38 |       mode => 644,
39 |       owner => root,
40 |       group => root,
41 |       require => Exec["unpack_hbase"]
42 |   }
43 | 
44 |   file {
45 |     "${hbase_home}/conf/hbase-env.sh":
46 |       source => "puppet:///modules/hbase/hbase-env.sh",
47 |       mode => 644,
48 |       owner => root,
49 |       group => root,
50 |       require => Exec["unpack_hbase"]
51 |   }
52 | 
53 |   file { "/etc/profile.d/hbase-path.sh":
54 |     content => template("hbase/hbase-path.sh.erb"),
55 |     owner => root,
56 |     group => root,
57 |   }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/modules/hbase/templates/hbase-path.sh.erb:
--------------------------------------------------------------------------------
1 | export HBASE_HOME=<%=hbase_home%>
2 | export HBASE_CONF_DIR=$HBASE_HOME/conf
3 | export PATH=$HBASE_HOME/bin:$PATH
4 | 


--------------------------------------------------------------------------------
/single-node/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | VAGRANTFILE_API_VERSION = "2"
 5 | 
 6 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 7 |   config.vm.box = "cascading-hadoop-base"
 8 |   config.vm.box_url = "http://files.vagrantup.com/precise64.box"
 9 | 
10 |   config.vm.provider :virtualbox do |vb|
11 |     vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "1536"]
12 |   end
13 | 
14 |   config.vm.define :master do |master|
15 |     master.vm.network "private_network", ip: "192.168.7.10"
16 |     master.vm.hostname = "master.local"
17 | 
18 |     config.vm.provision :puppet do |puppet|
19 |       puppet.manifest_file = "master-single.pp"
20 |       puppet.module_path = "../modules"
21 |       puppet.manifests_path = "../manifests"
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------