├── roles
    ├── cdh5-base
    │   ├── files
    │   │   ├── oozie
    │   │   │   └── sshkeys
    │   │   │   │   ├── id_rsa
    │   │   │   │   ├── id_rsa.pub
    │   │   │   │   └── authorized_keys
    │   │   ├── java.sh
    │   │   ├── cloudera-cdh5b2.repo
    │   │   └── cloudera-gplextras5b2.repo
    │   ├── templates
    │   │   ├── hadoop
    │   │   │   ├── slaves.j2
    │   │   │   ├── container-executor.cfg.j2
    │   │   │   ├── configuration.xsl.j2
    │   │   │   ├── hadoop-metrics2.properties.j2
    │   │   │   ├── core-site.xml.j2
    │   │   │   ├── ssl-server.xml.example.j2
    │   │   │   ├── ssl-client.xml.example.j2
    │   │   │   ├── mapred-site.xml.j2
    │   │   │   ├── hadoop-metrics.properties.j2
    │   │   │   ├── yarn-site.xml.j2
    │   │   │   ├── capacity-scheduler.xml.j2
    │   │   │   ├── mapred-queues.xml.template.j2
    │   │   │   ├── yarn-env.sh.j2
    │   │   │   ├── hdfs-site.xml.j2
    │   │   │   ├── hadoop-policy.xml.j2
    │   │   │   └── log4j.properties.j2
    │   │   ├── hbase
    │   │   │   ├── regionservers.j2
    │   │   │   ├── hadoop-metrics2-hbase.properties.j2
    │   │   │   ├── hbase-site.xml.j2
    │   │   │   ├── hbase-policy.xml.j2
    │   │   │   ├── log4j.properties.j2
    │   │   │   └── hbase-env.sh.j2
    │   │   ├── hosts.j2
    │   │   ├── zoo.cfg.j2
    │   │   └── limits.conf.j2
    │   └── tasks
    │   │   ├── main.yml
    │   │   ├── misc.yml
    │   │   ├── oozie.yml
    │   │   ├── hbase.yml
    │   │   └── base.yml
    ├── cdh5-spark-base
    │   ├── templates
    │   │   ├── slaves.j2
    │   │   ├── fairscheduler.xml.template.j2
    │   │   ├── log4j.properties.template.j2
    │   │   ├── spark-env.sh.template.j2
    │   │   ├── spark-env.sh.j2
    │   │   └── metrics.properties.template.j2
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-zookeeperserver
    │   ├── handlers
    │   │   └── main.yml
    │   ├── templates
    │   │   └── zoo.cfg.j2
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-spark-master
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-spark-worker
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-oozie
    │   ├── templates
    │   │   ├── oozie_db_init.sql.j2
    │   │   ├── adminusers.txt.j2
    │   │   ├── action-conf
    │   │   │   └── hive.xml.j2
    │   │   ├── hadoop-config.xml.j2
    │   │   ├── hadoop-conf
    │   │   │   └── core-site.xml.j2
    │   │   ├── oozie-env.sh.j2
    │   │   ├── oozie-log4j.properties.j2
    │   │   └── oozie-site.xml.j2
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-httpfs
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-pig
    │   ├── templates
    │   │   ├── register.sh.j2
    │   │   ├── build.properties.j2
    │   │   ├── log4j.properties.j2
    │   │   └── pig.properties.j2
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-hbase-regionserver
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-hive
    │   ├── templates
    │   │   ├── hive_metastore_init.sql.j2
    │   │   ├── hive-server2.j2
    │   │   ├── hive-env.sh.template.j2
    │   │   ├── hive-exec-log4j.properties.j2
    │   │   ├── hive-site.xml.j2
    │   │   └── hive-log4j.properties.j2
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-journalnode
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-hbase-master
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-slave
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-namenode-primary
    │   └── tasks
    │   │   └── main.yml
    ├── cdh5-namenode-backup
    │   └── tasks
    │   │   └── main.yml
    └── cdh5-resourcemanager
    │   └── tasks
    │       └── main.yml
├── README.md
├── hosts.cdh5
├── cdh5.yml
└── group_vars
    └── cdh5-all


/roles/cdh5-base/files/oozie/sshkeys/id_rsa:
--------------------------------------------------------------------------------
1 | id_rsa
2 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/slaves.j2:
--------------------------------------------------------------------------------
1 | localhost
2 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/files/oozie/sshkeys/id_rsa.pub:
--------------------------------------------------------------------------------
1 | id_rsa.pub
2 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/regionservers.j2:
--------------------------------------------------------------------------------
1 | localhost
2 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/files/oozie/sshkeys/authorized_keys:
--------------------------------------------------------------------------------
1 | authorized_keys
2 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/slaves.j2:
--------------------------------------------------------------------------------
1 | # A Spark Worker will be started on each of the machines listed below.
2 | localhost


--------------------------------------------------------------------------------
/roles/cdh5-base/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - include: misc.yml
3 | - include: base.yml
4 | - include: hbase.yml
5 | - include: oozie.yml
6 | 


--------------------------------------------------------------------------------
/roles/cdh5-zookeeperserver/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: restart zookeeper server
3 |   service: name=zookeeper-server state=restarted
4 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-master/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: start spark master services
3 |   service: name={{ item }} state=started
4 |   with_items:
5 |   - spark-master
6 |   tags:
7 |   - cdh5-spark-master
8 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-worker/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: start spark worker services
3 |   service: name={{ item }} state=started
4 |   with_items:
5 |   - spark-worker
6 |   tags:
7 |   - cdh5-spark-worker
8 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/files/java.sh:
--------------------------------------------------------------------------------
1 | export JAVA_HOME=/usr/java/jdk1.6.0_45
2 | export JRE_HOME=$JAVA_HOME/jre
3 | export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
4 | export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
5 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hosts.j2:
--------------------------------------------------------------------------------
1 | 127.0.0.1 localhost localhost.localdomain
2 | {% for host in groups['cdh5-all'] %}
3 | {{ hostvars[host]["ansible_default_ipv4"]["address"] }} {{ host }}.{{ tl_domain }} {{ host }} {{ host }}.localdomain
4 | {% endfor %}
5 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/oozie_db_init.sql.j2:
--------------------------------------------------------------------------------
1 | CREATE DATABASE oozie;
2 | GRANT ALL PRIVILEGES ON oozie.* TO 'oozie'@'localhost' IDENTIFIED BY '{{ oozie_db_passwd }}';
3 | GRANT ALL PRIVILEGES ON oozie.* TO 'oozie'@'%' IDENTIFIED BY '{{ oozie_db_passwd }}';
4 | 


--------------------------------------------------------------------------------
/roles/cdh5-httpfs/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: installl httpfs pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-httpfs
 6 |   tags:
 7 |   - cdh5-httpfs
 8 | 
 9 | - name: start httpfs service
10 |   service: name=hadoop-httpfs state=started
11 |   tags:
12 |   - cdh5-httpfs
13 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/container-executor.cfg.j2:
--------------------------------------------------------------------------------
1 | yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
2 | banned.users=#comma separated list of users who can not run applications
3 | min.user.id=1000#Prevent other super-users
4 | allowed.system.users=##comma separated list of system users who CAN run applications
5 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/files/cloudera-cdh5b2.repo:
--------------------------------------------------------------------------------
1 | [cloudera-cdh5b2]
2 | # Packages for Cloudera's Distribution for Hadoop, Version 5.0.0b2, on RedHat or CentOS 6 x86_64
3 | name=Cloudera's Distribution for Hadoop, Version 5.0.0b2
4 | baseurl=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/5.0.0b2/
5 | gpgkey=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera    
6 | gpgcheck=1
7 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/files/cloudera-gplextras5b2.repo:
--------------------------------------------------------------------------------
1 | [cloudera-gplextras5b2]
2 | # Packages for Cloudera's GPLExtras, Version 5.0.0b2, on RedHat or CentOS 6 x86_64
3 | name=Cloudera's GPLExtras, Version 5.0.0b2
4 | baseurl=http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/5.0.0b2/
5 | gpgkey=http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/RPM-GPG-KEY-cloudera    
6 | gpgcheck=1
7 | 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Ansible playbook of CDH5
 2 | 
 3 | HDFS, HBase, Hive, HTTPFS, Oozie, Pig, Zookeeper, Spark
 4 | 
 5 | #### Steps:
 6 | 
 7 | ```
 8 | 1. Specify the hosts in hosts.cdh5
 9 | 2. Specify the configuration values in groups_var/cdh5-all
10 | 3. Make sure all hosts can be logged into as root or normal user with sudo permissions
11 | 4. Run 'ansible-playbook cdh5.yml -i hosts.cdh5 -u heydevops --sudo -k'
12 | ```
13 | 


--------------------------------------------------------------------------------
/roles/cdh5-pig/templates/register.sh.j2:
--------------------------------------------------------------------------------
1 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
2 | REGISTER /usr/lib/pig/datafu-1.1.0-cdh5.0.0-beta-2.jar
3 | REGISTER /usr/lib/zookeeper/zookeeper-3.4.5-cdh5.0.0-beta-2.jar
4 | REGISTER /usr/lib/hbase/hbase-server-0.96.1.1-cdh5.0.0-beta-2.jar
5 | REGISTER /usr/lib/hbase/hbase-client-0.96.1.1-cdh5.0.0-beta-2.jar
6 | REGISTER /usr/lib/hbase/hbase-common-0.96.1.1-cdh5.0.0-beta-2.jar
7 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/fairscheduler.xml.template.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <allocations>
 3 |   <pool name="production">
 4 |     <schedulingMode>FAIR</schedulingMode>
 5 |     <weight>1</weight>
 6 |     <minShare>2</minShare>
 7 |   </pool>
 8 |   <pool name="test">
 9 |     <schedulingMode>FIFO</schedulingMode>
10 |     <weight>2</weight>
11 |     <minShare>3</minShare>
12 |   </pool>
13 | </allocations>
14 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/tasks/misc.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install the libselinux-python package
 3 |   yum: name=libselinux-python state=installed
 4 |   tags:
 5 |   - disable_selinux
 6 | 
 7 | - name: disable SELinux in conf file
 8 |   selinux: state=disabled
 9 |   register: selinux_conf
10 |   tags:
11 |   - disable_selinux
12 | 
13 | - name: disable SELinux in command line
14 |   shell: setenforce 0
15 |   when: selinux_conf|changed
16 |   tags:
17 |   - disable_selinux
18 | 


--------------------------------------------------------------------------------
/roles/cdh5-hbase-regionserver/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install hbase regionserver pkgs
 3 |   yum: name={{ item }}-{{ version['hbase'] }} state=present
 4 |   with_items:
 5 |   - hbase-regionserver
 6 |   tags:
 7 |   - cdh5-hbase
 8 |   - cdh5-hbase-regionserver
 9 | 
10 | - name: start hbase regionserver
11 |   service: name={{ item }} state=started
12 |   with_items:
13 |   - hbase-regionserver
14 |   tags:
15 |   - cdh5-hbase
16 |   - cdh5-hbase-regionserver
17 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive_metastore_init.sql.j2:
--------------------------------------------------------------------------------
 1 | CREATE DATABASE IF NOT EXISTS metastore;
 2 | USE metastore;
 3 | SOURCE /usr/lib/hive/scripts/metastore/upgrade/mysql/hive-schema-0.12.0.mysql.sql;
 4 | 
 5 | {% for host in hive_mysql_hosts %}
 6 | CREATE USER 'hive'@'{{ host }}' IDENTIFIED BY '{{ hive_mysql_passwd }}';
 7 | REVOKE ALL PRIVILEGES, GRANT OPTION FROM 'hive'@'{{ host }}';
 8 | GRANT SELECT,INSERT,UPDATE,DELETE,LOCK TABLES,EXECUTE ON metastore.* TO 'hive'@'{{ host }}';
 9 | {% endfor %}
10 | FLUSH PRIVILEGES;
11 | 


--------------------------------------------------------------------------------
/roles/cdh5-journalnode/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install journalnode pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-hdfs-journalnode
 6 |   tags:
 7 |   - cdh5-journalnode
 8 | 
 9 | - name: create folder for journaling
10 |   file: path={{ dfs_journalnode_edits_dir }} state=directory owner=hdfs group=hdfs mode=0755
11 |   tags:
12 |   - cdh5-journalnode
13 | 
14 | - name: start journalnode services
15 |   service: name=hadoop-hdfs-journalnode state=started
16 |   tags:
17 |   - cdh5-journalnode
18 | 


--------------------------------------------------------------------------------
/roles/cdh5-pig/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install pig pkgs
 3 |   yum: name={{ item }}-{{ version['pig'] }} state=present
 4 |   with_items:
 5 |   - pig
 6 |   tags:
 7 |   - cdh5-pig
 8 | 
 9 | - name:  install datafu pkg
10 |   yum: name=pig-udf-datafu state=present
11 |   tags:
12 |   - cdh5-pig
13 | 
14 | - name: copy pig configuration files
15 |   template: src={{ item }}.j2 dest=/etc/pig/conf/{{ item }} owner=root group=root mode=0644
16 |   with_items:
17 |   - build.properties
18 |   - log4j.properties
19 |   - pig.properties
20 |   - register.sh
21 |   tags:
22 |   - cdh5-pig
23 |   - cdh5-pig-conf
24 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/zoo.cfg.j2:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial 
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between 
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | dataDir={{ zookeeper_datadir }}
11 | # the port at which the clients will connect
12 | clientPort=2181
13 | maxClientCnxns=0
14 | {% for host in groups['cdh5-zookeeperserver'] %}
15 | server.{{ hostvars[host].zoo_id }}={{ host }}.{{ tl_domain }}:2888:3888
16 | {% endfor %}
17 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/log4j.properties.template.j2:
--------------------------------------------------------------------------------
 1 | # Set everything to be logged to the console
 2 | log4j.rootCategory=INFO, console
 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 4 | log4j.appender.console.target=System.err
 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 7 | 
 8 | # Settings to quiet third party logs that are too verbose
 9 | log4j.logger.org.eclipse.jetty=WARN
10 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
11 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
12 | 


--------------------------------------------------------------------------------
/roles/cdh5-zookeeperserver/templates/zoo.cfg.j2:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial 
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between 
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | dataDir={{ zookeeper_datadir }}
11 | # the port at which the clients will connect
12 | clientPort=2181
13 | maxClientCnxns=0
14 | {% for host in groups['cdh5-zookeeperserver'] %}
15 | server.{{ hostvars[host].zoo_id }}={{ host }}.{{ tl_domain }}:2888:3888
16 | {% endfor %}
17 | 


--------------------------------------------------------------------------------
/roles/cdh5-hbase-master/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install hbase master pkgs
 3 |   yum: name={{ item }}-{{ version['hbase'] }} state=present
 4 |   with_items:
 5 |   - hbase-master
 6 |   - hbase-thrift
 7 |   - hbase-rest
 8 |   tags:
 9 |   - cdh5-hbase
10 |   - cdh5-hbase-master
11 | 
12 | - name: create hbase directory in hdfs
13 |   shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /hbase; sudo -u hdfs hadoop fs -chown hbase /hbase && touch {{ item }}
14 |   with_items:
15 |   - /var/hadoop/.status/hbase_dir.created
16 |   tags:
17 |   - cdh5-hbase
18 |   - cdh5-hbase-master
19 | 
20 | - name: start hbase master services
21 |   service: name={{ item }} state=started
22 |   with_items:
23 |   - hbase-master
24 |   - hbase-thrift
25 |   - hbase-rest
26 |   tags:
27 |   - cdh5-hbase
28 |   - cdh5-hbase-master
29 | 


--------------------------------------------------------------------------------
/roles/cdh5-pig/templates/build.properties.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | #Fri Feb 07 12:46:21 PST 2014
 3 | hadoop-test.version=2.2.0-mr1-cdh5.0.0-beta-2
 4 | parquet-pig-bundle.version=1.2.5-cdh5.0.0-beta-2
 5 | snappy.version=1.0.4.1
 6 | zookeeper.version=3.4.5-cdh5.0.0-beta-2
 7 | protobuf-java.version=2.5.0
 8 | slf4j-api.version=1.7.5
 9 | hbase95.version=0.96.1.1-cdh5.0.0-beta-2
10 | pig.version=0.12.0-cdh5.0.0-beta-2
11 | version=0.12.0-cdh5.0.0-beta-2
12 | slf4j-log4j12.version=1.7.5
13 | hadoop-hdfs.version=2.2.0-cdh5.0.0-beta-2
14 | avro.version=1.7.5-cdh5.0.0-beta-2
15 | reactor.repo=https\://repository.cloudera.com/content/repositories/snapshots
16 | hadoop-mapreduce.version=2.2.0-cdh5.0.0-beta-2
17 | hadoop-common.version=2.2.0-cdh5.0.0-beta-2
18 | hadoop-core.version=2.2.0-mr1-cdh5.0.0-beta-2
19 | hadoopversion=23
20 | commons-lang.version=2.6
21 | hbaseversion=95
22 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install hadoop-client for spark
 3 |   yum: name={{ item }}-{{ version['hadoop']}} state=present
 4 |   with_items:
 5 |   - hadoop-client
 6 |   tags:
 7 |   - cdh5-spark-base
 8 | 
 9 | - name: install spark pkgs
10 |   yum: name={{ item }}-{{ version['spark'] }} state=present
11 |   with_items:
12 |   - spark-core
13 |   - spark-master
14 |   - spark-worker
15 |   - spark-python
16 |   tags:
17 |   - cdh5-spark-base
18 | 
19 | - name: copy spark configuration files
20 |   template: src={{ item }}.j2 dest=/etc/spark/conf/{{ item }} owner=spark group=spark mode=0644
21 |   with_items:
22 |   - fairscheduler.xml.template
23 |   - log4j.properties.template
24 |   - metrics.properties.template
25 |   - slaves
26 |   - spark-env.sh
27 |   - spark-env.sh.template
28 |   tags:
29 |   - cdh5-spark-base
30 |   - cdh5-spark-base-conf
31 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/tasks/oozie.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: create oozie group
 3 |   group: name=oozie state=present
 4 |   tags:
 5 |   - cdh5-base
 6 |   - cdh5-oozie
 7 |   - cdh5-oozie-user
 8 |   
 9 | - name: create oozie user
10 |   user: name=oozie group=oozie createhome=yes state=present
11 |   tags:
12 |   - cdh5-base
13 |   - cdh5-oozie
14 |   - cdh5-oozie-user
15 | 
16 | - name: create .ssh directory for oozie user
17 |   file: path=/home/oozie/.ssh state=directory owner=oozie group=oozie mode=0700
18 |   tags:
19 |   - cdh5-base
20 |   - cdh5-oozie
21 |   - cdh5-oozie-user
22 | 
23 | - name: copy the sshkeys for oozie user
24 |   copy: src=oozie/sshkeys/{{ item }} dest=/home/oozie/.ssh/{{ item }} owner=oozie group=oozie mode=0600
25 |   with_items:
26 |   - authorized_keys
27 |   - id_rsa
28 |   - id_rsa.pub
29 |   tags:
30 |   - cdh5-base
31 |   - cdh5-oozie
32 |   - cdh5-oozie-user
33 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/adminusers.txt.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # Admin Users, one user by line
20 | 


--------------------------------------------------------------------------------
/roles/cdh5-zookeeperserver/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install zookeeper server pkgs
 3 |   yum: name={{ item }}-{{ version['zookeeper'] }} state=present
 4 |   with_items:
 5 |   - zookeeper-server
 6 |   tags:
 7 |   - cdh5-zookeeper
 8 |   - cdh5-zookeeperserver
 9 | 
10 | - name: create zookeeper_datadir
11 |   file: path={{ zookeeper_datadir }} state=directory owner=zookeeper group=zookeeper mode=0755
12 |   tags:
13 |   - cdh5-zookeeper
14 |   - cdh5-zookeeperserver
15 | 
16 | - name: init zookeeper server
17 |   shell: service zookeeper-server init --myid={{ zoo_id }}  creates={{ zookeeper_datadir }}/myid
18 |   tags:
19 |   - cdh5-zookeeper
20 |   - cdh5-zookeeperserver
21 | 
22 | - name: create zookeeper cfg
23 |   template: src=zoo.cfg.j2 dest=/etc/zookeeper/conf/zoo.cfg owner=zookeeper group=zookeeper mode=0644
24 |   notify: restart zookeeper server
25 |   tags:
26 |   - cdh5-zookeeper
27 |   - cdh5-zookeeperserver
28 | 
29 | - name: start zookeeper server
30 |   service: name=zookeeper-server state=started
31 |   tags:
32 |   - cdh5-zookeeper
33 |   - cdh5-zookeeperserver
34 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive-server2.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # The port for Hive server2 daemon to listen to.
17 | # Unfortunatelly, there is no way to specify the interfaces 
18 | # to which the daemon binds.
19 | #
20 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
21 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/tasks/hbase.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install hbase base pkgs
 3 |   yum: name={{ item }}-{{ version['hbase'] }} state=present
 4 |   with_items:
 5 |   - hbase
 6 |   tags:
 7 |   - cdh5-base
 8 |   - cdh5-hbase
 9 | 
10 | - name: install ntp pkgs
11 |   yum: name=ntp state=present
12 |   tags:
13 |   - cdh5-base
14 |   - cdh5-hbase
15 |   - cdh5-ntp
16 | 
17 | - name: start ntpd service
18 |   service: name=ntpd state=started
19 |   tags:
20 |   - cdh5-base
21 |   - cdh5-hbase
22 |   - cdh5-ntp
23 | 
24 | - name: copy the limits.conf 
25 |   template: src=limits.conf.j2 dest=/etc/security/limits.conf owner=root group=root mode=0644
26 |   tags:
27 |   - cdh5-base
28 |   - cdh5-hbase
29 | 
30 | - name: copy the hbase configuration files
31 |   template: src=hbase/{{ item }}.j2 dest=/etc/hbase/conf/{{ item }} owner=hbase group=hbase mode=0664
32 |   with_items:
33 |   - hbase-site.xml
34 |   - hbase-env.sh
35 |   - hadoop-metrics2-hbase.properties
36 |   - hbase-policy.xml
37 |   - log4j.properties
38 |   - regionservers
39 |   register: copy_hbase_conf
40 |   tags:
41 |   - cdh5-base
42 |   - cdh5-hbase
43 |   - cdh5-hbase-conf
44 | 


--------------------------------------------------------------------------------
/roles/cdh5-slave/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install datanode nodemanager and mapreduce pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-yarn-nodemanager
 6 |   - hadoop-hdfs-datanode
 7 |   - hadoop-mapreduce
 8 |   tags:
 9 |   - cdh5-slave
10 | 
11 | - name: create the data directory for the slave nodes to store the data
12 |   file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700
13 |   with_items: 
14 |   - "{{ dfs_datanode_data_dir }}"
15 |   tags:
16 |   - cdh5-slave
17 | 
18 | - name: create the local storage directories for use by YARN
19 |   file: path={{ item }} state=directory owner=yarn group=yarn mode=0755
20 |   with_items:
21 |   - "{{ yarn_nodemanager_local_dirs }}"
22 |   - "{{ yarn_nodemanager_log_dirs }}"
23 |   tags:
24 |   - cdh5-slave
25 | 
26 | - name: start hadoop datanode service
27 |   service: name={{ item }} state=started
28 |   with_items:
29 |   - hadoop-hdfs-datanode
30 |   tags:
31 |   - cdh5-slave
32 | 
33 | - name: start nodemanager services
34 |   service: name={{ item }} state=started
35 |   with_items:
36 |   - hadoop-yarn-nodemanager
37 |   tags:
38 |   - cdh5-slave
39 | 


--------------------------------------------------------------------------------
/hosts.cdh5:
--------------------------------------------------------------------------------
 1 | [cdh5-all:children]
 2 | cdh5-namenode
 3 | cdh5-journalnode
 4 | cdh5-zookeeperserver
 5 | cdh5-resourcemanager
 6 | cdh5-httpfs
 7 | cdh5-hbase
 8 | cdh5-slave
 9 | cdh5-hive
10 | cdh5-oozie
11 | cdh5-pig
12 | cdh5-spark
13 | 
14 | [cdh5-namenode:children]
15 | cdh5-namenode-primary
16 | cdh5-namenode-backup
17 | 
18 | [cdh5-namenode-primary]
19 | idc1-hnn1
20 | 
21 | [cdh5-namenode-backup]
22 | idc1-hnn2
23 | 
24 | [cdh5-journalnode]
25 | idc1-hjn1
26 | idc1-hjn2
27 | idc1-hjn3
28 | 
29 | [cdh5-zookeeperserver]
30 | idc1-hjn1 zoo_id=1
31 | idc1-hjn2 zoo_id=2
32 | idc1-hjn3 zoo_id=3
33 | 
34 | [cdh5-resourcemanager]
35 | idc1-hrm1
36 | 
37 | [cdh5-httpfs]
38 | idc1-hnn2
39 | 
40 | [cdh5-hbase:children]
41 | cdh5-hbase-master
42 | cdh5-hbase-regionserver
43 | 
44 | [cdh5-hbase-master]
45 | idc1-hnn2
46 | 
47 | [cdh5-hbase-regionserver]
48 | idc1-hdn[1:10]
49 | 
50 | [cdh5-slave]
51 | idc1-hdn[1:10]
52 | 
53 | [cdh5-hive]
54 | idc1-hrm1
55 | 
56 | [cdh5-oozie]
57 | idc1-hrm1
58 | 
59 | [cdh5-pig]
60 | idc1-hrm1
61 | 
62 | [cdh5-spark:children]
63 | cdh5-spark-master
64 | cdh5-spark-worker
65 | 
66 | [cdh5-spark-master]
67 | idc1-hrm1
68 | 
69 | [cdh5-spark-worker]
70 | idc1-hdn[1:10]
71 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/hadoop-metrics2-hbase.properties.j2:
--------------------------------------------------------------------------------
 1 | # syntax: [prefix].[source|sink].[instance].[options]
 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
 3 | 
 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink
 5 | # default sampling period
 6 | *.period=10
 7 | 
 8 | # Below are some examples of sinks that could be used
 9 | # to monitor different hbase daemons.
10 | 
11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink
12 | # hbase.sink.file-all.filename=all.metrics
13 | 
14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink
15 | # hbase.sink.file0.context=hmaster
16 | # hbase.sink.file0.filename=master.metrics
17 | 
18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink
19 | # hbase.sink.file1.context=thrift-one
20 | # hbase.sink.file1.filename=thrift-one.metrics
21 | 
22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink
23 | # hbase.sink.file2.context=thrift-two
24 | # hbase.sink.file2.filename=thrift-one.metrics
25 | 
26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink
27 | # hbase.sink.file3.context=rest
28 | # hbase.sink.file3.filename=rest.metrics
29 | 


--------------------------------------------------------------------------------
/roles/cdh5-namenode-primary/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install namenode pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-hdfs-namenode
 6 |   - hadoop-hdfs-zkfc
 7 |   tags:
 8 |   - cdh5-namenode
 9 |   - cdh5-namenode-primary
10 | 
11 | - name: create the data directory for the namenode metadata
12 |   file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700
13 |   with_items: 
14 |   - "{{ dfs_namenode_name_dir }}"
15 |   tags:
16 |   - cdh5-namenode
17 |   - cdh5-namenode-primary
18 | 
19 | - name: create the dfs hosts exclude file
20 |   file: path={{ dfs_hosts_exclude }} owner=hdfs group=hdfs mode=0644
21 |   tags:
22 |   - cdh5-namenode
23 |   - cdh5-namenode-primary
24 | 
25 | - name: format the namenode
26 |   shell: creates={{ item }} sudo -u hdfs hadoop namenode -format && touch {{ item }}
27 |   with_items:
28 |   - /var/hadoop/.status/namenode.formatted
29 |   tags:
30 |   - cdh5-namenode
31 |   - cdh5-namenode-primary
32 | 
33 | - name: start hadoop namenode services
34 |   service: name={{ item }} state=started
35 |   with_items:
36 |   - hadoop-hdfs-namenode
37 |   tags:
38 |   - cdh5-namenode
39 |   - cdh5-namenode-primary
40 | 


--------------------------------------------------------------------------------
/roles/cdh5-pig/templates/log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # ***** Set root logger level to DEBUG and its only appender to A.
17 | log4j.logger.org.apache.pig=info, A
18 | 
19 | # ***** A is set to be a ConsoleAppender.
20 | log4j.appender.A=org.apache.log4j.ConsoleAppender
21 | # ***** A uses PatternLayout.
22 | log4j.appender.A.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
24 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/action-conf/hive.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |    <property>
22 |       <name>hadoop.bin.path</name>
23 |       <value>/usr/bin/hadoop</value>
24 |    </property>
25 | 
26 |    <property>
27 |       <name>hadoop.config.dir</name>
28 |       <value>/etc/hadoop/conf</value>
29 |    </property>
30 | </configuration>
31 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/spark-env.sh.template.j2:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This file contains environment variables required to run Spark. Copy it as
 4 | # spark-env.sh and edit that to configure Spark for your site.
 5 | #
 6 | # The following variables can be set in this file:
 7 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
 8 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
 9 | # - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
10 | #   we recommend setting app-wide options in the application's driver program.
11 | #     Examples of node-specific options : -Dspark.local.dir, GC options
12 | #     Examples of app-wide options : -Dspark.serializer
13 | #
14 | # If using the standalone deploy mode, you can also set variables for it here:
15 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
16 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
17 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
18 | # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
19 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
20 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
21 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
22 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/configuration.xsl.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |    Licensed to the Apache Software Foundation (ASF) under one or more
 4 |    contributor license agreements.  See the NOTICE file distributed with
 5 |    this work for additional information regarding copyright ownership.
 6 |    The ASF licenses this file to You under the Apache License, Version 2.0
 7 |    (the "License"); you may not use this file except in compliance with
 8 |    the License.  You may obtain a copy of the License at
 9 | 
10 |        http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |    Unless required by applicable law or agreed to in writing, software
13 |    distributed under the License is distributed on an "AS IS" BASIS,
14 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |    See the License for the specific language governing permissions and
16 |    limitations under the License.
17 | -->
18 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
19 | <xsl:output method="html"/>
20 | <xsl:template match="configuration">
21 | <html>
22 | <body>
23 | <table border="1">
24 | <tr>
25 |  <td>name</td>
26 |  <td>value</td>
27 |  <td>description</td>
28 | </tr>
29 | <xsl:for-each select="property">
30 | <tr>
31 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
32 |   <td><xsl:value-of select="value"/></td>
33 |   <td><xsl:value-of select="description"/></td>
34 | </tr>
35 | </xsl:for-each>
36 | </table>
37 | </body>
38 | </html>
39 | </xsl:template>
40 | </xsl:stylesheet>
41 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/hbase-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 |   <property>
25 |     <name>hbase.rest.port</name>
26 |     <value>60050</value>
27 |   </property>
28 |   <property>
29 |     <name>hbase.cluster.distributed</name>
30 |     <value>true</value>
31 |   </property>
32 |   <property>
33 |     <name>hbase.rootdir</name>
34 |     <value>hdfs://{{ nameservice_id }}:8020/hbase</value>
35 |   </property>
36 |   <property>
37 |     <name>hbase.zookeeper.quorum</name>
38 |     <value>{{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }}</value>
39 |   </property>
40 | </configuration>
41 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/hadoop-config.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>mapreduce.jobtracker.kerberos.principal</name>
24 |         <value>mapred/_HOST@LOCALREALM</value>
25 |     </property>
26 | 
27 |     <property>
28 |       <name>yarn.resourcemanager.principal</name>
29 |       <value>yarn/_HOST@LOCALREALM</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>dfs.namenode.kerberos.principal</name>
34 |         <value>hdfs/_HOST@LOCALREALM</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>mapreduce.framework.name</name>
39 |         <value>yarn</value>
40 |     </property>
41 | 
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/hadoop-conf/core-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>mapreduce.jobtracker.kerberos.principal</name>
24 |         <value>mapred/_HOST@LOCALREALM</value>
25 |     </property>
26 | 
27 |     <property>
28 |       <name>yarn.resourcemanager.principal</name>
29 |       <value>yarn/_HOST@LOCALREALM</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>dfs.namenode.kerberos.principal</name>
34 |         <value>hdfs/_HOST@LOCALREALM</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>mapreduce.framework.name</name>
39 |         <value>yarn</value>
40 |     </property>
41 | 
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/roles/cdh5-namenode-backup/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install namenode pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-hdfs-namenode
 6 |   - hadoop-hdfs-zkfc
 7 |   tags:
 8 |   - cdh5-namenode
 9 |   - cdh5-namenode-backup
10 | 
11 | - name: create the data directory for the namenode metadata
12 |   file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700
13 |   with_items: 
14 |   - "{{ dfs_namenode_name_dir }}"
15 |   tags:
16 |   - cdh5-namenode
17 |   - cdh5-namenode-backup
18 | 
19 | - name: create the dfs hosts exclude file
20 |   file: path={{ dfs_hosts_exclude }} owner=hdfs group=hdfs mode=0644
21 |   tags:
22 |   - cdh5-namenode
23 |   - cdh5-namenode-backup
24 | 
25 | - name: initialize the backup namenode
26 |   shell: creates={{ item }} sudo -u hdfs hadoop namenode -bootstrapStandby && touch {{ item }}
27 |   with_items:
28 |   - /var/hadoop/.status/namenode.formatted
29 |   tags:
30 |   - cdh5-namenode
31 |   - cdh5-namenode-backup
32 | 
33 | - name: start hadoop namenode services
34 |   service: name={{ item }} state=started
35 |   with_items:
36 |   - hadoop-hdfs-namenode
37 |   tags:
38 |   - cdh5-namenode
39 |   - cdh5-namenode-backup
40 | 
41 | - name: initialize the zkfc for namenode
42 |   shell: creates={{ item }} sudo -u hdfs hdfs zkfc -formatZK && touch {{ item }}
43 |   with_items:
44 |   - /var/hadoop/.status/zkfc.formatted
45 |   tags:
46 |   - cdh5-namenode
47 |   - cdh5-namenode-backup
48 | 
49 | - name: start zkfc for namenodes
50 |   service: name=hadoop-hdfs-zkfc state=started
51 |   tags:
52 |   - cdh5-namenode
53 |   - cdh5-namenode-backup
54 | 


--------------------------------------------------------------------------------
/cdh5.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - hosts: cdh5-all
 3 |   roles:
 4 |   - cdh5-base
 5 | 
 6 | - hosts: cdh5-zookeeperserver
 7 |   roles:
 8 |   - cdh5-zookeeperserver
 9 | 
10 | - hosts: cdh5-journalnode
11 |   roles:
12 |   - cdh5-journalnode
13 | 
14 | - hosts: cdh5-namenode-primary
15 |   roles:
16 |   - cdh5-namenode-primary
17 | 
18 | - hosts: cdh5-namenode-backup
19 |   roles:
20 |   - cdh5-namenode-backup
21 | 
22 | - hosts: cdh5-namenode-primary
23 |   tasks:
24 |   - name: start zkfc for namenodes
25 |     service: name=hadoop-hdfs-zkfc state=started
26 |     tags:
27 |     - cdh5-namenode-primary
28 |     - cdh5-namenode-primary-zkfc
29 | 
30 | - hosts: cdh5-httpfs
31 |   roles:
32 |   - cdh5-httpfs
33 | 
34 | - hosts: cdh5-slave
35 |   roles:
36 |   - cdh5-slave
37 | 
38 | - hosts: cdh5-namenode-primary
39 |   tasks:
40 |   - name: create tmp directory
41 |     shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /tmp; sudo -u hdfs hadoop fs -chmod -R 1777 /tmp && touch {{ item }}
42 |     with_items:
43 |     - /var/hadoop/.status/tmp_dir.created
44 |     tags:
45 |     - cdh5-namenode-primary
46 |     - cdh5-namenode-primary-tmpdir
47 | 
48 | - hosts: cdh5-resourcemanager
49 |   roles:
50 |   - cdh5-resourcemanager
51 | 
52 | - hosts: cdh5-hbase-master
53 |   roles:
54 |   - cdh5-hbase-master
55 | 
56 | - hosts: cdh5-hbase-regionserver
57 |   roles:
58 |   - cdh5-hbase-regionserver
59 | 
60 | - hosts: cdh5-hive
61 |   roles:
62 |   - cdh5-hive
63 | 
64 | - hosts: cdh5-oozie
65 |   roles:
66 |   - cdh5-oozie
67 | 
68 | - hosts: cdh5-pig
69 |   roles:
70 |   - cdh5-pig
71 | 
72 | - hosts: cdh5-spark
73 |   roles:
74 |   - cdh5-spark-base
75 | 
76 | - hosts: cdh5-spark-master
77 |   roles:
78 |   - cdh5-spark-master
79 |  
80 | - hosts: cdh5-spark-worker
81 |   roles:
82 |   - cdh5-spark-worker
83 | 


--------------------------------------------------------------------------------
/roles/cdh5-resourcemanager/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: install resourcemanager pkgs
 3 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 4 |   with_items:
 5 |   - hadoop-yarn-resourcemanager
 6 |   tags:
 7 |   - cdh5-resourcemanager
 8 | 
 9 | - name: install historyserver and  proxyserver pkgs
10 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
11 |   with_items:
12 |   - hadoop-mapreduce-historyserver
13 |   - hadoop-yarn-proxyserver
14 |   tags:
15 |   - cdh5-resourcemanager
16 |   - cdh5-historyserver
17 |   - cdh5-proxyserver
18 | 
19 | - name: create the local storage directories for use by YARN
20 |   file: path={{ item }} state=directory owner=yarn group=yarn mode=0755
21 |   with_items: 
22 |   - "{{ yarn_nodemanager_local_dirs }}"
23 |   - "{{ yarn_nodemanager_log_dirs }}"
24 |   tags:
25 |   - cdh5-resourcemanager
26 | 
27 | - name: create yarn history directory
28 |   shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/history; sudo -u hdfs hadoop fs -chmod -R 1777 /user/history; sudo -u hdfs hadoop fs -chown mapred:hadoop /user/history && touch {{ item }}
29 |   with_items:  
30 |   - /var/hadoop/.status/yarn_history.created
31 |   tags:  
32 |   - cdh5-resourcemanager  
33 |   
34 | - name: create yarn log directory
35 |   shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /var/log/hadoop-yarn; sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn && touch {{ item }}
36 |   with_items:  
37 |   - /var/hadoop/.status/yarn_log.created
38 |   tags:  
39 |   - cdh5-resourcemanager
40 | 
41 | - name: start resourcemanager services
42 |   service: name={{ item }} state=started
43 |   with_items:
44 |   - hadoop-yarn-resourcemanager
45 |   - hadoop-mapreduce-historyserver
46 |   - hadoop-yarn-proxyserver
47 |   tags:
48 |   - cdh5-resourcemanager
49 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/hadoop-metrics2.properties.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | #   Licensed to the Apache Software Foundation (ASF) under one or more
 3 | #   contributor license agreements.  See the NOTICE file distributed with
 4 | #   this work for additional information regarding copyright ownership.
 5 | #   The ASF licenses this file to You under the Apache License, Version 2.0
 6 | #   (the "License"); you may not use this file except in compliance with
 7 | #   the License.  You may obtain a copy of the License at
 8 | #
 9 | #       http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | 
18 | # syntax: [prefix].[source|sink].[instance].[options]
19 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
20 | 
21 | *.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
22 | # default sampling period, in seconds
23 | *.period=10
24 | 
25 | # The namenode-metrics.out will contain metrics from all context
26 | #namenode.sink.file.filename=namenode-metrics.out
27 | # Specifying a special sampling period for namenode:
28 | #namenode.sink.*.period=8
29 | 
30 | #datanode.sink.file.filename=datanode-metrics.out
31 | 
32 | # the following example split metrics of different
33 | # context to different sinks (in this case files)
34 | #jobtracker.sink.file_jvm.context=jvm
35 | #jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out
36 | #jobtracker.sink.file_mapred.context=mapred
37 | #jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out
38 | 
39 | #tasktracker.sink.file.filename=tasktracker-metrics.out
40 | 
41 | #maptask.sink.file.filename=maptask-metrics.out
42 | 
43 | #reducetask.sink.file.filename=reducetask-metrics.out
44 | 
45 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/limits.conf.j2:
--------------------------------------------------------------------------------
 1 | # /etc/security/limits.conf
 2 | #
 3 | #Each line describes a limit for a user in the form:
 4 | #
 5 | #<domain>        <type>  <item>  <value>
 6 | #
 7 | #Where:
 8 | #<domain> can be:
 9 | #        - an user name
10 | #        - a group name, with @group syntax
11 | #        - the wildcard *, for default entry
12 | #        - the wildcard %, can be also used with %group syntax,
13 | #                 for maxlogin limit
14 | #
15 | #<type> can have the two values:
16 | #        - "soft" for enforcing the soft limits
17 | #        - "hard" for enforcing hard limits
18 | #
19 | #<item> can be one of the following:
20 | #        - core - limits the core file size (KB)
21 | #        - data - max data size (KB)
22 | #        - fsize - maximum filesize (KB)
23 | #        - memlock - max locked-in-memory address space (KB)
24 | #        - nofile - max number of open files
25 | #        - rss - max resident set size (KB)
26 | #        - stack - max stack size (KB)
27 | #        - cpu - max CPU time (MIN)
28 | #        - nproc - max number of processes
29 | #        - as - address space limit (KB)
30 | #        - maxlogins - max number of logins for this user
31 | #        - maxsyslogins - max number of logins on the system
32 | #        - priority - the priority to run user process with
33 | #        - locks - max number of file locks the user can hold
34 | #        - sigpending - max number of pending signals
35 | #        - msgqueue - max memory used by POSIX message queues (bytes)
36 | #        - nice - max nice priority allowed to raise to values: [-20, 19]
37 | #        - rtprio - max realtime priority
38 | #
39 | #<domain>      <type>  <item>         <value>
40 | #
41 | 
42 | #*               soft    core            0
43 | #*               hard    rss             10000
44 | #@student        hard    nproc           20
45 | #@faculty        soft    nproc           20
46 | #@faculty        hard    nproc           50
47 | #ftp             hard    nproc           0
48 | #@student        -       maxlogins       4
49 | 
50 | *                -       nofile          32768
51 | *                -       nproc           32768
52 | 
53 | # End of file
54 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/core-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one or more
 4 |   contributor license agreements.  See the NOTICE file distributed with
 5 |   this work for additional information regarding copyright ownership.
 6 |   The ASF licenses this file to You under the Apache License, Version 2.0
 7 |   (the "License"); you may not use this file except in compliance with
 8 |   the License.  You may obtain a copy of the License at
 9 | 
10 |       http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |   Unless required by applicable law or agreed to in writing, software
13 |   distributed under the License is distributed on an "AS IS" BASIS,
14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |   See the License for the specific language governing permissions and
16 |   limitations under the License.
17 | -->
18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
19 | 
20 | <configuration>
21 |   <!-- the default path prefix used by the Hadoop FS client when none is given -->
22 |   <property>
23 |     <name>fs.defaultFS</name>
24 |     <value>hdfs://{{ nameservice_id }}/</value>
25 |   </property>
26 | 
27 |   <!-- enable trash -->
28 |   <property>
29 |     <name>fs.trash.interval</name>
30 |     <value>{{ fs_trash_interval }}</value>
31 |   </property>
32 | 
33 |   <!-- lzo setting -->
34 |   <property>
35 |     <name>io.compression.codecs</name>
36 |     <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,
37 | org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,
38 | com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
39 |   </property>
40 |   <property>
41 |     <name>io.compression.codec.lzo.class</name>
42 |     <value>com.hadoop.compression.lzo.LzoCodec</value>
43 |   </property>
44 | 
45 |   <!-- oozie user setting -->
46 |   <property>
47 |     <name>hadoop.proxyuser.oozie.hosts</name>
48 |     <value>*</value>
49 |   </property>
50 |   <property>
51 |     <name>hadoop.proxyuser.oozie.groups</name>
52 |     <value>*</value>
53 |   </property>
54 | 
55 |   <!-- HTTPFS proxy user setting -->
56 |   <property>
57 |     <name>hadoop.proxyuser.httpfs.hosts</name>
58 |     <value>*</value>
59 |   </property>
60 |   <property>
61 |     <name>hadoop.proxyuser.httpfs.groups</name>
62 |     <value>*</value>
63 |   </property>
64 | </configuration>
65 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/spark-env.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This file contains environment variables required to run Spark. Copy it as
 4 | # spark-env.sh and edit that to configure Spark for your site.
 5 | #
 6 | # The following variables can be set in this file:
 7 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
 8 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
 9 | # - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
10 | #   we recommend setting app-wide options in the application's driver program.
11 | #     Examples of node-specific options : -Dspark.local.dir, GC options
12 | #     Examples of app-wide options : -Dspark.serializer
13 | #
14 | # If using the standalone deploy mode, you can also set variables for it here:
15 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
16 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
17 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
18 | # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
19 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
20 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
21 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
22 | 
23 | ###
24 | ### === IMPORTANT ===
25 | ### Change the following to specify a real cluster's Master host
26 | ###
27 | export STANDALONE_SPARK_MASTER_HOST=`hostname`
28 | 
29 | #export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST
30 | export SPARK_MASTER_IP={{ groups['cdh5-spark-master'] | join() }}.{{ tl_domain }}
31 | 
32 | ### Let's run everything with JVM runtime, instead of Scala
33 | export SPARK_LAUNCH_WITH_SCALA=0
34 | export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib
35 | export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib
36 | export SPARK_MASTER_WEBUI_PORT=18080
37 | export SPARK_MASTER_PORT=7077
38 | export SPARK_WORKER_PORT=7078
39 | export SPARK_WORKER_WEBUI_PORT=18081
40 | export SPARK_WORKER_DIR=/var/run/spark/work
41 | export SPARK_LOG_DIR=/var/log/spark
42 | 
43 | if [ -n "$HADOOP_HOME" ]; then
44 |   export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native
45 | fi
46 | 
47 | ### Comment above 2 lines and uncomment the following if
48 | ### you want to run with scala version, that is included with the package
49 | #export SCALA_HOME=${SCALA_HOME:-/usr/lib/spark/scala}
50 | #export PATH=$PATH:$SCALA_HOME/bin
51 | 
52 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/hbase-policy.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  * Licensed to the Apache Software Foundation (ASF) under one
 6 |  * or more contributor license agreements.  See the NOTICE file
 7 |  * distributed with this work for additional information
 8 |  * regarding copyright ownership.  The ASF licenses this file
 9 |  * to you under the Apache License, Version 2.0 (the
10 |  * "License"); you may not use this file except in compliance
11 |  * with the License.  You may obtain a copy of the License at
12 |  *
13 |  *     http://www.apache.org/licenses/LICENSE-2.0
14 |  *
15 |  * Unless required by applicable law or agreed to in writing, software
16 |  * distributed under the License is distributed on an "AS IS" BASIS,
17 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 |  * See the License for the specific language governing permissions and
19 |  * limitations under the License.
20 |  */
21 | -->
22 | 
23 | <configuration>
24 |   <property>
25 |     <name>security.client.protocol.acl</name>
26 |     <value>*</value>
27 |     <description>ACL for ClientProtocol and AdminProtocol implementations (ie. 
28 |     clients talking to HRegionServers)
29 |     The ACL is a comma-separated list of user and group names. The user and 
30 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
31 |     A special value of "*" means all users are allowed.</description>
32 |   </property>
33 | 
34 |   <property>
35 |     <name>security.admin.protocol.acl</name>
36 |     <value>*</value>
37 |     <description>ACL for HMasterInterface protocol implementation (ie. 
38 |     clients talking to HMaster for admin operations).
39 |     The ACL is a comma-separated list of user and group names. The user and 
40 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
41 |     A special value of "*" means all users are allowed.</description>
42 |   </property>
43 | 
44 |   <property>
45 |     <name>security.masterregion.protocol.acl</name>
46 |     <value>*</value>
47 |     <description>ACL for HMasterRegionInterface protocol implementations
48 |     (for HRegionServers communicating with HMaster)
49 |     The ACL is a comma-separated list of user and group names. The user and 
50 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
51 |     A special value of "*" means all users are allowed.</description>
52 |   </property>
53 | </configuration>
54 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive-env.sh.template.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Set Hive and Hadoop environment variables here. These variables can be used
18 | # to control the execution of Hive. It should be used by admins to configure
19 | # the Hive installation (so that users do not have to set environment variables
20 | # or set command line parameters to get correct behavior).
21 | #
22 | # The hive service being invoked (CLI/HWI etc.) is available via the environment
23 | # variable SERVICE
24 | 
25 | 
26 | # Hive Client memory usage can be an issue if a large number of clients
27 | # are running at the same time. The flags below have been useful in 
28 | # reducing memory usage:
29 | #
30 | # if [ "$SERVICE" = "cli" ]; then
31 | #   if [ -z "$DEBUG" ]; then
32 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
33 | #   else
34 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
35 | #   fi
36 | # fi
37 | 
38 | # The heap size of the jvm stared by hive shell script can be controlled via:
39 | #
40 | # export HADOOP_HEAPSIZE=1024
41 | #
42 | # Larger heap size may be required when running queries over large number of files or partitions. 
43 | # By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be 
44 | # appropriate for hive server (hwi etc).
45 | 
46 | 
47 | # Set HADOOP_HOME to point to a specific hadoop install directory
48 | # HADOOP_HOME=${bin}/../../hadoop
49 | 
50 | # Hive Configuration Directory can be controlled by:
51 | # export HIVE_CONF_DIR=
52 | 
53 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by:
54 | # export HIVE_AUX_JARS_PATH=
55 | 


--------------------------------------------------------------------------------
/roles/cdh5-pig/templates/pig.properties.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # Pig configuration file. All values can be overwritten by command line arguments.
17 | # see bin/pig -help
18 | 
19 | # log4jconf log4j configuration file
20 | # log4jconf=./conf/log4j.properties
21 | 
22 | # brief logging (no timestamps)
23 | brief=false
24 | 
25 | # clustername, name of the hadoop jobtracker. If no port is defined port 50020 will be used. 
26 | #cluster
27 | 
28 | #debug level, INFO is default
29 | debug=INFO
30 | 
31 | # a file that contains pig script
32 | #file=
33 | 
34 | # load jarfile, colon separated
35 | #jar=
36 | 
37 | #verbose print all log messages to screen (default to print only INFO and above to screen)
38 | verbose=false
39 | 
40 | #exectype local|mapreduce, mapreduce is default
41 | #exectype=mapreduce
42 | # hod realted properties
43 | #ssh.gateway
44 | #hod.expect.root
45 | #hod.expect.uselatest
46 | #hod.command
47 | #hod.config.dir
48 | #hod.param
49 | 
50 | 
51 | #Do not spill temp files smaller than this size (bytes)
52 | pig.spill.size.threshold=5000000
53 | #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes)
54 | #This should help reduce the number of files being spilled.
55 | pig.spill.gc.activation.size=40000000
56 | 
57 | 
58 | ######################
59 | # Everything below this line is Yahoo specific.  Note that I've made
60 | # (almost) no changes to the lines above to make merging in from Apache
61 | # easier.  Any values I don't want from above I override below.
62 | #
63 | # This file is configured for use with HOD on the production clusters.  If you
64 | # want to run pig with a static cluster you will need to remove everything
65 | # below this line and set the cluster value (above) to the
66 | # hostname and port of your job tracker.
67 | 
68 | exectype=mapreduce
69 | log.file=
70 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/ssl-server.xml.example.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | <configuration>
20 | 
21 | <property>
22 |   <name>ssl.server.truststore.location</name>
23 |   <value></value>
24 |   <description>Truststore to be used by NN and DN. Must be specified.
25 |   </description>
26 | </property>
27 | 
28 | <property>
29 |   <name>ssl.server.truststore.password</name>
30 |   <value></value>
31 |   <description>Optional. Default value is "".
32 |   </description>
33 | </property>
34 | 
35 | <property>
36 |   <name>ssl.server.truststore.type</name>
37 |   <value>jks</value>
38 |   <description>Optional. The keystore file format, default value is "jks".
39 |   </description>
40 | </property>
41 | 
42 | <property>
43 |   <name>ssl.server.truststore.reload.interval</name>
44 |   <value>10000</value>
45 |   <description>Truststore reload check interval, in milliseconds.
46 |   Default value is 10000 (10 seconds).
47 |   </description>
48 | </property>
49 | 
50 | <property>
51 |   <name>ssl.server.keystore.location</name>
52 |   <value></value>
53 |   <description>Keystore to be used by NN and DN. Must be specified.
54 |   </description>
55 | </property>
56 | 
57 | <property>
58 |   <name>ssl.server.keystore.password</name>
59 |   <value></value>
60 |   <description>Must be specified.
61 |   </description>
62 | </property>
63 | 
64 | <property>
65 |   <name>ssl.server.keystore.keypassword</name>
66 |   <value></value>
67 |   <description>Must be specified.
68 |   </description>
69 | </property>
70 | 
71 | <property>
72 |   <name>ssl.server.keystore.type</name>
73 |   <value>jks</value>
74 |   <description>Optional. The keystore file format, default value is "jks".
75 |   </description>
76 | </property>
77 | 
78 | </configuration>
79 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/ssl-client.xml.example.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | <configuration>
20 | 
21 | <property>
22 |   <name>ssl.client.truststore.location</name>
23 |   <value></value>
24 |   <description>Truststore to be used by clients like distcp. Must be
25 |   specified.
26 |   </description>
27 | </property>
28 | 
29 | <property>
30 |   <name>ssl.client.truststore.password</name>
31 |   <value></value>
32 |   <description>Optional. Default value is "".
33 |   </description>
34 | </property>
35 | 
36 | <property>
37 |   <name>ssl.client.truststore.type</name>
38 |   <value>jks</value>
39 |   <description>Optional. The keystore file format, default value is "jks".
40 |   </description>
41 | </property>
42 | 
43 | <property>
44 |   <name>ssl.client.truststore.reload.interval</name>
45 |   <value>10000</value>
46 |   <description>Truststore reload check interval, in milliseconds.
47 |   Default value is 10000 (10 seconds).
48 |   </description>
49 | </property>
50 | 
51 | <property>
52 |   <name>ssl.client.keystore.location</name>
53 |   <value></value>
54 |   <description>Keystore to be used by clients like distcp. Must be
55 |   specified.
56 |   </description>
57 | </property>
58 | 
59 | <property>
60 |   <name>ssl.client.keystore.password</name>
61 |   <value></value>
62 |   <description>Optional. Default value is "".
63 |   </description>
64 | </property>
65 | 
66 | <property>
67 |   <name>ssl.client.keystore.keypassword</name>
68 |   <value></value>
69 |   <description>Optional. Default value is "".
70 |   </description>
71 | </property>
72 | 
73 | <property>
74 |   <name>ssl.client.keystore.type</name>
75 |   <value>jks</value>
76 |   <description>Optional. The keystore file format, default value is "jks".
77 |   </description>
78 | </property>
79 | 
80 | </configuration>
81 | 


--------------------------------------------------------------------------------
/group_vars/cdh5-all:
--------------------------------------------------------------------------------
 1 | ---
 2 | version:
 3 |   hadoop: 2.2.0+cdh5.0.0+1610-0.cdh5b2.p0.51.el6
 4 |   lzo: 0.4.15+gplextras5.0.0+0-0.gplextras5b2.p0.28.el6
 5 |   hbase: 0.96.1.1+cdh5.0.0+23-0.cdh5b2.p0.20.el6
 6 |   hive: 0.12.0+cdh5.0.0+265-0.cdh5b2.p0.33.el6
 7 |   oozie: 4.0.0+cdh5.0.0+144-0.cdh5b2.p0.21.el6
 8 |   pig: 0.12.0+cdh5.0.0+20-0.cdh5b2.p0.19.el6
 9 |   zookeeper: 3.4.5+cdh5.0.0+27-0.cdh5b2.p0.29.el6
10 |   spark: 0.9.0-1.cdh5b2.p0.22.el6
11 | 
12 | ansible_path: "{{ lookup('env','ANSIBLE_WORK_DIR') }}"
13 | repo_server: 10.100.1.10
14 | 
15 | tl_domain: heylinux.com
16 | 
17 | # core-site_xml
18 | nameservice_id: mycluster
19 | fs_trash_interval: 1440
20 | 
21 | # zoo_cfg
22 | zookeeper_datadir: /var/lib/zookeeper
23 | 
24 | # hdfs-site_xml
25 | dfs_permissions_superusergroup: hdfs
26 | dfs_permissions_enabled: 'false'
27 | dfs_replication: 1
28 | dfs_journalnode_edits_dir: /var/hadoop/data/1/dfs/jn
29 | dfs_blocksize: 134217728
30 | dfs_namenode_handler_count: 256
31 | dfs_datanode_handler_count: 32
32 | dfs_datanode_du_reserved: 0
33 | dfs_balance_bandwidthPerSec: 1048576
34 | dfs_hosts_exclude: /etc/hadoop/conf.{{ nameservice_id }}/datanodes.exclude
35 | dfs_datanode_max_transfer_threads: 4096
36 | dfs_datanode_balanced_space_threshold: 10737418240
37 | dfs_datanode_balanced_space_preference_fraction: 0.75
38 | dfs_datanode_max_xcievers: 4096
39 | dfs_checksum_type: CRC32
40 | dfs_namenode_name_dir:
41 |   - /var/hadoop/data/1/dfs/nn
42 |   - /var/hadoop/data/2/dfs/nn
43 | dfs_datanode_data_dir:
44 |   - /var/hadoop/data/1/dfs/dn
45 |   - /var/hadoop/data/2/dfs/dn
46 |   - /var/hadoop/data/3/dfs/dn
47 |   - /var/hadoop/data/4/dfs/dn
48 | 
49 | # yarn-site_xml
50 | yarn_nodemanager_local_dirs:
51 |   - /var/hadoop/data/1/yarn/local
52 |   - /var/hadoop/data/2/yarn/local
53 |   - /var/hadoop/data/3/yarn/local
54 |   - /var/hadoop/data/4/yarn/local
55 | yarn_nodemanager_log_dirs:
56 |   - /var/hadoop/data/1/yarn/logs
57 |   - /var/hadoop/data/2/yarn/logs
58 |   - /var/hadoop/data/3/yarn/logs
59 |   - /var/hadoop/data/4/yarn/logs
60 | yarn_nodemanager_remote_app_log_dir: 'hdfs://{{ nameservice_id }}/var/log/hadoop-yarn/apps'
61 | yarn_nodemanager_vmem_pmem_ratio: 10
62 | yarn_nodemanager_resource_memory_mb: 24576
63 | yarn_nodemanager_pmem_check_enabled: 'true'
64 | yarn_nodemanager_vmem_check_enabled: 'true'
65 | 
66 | # mapred-site_xml
67 | mapreduce_map_memory_mb: 4096
68 | mapreduce_reduce_memory_mb: 8192
69 | mapreduce_map_java_opts: '-Xmx3072m'
70 | mapreduce_reduce_java_opts: '-Xmx6144m'
71 | mapreduce_jobtracker_handler_count: 128
72 | dfs_namenode_handler_count: 128
73 | 
74 | # hive-site_xml
75 | hive_mysql_hosts:
76 |   - "%"
77 |   - "127.0.0.1"
78 |   - "localhost"
79 | hive_mysql_passwd: mypasswd
80 | 
81 | # oozie-site_xml
82 | oozie_db_passwd: mypasswd
83 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/mapred-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one or more
 4 |   contributor license agreements.  See the NOTICE file distributed with
 5 |   this work for additional information regarding copyright ownership.
 6 |   The ASF licenses this file to You under the Apache License, Version 2.0
 7 |   (the "License"); you may not use this file except in compliance with
 8 |   the License.  You may obtain a copy of the License at
 9 | 
10 |       http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |   Unless required by applicable law or agreed to in writing, software
13 |   distributed under the License is distributed on an "AS IS" BASIS,
14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |   See the License for the specific language governing permissions and
16 |   limitations under the License.
17 | -->
18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
19 | 
20 | <configuration>
21 |   <!-- If you plan on running YARN, you must set this property to the value of yarn -->
22 |   <property>
23 |     <name>mapreduce.framework.name</name>
24 |     <value>yarn</value>
25 |   </property>
26 | 
27 |   <!--  the most important properties for jobhistory -->
28 |   <property>
29 |     <name>mapreduce.jobhistory.address</name>
30 |     <value>{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:10020</value>
31 |   </property>
32 |   <property>
33 |     <name>mapreduce.jobhistory.webapp.address</name>
34 |     <value>{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:19888</value>
35 |   </property>
36 | 
37 |   <!-- YARN requires a staging directory for temporary files created by running jobs -->
38 |   <property>
39 |     <name>yarn.app.mapreduce.am.staging-dir</name>
40 |     <value>/user</value>
41 |   </property>
42 | 
43 |   <!-- Fix the issue about memory limits -->
44 |   <property>
45 |     <name>mapreduce.map.memory.mb</name>
46 |     <value>{{ mapreduce_map_memory_mb }}</value>
47 |   </property>
48 |   <property>
49 |     <name>mapreduce.reduce.memory.mb</name>
50 |     <value>{{ mapreduce_reduce_memory_mb }}</value>
51 |   </property>
52 |   <property>
53 |     <name>mapreduce.map.java.opts</name>
54 |     <value>{{ mapreduce_map_java_opts }}</value>
55 |   </property>
56 |   <property>
57 |     <name>mapreduce.reduce.java.opts</name>
58 |     <value>{{ mapreduce_reduce_java_opts }}</value>
59 |   </property>
60 | 
61 |   <property>
62 |     <name>mapreduce.jobtracker.handler.count</name>
63 |     <value>{{ mapreduce_jobtracker_handler_count }}</value>
64 |   </property>
65 |   <property>
66 |     <name>dfs.namenode.handler.count</name>
67 |     <value>{{ dfs_namenode_handler_count }}</value>
68 |   </property>
69 | 
70 | </configuration>
71 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/hadoop-metrics.properties.j2:
--------------------------------------------------------------------------------
 1 | # Configuration of the "dfs" context for null
 2 | dfs.class=org.apache.hadoop.metrics.spi.NullContext
 3 | 
 4 | # Configuration of the "dfs" context for file
 5 | #dfs.class=org.apache.hadoop.metrics.file.FileContext
 6 | #dfs.period=10
 7 | #dfs.fileName=/tmp/dfsmetrics.log
 8 | 
 9 | # Configuration of the "dfs" context for ganglia
10 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
11 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
12 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
13 | # dfs.period=10
14 | # dfs.servers=localhost:8649
15 | 
16 | 
17 | # Configuration of the "mapred" context for null
18 | mapred.class=org.apache.hadoop.metrics.spi.NullContext
19 | 
20 | # Configuration of the "mapred" context for file
21 | #mapred.class=org.apache.hadoop.metrics.file.FileContext
22 | #mapred.period=10
23 | #mapred.fileName=/tmp/mrmetrics.log
24 | 
25 | # Configuration of the "mapred" context for ganglia
26 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
27 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
28 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
29 | # mapred.period=10
30 | # mapred.servers=localhost:8649
31 | 
32 | 
33 | # Configuration of the "jvm" context for null
34 | #jvm.class=org.apache.hadoop.metrics.spi.NullContext
35 | 
36 | # Configuration of the "jvm" context for file
37 | #jvm.class=org.apache.hadoop.metrics.file.FileContext
38 | #jvm.period=10
39 | #jvm.fileName=/tmp/jvmmetrics.log
40 | 
41 | # Configuration of the "jvm" context for ganglia
42 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
43 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
44 | # jvm.period=10
45 | # jvm.servers=localhost:8649
46 | 
47 | # Configuration of the "rpc" context for null
48 | rpc.class=org.apache.hadoop.metrics.spi.NullContext
49 | 
50 | # Configuration of the "rpc" context for file
51 | #rpc.class=org.apache.hadoop.metrics.file.FileContext
52 | #rpc.period=10
53 | #rpc.fileName=/tmp/rpcmetrics.log
54 | 
55 | # Configuration of the "rpc" context for ganglia
56 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
57 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
58 | # rpc.period=10
59 | # rpc.servers=localhost:8649
60 | 
61 | 
62 | # Configuration of the "ugi" context for null
63 | ugi.class=org.apache.hadoop.metrics.spi.NullContext
64 | 
65 | # Configuration of the "ugi" context for file
66 | #ugi.class=org.apache.hadoop.metrics.file.FileContext
67 | #ugi.period=10
68 | #ugi.fileName=/tmp/ugimetrics.log
69 | 
70 | # Configuration of the "ugi" context for ganglia
71 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
72 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
73 | # ugi.period=10
74 | # ugi.servers=localhost:8649
75 | 
76 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive-exec-log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=INFO,FA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.query.id=hadoop
22 | hive.log.file=${hive.query.id}.log
23 | 
24 | # Define the root logger to the system property "hadoop.root.logger".
25 | log4j.rootLogger=${hive.root.logger}, EventCounter
26 | 
27 | # Logging Threshold
28 | log4j.threshhold=${hive.log.threshold}
29 | 
30 | #
31 | # File Appender
32 | #
33 | 
34 | log4j.appender.FA=org.apache.log4j.FileAppender
35 | log4j.appender.FA.File=${hive.log.dir}/${hive.log.file}
36 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout
37 | 
38 | # Pattern format: Date LogLevel LoggerName LogMessage
39 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
40 | # Debugging Pattern format
41 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
42 | 
43 | 
44 | #
45 | # console
46 | # Add "console" to rootlogger above if you want to use this
47 | #
48 | 
49 | log4j.appender.console=org.apache.log4j.ConsoleAppender
50 | log4j.appender.console.target=System.err
51 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
52 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
53 | 
54 | #custom logging levels
55 | #log4j.logger.xxx=DEBUG
56 | 
57 | #
58 | # Event Counter Appender
59 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
60 | #
61 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
62 | 
63 | 
64 | log4j.category.DataNucleus=ERROR,FA
65 | log4j.category.Datastore=ERROR,FA
66 | log4j.category.Datastore.Schema=ERROR,FA
67 | log4j.category.JPOX.Datastore=ERROR,FA
68 | log4j.category.JPOX.Plugin=ERROR,FA
69 | log4j.category.JPOX.MetaData=ERROR,FA
70 | log4j.category.JPOX.Query=ERROR,FA
71 | log4j.category.JPOX.General=ERROR,FA
72 | log4j.category.JPOX.Enhancer=ERROR,FA
73 | 
74 | 
75 | # Silence useless ZK logs
76 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,FA
77 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,FA
78 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/oozie-env.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Licensed to the Apache Software Foundation (ASF) under one
 4 | # or more contributor license agreements.  See the NOTICE file
 5 | # distributed with this work for additional information
 6 | # regarding copyright ownership.  The ASF licenses this file
 7 | # to you under the Apache License, Version 2.0 (the
 8 | # "License"); you may not use this file except in compliance
 9 | # with the License.  You may obtain a copy of the License at
10 | # 
11 | #      http://www.apache.org/licenses/LICENSE-2.0
12 | # 
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # Set Oozie specific environment variables here.
21 | 
22 | 
23 | export OOZIE_DATA=/var/lib/oozie
24 | export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat
25 | export CATALINA_TMPDIR=/var/lib/oozie
26 | export CATALINA_PID=/var/run/oozie/oozie.pid
27 | export CATALINA_BASE=/var/lib/oozie/tomcat-deployment
28 | 
29 | # Settings for the Embedded Tomcat that runs Oozie
30 | # Java System properties for Oozie should be specified in this variable
31 | #
32 | export OOZIE_HTTPS_PORT=11443
33 | export OOZIE_HTTPS_KEYSTORE_PASS=password
34 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.port=${OOZIE_HTTPS_PORT}"
35 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.keystore.pass=${OOZIE_HTTPS_KEYSTORE_PASS}"
36 | export CATALINA_OPTS="$CATALINA_OPTS -Xmx1024m"
37 | 
38 | # Oozie configuration file to load from Oozie configuration directory
39 | #
40 | # export OOZIE_CONFIG_FILE=oozie-site.xml
41 | export OOZIE_CONFIG=/etc/oozie/conf
42 | 
43 | # Oozie logs directory
44 | #
45 | # export OOZIE_LOG=${OOZIE_HOME}/logs
46 | export OOZIE_LOG=/var/log/oozie
47 | 
48 | # Oozie Log4J configuration file to load from Oozie configuration directory
49 | #
50 | # export OOZIE_LOG4J_FILE=oozie-log4j.properties
51 | 
52 | # Reload interval of the Log4J configuration file, in seconds
53 | #
54 | # export OOZIE_LOG4J_RELOAD=10
55 | 
56 | # The port Oozie server runs
57 | #
58 | # export OOZIE_HTTP_PORT=11000
59 | 
60 | # The port Oozie server runs if using SSL (HTTPS)
61 | #
62 | # export OOZIE_HTTPS_PORT=11443
63 | 
64 | # The host name Oozie server runs on
65 | #
66 | # export OOZIE_HTTP_HOSTNAME=`hostname -f`
67 | 
68 | # The base URL for callback URLs to Oozie
69 | #
70 | # export OOZIE_BASE_URL="http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie"
71 | 
72 | # The location of the keystore for the Oozie server if using SSL (HTTPS)
73 | #
74 | # export OOZIE_HTTPS_KEYSTORE_FILE=${HOME}/.keystore
75 | 
76 | # The password of the keystore for the Oozie server if using SSL (HTTPS)
77 | #
78 | # export OOZIE_HTTPS_KEYSTORE_PASS=password
79 | 
80 | # The Oozie Instance ID
81 | #
82 | # export OOZIE_INSTANCE_ID="${OOZIE_HTTP_HOSTNAME}"
83 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/yarn-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one or more
 4 |   contributor license agreements.  See the NOTICE file distributed with
 5 |   this work for additional information regarding copyright ownership.
 6 |   The ASF licenses this file to You under the Apache License, Version 2.0
 7 |   (the "License"); you may not use this file except in compliance with
 8 |   the License.  You may obtain a copy of the License at
 9 | 
10 |       http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |   Unless required by applicable law or agreed to in writing, software
13 |   distributed under the License is distributed on an "AS IS" BASIS,
14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |   See the License for the specific language governing permissions and
16 |   limitations under the License.
17 | -->
18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
19 | 
20 | <configuration>
21 |   <!-- the most important properties -->
22 |   <property>
23 |     <name>yarn.nodemanager.aux-services</name>
24 |     <value>mapreduce_shuffle</value>
25 |   </property>
26 |   <property>
27 |     <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
28 |     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
29 |   </property>
30 |   <property>
31 |     <name>yarn.resourcemanager.hostname</name>
32 |     <value>{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}</value>
33 |   </property>
34 |   <property>
35 |      <name>yarn.application.classpath</name>
36 |      <value>
37 |         $HADOOP_CONF_DIR,
38 |         $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
39 |         $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
40 |         $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
41 |         $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
42 |      </value>
43 |   </property>
44 | 
45 |   <!-- specify the directories -->
46 |   <property>
47 |     <name>yarn.nodemanager.local-dirs</name>
48 |     <value>{{ yarn_nodemanager_local_dirs | join(',') }}</value>
49 |   </property>
50 |   <property>
51 |     <name>yarn.nodemanager.log-dirs</name>
52 |     <value>{{ yarn_nodemanager_log_dirs | join(',') }}</value>
53 |   </property>
54 |   <property>
55 |     <name>yarn.log-aggregation-enable</name>
56 |     <value>true</value>
57 |   </property>
58 |   <property>
59 |     <name>yarn.nodemanager.remote-app-log-dir</name>
60 |     <value>{{ yarn_nodemanager_remote_app_log_dir }}</value>
61 |   </property>
62 |   <property>
63 |     <name>yarn.log.server.url</name>
64 |     <value>http://{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:19888/jobhistory/logs/</value>
65 |   </property>
66 | 
67 |   <!-- enable the proxyserver -->
68 |   <property>
69 |     <name>yarn.web-proxy.address</name>
70 |     <value>{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:8100</value>
71 |   </property>
72 | 
73 |   <!-- Fix the issue about exit code 143 with container -->
74 |   <property>
75 |     <name>yarn.nodemanager.vmem-pmem-ratio</name>
76 |     <value>{{ yarn_nodemanager_vmem_pmem_ratio }}</value>
77 |   </property>
78 |   <property>
79 |     <name>yarn.nodemanager.resource.memory-mb</name>
80 |     <value>{{ yarn_nodemanager_resource_memory_mb }}</value>
81 |   </property>
82 |   <property>
83 |     <name>yarn.nodemanager.pmem-check-enabled</name>
84 |     <value>{{ yarn_nodemanager_pmem_check_enabled }}</value>
85 |   </property>
86 |   <property>
87 |     <name>yarn.nodemanager.vmem-check-enabled</name>
88 |     <value>{{ yarn_nodemanager_vmem_check_enabled }}</value>
89 |   </property>
90 | 
91 | </configuration>
92 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one or more
 4 |   contributor license agreements.  See the NOTICE file distributed with
 5 |   this work for additional information regarding copyright ownership.
 6 |   The ASF licenses this file to You under the Apache License, Version 2.0
 7 |   (the "License"); you may not use this file except in compliance with
 8 |   the License.  You may obtain a copy of the License at
 9 | 
10 |       http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |   Unless required by applicable law or agreed to in writing, software
13 |   distributed under the License is distributed on an "AS IS" BASIS,
14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |   See the License for the specific language governing permissions and
16 |   limitations under the License.
17 | -->
18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
19 | 
20 | <configuration>
21 | 
22 | <!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
23 | <!-- that are implied by Hadoop setup variables.                                                -->
24 | <!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
25 | <!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
26 | <!-- resource).                                                                                 -->
27 | 
28 | <!-- Hive Execution Parameters -->
29 | 
30 | <property>
31 |   <name>javax.jdo.option.ConnectionURL</name>
32 |   <value>jdbc:mysql://localhost/metastore</value>
33 | </property>
34 | <property>
35 |   <name>javax.jdo.option.ConnectionDriverName</name>
36 |   <value>com.mysql.jdbc.Driver</value>
37 | </property>
38 | <property>
39 |   <name>javax.jdo.option.ConnectionUserName</name>
40 |   <value>hive</value>
41 | </property>
42 | <property>
43 |   <name>javax.jdo.option.ConnectionPassword</name>
44 |   <value>{{ hive_mysql_passwd }}</value>
45 | </property>
46 | 
47 | <property>
48 |   <name>datanucleus.autoCreateSchema</name>
49 |   <value>false</value>
50 | </property>
51 | <property>
52 |   <name>datanucleus.fixedDatastore</name>
53 |   <value>true</value>
54 | </property>
55 | <property>
56 |   <name>datanucleus.autoStartMechanism</name> 
57 |   <value>SchemaTable</value>
58 | </property> 
59 | 
60 | <property>
61 |   <name>hive.metastore.uris</name>
62 |   <value>thrift://{{ groups['cdh5-hive'] | join() }}.{{ tl_domain }}:9083</value>
63 | </property>
64 | 
65 | <property>
66 |   <name>hive.support.concurrency</name>
67 |   <value>true</value>
68 | </property>
69 | <property>
70 |   <name>hive.zookeeper.quorum</name>
71 |   <value>{{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }}</value>
72 | </property>
73 | 
74 | <property>
75 |   <name>hive.aux.jars.path</name>
76 |     <value>file:///usr/lib/hive/lib/zookeeper.jar,
77 | file:///usr/lib/hive/lib/hive-hbase-handler.jar,
78 | file:///usr/lib/hive/lib/guava-11.0.2.jar,
79 | file:///usr/lib/hive/lib/hbase-client.jar,
80 | file:///usr/lib/hive/lib/hbase-common.jar,
81 | file:///usr/lib/hive/lib/hbase-hadoop-compat.jar,
82 | file:///usr/lib/hive/lib/hbase-hadoop2-compat.jar,
83 | file:///usr/lib/hive/lib/hbase-protocol.jar,
84 | file:///usr/lib/hive/lib/hbase-server.jar,
85 | file:///usr/lib/hive/lib/htrace-core.jar</value>
86 | </property>
87 | 
88 | <property>
89 |   <name>hbase.zookeeper.quorum</name>
90 |   <value>{{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }}</value>
91 | </property>
92 | </configuration>
93 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/tasks/main.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: install hive server pkgs
  3 |   yum: name={{ item }}-{{ version['hive'] }} state=present
  4 |   with_items:
  5 |   - hive
  6 |   - hive-metastore
  7 |   - hive-server2
  8 |   - hive-hbase
  9 |   - hive-jdbc
 10 |   tags:
 11 |   - cdh5-hive
 12 | 
 13 | - name: copy the hive configuration files
 14 |   template: src={{ item }}.j2 dest=/etc/hive/conf/{{ item }} owner=hive group=hive mode=0664
 15 |   with_items:
 16 |   - hive-site.xml
 17 |   - hive-env.sh.template
 18 |   - hive-default.xml.template
 19 |   - hive-exec-log4j.properties
 20 |   - hive-log4j.properties
 21 |   - hive-server2
 22 |   register: copy_hive_conf
 23 |   tags:
 24 |   - cdh5-hive
 25 |   - cdh5-hive-conf
 26 | 
 27 | - name: copy the hive default env conf
 28 |   template: src={{ item }}.j2 dest=/etc/default/{{ item }} owner=hive group=hive mode=0644
 29 |   with_items:
 30 |   - hive-server2
 31 |   tags:
 32 |   - cdh5-hive
 33 |   - cdh5-hive-conf
 34 | 
 35 | - name: install mysql server and connector for hive metastore
 36 |   yum: name={{ item }} state=present
 37 |   with_items:
 38 |   - mysql-server
 39 |   - mysql-connector-java
 40 |   tags:
 41 |   - cdh5-hive
 42 |   - cdh5-hive-mysqlserver
 43 | 
 44 | - name: start mysql server for hive metastore
 45 |   service: name=mysqld state=started
 46 |   tags:
 47 |   - cdh5-hive
 48 |   - cdh5-hive-mysqlserver
 49 | 
 50 | - name: create symbolically link for mysql connector
 51 |   file: src=/usr/share/java/mysql-connector-java.jar dest=/usr/lib/hive/lib/mysql-connector-java.jar owner=hive group=hive state=link
 52 |   tags:
 53 |   - cdh5-hive
 54 |   - cdh5-hive-mysqlserver
 55 | 
 56 | - name: create needed directories for hive
 57 |   file: name={{ item }} state=directory owner=hive group=hive mode=0700
 58 |   with_items:
 59 |   - /var/hive
 60 |   - /var/hive/deploy
 61 |   - /var/hive/.status
 62 |   tags:
 63 |   - cdh5-hive
 64 | 
 65 | - name: copy hive_metastore_init.sql to /var/hive/deploy
 66 |   template: src={{ item }}.j2 dest=/var/hive/deploy/{{ item }} owner=hive group=hive mode=0644
 67 |   with_items:
 68 |   - hive_metastore_init.sql
 69 |   tags:
 70 |   - cdh5-hive
 71 |   - cdh5-hive-conf
 72 |   - cdh5-hive-mysqlserver
 73 | 
 74 | - name: set the password for mysql root user
 75 |   shell: creates={{ item }} mysqladmin -u root password '{{ hive_mysql_passwd }}' && touch {{ item }}
 76 |   with_items:
 77 |   - /var/hive/.status/password.reset
 78 |   tags:
 79 |   - cdh5-hive
 80 |   - cdh5-hive-mysqlserver
 81 | 
 82 | - name: create the database for hive metastore
 83 |   shell: creates={{ item }} mysql -uroot -p'{{ hive_mysql_passwd }}' < /var/hive/deploy/hive_metastore_init.sql && touch {{ item }}
 84 |   with_items:
 85 |   - /var/hive/.status/metastore.created
 86 |   tags:
 87 |   - cdh5-hive
 88 |   - cdh5-hive-mysqlserver
 89 | 
 90 | - name: start metastore service
 91 |   service: name=hive-metastore state=started
 92 |   tags:
 93 |   - cdh5-hive
 94 | 
 95 | - name: create warehouse directory
 96 |   shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/hive/warehouse; sudo -u hdfs hadoop fs -chown -R hive /user/hive; sudo -u hdfs hadoop fs -chmod -R 1777 /user/hive && touch {{ item }}
 97 |   with_items:
 98 |   - /var/hive/.status/warehouse.created
 99 |   tags:
100 |   - cdh5-hive
101 | 
102 | - name: change the permissions of .hivehistory
103 |   file: path=/var/lib/hive/.hivehistory owner=hive group=hive mode=0666
104 |   tags:
105 |   - cdh5-hive
106 | 
107 | - name: start hive server services
108 |   service: name={{ item }} state=started
109 |   with_items:
110 |   - hive-server2
111 |   tags:
112 |   - cdh5-hive
113 |   - cdh5-hive-server2
114 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | # Define some default values that can be overridden by system properties
 2 | hbase.root.logger=INFO,console
 3 | hbase.security.logger=INFO,console
 4 | hbase.log.dir=.
 5 | hbase.log.file=hbase.log
 6 | 
 7 | # Define the root logger to the system property "hbase.root.logger".
 8 | log4j.rootLogger=${hbase.root.logger}
 9 | 
10 | # Logging Threshold
11 | log4j.threshold=ALL
12 | 
13 | #
14 | # Daily Rolling File Appender
15 | #
16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
18 | 
19 | # Rollver at midnight
20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
21 | 
22 | # 30-day backup
23 | #log4j.appender.DRFA.MaxBackupIndex=30
24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
25 | 
26 | # Pattern format: Date LogLevel LoggerName LogMessage
27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
28 | 
29 | # Rolling File Appender properties
30 | hbase.log.maxfilesize=256MB
31 | hbase.log.maxbackupindex=20
32 | 
33 | # Rolling File Appender
34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file}
36 | 
37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize}
38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex}
39 | 
40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
42 | 
43 | #
44 | # Security audit appender
45 | #
46 | hbase.security.log.file=SecurityAuth.audit
47 | hbase.security.log.maxfilesize=256MB
48 | hbase.security.log.maxbackupindex=20
49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file}
51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize}
52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex}
53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
55 | log4j.category.SecurityLogger=${hbase.security.logger}
56 | log4j.additivity.SecurityLogger=false
57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE
58 | 
59 | #
60 | # Null Appender
61 | #
62 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
63 | 
64 | #
65 | # console
66 | # Add "console" to rootlogger above if you want to use this 
67 | #
68 | log4j.appender.console=org.apache.log4j.ConsoleAppender
69 | log4j.appender.console.target=System.err
70 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
71 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
72 | 
73 | # Custom Logging levels
74 | 
75 | log4j.logger.org.apache.zookeeper=INFO
76 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
77 | log4j.logger.org.apache.hadoop.hbase=DEBUG
78 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug.
79 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO
81 | #log4j.logger.org.apache.hadoop.dfs=DEBUG
82 | # Set this class to log INFO only otherwise its OTT
83 | # Enable this to get detailed connection error/retry logging.
84 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE
85 | 
86 | 
87 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output)
88 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG
89 | 
90 | # Uncomment the below if you want to remove logging of client region caching'
91 | # and scan of .META. messages
92 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO
93 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO
94 | 


--------------------------------------------------------------------------------
/roles/cdh5-hive/templates/hive-log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=INFO,DRFA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.log.file=hive.log
22 | 
23 | # Define the root logger to the system property "hadoop.root.logger".
24 | log4j.rootLogger=${hive.root.logger}, EventCounter
25 | 
26 | # Logging Threshold
27 | log4j.threshold=${hive.log.threshold}
28 | 
29 | #
30 | # Daily Rolling File Appender
31 | #
32 | # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files
33 | # for different CLI session.
34 | #
35 | # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender
36 | 
37 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
38 | 
39 | log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
40 | 
41 | # Rollver at midnight
42 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
43 | 
44 | # 30-day backup
45 | #log4j.appender.DRFA.MaxBackupIndex=30
46 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
47 | 
48 | # Pattern format: Date LogLevel LoggerName LogMessage
49 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
50 | # Debugging Pattern format
51 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
52 | 
53 | 
54 | #
55 | # console
56 | # Add "console" to rootlogger above if you want to use this
57 | #
58 | 
59 | log4j.appender.console=org.apache.log4j.ConsoleAppender
60 | log4j.appender.console.target=System.err
61 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
62 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
63 | log4j.appender.console.encoding=UTF-8
64 | 
65 | #custom logging levels
66 | #log4j.logger.xxx=DEBUG
67 | 
68 | #
69 | # Event Counter Appender
70 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
71 | #
72 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
73 | 
74 | 
75 | log4j.category.DataNucleus=ERROR,DRFA
76 | log4j.category.Datastore=ERROR,DRFA
77 | log4j.category.Datastore.Schema=ERROR,DRFA
78 | log4j.category.JPOX.Datastore=ERROR,DRFA
79 | log4j.category.JPOX.Plugin=ERROR,DRFA
80 | log4j.category.JPOX.MetaData=ERROR,DRFA
81 | log4j.category.JPOX.Query=ERROR,DRFA
82 | log4j.category.JPOX.General=ERROR,DRFA
83 | log4j.category.JPOX.Enhancer=ERROR,DRFA
84 | 
85 | 
86 | # Silence useless ZK logs
87 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA
88 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA
89 | 
90 | #custom logging levels
91 | log4j.logger.org.apache.hadoop.hive.ql.parse.SemanticAnalyzer=INFO
92 | log4j.logger.org.apache.hadoop.hive.ql.Driver=INFO
93 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.ExecDriver=INFO
94 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapRedTask=INFO
95 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask=INFO
96 | log4j.logger.org.apache.hadoop.hive.ql.exec.Task=INFO
97 | 
98 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/capacity-scheduler.xml.j2:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   Licensed under the Apache License, Version 2.0 (the "License");
  3 |   you may not use this file except in compliance with the License.
  4 |   You may obtain a copy of the License at
  5 | 
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 |   Unless required by applicable law or agreed to in writing, software
  9 |   distributed under the License is distributed on an "AS IS" BASIS,
 10 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 |   See the License for the specific language governing permissions and
 12 |   limitations under the License. See accompanying LICENSE file.
 13 | -->
 14 | <configuration>
 15 | 
 16 |   <property>
 17 |     <name>yarn.scheduler.capacity.maximum-applications</name>
 18 |     <value>10000</value>
 19 |     <description>
 20 |       Maximum number of applications that can be pending and running.
 21 |     </description>
 22 |   </property>
 23 | 
 24 |   <property>
 25 |     <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
 26 |     <value>0.1</value>
 27 |     <description>
 28 |       Maximum percent of resources in the cluster which can be used to run 
 29 |       application masters i.e. controls number of concurrent running
 30 |       applications.
 31 |     </description>
 32 |   </property>
 33 | 
 34 |   <property>
 35 |     <name>yarn.scheduler.capacity.resource-calculator</name>
 36 |     <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
 37 |     <description>
 38 |       The ResourceCalculator implementation to be used to compare 
 39 |       Resources in the scheduler.
 40 |       The default i.e. DefaultResourceCalculator only uses Memory while
 41 |       DominantResourceCalculator uses dominant-resource to compare 
 42 |       multi-dimensional resources such as Memory, CPU etc.
 43 |     </description>
 44 |   </property>
 45 | 
 46 |   <property>
 47 |     <name>yarn.scheduler.capacity.root.queues</name>
 48 |     <value>default</value>
 49 |     <description>
 50 |       The queues at the this level (root is the root queue).
 51 |     </description>
 52 |   </property>
 53 | 
 54 |   <property>
 55 |     <name>yarn.scheduler.capacity.root.default.capacity</name>
 56 |     <value>100</value>
 57 |     <description>Default queue target capacity.</description>
 58 |   </property>
 59 | 
 60 |   <property>
 61 |     <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
 62 |     <value>1</value>
 63 |     <description>
 64 |       Default queue user limit a percentage from 0.0 to 1.0.
 65 |     </description>
 66 |   </property>
 67 | 
 68 |   <property>
 69 |     <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
 70 |     <value>100</value>
 71 |     <description>
 72 |       The maximum capacity of the default queue. 
 73 |     </description>
 74 |   </property>
 75 | 
 76 |   <property>
 77 |     <name>yarn.scheduler.capacity.root.default.state</name>
 78 |     <value>RUNNING</value>
 79 |     <description>
 80 |       The state of the default queue. State can be one of RUNNING or STOPPED.
 81 |     </description>
 82 |   </property>
 83 | 
 84 |   <property>
 85 |     <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
 86 |     <value>*</value>
 87 |     <description>
 88 |       The ACL of who can submit jobs to the default queue.
 89 |     </description>
 90 |   </property>
 91 | 
 92 |   <property>
 93 |     <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
 94 |     <value>*</value>
 95 |     <description>
 96 |       The ACL of who can administer jobs on the default queue.
 97 |     </description>
 98 |   </property>
 99 | 
100 |   <property>
101 |     <name>yarn.scheduler.capacity.node-locality-delay</name>
102 |     <value>40</value>
103 |     <description>
104 |       Number of missed scheduling opportunities after which the CapacityScheduler 
105 |       attempts to schedule rack-local containers. 
106 |       Typically this should be set to number of nodes in the cluster, By default is setting 
107 |       approximately number of nodes in one rack which is 40.
108 |     </description>
109 |   </property>
110 | 
111 | </configuration>
112 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/mapred-queues.xml.template.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |    Licensed to the Apache Software Foundation (ASF) under one or more
 4 |    contributor license agreements.  See the NOTICE file distributed with
 5 |    this work for additional information regarding copyright ownership.
 6 |    The ASF licenses this file to You under the Apache License, Version 2.0
 7 |    (the "License"); you may not use this file except in compliance with
 8 |    the License.  You may obtain a copy of the License at
 9 | 
10 |        http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |    Unless required by applicable law or agreed to in writing, software
13 |    distributed under the License is distributed on an "AS IS" BASIS,
14 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |    See the License for the specific language governing permissions and
16 |    limitations under the License.
17 | -->
18 | <!-- This is the template for queue configuration. The format supports nesting of
19 |      queues within queues - a feature called hierarchical queues. All queues are
20 |      defined within the 'queues' tag which is the top level element for this
21 |      XML document. The queue acls configured here for different queues are
22 |      checked for authorization only if the configuration property
23 |      mapreduce.cluster.acls.enabled is set to true. -->
24 | <queues>
25 | 
26 |   <!-- Configuration for a queue is specified by defining a 'queue' element. -->
27 |   <queue>
28 | 
29 |     <!-- Name of a queue. Queue name cannot contain a ':'  -->
30 |     <name>default</name>
31 | 
32 |     <!-- properties for a queue, typically used by schedulers,
33 |     can be defined here -->
34 |     <properties>
35 |     </properties>
36 | 
37 | 	<!-- State of the queue. If running, the queue will accept new jobs.
38 |          If stopped, the queue will not accept new jobs. -->
39 |     <state>running</state>
40 | 
41 |     <!-- Specifies the ACLs to check for submitting jobs to this queue.
42 |          If set to '*', it allows all users to submit jobs to the queue.
43 |          If set to ' '(i.e. space), no user will be allowed to do this
44 |          operation. The default value for any queue acl is ' '.
45 |          For specifying a list of users and groups the format to use is
46 |          user1,user2 group1,group2
47 | 
48 |          It is only used if authorization is enabled in Map/Reduce by setting
49 |          the configuration property mapreduce.cluster.acls.enabled to true.
50 | 
51 |          Irrespective of this ACL configuration, the user who started the
52 |          cluster and cluster administrators configured via
53 |          mapreduce.cluster.administrators can do this operation. -->
54 |     <acl-submit-job> </acl-submit-job>
55 | 
56 |     <!-- Specifies the ACLs to check for viewing and modifying jobs in this
57 |          queue. Modifications include killing jobs, tasks of jobs or changing
58 |          priorities.
59 |          If set to '*', it allows all users to view, modify jobs of the queue.
60 |          If set to ' '(i.e. space), no user will be allowed to do this
61 |          operation.
62 |          For specifying a list of users and groups the format to use is
63 |          user1,user2 group1,group2
64 | 
65 |          It is only used if authorization is enabled in Map/Reduce by setting
66 |          the configuration property mapreduce.cluster.acls.enabled to true.
67 | 
68 |          Irrespective of this ACL configuration, the user who started the
69 |          cluster  and cluster administrators configured via
70 |          mapreduce.cluster.administrators can do the above operations on all
71 |          the jobs in all the queues. The job owner can do all the above
72 |          operations on his/her job irrespective of this ACL configuration. -->
73 |     <acl-administer-jobs> </acl-administer-jobs>
74 |   </queue>
75 | 
76 |   <!-- Here is a sample of a hierarchical queue configuration
77 |        where q2 is a child of q1. In this example, q2 is a leaf level
78 |        queue as it has no queues configured within it. Currently, ACLs
79 |        and state are only supported for the leaf level queues.
80 |        Note also the usage of properties for the queue q2.
81 |   <queue>
82 |     <name>q1</name>
83 |     <queue>
84 |       <name>q2</name>
85 |       <properties>
86 |         <property key="capacity" value="20"/>
87 |         <property key="user-limit" value="30"/>
88 |       </properties>
89 |     </queue>
90 |   </queue>
91 |  -->
92 | </queues>
93 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/tasks/main.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: install oozie pkgs
  3 |   yum: name={{ item }}-{{ version['oozie'] }} state=present
  4 |   with_items:
  5 |   - oozie
  6 |   - oozie-client
  7 |   tags:
  8 |   - cdh5-oozie
  9 | 
 10 | - name: create alternatives for YARN without SSL
 11 |   shell: creates=/etc/alternatives/oozie-tomcat-conf alternatives --install {{ item }} oozie-tomcat-conf {{ item }} 50; alternatives --set oozie-tomcat-conf {{ item }}
 12 |   with_items:
 13 |   - /etc/oozie/tomcat-conf.http
 14 |   tags:
 15 |   - cdh5-oozie
 16 |   - cdh5-oozie-conf 
 17 | 
 18 | - name: create the oozie configuration dirs
 19 |   file: path=/etc/oozie/conf/{{ item }} state=directory owner=oozie group=oozie mode=0755
 20 |   with_items:
 21 |   - action-conf
 22 |   - hadoop-conf
 23 |   tags:
 24 |   - cdh5-oozie
 25 |   - cdh5-oozie-conf
 26 | 
 27 | - name: copy the oozie configuration files
 28 |   template: src={{ item }}.j2 dest=/etc/oozie/conf/{{ item }} owner=oozie group=oozie mode=0664
 29 |   with_items:
 30 |   - oozie-site.xml
 31 |   - adminusers.txt
 32 |   - hadoop-config.xml
 33 |   - oozie-default.xml
 34 |   - oozie-env.sh
 35 |   - oozie-log4j.properties
 36 |   - action-conf/hive.xml
 37 |   - hadoop-conf/core-site.xml
 38 |   register: copy_oozie_conf
 39 |   tags:
 40 |   - cdh5-oozie
 41 |   - cdh5-oozie-conf
 42 | 
 43 | - name: create needed directories for oozie
 44 |   file: name={{ item }} state=directory owner=oozie group=oozie mode=0700
 45 |   with_items:
 46 |   - /var/oozie
 47 |   - /var/oozie/deploy
 48 |   - /var/oozie/.status
 49 |   tags:
 50 |   - cdh5-oozie
 51 | 
 52 | - name: copy the oozie_db_init.sql to /var/oozie/deploy
 53 |   template: src={{ item }}.j2 dest=/var/oozie/deploy/{{ item }} owner=oozie group=oozie mode=0664
 54 |   with_items:
 55 |   - oozie_db_init.sql
 56 |   tags:
 57 |   - cdh5-oozie
 58 |   - cdh5-oozie-conf
 59 | 
 60 | - name: create database for oozie
 61 |   shell: creates={{ item }} mysql -uroot -p'{{ hive_mysql_passwd }}' < /var/oozie/deploy/oozie_db_init.sql && touch {{ item }}
 62 |   with_items:
 63 |   - /var/oozie/.status/oozie_db.created
 64 |   tags:
 65 |   - cdh5-oozie
 66 |   - cdh5-oozie-db
 67 | 
 68 | - name: download ext zip file from cloudera site
 69 |   get_url: url=http://archive.cloudera.com/gplextras/misc/ext-2.2.zip dest=/var/oozie/deploy/ext-2.2.zip mode=0644
 70 |   tags:
 71 |   - cdh5-oozie
 72 |   - cdh5-oozie-lib
 73 | 
 74 | - name: extract the ext zip file to /var/lib/oozie
 75 |   shell: creates=/var/lib/oozie/ext-2.2 unzip /var/oozie/deploy/ext-2.2.zip -d /var/lib/oozie/
 76 |   tags:
 77 |   - cdh5-oozie
 78 |   - cdh5-oozie-lib
 79 | 
 80 | - name: create directories in hdfs
 81 |   shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/oozie; sudo -u hdfs hadoop fs -chown -R oozie /user/oozie && touch {{ item }}
 82 |   with_items:
 83 |   - /var/oozie/.status/oozie_dir.created
 84 |   tags:
 85 |   - cdh5-oozie
 86 |   - cdh5-oozie-lib
 87 | 
 88 | - name: create sharelib for oozie in hdfs
 89 |   shell: creates={{ item }} sudo oozie-setup sharelib create -fs hdfs://{{ nameservice_id }}:8020 -locallib /usr/lib/oozie/oozie-sharelib-yarn.tar.gz && touch {{ item }}
 90 |   with_items:
 91 |   - /var/oozie/.status/sharelib.created
 92 |   tags:
 93 |   - cdh5-oozie
 94 |   - cdh5-oozie-lib
 95 | 
 96 | - name: install mysql connector
 97 |   yum: name={{ item }} state=present
 98 |   with_items:
 99 |   - mysql-connector-java
100 |   tags:
101 |   - cdh5-oozie
102 |   - cdh5-oozie-lib
103 |  
104 | - name: create symbolically link for mysql connector
105 |   file: src=/usr/share/java/mysql-connector-java.jar dest=/var/lib/oozie/mysql-connector-java.jar owner=oozie group=oozie state=link
106 |   tags:
107 |   - cdh5-oozie
108 |   - cdh5-oozie-lib
109 | 
110 | - name: create symbolically links for hadoop-lzo
111 |   file: src=/usr/lib/hadoop/lib/{{ item }} dest=/var/lib/oozie/{{ item }} owner=oozie group=oozie state=link
112 |   with_items:
113 |   - hadoop-lzo-0.4.15-gplextras5.0.0-beta-2-SNAPSHOT.jar
114 |   - hadoop-lzo.jar
115 |   tags:
116 |   - cdh5-oozie
117 |   - cdh5-oozie-lib
118 |   
119 | - name: create symbolically links for hadoop-lzo
120 |   file: src=/usr/lib/hadoop/lib/native/{{ item }} dest=/var/lib/oozie/{{ item }} owner=oozie group=oozie state=link
121 |   with_items:
122 |   - libgplcompression.a
123 |   - libgplcompression.la
124 |   - libgplcompression.lai
125 |   - libgplcompression.so
126 |   - libgplcompression.so.0
127 |   - libgplcompression.so.0.0.0
128 |   tags:
129 |   - cdh5-oozie
130 |   - cdh5-oozie-lib
131 | 
132 | - name: start oozie
133 |   service: name=oozie state=started
134 |   tags:
135 |    - cdh5-oozie
136 |    - cdh5-oozie-service
137 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/yarn-env.sh.j2:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # User for YARN daemons
 17 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
 18 | 
 19 | # resolve links - $0 may be a softlink
 20 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
 21 | 
 22 | # some Java parameters
 23 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/
 24 | if [ "$JAVA_HOME" != "" ]; then
 25 |   #echo "run java in $JAVA_HOME"
 26 |   JAVA_HOME=$JAVA_HOME
 27 | fi
 28 |   
 29 | if [ "$JAVA_HOME" = "" ]; then
 30 |   echo "Error: JAVA_HOME is not set."
 31 |   exit 1
 32 | fi
 33 | 
 34 | JAVA=$JAVA_HOME/bin/java
 35 | JAVA_HEAP_MAX=-Xmx1000m 
 36 | 
 37 | # For setting YARN specific HEAP sizes please use this
 38 | # Parameter and set appropriately
 39 | # YARN_HEAPSIZE=1000
 40 | 
 41 | # check envvars which might override default args
 42 | if [ "$YARN_HEAPSIZE" != "" ]; then
 43 |   JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
 44 | fi
 45 | 
 46 | # Resource Manager specific parameters
 47 | 
 48 | # Specify the max Heapsize for the ResourceManager using a numerical value
 49 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 50 | # the value to 1000.
 51 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 52 | # and/or YARN_RESOURCEMANAGER_OPTS.
 53 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 54 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 55 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 56 | 
 57 | # Specify the JVM options to be used when starting the ResourceManager.
 58 | # These options will be appended to the options specified as YARN_OPTS
 59 | # and therefore may override any similar flags set in YARN_OPTS
 60 | #export YARN_RESOURCEMANAGER_OPTS=
 61 | 
 62 | # Node Manager specific parameters
 63 | 
 64 | # Specify the max Heapsize for the NodeManager using a numerical value
 65 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 66 | # the value to 1000.
 67 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 68 | # and/or YARN_NODEMANAGER_OPTS.
 69 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 70 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 71 | #export YARN_NODEMANAGER_HEAPSIZE=1000
 72 | 
 73 | # Specify the JVM options to be used when starting the NodeManager.
 74 | # These options will be appended to the options specified as YARN_OPTS
 75 | # and therefore may override any similar flags set in YARN_OPTS
 76 | #export YARN_NODEMANAGER_OPTS=
 77 | 
 78 | # so that filenames w/ spaces are handled correctly in loops below
 79 | IFS=
 80 | 
 81 | 
 82 | # default log directory & file
 83 | if [ "$YARN_LOG_DIR" = "" ]; then
 84 |   YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
 85 | fi
 86 | if [ "$YARN_LOGFILE" = "" ]; then
 87 |   YARN_LOGFILE='yarn.log'
 88 | fi
 89 | 
 90 | # default policy file for service-level authorization
 91 | if [ "$YARN_POLICYFILE" = "" ]; then
 92 |   YARN_POLICYFILE="hadoop-policy.xml"
 93 | fi
 94 | 
 95 | # restore ordinary behaviour
 96 | unset IFS
 97 | 
 98 | 
 99 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
100 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
101 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
102 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
103 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
104 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
105 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
106 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
107 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
108 |   YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
109 | fi  
110 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/tasks/base.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: create cdh5 repo
  3 |   copy: src={{ item }} dest=/etc/yum.repos.d/{{ item }} owner=root group=root mode=0644
  4 |   with_items:
  5 |   - cloudera-cdh5b2.repo
  6 |   - cloudera-gplextras5b2.repo
  7 |   tags:
  8 |   - cdh5-base
  9 | 
 10 | - name: add cdh5 repo key
 11 |   get_url: url=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera dest=/etc/pki/rpm-gpg/RPM-GPG-KEY-cloudera owner=root group=root mode=0644
 12 |   tags:
 13 |   - cdh5-base
 14 | 
 15 | - name: install needed rpms for hadoop
 16 |   yum: name={{ item }} state=present
 17 |   with_items:
 18 |   - gcc
 19 |   - gcc-c++
 20 |   - make
 21 |   tags:
 22 |   - cdh5-base
 23 | 
 24 | - name: create the hosts file for all machines
 25 |   template: src=hosts.j2 dest=/etc/hosts owner=root group=root mode=0644
 26 |   tags:
 27 |   - cdh5-base
 28 |   - cdh5-base-hosts
 29 | 
 30 | - name: install hadoop base pkgs
 31 |   yum: name={{ item }}-{{ version['hadoop'] }} state=present
 32 |   with_items:
 33 |   - hadoop
 34 |   - hadoop-mapreduce
 35 |   - hadoop-yarn
 36 |   - hadoop-hdfs
 37 |   tags:
 38 |   - cdh5-base
 39 | 
 40 | - name: install hadoop lzo pkgs
 41 |   yum: name={{ item }}-{{ version['lzo'] }} state=present
 42 |   with_items:
 43 |   - hadoop-lzo
 44 |   tags:
 45 |   - cdh5-base
 46 | 
 47 | - name: create all needed hadoop directories
 48 |   file: path={{ item }} state=directory owner=root group=root mode=0755
 49 |   with_items:
 50 |   - /var/hadoop
 51 |   - /var/hadoop/data
 52 |   - /var/hadoop/.status
 53 |   - /root/deploy
 54 |   - /root/deploy/hadoop
 55 |   tags:
 56 |   - cdh5-base
 57 | 
 58 | - name: download jdk-6u35-linux-x64-rpm.bin
 59 |   get_url: url=http://{{ repo_server }}/repo/misc/jdk-6u45-linux-x64-rpm.bin dest=/root/deploy/hadoop/jdk-6u45-linux-x64-rpm.bin mode=0755
 60 |   tags:
 61 |   - cdh5-base
 62 |   - cdh5-jdk
 63 | 
 64 | - name: remove openjdk pkgs
 65 |   yum: name={{ item }} state=absent
 66 |   with_items:
 67 |   - java-1.6.0-openjdk-devel
 68 |   - java-1.6.0-openjdk
 69 |   tags:
 70 |   - cdh5-base
 71 |   - cdh5-jdk
 72 | 
 73 | - name: run jdk-6u45-linux-x64-rpm.bin
 74 |   shell: /root/deploy/hadoop/jdk-6u45-linux-x64-rpm.bin creates=/usr/java/jdk1.6.0_45
 75 |   tags:
 76 |   - cdh5-base
 77 |   - cdh5-jdk
 78 | 
 79 | - name: create java env profile
 80 |   copy: src=java.sh dest=/etc/profile.d/java.sh owner=root group=root mode=0644
 81 |   register: java_env_profile
 82 |   tags:
 83 |   - cdh5-base
 84 |   - cdh5-jdk
 85 | 
 86 | - name: source the java env profile
 87 |   shell: source /etc/profile.d/java.sh
 88 |   when: java_env_profile|changed
 89 |   tags:
 90 |   - cdh5-base
 91 |   - cdh5-jdk
 92 | 
 93 | - name: install zookeeper base pkgs
 94 |   yum: name={{ item }}-{{ version['zookeeper'] }} state=present
 95 |   with_items:
 96 |   - zookeeper
 97 |   tags:
 98 |   - cdh5-base
 99 |   - cdh5-zookeeper
100 | 
101 | - name: create zookeeper cfgs
102 |   template: src=zoo.cfg.j2 dest=/etc/zookeeper/conf/zoo.cfg owner=zookeeper group=zookeeper mode=0644
103 |   tags:
104 |   - cdh5-base
105 |   - cdh5-zookeeper
106 | 
107 | - name: create /etc/hadoop/conf.{{ nameservice_id }}
108 |   file: path=/etc/hadoop/conf.{{ nameservice_id }} state=directory owner=root group=root mode=0755
109 |   register: create_hadoop_conf
110 |   tags:
111 |   - cdh5-base
112 |   - cdh5-base-conf
113 | 
114 | - name: create alternatives for hadoop-conf
115 |   shell: alternatives --install /etc/hadoop/conf hadoop-conf {{ item }} 50
116 |   with_items:
117 |   - /etc/hadoop/conf.{{ nameservice_id }}
118 |   when: create_hadoop_conf|changed
119 |   tags:
120 |   - cdh5-base
121 |   - cdh5-base-conf
122 | 
123 | - name: create alternatives for hadoop-conf
124 |   shell: alternatives --set hadoop-conf {{ item }} 
125 |   with_items:
126 |   - /etc/hadoop/conf.{{ nameservice_id }}
127 |   when: create_hadoop_conf|changed
128 |   tags:
129 |   - cdh5-base
130 |   - cdh5-base-conf
131 | 
132 | - name: copy the hadoop configuration files
133 |   template: src=hadoop/{{ item }}.j2 dest=/etc/hadoop/conf.{{ nameservice_id }}/{{ item }} owner=hdfs group=hadoop mode=0664
134 |   with_items:
135 |   - core-site.xml
136 |   - hdfs-site.xml
137 |   - mapred-site.xml
138 |   - yarn-site.xml
139 |   - yarn-env.sh
140 |   - slaves
141 |   - capacity-scheduler.xml
142 |   - configuration.xsl
143 |   - container-executor.cfg
144 |   - hadoop-metrics2.properties
145 |   - hadoop-metrics.properties
146 |   - hadoop-policy.xml
147 |   - log4j.properties
148 |   - mapred-queues.xml.template
149 |   - ssl-client.xml.example
150 |   - ssl-server.xml.example
151 |   register: copy_hadoop_conf
152 |   tags:
153 |   - cdh5-base
154 |   - cdh5-base-conf
155 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/oozie-log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | #    http://www.apache.org/licenses/LICENSE-2.0
20 | #
21 | # Unless required by applicable law or agreed to in writing, software
22 | # distributed under the License is distributed on an "AS IS" BASIS,
23 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | # See the License for the specific language governing permissions and
25 | # limitations under the License. See accompanying LICENSE file.
26 | #
27 | 
28 | # If the Java System property 'oozie.log.dir' is not defined at Oozie start up time
29 | # XLogService sets its value to '${oozie.home}/logs'
30 | 
31 | # The appender that Oozie uses must be named 'oozie' (i.e. log4j.appender.oozie)
32 | 
33 | # Using the RollingFileAppender with the OozieRollingPolicy will roll the log file every hour and retain up to MaxHistory number of
34 | # log files. If FileNamePattern ends with ".gz" it will create gzip files.
35 | log4j.appender.oozie=org.apache.log4j.rolling.RollingFileAppender
36 | log4j.appender.oozie.RollingPolicy=org.apache.oozie.util.OozieRollingPolicy
37 | log4j.appender.oozie.File=${oozie.log.dir}/oozie.log
38 | log4j.appender.oozie.Append=true
39 | log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
40 | log4j.appender.oozie.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - SERVER[${oozie.instance.id}] %m%n
41 | # The FileNamePattern must end with "-%d{yyyy-MM-dd-HH}.gz" or "-%d{yyyy-MM-dd-HH}" and also start with the 
42 | # value of log4j.appender.oozie.File
43 | log4j.appender.oozie.RollingPolicy.FileNamePattern=${log4j.appender.oozie.File}-%d{yyyy-MM-dd-HH}
44 | # The MaxHistory controls how many log files will be retained (720 hours / 24 hours per day = 30 days); -1 to disable
45 | log4j.appender.oozie.RollingPolicy.MaxHistory=720
46 | 
47 | # Uncomment the below two lines to use the DailyRollingFileAppender instead
48 | # The DatePattern must end with either "dd" or "HH"
49 | #log4j.appender.oozie=org.apache.log4j.DailyRollingFileAppender
50 | #log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH
51 | 
52 | log4j.appender.oozieops=org.apache.log4j.DailyRollingFileAppender
53 | log4j.appender.oozieops.DatePattern='.'yyyy-MM-dd
54 | log4j.appender.oozieops.File=${oozie.log.dir}/oozie-ops.log
55 | log4j.appender.oozieops.Append=true
56 | log4j.appender.oozieops.layout=org.apache.log4j.PatternLayout
57 | log4j.appender.oozieops.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
58 | 
59 | log4j.appender.oozieinstrumentation=org.apache.log4j.DailyRollingFileAppender
60 | log4j.appender.oozieinstrumentation.DatePattern='.'yyyy-MM-dd
61 | log4j.appender.oozieinstrumentation.File=${oozie.log.dir}/oozie-instrumentation.log
62 | log4j.appender.oozieinstrumentation.Append=true
63 | log4j.appender.oozieinstrumentation.layout=org.apache.log4j.PatternLayout
64 | log4j.appender.oozieinstrumentation.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
65 | 
66 | log4j.appender.oozieaudit=org.apache.log4j.DailyRollingFileAppender
67 | log4j.appender.oozieaudit.DatePattern='.'yyyy-MM-dd
68 | log4j.appender.oozieaudit.File=${oozie.log.dir}/oozie-audit.log
69 | log4j.appender.oozieaudit.Append=true
70 | log4j.appender.oozieaudit.layout=org.apache.log4j.PatternLayout
71 | log4j.appender.oozieaudit.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
72 | 
73 | log4j.appender.openjpa=org.apache.log4j.DailyRollingFileAppender
74 | log4j.appender.openjpa.DatePattern='.'yyyy-MM-dd
75 | log4j.appender.openjpa.File=${oozie.log.dir}/oozie-jpa.log
76 | log4j.appender.openjpa.Append=true
77 | log4j.appender.openjpa.layout=org.apache.log4j.PatternLayout
78 | log4j.appender.openjpa.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
79 | 
80 | log4j.logger.openjpa=INFO, openjpa
81 | log4j.logger.oozieops=INFO, oozieops
82 | log4j.logger.oozieinstrumentation=ALL, oozieinstrumentation
83 | log4j.logger.oozieaudit=ALL, oozieaudit
84 | log4j.logger.org.apache.oozie=INFO, oozie
85 | log4j.logger.org.apache.hadoop=WARN, oozie
86 | log4j.logger.org.mortbay=WARN, oozie
87 | log4j.logger.org.hsqldb=WARN, oozie
88 | log4j.logger.org.apache.hadoop.security.authentication.server=WARN, oozie
89 | 


--------------------------------------------------------------------------------
/roles/cdh5-spark-base/templates/metrics.properties.template.j2:
--------------------------------------------------------------------------------
  1 | #  syntax: [instance].sink|source.[name].[options]=[value]
  2 | 
  3 | #  This file configures Spark's internal metrics system. The metrics system is
  4 | #  divided into instances which correspond to internal components.
  5 | #  Each instance can be configured to report its metrics to one or more sinks.
  6 | #  Accepted values for [instance] are "master", "worker", "executor", "driver",
  7 | #  and "applications". A wild card "*" can be used as an instance name, in
  8 | #  which case all instances will inherit the supplied property.
  9 | #
 10 | #  Within an instance, a "source" specifies a particular set of grouped metrics.
 11 | #  there are two kinds of sources:
 12 | #    1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
 13 | #    collect a Spark component's internal state. Each instance is paired with a
 14 | #    Spark source that is added automatically.
 15 | #    2. Common sources, like JvmSource, which will collect low level state.
 16 | #    These can be added through configuration options and are then loaded
 17 | #    using reflection.
 18 | #
 19 | #  A "sink" specifies where metrics are delivered to. Each instance can be
 20 | #  assigned one or more sinks.
 21 | #
 22 | #  The sink|source field specifies whether the property relates to a sink or
 23 | #  source.
 24 | #
 25 | #  The [name] field specifies the name of source or sink.
 26 | #
 27 | #  The [options] field is the specific property of this source or sink. The
 28 | #  source or sink is responsible for parsing this property.
 29 | #
 30 | #  Notes:
 31 | #    1. To add a new sink, set the "class" option to a fully qualified class
 32 | #    name (see examples below).
 33 | #    2. Some sinks involve a polling period. The minimum allowed polling period
 34 | #    is 1 second.
 35 | #    3. Wild card properties can be overridden by more specific properties.
 36 | #    For example, master.sink.console.period takes precedence over
 37 | #    *.sink.console.period.
 38 | #    4. A metrics specific configuration
 39 | #    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
 40 | #    added to Java properties using -Dspark.metrics.conf=xxx if you want to
 41 | #    customize metrics system. You can also put the file in ${SPARK_HOME}/conf
 42 | #    and it will be loaded automatically.
 43 | #    5. MetricsServlet is added by default as a sink in master, worker and client
 44 | #    driver, you can send http request "/metrics/json" to get a snapshot of all the
 45 | #    registered metrics in json format. For master, requests "/metrics/master/json" and
 46 | #    "/metrics/applications/json" can be sent seperately to get metrics snapshot of
 47 | #    instance master and applications. MetricsServlet may not be configured by self.
 48 | #
 49 | 
 50 | ## List of available sinks and their properties.
 51 | 
 52 | # org.apache.spark.metrics.sink.ConsoleSink
 53 | #   Name:   Default:   Description:
 54 | #   period  10         Poll period
 55 | #   unit    seconds    Units of poll period
 56 | 
 57 | # org.apache.spark.metrics.sink.CSVSink
 58 | #   Name:     Default:   Description:
 59 | #   period    10         Poll period
 60 | #   unit      seconds    Units of poll period
 61 | #   directory /tmp       Where to store CSV files
 62 | 
 63 | # org.apache.spark.metrics.sink.GangliaSink
 64 | #   Name:     Default:   Description:
 65 | #   host      NONE       Hostname or multicast group of Ganglia server
 66 | #   port      NONE       Port of Ganglia server(s)
 67 | #   period    10         Poll period
 68 | #   unit      seconds    Units of poll period
 69 | #   ttl       1          TTL of messages sent by Ganglia
 70 | #   mode      multicast  Ganglia network mode ('unicast' or 'mulitcast')
 71 | 
 72 | # org.apache.spark.metrics.sink.JmxSink
 73 | 
 74 | # org.apache.spark.metrics.sink.MetricsServlet
 75 | #   Name:     Default:   Description:
 76 | #   path      VARIES*    Path prefix from the web server root
 77 | #   sample    false      Whether to show entire set of samples for histograms ('false' or 'true')
 78 | #
 79 | # * Default path is /metrics/json for all instances except the master. The master has two paths:
 80 | #     /metrics/aplications/json # App information
 81 | #     /metrics/master/json      # Master information
 82 | 
 83 | # org.apache.spark.metrics.sink.GraphiteSink
 84 | #   Name:     Default:      Description:
 85 | #   host      NONE          Hostname of Graphite server
 86 | #   port      NONE          Port of Graphite server
 87 | #   period    10            Poll period
 88 | #   unit      seconds       Units of poll period
 89 | #   prefix    EMPTY STRING  Prefix to prepend to metric name
 90 | 
 91 | ## Examples
 92 | # Enable JmxSink for all instances by class name
 93 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
 94 | 
 95 | # Enable ConsoleSink for all instances by class name
 96 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
 97 | 
 98 | # Polling period for ConsoleSink
 99 | #*.sink.console.period=10
100 | 
101 | #*.sink.console.unit=seconds
102 | 
103 | # Master instance overlap polling period
104 | #master.sink.console.period=15
105 | 
106 | #master.sink.console.unit=seconds
107 | 
108 | # Enable CsvSink for all instances
109 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
110 | 
111 | # Polling period for CsvSink
112 | #*.sink.csv.period=1
113 | 
114 | #*.sink.csv.unit=minutes
115 | 
116 | # Polling directory for CsvSink
117 | #*.sink.csv.directory=/tmp/
118 | 
119 | # Worker instance overlap polling period
120 | #worker.sink.csv.period=10
121 | 
122 | #worker.sink.csv.unit=minutes
123 | 
124 | # Enable jvm source for instance master, worker, driver and executor
125 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
126 | 
127 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
128 | 
129 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
130 | 
131 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
132 | 
133 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/hdfs-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!--
  3 |   Licensed to the Apache Software Foundation (ASF) under one or more
  4 |   contributor license agreements.  See the NOTICE file distributed with
  5 |   this work for additional information regarding copyright ownership.
  6 |   The ASF licenses this file to You under the Apache License, Version 2.0
  7 |   (the "License"); you may not use this file except in compliance with
  8 |   the License.  You may obtain a copy of the License at
  9 | 
 10 |       http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |   Unless required by applicable law or agreed to in writing, software
 13 |   distributed under the License is distributed on an "AS IS" BASIS,
 14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |   See the License for the specific language governing permissions and
 16 |   limitations under the License.
 17 | -->
 18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 19 | 
 20 | <configuration>
 21 |   <!-- the logical name for nameservice -->
 22 |   <property>
 23 |     <name>dfs.nameservices</name>
 24 |     <value>{{ nameservice_id }}</value>
 25 |   </property>
 26 | 
 27 |   <!-- Default block replication -->
 28 |   <property>
 29 |     <name>dfs.replication</name>
 30 |     <value>{{ dfs_replication }}</value>
 31 |   </property>
 32 | 
 33 |   <!-- Determines where on the local filesystem the DFS name node should store the name table(fsimage) --> 
 34 |   <property>
 35 |     <name>dfs.namenode.name.dir</name>
 36 |     <value>{{ dfs_namenode_name_dir | join(',') }}</value>
 37 |   </property>
 38 |   <!-- Determines where on the local filesystem an DFS data node should store its blocks -->
 39 |   <property>
 40 |     <name>dfs.datanode.data.dir</name>
 41 |     <value>{{ dfs_datanode_data_dir | join(',') }}</value>
 42 |   </property>
 43 | 
 44 |   <!-- The name of the group of super-users -->
 45 |   <property>
 46 |     <name>dfs.permissions.superusergroup</name>
 47 |     <value>{{ dfs_permissions_superusergroup }}</value>
 48 |   </property>
 49 | 
 50 |   <!-- turn off/on the permission checking -->
 51 |   <property>
 52 |     <name>dfs.permissions.enabled</name>
 53 |     <value>{{ dfs_permissions_enabled }}</value>
 54 |   </property>
 55 |   
 56 |   <!-- unique identifiers for each NameNode in the nameservice --> 
 57 |   <property>
 58 |     <name>dfs.ha.namenodes.{{ nameservice_id }}</name>
 59 |     <value>{{ groups['cdh5-namenode'] | join(',') }}</value>
 60 |   </property>
 61 |   <!-- the fully-qualified RPC address for each NameNode to listen on -->
 62 |   {% for host in groups['cdh5-namenode'] %}
 63 | <property>
 64 |     <name>dfs.namenode.rpc-address.{{ nameservice_id }}.{{ host }}</name>
 65 |     <value>{{ host }}.{{ tl_domain }}:8020</value>
 66 |   </property>
 67 |   {% endfor %}
 68 | <!-- the fully-qualified HTTP address for each NameNode to listen on -->
 69 |   {% for host in groups['cdh5-namenode'] %}
 70 | <property>
 71 |     <name>dfs.namenode.http-address.{{ nameservice_id }}.{{ host }}</name>
 72 |     <value>{{ host }}.{{ tl_domain }}:50070</value>
 73 |   </property>
 74 |   {% endfor %}
 75 |  
 76 |   <!-- the location of the shared storage directory --> 
 77 |   <property>
 78 |     <name>dfs.namenode.shared.edits.dir</name>
 79 |     <value>qjournal://{{ groups['cdh5-journalnode'] | join('.' ~ tl_domain + ':8485' + ';') }}.{{ tl_domain }}:8485/{{ nameservice_id }}</value>
 80 |   </property>
 81 |   <!-- the path where the JournalNode daemon will store its local state -->
 82 |   <property>
 83 |     <name>dfs.journalnode.edits.dir</name>
 84 |     <value>{{ dfs_journalnode_edits_dir }}</value>
 85 |   </property> 
 86 |  
 87 |   <!-- the Java class that HDFS clients use to contact the Active NameNode --> 
 88 |   <property>
 89 |     <name>dfs.client.failover.proxy.provider.{{ nameservice_id }}</name>
 90 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
 91 |   </property>
 92 | 
 93 |   <!-- run an arbitrary shell command to fence the Active NameNode -->
 94 |   <property>
 95 |     <name>dfs.ha.fencing.methods</name>
 96 |     <value>shell(/bin/true)</value>
 97 |   </property>
 98 | 
 99 |   <!-- Automatic failover by the ZKFailoverController process --> 
100 |   <property>
101 |     <name>dfs.ha.automatic-failover.enabled</name>
102 |     <value>true</value>
103 |   </property>
104 |   <property>
105 |     <name>ha.zookeeper.quorum</name>
106 |     <value>{{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ':2181' + ',') }}.{{ tl_domain }}:2181</value>
107 |   </property>
108 | 
109 |   <!-- The default block size for new files, in bytes --> 
110 |   <property>
111 |     <name>dfs.blocksize</name>
112 |     <value>{{ dfs_blocksize }}</value>
113 |   </property>
114 | 
115 |   <!-- The number of server threads for the namenode -->
116 |   <property>
117 |     <name>dfs.namenode.handler.count</name>
118 |     <value>{{ dfs_namenode_handler_count }}</value>
119 |   </property>
120 |   <!-- The number of server threads for the datanode -->
121 |   <property>
122 |     <name>dfs.datanode.handler.count</name>
123 |     <value>{{ dfs_datanode_handler_count }}</value>
124 |   </property> 
125 | 
126 |   <!-- Reserved space in bytes per volume. Always leave this much space free for non dfs use -->
127 |   <property>
128 |     <name>dfs.datanode.du.reserved</name>
129 |     <value>{{ dfs_datanode_du_reserved }}</value>
130 |   </property>
131 | 
132 |   <!-- Specifies the maximum amount of bandwidth that each datanode can utilize for the balancing purpose in term of the number of bytes per second -->
133 |   <property>
134 |     <name>dfs.balance.bandwidthPerSec</name>
135 |     <value>{{ dfs_balance_bandwidthPerSec }}</value>
136 |   </property>
137 | 
138 |   <!-- Names a file that contains a list of hosts that are not permitted to connect to the namenode -->
139 |   <property>
140 |     <name>dfs.hosts.exclude</name>
141 |     <value>{{ dfs_hosts_exclude }}</value>
142 |   </property>
143 | 
144 |   <!-- 	Specifies the maximum number of threads to use for transferring data in and out of the DN  -->
145 |   <property>
146 |     <name>dfs.datanode.max.transfer.threads</name>
147 |     <value>{{ dfs_datanode_max_transfer_threads }}</value>
148 |   </property>
149 | 
150 |   <!-- configure storage balancing -->
151 |   <property>
152 |     <name>dfs.datanode.fsdataset.volume.choosing.policy</name>
153 |     <value>org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value>
154 |   </property>
155 |   <property>
156 |     <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
157 |     <value>{{ dfs_datanode_balanced_space_threshold }}</value>
158 |   </property>
159 |   <property>
160 |     <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
161 |     <value>{{ dfs_datanode_balanced_space_preference_fraction }}</value>
162 |   </property>
163 | 
164 |   <!-- A Hadoop HDFS DataNode has an upper bound on the number of files that it can serve at any one time -->
165 |   <property>
166 |     <name>dfs.datanode.max.xcievers</name>
167 |     <value>{{ dfs_datanode_max_xcievers }}</value>
168 |   </property>
169 | 
170 |   <!-- enable webhdfs -->
171 |   <property>
172 |     <name>dfs.webhdfs.enabled</name>
173 |     <value>true</value>
174 |   </property>
175 | 
176 |   <property>
177 |     <name>dfs.checksum.type</name>
178 |     <value>{{ dfs_checksum_type }}</value>
179 |   </property>
180 | 
181 | </configuration>
182 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hbase/hbase-env.sh.j2:
--------------------------------------------------------------------------------
  1 | #
  2 | #/**
  3 | # * Copyright 2007 The Apache Software Foundation
  4 | # *
  5 | # * Licensed to the Apache Software Foundation (ASF) under one
  6 | # * or more contributor license agreements.  See the NOTICE file
  7 | # * distributed with this work for additional information
  8 | # * regarding copyright ownership.  The ASF licenses this file
  9 | # * to you under the Apache License, Version 2.0 (the
 10 | # * "License"); you may not use this file except in compliance
 11 | # * with the License.  You may obtain a copy of the License at
 12 | # *
 13 | # *     http://www.apache.org/licenses/LICENSE-2.0
 14 | # *
 15 | # * Unless required by applicable law or agreed to in writing, software
 16 | # * distributed under the License is distributed on an "AS IS" BASIS,
 17 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # * See the License for the specific language governing permissions and
 19 | # * limitations under the License.
 20 | # */
 21 | 
 22 | # Set environment variables here.
 23 | 
 24 | # This script sets variables multiple times over the course of starting an hbase process,
 25 | # so try to keep things idempotent unless you want to take an even deeper look
 26 | # into the startup scripts (bin/hbase, etc.)
 27 | 
 28 | # The java implementation to use.  Java 1.6 required.
 29 | # export JAVA_HOME=/usr/java/jdk1.6.0/
 30 | 
 31 | # Extra Java CLASSPATH elements.  Optional.
 32 | # export HBASE_CLASSPATH=
 33 | 
 34 | # The maximum amount of heap to use, in MB. Default is 1000.
 35 | # export HBASE_HEAPSIZE=1000
 36 | 
 37 | # Extra Java runtime options.
 38 | # Below are what we set by default.  May only work with SUN JVM.
 39 | # For more on why as well as other possible settings,
 40 | # see http://wiki.apache.org/hadoop/PerformanceTuning
 41 | export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
 42 | 
 43 | # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
 44 | 
 45 | # This enables basic gc logging to the .out file.
 46 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 47 | 
 48 | # This enables basic gc logging to its own file.
 49 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 50 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 51 | 
 52 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 53 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 54 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 55 | 
 56 | # Uncomment one of the below three options to enable java garbage collection logging for the client processes.
 57 | 
 58 | # This enables basic gc logging to the .out file.
 59 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 60 | 
 61 | # This enables basic gc logging to its own file.
 62 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 63 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 64 | 
 65 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 66 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 67 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 68 | 
 69 | # Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
 70 | # export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
 71 | # Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value.
 72 | 
 73 | 
 74 | # Uncomment and adjust to enable JMX exporting
 75 | # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
 76 | # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
 77 | #
 78 | # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
 79 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
 80 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
 81 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
 82 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
 83 | # export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
 84 | 
 85 | # File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
 86 | # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
 87 | 
 88 | # Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
 89 | #HBASE_REGIONSERVER_MLOCK=true
 90 | #HBASE_REGIONSERVER_UID="hbase"
 91 | 
 92 | # File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
 93 | # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
 94 | 
 95 | # Extra ssh options.  Empty by default.
 96 | # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
 97 | 
 98 | # Where log files are stored.  $HBASE_HOME/logs by default.
 99 | # export HBASE_LOG_DIR=${HBASE_HOME}/logs
100 | 
101 | # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
102 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
103 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
104 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
105 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
106 | 
107 | # A string representing this instance of hbase. $USER by default.
108 | # export HBASE_IDENT_STRING=$USER
109 | 
110 | # The scheduling priority for daemon processes.  See 'man nice'.
111 | # export HBASE_NICENESS=10
112 | 
113 | # The directory where pid files are stored. /tmp by default.
114 | # export HBASE_PID_DIR=/var/hadoop/pids
115 | 
116 | # Seconds to sleep between slave commands.  Unset by default.  This
117 | # can be useful in large clusters, where, e.g., slave rsyncs can
118 | # otherwise arrive faster than the master can service them.
119 | # export HBASE_SLAVE_SLEEP=0.1
120 | 
121 | # Tell HBase whether it should manage it's own instance of Zookeeper or not.
122 | # export HBASE_MANAGES_ZK=true
123 | 
124 | # The default log rolling policy is RFA, where the log file is rolled as per the size defined for the 
125 | # RFA appender. Please refer to the log4j.properties file to see more details on this appender.
126 | # In case one needs to do log rolling on a date change, one should set the environment property
127 | # HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
128 | # For example:
129 | # HBASE_ROOT_LOGGER=INFO,DRFA
130 | # The reason for changing default to RFA is to avoid the boundary case of filling out disk space as 
131 | # DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.
132 | 


--------------------------------------------------------------------------------
/roles/cdh5-oozie/templates/oozie-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!--
  3 |   Licensed to the Apache Software Foundation (ASF) under one
  4 |   or more contributor license agreements.  See the NOTICE file
  5 |   distributed with this work for additional information
  6 |   regarding copyright ownership.  The ASF licenses this file
  7 |   to you under the Apache License, Version 2.0 (the
  8 |   "License"); you may not use this file except in compliance
  9 |   with the License.  You may obtain a copy of the License at
 10 |   
 11 |        http://www.apache.org/licenses/LICENSE-2.0
 12 |   
 13 |   Unless required by applicable law or agreed to in writing, software
 14 |   distributed under the License is distributed on an "AS IS" BASIS,
 15 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |   See the License for the specific language governing permissions and
 17 |   limitations under the License.
 18 | -->
 19 | <configuration>
 20 | 
 21 |     <!--
 22 |         Refer to the oozie-default.xml file for the complete list of
 23 |         Oozie configuration properties and their default values.
 24 |     -->
 25 | 
 26 |     <property>
 27 |         <name>oozie.service.ActionService.executor.ext.classes</name>
 28 |         <value>
 29 |             org.apache.oozie.action.email.EmailActionExecutor,
 30 |             org.apache.oozie.action.hadoop.HiveActionExecutor,
 31 |             org.apache.oozie.action.hadoop.ShellActionExecutor,
 32 |             org.apache.oozie.action.hadoop.SqoopActionExecutor,
 33 |             org.apache.oozie.action.hadoop.DistcpActionExecutor
 34 |         </value>
 35 |     </property>
 36 | 
 37 |     <property>
 38 |         <name>oozie.service.SchemaService.wf.ext.schemas</name>
 39 |         <value>
 40 |             shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd,
 41 |             hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd,
 42 |             sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd,
 43 |             oozie-sla-0.1.xsd,oozie-sla-0.2.xsd
 44 |         </value>
 45 |     </property>
 46 | 
 47 |     <property>
 48 |         <name>oozie.system.id</name>
 49 |         <value>oozie-${user.name}</value>
 50 |     </property>
 51 | 
 52 |     <property>
 53 |         <name>oozie.systemmode</name>
 54 |         <value>NORMAL</value>
 55 |     </property>
 56 |     <property>
 57 |         <name>oozie.service.AuthorizationService.security.enabled</name>
 58 |         <value>false</value>
 59 |     </property>
 60 | 
 61 |     <property>
 62 |         <name>oozie.service.PurgeService.older.than</name>
 63 |         <value>30</value>
 64 |     </property>
 65 |     <property>
 66 |         <name>oozie.service.PurgeService.purge.interval</name>
 67 |         <value>3600</value>
 68 |     </property>
 69 |     <property>
 70 |         <name>oozie.service.CallableQueueService.queue.size</name>
 71 |         <value>10000</value>
 72 |     </property>
 73 |     <property>
 74 |         <name>oozie.service.CallableQueueService.threads</name>
 75 |         <value>10</value>
 76 |     </property>
 77 |     <property>
 78 |         <name>oozie.service.CallableQueueService.callable.concurrency</name>
 79 |         <value>3</value>
 80 |     </property>
 81 |     <property>
 82 |         <name>oozie.service.coord.normal.default.timeout
 83 |         </name>
 84 | 	<value>120</value>
 85 |     </property>
 86 | 
 87 |     <property>
 88 |         <name>oozie.db.schema.name</name>
 89 |         <value>oozie</value>
 90 |     </property>
 91 |     <property>
 92 |         <name>oozie.service.JPAService.create.db.schema</name>
 93 |         <value>true</value>
 94 |     </property>
 95 | 
 96 |     <property>
 97 |         <name>oozie.service.JPAService.jdbc.driver</name>
 98 |         <value>com.mysql.jdbc.Driver</value>
 99 |     </property>
100 |     <property>
101 |         <name>oozie.service.JPAService.jdbc.url</name>
102 |         <value>jdbc:mysql://localhost:3306/oozie</value>
103 |     </property>
104 |     <property>
105 |         <name>oozie.service.JPAService.jdbc.username</name>
106 |         <value>oozie</value>
107 |     </property>
108 |     <property>
109 |         <name>oozie.service.JPAService.jdbc.password</name>
110 |         <value>{{ oozie_db_passwd }}</value>
111 |     </property>
112 | 
113 |     <property>
114 |         <name>oozie.service.JPAService.pool.max.active.conn</name>
115 |         <value>10</value>
116 |     </property>
117 |     <property>
118 |         <name>oozie.service.HadoopAccessorService.kerberos.enabled</name>
119 |         <value>false</value>
120 |     </property>
121 |     <property>
122 |         <name>local.realm</name>
123 |         <value>LOCALHOST</value>
124 |     </property>
125 | 
126 |     <property>
127 |         <name>oozie.service.HadoopAccessorService.keytab.file</name>
128 |         <value>${user.home}/oozie.keytab</value>
129 |     </property>
130 |     <property>
131 |         <name>oozie.service.HadoopAccessorService.kerberos.principal</name>
132 |         <value>${user.name}/localhost@${local.realm}</value>
133 |     </property>
134 |     <property>
135 |         <name>oozie.service.HadoopAccessorService.jobTracker.whitelist</name>
136 |         <value> </value>
137 |     </property>
138 |     <property>
139 |         <name>oozie.service.HadoopAccessorService.nameNode.whitelist</name>
140 |         <value> </value>
141 |     </property>
142 | 
143 |     <property>
144 |         <name>oozie.service.HadoopAccessorService.hadoop.configurations</name>
145 |         <value>*=/etc/hadoop/conf</value>
146 |     </property>
147 |     <property>
148 |         <name>oozie.service.WorkflowAppService.system.libpath</name>
149 |         <value>/user/${user.name}/share/lib</value>
150 |     </property>
151 | 
152 |     <property>
153 |         <name>use.system.libpath.for.mapreduce.and.pig.jobs</name>
154 |         <value>false</value>
155 |     </property>
156 | 
157 |     <property>
158 |         <name>oozie.authentication.type</name>
159 |         <value>simple</value>
160 |     </property>
161 |     <property>
162 |         <name>oozie.authentication.token.validity</name>
163 |         <value>36000</value>
164 |     </property>
165 |     <property>
166 |         <name>oozie.authentication.signature.secret</name>
167 |         <value>oozie</value>
168 |     </property>
169 | 
170 |     <property>
171 |       <name>oozie.authentication.cookie.domain</name>
172 |       <value></value>
173 |     </property>
174 | 
175 |     <property>
176 |         <name>oozie.authentication.simple.anonymous.allowed</name>
177 |         <value>true</value>
178 |     </property>
179 | 
180 |     <property>
181 |         <name>oozie.authentication.kerberos.principal</name>
182 |         <value>HTTP/localhost@${local.realm}</value>
183 |     </property>
184 |     <property>
185 |         <name>oozie.authentication.kerberos.keytab</name>
186 |         <value>${oozie.service.HadoopAccessorService.keytab.file}</value>
187 |     </property>
188 |     <property>
189 |         <name>oozie.authentication.kerberos.name.rules</name>
190 |         <value>DEFAULT</value>
191 |     </property>
192 | 
193 |     <!-- Proxyuser Configuration -->
194 | 
195 |     <!--
196 | 
197 |     <property>
198 |         <name>oozie.service.ProxyUserService.proxyuser.oozie.hosts</name>
199 |         <value>*</value>
200 |     </property>
201 |     <property>
202 |         <name>oozie.service.ProxyUserService.proxyuser.oozie.groups</name>
203 |         <value>*</value>
204 |     </property>
205 | 
206 |     -->
207 | 
208 |     <!-- Default proxyuser configuration for Hue -->
209 | 
210 |     <property>
211 |         <name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
212 |         <value>*</value>
213 |     </property>
214 |     <property>
215 |         <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
216 |         <value>*</value>
217 |     </property>
218 | 
219 |     <property>
220 |         <name>oozie.action.mapreduce.uber.jar.enable</name>
221 |         <value>true</value>
222 |     </property>
223 |     <property>
224 |         <name>oozie.service.HadoopAccessorService.supported.filesystems</name>
225 |         <value>hdfs,viewfs</value>
226 |     </property>
227 | </configuration>
228 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/hadoop-policy.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3 | <!--
  4 | 
  5 |  Copyright 2011 The Apache Software Foundation
  6 |  
  7 |  Licensed to the Apache Software Foundation (ASF) under one
  8 |  or more contributor license agreements.  See the NOTICE file
  9 |  distributed with this work for additional information
 10 |  regarding copyright ownership.  The ASF licenses this file
 11 |  to you under the Apache License, Version 2.0 (the
 12 |  "License"); you may not use this file except in compliance
 13 |  with the License.  You may obtain a copy of the License at
 14 | 
 15 |      http://www.apache.org/licenses/LICENSE-2.0
 16 | 
 17 |  Unless required by applicable law or agreed to in writing, software
 18 |  distributed under the License is distributed on an "AS IS" BASIS,
 19 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 20 |  See the License for the specific language governing permissions and
 21 |  limitations under the License.
 22 | 
 23 | -->
 24 | 
 25 | <!-- Put site-specific property overrides in this file. -->
 26 | 
 27 | <configuration>
 28 |   <property>
 29 |     <name>security.client.protocol.acl</name>
 30 |     <value>*</value>
 31 |     <description>ACL for ClientProtocol, which is used by user code
 32 |     via the DistributedFileSystem.
 33 |     The ACL is a comma-separated list of user and group names. The user and
 34 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 35 |     A special value of "*" means all users are allowed.</description>
 36 |   </property>
 37 | 
 38 |   <property>
 39 |     <name>security.client.datanode.protocol.acl</name>
 40 |     <value>*</value>
 41 |     <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
 42 |     for block recovery.
 43 |     The ACL is a comma-separated list of user and group names. The user and
 44 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 45 |     A special value of "*" means all users are allowed.</description>
 46 |   </property>
 47 | 
 48 |   <property>
 49 |     <name>security.datanode.protocol.acl</name>
 50 |     <value>*</value>
 51 |     <description>ACL for DatanodeProtocol, which is used by datanodes to
 52 |     communicate with the namenode.
 53 |     The ACL is a comma-separated list of user and group names. The user and
 54 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 55 |     A special value of "*" means all users are allowed.</description>
 56 |   </property>
 57 | 
 58 |   <property>
 59 |     <name>security.inter.datanode.protocol.acl</name>
 60 |     <value>*</value>
 61 |     <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
 62 |     for updating generation timestamp.
 63 |     The ACL is a comma-separated list of user and group names. The user and
 64 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 65 |     A special value of "*" means all users are allowed.</description>
 66 |   </property>
 67 | 
 68 |   <property>
 69 |     <name>security.namenode.protocol.acl</name>
 70 |     <value>*</value>
 71 |     <description>ACL for NamenodeProtocol, the protocol used by the secondary
 72 |     namenode to communicate with the namenode.
 73 |     The ACL is a comma-separated list of user and group names. The user and
 74 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 75 |     A special value of "*" means all users are allowed.</description>
 76 |   </property>
 77 | 
 78 |  <property>
 79 |     <name>security.admin.operations.protocol.acl</name>
 80 |     <value>*</value>
 81 |     <description>ACL for AdminOperationsProtocol. Used for admin commands.
 82 |     The ACL is a comma-separated list of user and group names. The user and
 83 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
 84 |     A special value of "*" means all users are allowed.</description>
 85 |   </property>
 86 | 
 87 |   <property>
 88 |     <name>security.refresh.usertogroups.mappings.protocol.acl</name>
 89 |     <value>*</value>
 90 |     <description>ACL for RefreshUserMappingsProtocol. Used to refresh
 91 |     users mappings. The ACL is a comma-separated list of user and
 92 |     group names. The user and group list is separated by a blank. For
 93 |     e.g. "alice,bob users,wheel".  A special value of "*" means all
 94 |     users are allowed.</description>
 95 |   </property>
 96 | 
 97 |   <property>
 98 |     <name>security.refresh.policy.protocol.acl</name>
 99 |     <value>*</value>
100 |     <description>ACL for RefreshAuthorizationPolicyProtocol, used by the
101 |     dfsadmin and mradmin commands to refresh the security policy in-effect.
102 |     The ACL is a comma-separated list of user and group names. The user and
103 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
104 |     A special value of "*" means all users are allowed.</description>
105 |   </property>
106 | 
107 |   <property>
108 |     <name>security.ha.service.protocol.acl</name>
109 |     <value>*</value>
110 |     <description>ACL for HAService protocol used by HAAdmin to manage the
111 |       active and stand-by states of namenode.</description>
112 |   </property>
113 | 
114 |   <property>
115 |     <name>security.zkfc.protocol.acl</name>
116 |     <value>*</value>
117 |     <description>ACL for access to the ZK Failover Controller
118 |     </description>
119 |   </property>
120 | 
121 |   <property>
122 |     <name>security.qjournal.service.protocol.acl</name>
123 |     <value>*</value>
124 |     <description>ACL for QJournalProtocol, used by the NN to communicate with
125 |     JNs when using the QuorumJournalManager for edit logs.</description>
126 |   </property>
127 | 
128 |   <property>
129 |     <name>security.mrhs.client.protocol.acl</name>
130 |     <value>*</value>
131 |     <description>ACL for HSClientProtocol, used by job clients to
132 |     communciate with the MR History Server job status etc. 
133 |     The ACL is a comma-separated list of user and group names. The user and
134 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
135 |     A special value of "*" means all users are allowed.</description>
136 |   </property>
137 | 
138 |   <!-- YARN Protocols -->
139 | 
140 |   <property>
141 |     <name>security.resourcetracker.protocol.acl</name>
142 |     <value>*</value>
143 |     <description>ACL for ResourceTrackerProtocol, used by the
144 |     ResourceManager and NodeManager to communicate with each other.
145 |     The ACL is a comma-separated list of user and group names. The user and
146 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
147 |     A special value of "*" means all users are allowed.</description>
148 |   </property>
149 | 
150 |   <property>
151 |     <name>security.resourcemanager-administration.protocol.acl</name>
152 |     <value>*</value>
153 |     <description>ACL for ResourceManagerAdministrationProtocol, for admin commands. 
154 |     The ACL is a comma-separated list of user and group names. The user and
155 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
156 |     A special value of "*" means all users are allowed.</description>
157 |   </property>
158 | 
159 |   <property>
160 |     <name>security.applicationclient.protocol.acl</name>
161 |     <value>*</value>
162 |     <description>ACL for ApplicationClientProtocol, used by the ResourceManager 
163 |     and applications submission clients to communicate with each other.
164 |     The ACL is a comma-separated list of user and group names. The user and
165 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
166 |     A special value of "*" means all users are allowed.</description>
167 |   </property>
168 | 
169 |   <property>
170 |     <name>security.applicationmaster.protocol.acl</name>
171 |     <value>*</value>
172 |     <description>ACL for ApplicationMasterProtocol, used by the ResourceManager 
173 |     and ApplicationMasters to communicate with each other.
174 |     The ACL is a comma-separated list of user and group names. The user and
175 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
176 |     A special value of "*" means all users are allowed.</description>
177 |   </property>
178 | 
179 |   <property>
180 |     <name>security.containermanagement.protocol.acl</name>
181 |     <value>*</value>
182 |     <description>ACL for ContainerManagementProtocol protocol, used by the NodeManager 
183 |     and ApplicationMasters to communicate with each other.
184 |     The ACL is a comma-separated list of user and group names. The user and
185 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
186 |     A special value of "*" means all users are allowed.</description>
187 |   </property>
188 | 
189 |   <property>
190 |     <name>security.resourcelocalizer.protocol.acl</name>
191 |     <value>*</value>
192 |     <description>ACL for ResourceLocalizer protocol, used by the NodeManager 
193 |     and ResourceLocalizer to communicate with each other.
194 |     The ACL is a comma-separated list of user and group names. The user and
195 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
196 |     A special value of "*" means all users are allowed.</description>
197 |   </property>
198 | 
199 |   <property>
200 |     <name>security.job.task.protocol.acl</name>
201 |     <value>*</value>
202 |     <description>ACL for TaskUmbilicalProtocol, used by the map and reduce
203 |     tasks to communicate with the parent tasktracker.
204 |     The ACL is a comma-separated list of user and group names. The user and
205 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
206 |     A special value of "*" means all users are allowed.</description>
207 |   </property>
208 | 
209 |   <property>
210 |     <name>security.job.client.protocol.acl</name>
211 |     <value>*</value>
212 |     <description>ACL for MRClientProtocol, used by job clients to
213 |     communciate with the MR ApplicationMaster to query job status etc. 
214 |     The ACL is a comma-separated list of user and group names. The user and
215 |     group list is separated by a blank. For e.g. "alice,bob users,wheel".
216 |     A special value of "*" means all users are allowed.</description>
217 |   </property>
218 | 
219 | </configuration>
220 | 


--------------------------------------------------------------------------------
/roles/cdh5-base/templates/hadoop/log4j.properties.j2:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 The Apache Software Foundation
  2 | # 
  3 | # Licensed to the Apache Software Foundation (ASF) under one
  4 | # or more contributor license agreements.  See the NOTICE file
  5 | # distributed with this work for additional information
  6 | # regarding copyright ownership.  The ASF licenses this file
  7 | # to you under the Apache License, Version 2.0 (the
  8 | # "License"); you may not use this file except in compliance
  9 | # with the License.  You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | 
 19 | # Define some default values that can be overridden by system properties
 20 | hadoop.root.logger=INFO,console
 21 | hadoop.log.dir=.
 22 | hadoop.log.file=hadoop.log
 23 | 
 24 | # Define the root logger to the system property "hadoop.root.logger".
 25 | log4j.rootLogger=${hadoop.root.logger}, EventCounter
 26 | 
 27 | # Logging Threshold
 28 | log4j.threshold=ALL
 29 | 
 30 | # Null Appender
 31 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
 32 | 
 33 | #
 34 | # Rolling File Appender - cap space usage at 5gb.
 35 | #
 36 | hadoop.log.maxfilesize=256MB
 37 | hadoop.log.maxbackupindex=20
 38 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
 39 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
 40 | 
 41 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
 42 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
 43 | 
 44 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
 45 | 
 46 | # Pattern format: Date LogLevel LoggerName LogMessage
 47 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 48 | # Debugging Pattern format
 49 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
 50 | 
 51 | 
 52 | #
 53 | # Daily Rolling File Appender
 54 | #
 55 | 
 56 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
 57 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
 58 | 
 59 | # Rollver at midnight
 60 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
 61 | 
 62 | # 30-day backup
 63 | #log4j.appender.DRFA.MaxBackupIndex=30
 64 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
 65 | 
 66 | # Pattern format: Date LogLevel LoggerName LogMessage
 67 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 68 | # Debugging Pattern format
 69 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
 70 | 
 71 | 
 72 | #
 73 | # console
 74 | # Add "console" to rootlogger above if you want to use this 
 75 | #
 76 | 
 77 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 78 | log4j.appender.console.target=System.err
 79 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
 80 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
 81 | 
 82 | #
 83 | # TaskLog Appender
 84 | #
 85 | 
 86 | #Default values
 87 | hadoop.tasklog.taskid=null
 88 | hadoop.tasklog.iscleanup=false
 89 | hadoop.tasklog.noKeepSplits=4
 90 | hadoop.tasklog.totalLogFileSize=100
 91 | hadoop.tasklog.purgeLogSplits=true
 92 | hadoop.tasklog.logsRetainHours=12
 93 | 
 94 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
 95 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
 96 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
 97 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
 98 | 
 99 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
100 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
101 | 
102 | #
103 | # HDFS block state change log from block manager
104 | #
105 | # Uncomment the following to suppress normal block state change
106 | # messages from BlockManager in NameNode.
107 | #log4j.logger.BlockStateChange=WARN
108 | 
109 | #
110 | #Security appender
111 | #
112 | hadoop.security.logger=INFO,NullAppender
113 | hadoop.security.log.maxfilesize=256MB
114 | hadoop.security.log.maxbackupindex=20
115 | log4j.category.SecurityLogger=${hadoop.security.logger}
116 | hadoop.security.log.file=SecurityAuth-${user.name}.audit
117 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 
118 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
119 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
120 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
121 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
122 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
123 | 
124 | #
125 | # Daily Rolling Security appender
126 | #
127 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
128 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
129 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
130 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
131 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
132 | 
133 | #
134 | # hadoop configuration logging
135 | #
136 | 
137 | # Uncomment the following line to turn off configuration deprecation warnings.
138 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
139 | 
140 | #
141 | # hdfs audit logging
142 | #
143 | hdfs.audit.logger=INFO,NullAppender
144 | hdfs.audit.log.maxfilesize=256MB
145 | hdfs.audit.log.maxbackupindex=20
146 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
147 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
148 | log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
149 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
150 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
151 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
152 | log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
153 | log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
154 | 
155 | #
156 | # mapred audit logging
157 | #
158 | mapred.audit.logger=INFO,NullAppender
159 | mapred.audit.log.maxfilesize=256MB
160 | mapred.audit.log.maxbackupindex=20
161 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
162 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
163 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
164 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
165 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
166 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
167 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
168 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
169 | 
170 | # Custom Logging levels
171 | 
172 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
173 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
174 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
175 | 
176 | # Jets3t library
177 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
178 | 
179 | #
180 | # Event Counter Appender
181 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
182 | #
183 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
184 | 
185 | #
186 | # Job Summary Appender 
187 | #
188 | # Use following logger to send summary to separate file defined by 
189 | # hadoop.mapreduce.jobsummary.log.file :
190 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA
191 | # 
192 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
193 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
194 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
195 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20
196 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender
197 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
198 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
199 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
200 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
201 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
202 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
203 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
204 | 
205 | #
206 | # Yarn ResourceManager Application Summary Log 
207 | #
208 | # Set the ResourceManager summary log filename
209 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
210 | # Set the ResourceManager summary log level and appender
211 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
212 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
213 | 
214 | # To enable AppSummaryLogging for the RM, 
215 | # set yarn.server.resourcemanager.appsummary.logger to 
216 | # <LEVEL>,RMSUMMARY in hadoop-env.sh
217 | 
218 | # Appender for ResourceManager Application Summary Log
219 | # Requires the following properties to be set
220 | #    - hadoop.log.dir (Hadoop Log directory)
221 | #    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
222 | #    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
223 | 
224 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
225 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
226 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
227 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
228 | log4j.appender.RMSUMMARY.MaxFileSize=256MB
229 | log4j.appender.RMSUMMARY.MaxBackupIndex=20
230 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
231 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
232 | 
233 | # HS audit log configs
234 | #mapreduce.hs.audit.logger=INFO,HSAUDIT
235 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
236 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
237 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
238 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
239 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
240 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
241 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
242 | 
243 | # Http Server Request Logs
244 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog
245 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
246 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
247 | #log4j.appender.namenoderequestlog.RetainDays=3
248 | 
249 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog
250 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
251 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
252 | #log4j.appender.datanoderequestlog.RetainDays=3
253 | 
254 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
255 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
256 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
257 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3
258 | 
259 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
260 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
261 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
262 | #log4j.appender.jobhistoryrequestlog.RetainDays=3
263 | 
264 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
265 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
266 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
267 | #log4j.appender.nodemanagerrequestlog.RetainDays=3
268 | 


--------------------------------------------------------------------------------