├── roles ├── cdh5-base │ ├── files │ │ ├── oozie │ │ │ └── sshkeys │ │ │ │ ├── id_rsa │ │ │ │ ├── id_rsa.pub │ │ │ │ └── authorized_keys │ │ ├── java.sh │ │ ├── cloudera-cdh5b2.repo │ │ └── cloudera-gplextras5b2.repo │ ├── templates │ │ ├── hadoop │ │ │ ├── slaves.j2 │ │ │ ├── container-executor.cfg.j2 │ │ │ ├── configuration.xsl.j2 │ │ │ ├── hadoop-metrics2.properties.j2 │ │ │ ├── core-site.xml.j2 │ │ │ ├── ssl-server.xml.example.j2 │ │ │ ├── ssl-client.xml.example.j2 │ │ │ ├── mapred-site.xml.j2 │ │ │ ├── hadoop-metrics.properties.j2 │ │ │ ├── yarn-site.xml.j2 │ │ │ ├── capacity-scheduler.xml.j2 │ │ │ ├── mapred-queues.xml.template.j2 │ │ │ ├── yarn-env.sh.j2 │ │ │ ├── hdfs-site.xml.j2 │ │ │ ├── hadoop-policy.xml.j2 │ │ │ └── log4j.properties.j2 │ │ ├── hbase │ │ │ ├── regionservers.j2 │ │ │ ├── hadoop-metrics2-hbase.properties.j2 │ │ │ ├── hbase-site.xml.j2 │ │ │ ├── hbase-policy.xml.j2 │ │ │ ├── log4j.properties.j2 │ │ │ └── hbase-env.sh.j2 │ │ ├── hosts.j2 │ │ ├── zoo.cfg.j2 │ │ └── limits.conf.j2 │ └── tasks │ │ ├── main.yml │ │ ├── misc.yml │ │ ├── oozie.yml │ │ ├── hbase.yml │ │ └── base.yml ├── cdh5-spark-base │ ├── templates │ │ ├── slaves.j2 │ │ ├── fairscheduler.xml.template.j2 │ │ ├── log4j.properties.template.j2 │ │ ├── spark-env.sh.template.j2 │ │ ├── spark-env.sh.j2 │ │ └── metrics.properties.template.j2 │ └── tasks │ │ └── main.yml ├── cdh5-zookeeperserver │ ├── handlers │ │ └── main.yml │ ├── templates │ │ └── zoo.cfg.j2 │ └── tasks │ │ └── main.yml ├── cdh5-spark-master │ └── tasks │ │ └── main.yml ├── cdh5-spark-worker │ └── tasks │ │ └── main.yml ├── cdh5-oozie │ ├── templates │ │ ├── oozie_db_init.sql.j2 │ │ ├── adminusers.txt.j2 │ │ ├── action-conf │ │ │ └── hive.xml.j2 │ │ ├── hadoop-config.xml.j2 │ │ ├── hadoop-conf │ │ │ └── core-site.xml.j2 │ │ ├── oozie-env.sh.j2 │ │ ├── oozie-log4j.properties.j2 │ │ └── oozie-site.xml.j2 │ └── tasks │ │ └── main.yml ├── cdh5-httpfs │ └── tasks │ │ └── main.yml ├── cdh5-pig │ ├── templates │ │ ├── register.sh.j2 │ │ ├── build.properties.j2 │ │ ├── log4j.properties.j2 │ │ └── pig.properties.j2 │ └── tasks │ │ └── main.yml ├── cdh5-hbase-regionserver │ └── tasks │ │ └── main.yml ├── cdh5-hive │ ├── templates │ │ ├── hive_metastore_init.sql.j2 │ │ ├── hive-server2.j2 │ │ ├── hive-env.sh.template.j2 │ │ ├── hive-exec-log4j.properties.j2 │ │ ├── hive-site.xml.j2 │ │ └── hive-log4j.properties.j2 │ └── tasks │ │ └── main.yml ├── cdh5-journalnode │ └── tasks │ │ └── main.yml ├── cdh5-hbase-master │ └── tasks │ │ └── main.yml ├── cdh5-slave │ └── tasks │ │ └── main.yml ├── cdh5-namenode-primary │ └── tasks │ │ └── main.yml ├── cdh5-namenode-backup │ └── tasks │ │ └── main.yml └── cdh5-resourcemanager │ └── tasks │ └── main.yml ├── README.md ├── hosts.cdh5 ├── cdh5.yml └── group_vars └── cdh5-all /roles/cdh5-base/files/oozie/sshkeys/id_rsa: -------------------------------------------------------------------------------- 1 | id_rsa 2 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/slaves.j2: -------------------------------------------------------------------------------- 1 | localhost 2 | -------------------------------------------------------------------------------- /roles/cdh5-base/files/oozie/sshkeys/id_rsa.pub: -------------------------------------------------------------------------------- 1 | id_rsa.pub 2 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/regionservers.j2: -------------------------------------------------------------------------------- 1 | localhost 2 | -------------------------------------------------------------------------------- /roles/cdh5-base/files/oozie/sshkeys/authorized_keys: -------------------------------------------------------------------------------- 1 | authorized_keys 2 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/slaves.j2: -------------------------------------------------------------------------------- 1 | # A Spark Worker will be started on each of the machines listed below. 2 | localhost -------------------------------------------------------------------------------- /roles/cdh5-base/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include: misc.yml 3 | - include: base.yml 4 | - include: hbase.yml 5 | - include: oozie.yml 6 | -------------------------------------------------------------------------------- /roles/cdh5-zookeeperserver/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: restart zookeeper server 3 | service: name=zookeeper-server state=restarted 4 | -------------------------------------------------------------------------------- /roles/cdh5-spark-master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: start spark master services 3 | service: name={{ item }} state=started 4 | with_items: 5 | - spark-master 6 | tags: 7 | - cdh5-spark-master 8 | -------------------------------------------------------------------------------- /roles/cdh5-spark-worker/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: start spark worker services 3 | service: name={{ item }} state=started 4 | with_items: 5 | - spark-worker 6 | tags: 7 | - cdh5-spark-worker 8 | -------------------------------------------------------------------------------- /roles/cdh5-base/files/java.sh: -------------------------------------------------------------------------------- 1 | export JAVA_HOME=/usr/java/jdk1.6.0_45 2 | export JRE_HOME=$JAVA_HOME/jre 3 | export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH 4 | export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH 5 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hosts.j2: -------------------------------------------------------------------------------- 1 | 127.0.0.1 localhost localhost.localdomain 2 | {% for host in groups['cdh5-all'] %} 3 | {{ hostvars[host]["ansible_default_ipv4"]["address"] }} {{ host }}.{{ tl_domain }} {{ host }} {{ host }}.localdomain 4 | {% endfor %} 5 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/oozie_db_init.sql.j2: -------------------------------------------------------------------------------- 1 | CREATE DATABASE oozie; 2 | GRANT ALL PRIVILEGES ON oozie.* TO 'oozie'@'localhost' IDENTIFIED BY '{{ oozie_db_passwd }}'; 3 | GRANT ALL PRIVILEGES ON oozie.* TO 'oozie'@'%' IDENTIFIED BY '{{ oozie_db_passwd }}'; 4 | -------------------------------------------------------------------------------- /roles/cdh5-httpfs/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: installl httpfs pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-httpfs 6 | tags: 7 | - cdh5-httpfs 8 | 9 | - name: start httpfs service 10 | service: name=hadoop-httpfs state=started 11 | tags: 12 | - cdh5-httpfs 13 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/container-executor.cfg.j2: -------------------------------------------------------------------------------- 1 | yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group 2 | banned.users=#comma separated list of users who can not run applications 3 | min.user.id=1000#Prevent other super-users 4 | allowed.system.users=##comma separated list of system users who CAN run applications 5 | -------------------------------------------------------------------------------- /roles/cdh5-base/files/cloudera-cdh5b2.repo: -------------------------------------------------------------------------------- 1 | [cloudera-cdh5b2] 2 | # Packages for Cloudera's Distribution for Hadoop, Version 5.0.0b2, on RedHat or CentOS 6 x86_64 3 | name=Cloudera's Distribution for Hadoop, Version 5.0.0b2 4 | baseurl=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/5.0.0b2/ 5 | gpgkey=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera 6 | gpgcheck=1 7 | -------------------------------------------------------------------------------- /roles/cdh5-base/files/cloudera-gplextras5b2.repo: -------------------------------------------------------------------------------- 1 | [cloudera-gplextras5b2] 2 | # Packages for Cloudera's GPLExtras, Version 5.0.0b2, on RedHat or CentOS 6 x86_64 3 | name=Cloudera's GPLExtras, Version 5.0.0b2 4 | baseurl=http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/5.0.0b2/ 5 | gpgkey=http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/RPM-GPG-KEY-cloudera 6 | gpgcheck=1 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Ansible playbook of CDH5 2 | 3 | HDFS, HBase, Hive, HTTPFS, Oozie, Pig, Zookeeper, Spark 4 | 5 | #### Steps: 6 | 7 | ``` 8 | 1. Specify the hosts in hosts.cdh5 9 | 2. Specify the configuration values in groups_var/cdh5-all 10 | 3. Make sure all hosts can be logged into as root or normal user with sudo permissions 11 | 4. Run 'ansible-playbook cdh5.yml -i hosts.cdh5 -u heydevops --sudo -k' 12 | ``` 13 | -------------------------------------------------------------------------------- /roles/cdh5-pig/templates/register.sh.j2: -------------------------------------------------------------------------------- 1 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce 2 | REGISTER /usr/lib/pig/datafu-1.1.0-cdh5.0.0-beta-2.jar 3 | REGISTER /usr/lib/zookeeper/zookeeper-3.4.5-cdh5.0.0-beta-2.jar 4 | REGISTER /usr/lib/hbase/hbase-server-0.96.1.1-cdh5.0.0-beta-2.jar 5 | REGISTER /usr/lib/hbase/hbase-client-0.96.1.1-cdh5.0.0-beta-2.jar 6 | REGISTER /usr/lib/hbase/hbase-common-0.96.1.1-cdh5.0.0-beta-2.jar 7 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/fairscheduler.xml.template.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | FAIR 5 | 1 6 | 2 7 | 8 | 9 | FIFO 10 | 2 11 | 3 12 | 13 | 14 | -------------------------------------------------------------------------------- /roles/cdh5-base/tasks/misc.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install the libselinux-python package 3 | yum: name=libselinux-python state=installed 4 | tags: 5 | - disable_selinux 6 | 7 | - name: disable SELinux in conf file 8 | selinux: state=disabled 9 | register: selinux_conf 10 | tags: 11 | - disable_selinux 12 | 13 | - name: disable SELinux in command line 14 | shell: setenforce 0 15 | when: selinux_conf|changed 16 | tags: 17 | - disable_selinux 18 | -------------------------------------------------------------------------------- /roles/cdh5-hbase-regionserver/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install hbase regionserver pkgs 3 | yum: name={{ item }}-{{ version['hbase'] }} state=present 4 | with_items: 5 | - hbase-regionserver 6 | tags: 7 | - cdh5-hbase 8 | - cdh5-hbase-regionserver 9 | 10 | - name: start hbase regionserver 11 | service: name={{ item }} state=started 12 | with_items: 13 | - hbase-regionserver 14 | tags: 15 | - cdh5-hbase 16 | - cdh5-hbase-regionserver 17 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive_metastore_init.sql.j2: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS metastore; 2 | USE metastore; 3 | SOURCE /usr/lib/hive/scripts/metastore/upgrade/mysql/hive-schema-0.12.0.mysql.sql; 4 | 5 | {% for host in hive_mysql_hosts %} 6 | CREATE USER 'hive'@'{{ host }}' IDENTIFIED BY '{{ hive_mysql_passwd }}'; 7 | REVOKE ALL PRIVILEGES, GRANT OPTION FROM 'hive'@'{{ host }}'; 8 | GRANT SELECT,INSERT,UPDATE,DELETE,LOCK TABLES,EXECUTE ON metastore.* TO 'hive'@'{{ host }}'; 9 | {% endfor %} 10 | FLUSH PRIVILEGES; 11 | -------------------------------------------------------------------------------- /roles/cdh5-journalnode/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install journalnode pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-hdfs-journalnode 6 | tags: 7 | - cdh5-journalnode 8 | 9 | - name: create folder for journaling 10 | file: path={{ dfs_journalnode_edits_dir }} state=directory owner=hdfs group=hdfs mode=0755 11 | tags: 12 | - cdh5-journalnode 13 | 14 | - name: start journalnode services 15 | service: name=hadoop-hdfs-journalnode state=started 16 | tags: 17 | - cdh5-journalnode 18 | -------------------------------------------------------------------------------- /roles/cdh5-pig/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install pig pkgs 3 | yum: name={{ item }}-{{ version['pig'] }} state=present 4 | with_items: 5 | - pig 6 | tags: 7 | - cdh5-pig 8 | 9 | - name: install datafu pkg 10 | yum: name=pig-udf-datafu state=present 11 | tags: 12 | - cdh5-pig 13 | 14 | - name: copy pig configuration files 15 | template: src={{ item }}.j2 dest=/etc/pig/conf/{{ item }} owner=root group=root mode=0644 16 | with_items: 17 | - build.properties 18 | - log4j.properties 19 | - pig.properties 20 | - register.sh 21 | tags: 22 | - cdh5-pig 23 | - cdh5-pig-conf 24 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/zoo.cfg.j2: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | dataDir={{ zookeeper_datadir }} 11 | # the port at which the clients will connect 12 | clientPort=2181 13 | maxClientCnxns=0 14 | {% for host in groups['cdh5-zookeeperserver'] %} 15 | server.{{ hostvars[host].zoo_id }}={{ host }}.{{ tl_domain }}:2888:3888 16 | {% endfor %} 17 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/log4j.properties.template.j2: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=INFO, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.err 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.eclipse.jetty=WARN 10 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 11 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 12 | -------------------------------------------------------------------------------- /roles/cdh5-zookeeperserver/templates/zoo.cfg.j2: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | dataDir={{ zookeeper_datadir }} 11 | # the port at which the clients will connect 12 | clientPort=2181 13 | maxClientCnxns=0 14 | {% for host in groups['cdh5-zookeeperserver'] %} 15 | server.{{ hostvars[host].zoo_id }}={{ host }}.{{ tl_domain }}:2888:3888 16 | {% endfor %} 17 | -------------------------------------------------------------------------------- /roles/cdh5-hbase-master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install hbase master pkgs 3 | yum: name={{ item }}-{{ version['hbase'] }} state=present 4 | with_items: 5 | - hbase-master 6 | - hbase-thrift 7 | - hbase-rest 8 | tags: 9 | - cdh5-hbase 10 | - cdh5-hbase-master 11 | 12 | - name: create hbase directory in hdfs 13 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /hbase; sudo -u hdfs hadoop fs -chown hbase /hbase && touch {{ item }} 14 | with_items: 15 | - /var/hadoop/.status/hbase_dir.created 16 | tags: 17 | - cdh5-hbase 18 | - cdh5-hbase-master 19 | 20 | - name: start hbase master services 21 | service: name={{ item }} state=started 22 | with_items: 23 | - hbase-master 24 | - hbase-thrift 25 | - hbase-rest 26 | tags: 27 | - cdh5-hbase 28 | - cdh5-hbase-master 29 | -------------------------------------------------------------------------------- /roles/cdh5-pig/templates/build.properties.j2: -------------------------------------------------------------------------------- 1 | # 2 | #Fri Feb 07 12:46:21 PST 2014 3 | hadoop-test.version=2.2.0-mr1-cdh5.0.0-beta-2 4 | parquet-pig-bundle.version=1.2.5-cdh5.0.0-beta-2 5 | snappy.version=1.0.4.1 6 | zookeeper.version=3.4.5-cdh5.0.0-beta-2 7 | protobuf-java.version=2.5.0 8 | slf4j-api.version=1.7.5 9 | hbase95.version=0.96.1.1-cdh5.0.0-beta-2 10 | pig.version=0.12.0-cdh5.0.0-beta-2 11 | version=0.12.0-cdh5.0.0-beta-2 12 | slf4j-log4j12.version=1.7.5 13 | hadoop-hdfs.version=2.2.0-cdh5.0.0-beta-2 14 | avro.version=1.7.5-cdh5.0.0-beta-2 15 | reactor.repo=https\://repository.cloudera.com/content/repositories/snapshots 16 | hadoop-mapreduce.version=2.2.0-cdh5.0.0-beta-2 17 | hadoop-common.version=2.2.0-cdh5.0.0-beta-2 18 | hadoop-core.version=2.2.0-mr1-cdh5.0.0-beta-2 19 | hadoopversion=23 20 | commons-lang.version=2.6 21 | hbaseversion=95 22 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install hadoop-client for spark 3 | yum: name={{ item }}-{{ version['hadoop']}} state=present 4 | with_items: 5 | - hadoop-client 6 | tags: 7 | - cdh5-spark-base 8 | 9 | - name: install spark pkgs 10 | yum: name={{ item }}-{{ version['spark'] }} state=present 11 | with_items: 12 | - spark-core 13 | - spark-master 14 | - spark-worker 15 | - spark-python 16 | tags: 17 | - cdh5-spark-base 18 | 19 | - name: copy spark configuration files 20 | template: src={{ item }}.j2 dest=/etc/spark/conf/{{ item }} owner=spark group=spark mode=0644 21 | with_items: 22 | - fairscheduler.xml.template 23 | - log4j.properties.template 24 | - metrics.properties.template 25 | - slaves 26 | - spark-env.sh 27 | - spark-env.sh.template 28 | tags: 29 | - cdh5-spark-base 30 | - cdh5-spark-base-conf 31 | -------------------------------------------------------------------------------- /roles/cdh5-base/tasks/oozie.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: create oozie group 3 | group: name=oozie state=present 4 | tags: 5 | - cdh5-base 6 | - cdh5-oozie 7 | - cdh5-oozie-user 8 | 9 | - name: create oozie user 10 | user: name=oozie group=oozie createhome=yes state=present 11 | tags: 12 | - cdh5-base 13 | - cdh5-oozie 14 | - cdh5-oozie-user 15 | 16 | - name: create .ssh directory for oozie user 17 | file: path=/home/oozie/.ssh state=directory owner=oozie group=oozie mode=0700 18 | tags: 19 | - cdh5-base 20 | - cdh5-oozie 21 | - cdh5-oozie-user 22 | 23 | - name: copy the sshkeys for oozie user 24 | copy: src=oozie/sshkeys/{{ item }} dest=/home/oozie/.ssh/{{ item }} owner=oozie group=oozie mode=0600 25 | with_items: 26 | - authorized_keys 27 | - id_rsa 28 | - id_rsa.pub 29 | tags: 30 | - cdh5-base 31 | - cdh5-oozie 32 | - cdh5-oozie-user 33 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/adminusers.txt.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # Admin Users, one user by line 20 | -------------------------------------------------------------------------------- /roles/cdh5-zookeeperserver/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install zookeeper server pkgs 3 | yum: name={{ item }}-{{ version['zookeeper'] }} state=present 4 | with_items: 5 | - zookeeper-server 6 | tags: 7 | - cdh5-zookeeper 8 | - cdh5-zookeeperserver 9 | 10 | - name: create zookeeper_datadir 11 | file: path={{ zookeeper_datadir }} state=directory owner=zookeeper group=zookeeper mode=0755 12 | tags: 13 | - cdh5-zookeeper 14 | - cdh5-zookeeperserver 15 | 16 | - name: init zookeeper server 17 | shell: service zookeeper-server init --myid={{ zoo_id }} creates={{ zookeeper_datadir }}/myid 18 | tags: 19 | - cdh5-zookeeper 20 | - cdh5-zookeeperserver 21 | 22 | - name: create zookeeper cfg 23 | template: src=zoo.cfg.j2 dest=/etc/zookeeper/conf/zoo.cfg owner=zookeeper group=zookeeper mode=0644 24 | notify: restart zookeeper server 25 | tags: 26 | - cdh5-zookeeper 27 | - cdh5-zookeeperserver 28 | 29 | - name: start zookeeper server 30 | service: name=zookeeper-server state=started 31 | tags: 32 | - cdh5-zookeeper 33 | - cdh5-zookeeperserver 34 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive-server2.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # The port for Hive server2 daemon to listen to. 17 | # Unfortunatelly, there is no way to specify the interfaces 18 | # to which the daemon binds. 19 | # 20 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce 21 | -------------------------------------------------------------------------------- /roles/cdh5-base/tasks/hbase.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install hbase base pkgs 3 | yum: name={{ item }}-{{ version['hbase'] }} state=present 4 | with_items: 5 | - hbase 6 | tags: 7 | - cdh5-base 8 | - cdh5-hbase 9 | 10 | - name: install ntp pkgs 11 | yum: name=ntp state=present 12 | tags: 13 | - cdh5-base 14 | - cdh5-hbase 15 | - cdh5-ntp 16 | 17 | - name: start ntpd service 18 | service: name=ntpd state=started 19 | tags: 20 | - cdh5-base 21 | - cdh5-hbase 22 | - cdh5-ntp 23 | 24 | - name: copy the limits.conf 25 | template: src=limits.conf.j2 dest=/etc/security/limits.conf owner=root group=root mode=0644 26 | tags: 27 | - cdh5-base 28 | - cdh5-hbase 29 | 30 | - name: copy the hbase configuration files 31 | template: src=hbase/{{ item }}.j2 dest=/etc/hbase/conf/{{ item }} owner=hbase group=hbase mode=0664 32 | with_items: 33 | - hbase-site.xml 34 | - hbase-env.sh 35 | - hadoop-metrics2-hbase.properties 36 | - hbase-policy.xml 37 | - log4j.properties 38 | - regionservers 39 | register: copy_hbase_conf 40 | tags: 41 | - cdh5-base 42 | - cdh5-hbase 43 | - cdh5-hbase-conf 44 | -------------------------------------------------------------------------------- /roles/cdh5-slave/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install datanode nodemanager and mapreduce pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-yarn-nodemanager 6 | - hadoop-hdfs-datanode 7 | - hadoop-mapreduce 8 | tags: 9 | - cdh5-slave 10 | 11 | - name: create the data directory for the slave nodes to store the data 12 | file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700 13 | with_items: 14 | - "{{ dfs_datanode_data_dir }}" 15 | tags: 16 | - cdh5-slave 17 | 18 | - name: create the local storage directories for use by YARN 19 | file: path={{ item }} state=directory owner=yarn group=yarn mode=0755 20 | with_items: 21 | - "{{ yarn_nodemanager_local_dirs }}" 22 | - "{{ yarn_nodemanager_log_dirs }}" 23 | tags: 24 | - cdh5-slave 25 | 26 | - name: start hadoop datanode service 27 | service: name={{ item }} state=started 28 | with_items: 29 | - hadoop-hdfs-datanode 30 | tags: 31 | - cdh5-slave 32 | 33 | - name: start nodemanager services 34 | service: name={{ item }} state=started 35 | with_items: 36 | - hadoop-yarn-nodemanager 37 | tags: 38 | - cdh5-slave 39 | -------------------------------------------------------------------------------- /hosts.cdh5: -------------------------------------------------------------------------------- 1 | [cdh5-all:children] 2 | cdh5-namenode 3 | cdh5-journalnode 4 | cdh5-zookeeperserver 5 | cdh5-resourcemanager 6 | cdh5-httpfs 7 | cdh5-hbase 8 | cdh5-slave 9 | cdh5-hive 10 | cdh5-oozie 11 | cdh5-pig 12 | cdh5-spark 13 | 14 | [cdh5-namenode:children] 15 | cdh5-namenode-primary 16 | cdh5-namenode-backup 17 | 18 | [cdh5-namenode-primary] 19 | idc1-hnn1 20 | 21 | [cdh5-namenode-backup] 22 | idc1-hnn2 23 | 24 | [cdh5-journalnode] 25 | idc1-hjn1 26 | idc1-hjn2 27 | idc1-hjn3 28 | 29 | [cdh5-zookeeperserver] 30 | idc1-hjn1 zoo_id=1 31 | idc1-hjn2 zoo_id=2 32 | idc1-hjn3 zoo_id=3 33 | 34 | [cdh5-resourcemanager] 35 | idc1-hrm1 36 | 37 | [cdh5-httpfs] 38 | idc1-hnn2 39 | 40 | [cdh5-hbase:children] 41 | cdh5-hbase-master 42 | cdh5-hbase-regionserver 43 | 44 | [cdh5-hbase-master] 45 | idc1-hnn2 46 | 47 | [cdh5-hbase-regionserver] 48 | idc1-hdn[1:10] 49 | 50 | [cdh5-slave] 51 | idc1-hdn[1:10] 52 | 53 | [cdh5-hive] 54 | idc1-hrm1 55 | 56 | [cdh5-oozie] 57 | idc1-hrm1 58 | 59 | [cdh5-pig] 60 | idc1-hrm1 61 | 62 | [cdh5-spark:children] 63 | cdh5-spark-master 64 | cdh5-spark-worker 65 | 66 | [cdh5-spark-master] 67 | idc1-hrm1 68 | 69 | [cdh5-spark-worker] 70 | idc1-hdn[1:10] 71 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/hadoop-metrics2-hbase.properties.j2: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period 6 | *.period=10 7 | 8 | # Below are some examples of sinks that could be used 9 | # to monitor different hbase daemons. 10 | 11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink 12 | # hbase.sink.file-all.filename=all.metrics 13 | 14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink 15 | # hbase.sink.file0.context=hmaster 16 | # hbase.sink.file0.filename=master.metrics 17 | 18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink 19 | # hbase.sink.file1.context=thrift-one 20 | # hbase.sink.file1.filename=thrift-one.metrics 21 | 22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink 23 | # hbase.sink.file2.context=thrift-two 24 | # hbase.sink.file2.filename=thrift-one.metrics 25 | 26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink 27 | # hbase.sink.file3.context=rest 28 | # hbase.sink.file3.filename=rest.metrics 29 | -------------------------------------------------------------------------------- /roles/cdh5-namenode-primary/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install namenode pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-hdfs-namenode 6 | - hadoop-hdfs-zkfc 7 | tags: 8 | - cdh5-namenode 9 | - cdh5-namenode-primary 10 | 11 | - name: create the data directory for the namenode metadata 12 | file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700 13 | with_items: 14 | - "{{ dfs_namenode_name_dir }}" 15 | tags: 16 | - cdh5-namenode 17 | - cdh5-namenode-primary 18 | 19 | - name: create the dfs hosts exclude file 20 | file: path={{ dfs_hosts_exclude }} owner=hdfs group=hdfs mode=0644 21 | tags: 22 | - cdh5-namenode 23 | - cdh5-namenode-primary 24 | 25 | - name: format the namenode 26 | shell: creates={{ item }} sudo -u hdfs hadoop namenode -format && touch {{ item }} 27 | with_items: 28 | - /var/hadoop/.status/namenode.formatted 29 | tags: 30 | - cdh5-namenode 31 | - cdh5-namenode-primary 32 | 33 | - name: start hadoop namenode services 34 | service: name={{ item }} state=started 35 | with_items: 36 | - hadoop-hdfs-namenode 37 | tags: 38 | - cdh5-namenode 39 | - cdh5-namenode-primary 40 | -------------------------------------------------------------------------------- /roles/cdh5-pig/templates/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # ***** Set root logger level to DEBUG and its only appender to A. 17 | log4j.logger.org.apache.pig=info, A 18 | 19 | # ***** A is set to be a ConsoleAppender. 20 | log4j.appender.A=org.apache.log4j.ConsoleAppender 21 | # ***** A uses PatternLayout. 22 | log4j.appender.A.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n 24 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/action-conf/hive.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | hadoop.bin.path 23 | /usr/bin/hadoop 24 | 25 | 26 | 27 | hadoop.config.dir 28 | /etc/hadoop/conf 29 | 30 | 31 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/spark-env.sh.template.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This file contains environment variables required to run Spark. Copy it as 4 | # spark-env.sh and edit that to configure Spark for your site. 5 | # 6 | # The following variables can be set in this file: 7 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 8 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos 9 | # - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that 10 | # we recommend setting app-wide options in the application's driver program. 11 | # Examples of node-specific options : -Dspark.local.dir, GC options 12 | # Examples of app-wide options : -Dspark.serializer 13 | # 14 | # If using the standalone deploy mode, you can also set variables for it here: 15 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname 16 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports 17 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 18 | # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g) 19 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT 20 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node 21 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 22 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/configuration.xsl.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
namevaluedescription
37 | 38 | 39 |
40 |
41 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/hbase-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | hbase.rest.port 26 | 60050 27 | 28 | 29 | hbase.cluster.distributed 30 | true 31 | 32 | 33 | hbase.rootdir 34 | hdfs://{{ nameservice_id }}:8020/hbase 35 | 36 | 37 | hbase.zookeeper.quorum 38 | {{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }} 39 | 40 | 41 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/hadoop-config.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | 23 | mapreduce.jobtracker.kerberos.principal 24 | mapred/_HOST@LOCALREALM 25 | 26 | 27 | 28 | yarn.resourcemanager.principal 29 | yarn/_HOST@LOCALREALM 30 | 31 | 32 | 33 | dfs.namenode.kerberos.principal 34 | hdfs/_HOST@LOCALREALM 35 | 36 | 37 | 38 | mapreduce.framework.name 39 | yarn 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/hadoop-conf/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | 23 | mapreduce.jobtracker.kerberos.principal 24 | mapred/_HOST@LOCALREALM 25 | 26 | 27 | 28 | yarn.resourcemanager.principal 29 | yarn/_HOST@LOCALREALM 30 | 31 | 32 | 33 | dfs.namenode.kerberos.principal 34 | hdfs/_HOST@LOCALREALM 35 | 36 | 37 | 38 | mapreduce.framework.name 39 | yarn 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /roles/cdh5-namenode-backup/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install namenode pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-hdfs-namenode 6 | - hadoop-hdfs-zkfc 7 | tags: 8 | - cdh5-namenode 9 | - cdh5-namenode-backup 10 | 11 | - name: create the data directory for the namenode metadata 12 | file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700 13 | with_items: 14 | - "{{ dfs_namenode_name_dir }}" 15 | tags: 16 | - cdh5-namenode 17 | - cdh5-namenode-backup 18 | 19 | - name: create the dfs hosts exclude file 20 | file: path={{ dfs_hosts_exclude }} owner=hdfs group=hdfs mode=0644 21 | tags: 22 | - cdh5-namenode 23 | - cdh5-namenode-backup 24 | 25 | - name: initialize the backup namenode 26 | shell: creates={{ item }} sudo -u hdfs hadoop namenode -bootstrapStandby && touch {{ item }} 27 | with_items: 28 | - /var/hadoop/.status/namenode.formatted 29 | tags: 30 | - cdh5-namenode 31 | - cdh5-namenode-backup 32 | 33 | - name: start hadoop namenode services 34 | service: name={{ item }} state=started 35 | with_items: 36 | - hadoop-hdfs-namenode 37 | tags: 38 | - cdh5-namenode 39 | - cdh5-namenode-backup 40 | 41 | - name: initialize the zkfc for namenode 42 | shell: creates={{ item }} sudo -u hdfs hdfs zkfc -formatZK && touch {{ item }} 43 | with_items: 44 | - /var/hadoop/.status/zkfc.formatted 45 | tags: 46 | - cdh5-namenode 47 | - cdh5-namenode-backup 48 | 49 | - name: start zkfc for namenodes 50 | service: name=hadoop-hdfs-zkfc state=started 51 | tags: 52 | - cdh5-namenode 53 | - cdh5-namenode-backup 54 | -------------------------------------------------------------------------------- /cdh5.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: cdh5-all 3 | roles: 4 | - cdh5-base 5 | 6 | - hosts: cdh5-zookeeperserver 7 | roles: 8 | - cdh5-zookeeperserver 9 | 10 | - hosts: cdh5-journalnode 11 | roles: 12 | - cdh5-journalnode 13 | 14 | - hosts: cdh5-namenode-primary 15 | roles: 16 | - cdh5-namenode-primary 17 | 18 | - hosts: cdh5-namenode-backup 19 | roles: 20 | - cdh5-namenode-backup 21 | 22 | - hosts: cdh5-namenode-primary 23 | tasks: 24 | - name: start zkfc for namenodes 25 | service: name=hadoop-hdfs-zkfc state=started 26 | tags: 27 | - cdh5-namenode-primary 28 | - cdh5-namenode-primary-zkfc 29 | 30 | - hosts: cdh5-httpfs 31 | roles: 32 | - cdh5-httpfs 33 | 34 | - hosts: cdh5-slave 35 | roles: 36 | - cdh5-slave 37 | 38 | - hosts: cdh5-namenode-primary 39 | tasks: 40 | - name: create tmp directory 41 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /tmp; sudo -u hdfs hadoop fs -chmod -R 1777 /tmp && touch {{ item }} 42 | with_items: 43 | - /var/hadoop/.status/tmp_dir.created 44 | tags: 45 | - cdh5-namenode-primary 46 | - cdh5-namenode-primary-tmpdir 47 | 48 | - hosts: cdh5-resourcemanager 49 | roles: 50 | - cdh5-resourcemanager 51 | 52 | - hosts: cdh5-hbase-master 53 | roles: 54 | - cdh5-hbase-master 55 | 56 | - hosts: cdh5-hbase-regionserver 57 | roles: 58 | - cdh5-hbase-regionserver 59 | 60 | - hosts: cdh5-hive 61 | roles: 62 | - cdh5-hive 63 | 64 | - hosts: cdh5-oozie 65 | roles: 66 | - cdh5-oozie 67 | 68 | - hosts: cdh5-pig 69 | roles: 70 | - cdh5-pig 71 | 72 | - hosts: cdh5-spark 73 | roles: 74 | - cdh5-spark-base 75 | 76 | - hosts: cdh5-spark-master 77 | roles: 78 | - cdh5-spark-master 79 | 80 | - hosts: cdh5-spark-worker 81 | roles: 82 | - cdh5-spark-worker 83 | -------------------------------------------------------------------------------- /roles/cdh5-resourcemanager/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install resourcemanager pkgs 3 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 4 | with_items: 5 | - hadoop-yarn-resourcemanager 6 | tags: 7 | - cdh5-resourcemanager 8 | 9 | - name: install historyserver and proxyserver pkgs 10 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 11 | with_items: 12 | - hadoop-mapreduce-historyserver 13 | - hadoop-yarn-proxyserver 14 | tags: 15 | - cdh5-resourcemanager 16 | - cdh5-historyserver 17 | - cdh5-proxyserver 18 | 19 | - name: create the local storage directories for use by YARN 20 | file: path={{ item }} state=directory owner=yarn group=yarn mode=0755 21 | with_items: 22 | - "{{ yarn_nodemanager_local_dirs }}" 23 | - "{{ yarn_nodemanager_log_dirs }}" 24 | tags: 25 | - cdh5-resourcemanager 26 | 27 | - name: create yarn history directory 28 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/history; sudo -u hdfs hadoop fs -chmod -R 1777 /user/history; sudo -u hdfs hadoop fs -chown mapred:hadoop /user/history && touch {{ item }} 29 | with_items: 30 | - /var/hadoop/.status/yarn_history.created 31 | tags: 32 | - cdh5-resourcemanager 33 | 34 | - name: create yarn log directory 35 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /var/log/hadoop-yarn; sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn && touch {{ item }} 36 | with_items: 37 | - /var/hadoop/.status/yarn_log.created 38 | tags: 39 | - cdh5-resourcemanager 40 | 41 | - name: start resourcemanager services 42 | service: name={{ item }} state=started 43 | with_items: 44 | - hadoop-yarn-resourcemanager 45 | - hadoop-mapreduce-historyserver 46 | - hadoop-yarn-proxyserver 47 | tags: 48 | - cdh5-resourcemanager 49 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/hadoop-metrics2.properties.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # syntax: [prefix].[source|sink].[instance].[options] 19 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 20 | 21 | *.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink 22 | # default sampling period, in seconds 23 | *.period=10 24 | 25 | # The namenode-metrics.out will contain metrics from all context 26 | #namenode.sink.file.filename=namenode-metrics.out 27 | # Specifying a special sampling period for namenode: 28 | #namenode.sink.*.period=8 29 | 30 | #datanode.sink.file.filename=datanode-metrics.out 31 | 32 | # the following example split metrics of different 33 | # context to different sinks (in this case files) 34 | #jobtracker.sink.file_jvm.context=jvm 35 | #jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out 36 | #jobtracker.sink.file_mapred.context=mapred 37 | #jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out 38 | 39 | #tasktracker.sink.file.filename=tasktracker-metrics.out 40 | 41 | #maptask.sink.file.filename=maptask-metrics.out 42 | 43 | #reducetask.sink.file.filename=reducetask-metrics.out 44 | 45 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/limits.conf.j2: -------------------------------------------------------------------------------- 1 | # /etc/security/limits.conf 2 | # 3 | #Each line describes a limit for a user in the form: 4 | # 5 | # 6 | # 7 | #Where: 8 | # can be: 9 | # - an user name 10 | # - a group name, with @group syntax 11 | # - the wildcard *, for default entry 12 | # - the wildcard %, can be also used with %group syntax, 13 | # for maxlogin limit 14 | # 15 | # can have the two values: 16 | # - "soft" for enforcing the soft limits 17 | # - "hard" for enforcing hard limits 18 | # 19 | # can be one of the following: 20 | # - core - limits the core file size (KB) 21 | # - data - max data size (KB) 22 | # - fsize - maximum filesize (KB) 23 | # - memlock - max locked-in-memory address space (KB) 24 | # - nofile - max number of open files 25 | # - rss - max resident set size (KB) 26 | # - stack - max stack size (KB) 27 | # - cpu - max CPU time (MIN) 28 | # - nproc - max number of processes 29 | # - as - address space limit (KB) 30 | # - maxlogins - max number of logins for this user 31 | # - maxsyslogins - max number of logins on the system 32 | # - priority - the priority to run user process with 33 | # - locks - max number of file locks the user can hold 34 | # - sigpending - max number of pending signals 35 | # - msgqueue - max memory used by POSIX message queues (bytes) 36 | # - nice - max nice priority allowed to raise to values: [-20, 19] 37 | # - rtprio - max realtime priority 38 | # 39 | # 40 | # 41 | 42 | #* soft core 0 43 | #* hard rss 10000 44 | #@student hard nproc 20 45 | #@faculty soft nproc 20 46 | #@faculty hard nproc 50 47 | #ftp hard nproc 0 48 | #@student - maxlogins 4 49 | 50 | * - nofile 32768 51 | * - nproc 32768 52 | 53 | # End of file 54 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | fs.defaultFS 24 | hdfs://{{ nameservice_id }}/ 25 | 26 | 27 | 28 | 29 | fs.trash.interval 30 | {{ fs_trash_interval }} 31 | 32 | 33 | 34 | 35 | io.compression.codecs 36 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec, 37 | org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec, 38 | com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec 39 | 40 | 41 | io.compression.codec.lzo.class 42 | com.hadoop.compression.lzo.LzoCodec 43 | 44 | 45 | 46 | 47 | hadoop.proxyuser.oozie.hosts 48 | * 49 | 50 | 51 | hadoop.proxyuser.oozie.groups 52 | * 53 | 54 | 55 | 56 | 57 | hadoop.proxyuser.httpfs.hosts 58 | * 59 | 60 | 61 | hadoop.proxyuser.httpfs.groups 62 | * 63 | 64 | 65 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/spark-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This file contains environment variables required to run Spark. Copy it as 4 | # spark-env.sh and edit that to configure Spark for your site. 5 | # 6 | # The following variables can be set in this file: 7 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 8 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos 9 | # - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that 10 | # we recommend setting app-wide options in the application's driver program. 11 | # Examples of node-specific options : -Dspark.local.dir, GC options 12 | # Examples of app-wide options : -Dspark.serializer 13 | # 14 | # If using the standalone deploy mode, you can also set variables for it here: 15 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname 16 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports 17 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 18 | # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g) 19 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT 20 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node 21 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 22 | 23 | ### 24 | ### === IMPORTANT === 25 | ### Change the following to specify a real cluster's Master host 26 | ### 27 | export STANDALONE_SPARK_MASTER_HOST=`hostname` 28 | 29 | #export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST 30 | export SPARK_MASTER_IP={{ groups['cdh5-spark-master'] | join() }}.{{ tl_domain }} 31 | 32 | ### Let's run everything with JVM runtime, instead of Scala 33 | export SPARK_LAUNCH_WITH_SCALA=0 34 | export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib 35 | export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib 36 | export SPARK_MASTER_WEBUI_PORT=18080 37 | export SPARK_MASTER_PORT=7077 38 | export SPARK_WORKER_PORT=7078 39 | export SPARK_WORKER_WEBUI_PORT=18081 40 | export SPARK_WORKER_DIR=/var/run/spark/work 41 | export SPARK_LOG_DIR=/var/log/spark 42 | 43 | if [ -n "$HADOOP_HOME" ]; then 44 | export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native 45 | fi 46 | 47 | ### Comment above 2 lines and uncomment the following if 48 | ### you want to run with scala version, that is included with the package 49 | #export SCALA_HOME=${SCALA_HOME:-/usr/lib/spark/scala} 50 | #export PATH=$PATH:$SCALA_HOME/bin 51 | 52 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/hbase-policy.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | 24 | 25 | security.client.protocol.acl 26 | * 27 | ACL for ClientProtocol and AdminProtocol implementations (ie. 28 | clients talking to HRegionServers) 29 | The ACL is a comma-separated list of user and group names. The user and 30 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 31 | A special value of "*" means all users are allowed. 32 | 33 | 34 | 35 | security.admin.protocol.acl 36 | * 37 | ACL for HMasterInterface protocol implementation (ie. 38 | clients talking to HMaster for admin operations). 39 | The ACL is a comma-separated list of user and group names. The user and 40 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 41 | A special value of "*" means all users are allowed. 42 | 43 | 44 | 45 | security.masterregion.protocol.acl 46 | * 47 | ACL for HMasterRegionInterface protocol implementations 48 | (for HRegionServers communicating with HMaster) 49 | The ACL is a comma-separated list of user and group names. The user and 50 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 51 | A special value of "*" means all users are allowed. 52 | 53 | 54 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive-env.sh.template.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Set Hive and Hadoop environment variables here. These variables can be used 18 | # to control the execution of Hive. It should be used by admins to configure 19 | # the Hive installation (so that users do not have to set environment variables 20 | # or set command line parameters to get correct behavior). 21 | # 22 | # The hive service being invoked (CLI/HWI etc.) is available via the environment 23 | # variable SERVICE 24 | 25 | 26 | # Hive Client memory usage can be an issue if a large number of clients 27 | # are running at the same time. The flags below have been useful in 28 | # reducing memory usage: 29 | # 30 | # if [ "$SERVICE" = "cli" ]; then 31 | # if [ -z "$DEBUG" ]; then 32 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" 33 | # else 34 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit" 35 | # fi 36 | # fi 37 | 38 | # The heap size of the jvm stared by hive shell script can be controlled via: 39 | # 40 | # export HADOOP_HEAPSIZE=1024 41 | # 42 | # Larger heap size may be required when running queries over large number of files or partitions. 43 | # By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be 44 | # appropriate for hive server (hwi etc). 45 | 46 | 47 | # Set HADOOP_HOME to point to a specific hadoop install directory 48 | # HADOOP_HOME=${bin}/../../hadoop 49 | 50 | # Hive Configuration Directory can be controlled by: 51 | # export HIVE_CONF_DIR= 52 | 53 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by: 54 | # export HIVE_AUX_JARS_PATH= 55 | -------------------------------------------------------------------------------- /roles/cdh5-pig/templates/pig.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Pig configuration file. All values can be overwritten by command line arguments. 17 | # see bin/pig -help 18 | 19 | # log4jconf log4j configuration file 20 | # log4jconf=./conf/log4j.properties 21 | 22 | # brief logging (no timestamps) 23 | brief=false 24 | 25 | # clustername, name of the hadoop jobtracker. If no port is defined port 50020 will be used. 26 | #cluster 27 | 28 | #debug level, INFO is default 29 | debug=INFO 30 | 31 | # a file that contains pig script 32 | #file= 33 | 34 | # load jarfile, colon separated 35 | #jar= 36 | 37 | #verbose print all log messages to screen (default to print only INFO and above to screen) 38 | verbose=false 39 | 40 | #exectype local|mapreduce, mapreduce is default 41 | #exectype=mapreduce 42 | # hod realted properties 43 | #ssh.gateway 44 | #hod.expect.root 45 | #hod.expect.uselatest 46 | #hod.command 47 | #hod.config.dir 48 | #hod.param 49 | 50 | 51 | #Do not spill temp files smaller than this size (bytes) 52 | pig.spill.size.threshold=5000000 53 | #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) 54 | #This should help reduce the number of files being spilled. 55 | pig.spill.gc.activation.size=40000000 56 | 57 | 58 | ###################### 59 | # Everything below this line is Yahoo specific. Note that I've made 60 | # (almost) no changes to the lines above to make merging in from Apache 61 | # easier. Any values I don't want from above I override below. 62 | # 63 | # This file is configured for use with HOD on the production clusters. If you 64 | # want to run pig with a static cluster you will need to remove everything 65 | # below this line and set the cluster value (above) to the 66 | # hostname and port of your job tracker. 67 | 68 | exectype=mapreduce 69 | log.file= 70 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/ssl-server.xml.example.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | ssl.server.truststore.location 23 | 24 | Truststore to be used by NN and DN. Must be specified. 25 | 26 | 27 | 28 | 29 | ssl.server.truststore.password 30 | 31 | Optional. Default value is "". 32 | 33 | 34 | 35 | 36 | ssl.server.truststore.type 37 | jks 38 | Optional. The keystore file format, default value is "jks". 39 | 40 | 41 | 42 | 43 | ssl.server.truststore.reload.interval 44 | 10000 45 | Truststore reload check interval, in milliseconds. 46 | Default value is 10000 (10 seconds). 47 | 48 | 49 | 50 | 51 | ssl.server.keystore.location 52 | 53 | Keystore to be used by NN and DN. Must be specified. 54 | 55 | 56 | 57 | 58 | ssl.server.keystore.password 59 | 60 | Must be specified. 61 | 62 | 63 | 64 | 65 | ssl.server.keystore.keypassword 66 | 67 | Must be specified. 68 | 69 | 70 | 71 | 72 | ssl.server.keystore.type 73 | jks 74 | Optional. The keystore file format, default value is "jks". 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/ssl-client.xml.example.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | ssl.client.truststore.location 23 | 24 | Truststore to be used by clients like distcp. Must be 25 | specified. 26 | 27 | 28 | 29 | 30 | ssl.client.truststore.password 31 | 32 | Optional. Default value is "". 33 | 34 | 35 | 36 | 37 | ssl.client.truststore.type 38 | jks 39 | Optional. The keystore file format, default value is "jks". 40 | 41 | 42 | 43 | 44 | ssl.client.truststore.reload.interval 45 | 10000 46 | Truststore reload check interval, in milliseconds. 47 | Default value is 10000 (10 seconds). 48 | 49 | 50 | 51 | 52 | ssl.client.keystore.location 53 | 54 | Keystore to be used by clients like distcp. Must be 55 | specified. 56 | 57 | 58 | 59 | 60 | ssl.client.keystore.password 61 | 62 | Optional. Default value is "". 63 | 64 | 65 | 66 | 67 | ssl.client.keystore.keypassword 68 | 69 | Optional. Default value is "". 70 | 71 | 72 | 73 | 74 | ssl.client.keystore.type 75 | jks 76 | Optional. The keystore file format, default value is "jks". 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /group_vars/cdh5-all: -------------------------------------------------------------------------------- 1 | --- 2 | version: 3 | hadoop: 2.2.0+cdh5.0.0+1610-0.cdh5b2.p0.51.el6 4 | lzo: 0.4.15+gplextras5.0.0+0-0.gplextras5b2.p0.28.el6 5 | hbase: 0.96.1.1+cdh5.0.0+23-0.cdh5b2.p0.20.el6 6 | hive: 0.12.0+cdh5.0.0+265-0.cdh5b2.p0.33.el6 7 | oozie: 4.0.0+cdh5.0.0+144-0.cdh5b2.p0.21.el6 8 | pig: 0.12.0+cdh5.0.0+20-0.cdh5b2.p0.19.el6 9 | zookeeper: 3.4.5+cdh5.0.0+27-0.cdh5b2.p0.29.el6 10 | spark: 0.9.0-1.cdh5b2.p0.22.el6 11 | 12 | ansible_path: "{{ lookup('env','ANSIBLE_WORK_DIR') }}" 13 | repo_server: 10.100.1.10 14 | 15 | tl_domain: heylinux.com 16 | 17 | # core-site_xml 18 | nameservice_id: mycluster 19 | fs_trash_interval: 1440 20 | 21 | # zoo_cfg 22 | zookeeper_datadir: /var/lib/zookeeper 23 | 24 | # hdfs-site_xml 25 | dfs_permissions_superusergroup: hdfs 26 | dfs_permissions_enabled: 'false' 27 | dfs_replication: 1 28 | dfs_journalnode_edits_dir: /var/hadoop/data/1/dfs/jn 29 | dfs_blocksize: 134217728 30 | dfs_namenode_handler_count: 256 31 | dfs_datanode_handler_count: 32 32 | dfs_datanode_du_reserved: 0 33 | dfs_balance_bandwidthPerSec: 1048576 34 | dfs_hosts_exclude: /etc/hadoop/conf.{{ nameservice_id }}/datanodes.exclude 35 | dfs_datanode_max_transfer_threads: 4096 36 | dfs_datanode_balanced_space_threshold: 10737418240 37 | dfs_datanode_balanced_space_preference_fraction: 0.75 38 | dfs_datanode_max_xcievers: 4096 39 | dfs_checksum_type: CRC32 40 | dfs_namenode_name_dir: 41 | - /var/hadoop/data/1/dfs/nn 42 | - /var/hadoop/data/2/dfs/nn 43 | dfs_datanode_data_dir: 44 | - /var/hadoop/data/1/dfs/dn 45 | - /var/hadoop/data/2/dfs/dn 46 | - /var/hadoop/data/3/dfs/dn 47 | - /var/hadoop/data/4/dfs/dn 48 | 49 | # yarn-site_xml 50 | yarn_nodemanager_local_dirs: 51 | - /var/hadoop/data/1/yarn/local 52 | - /var/hadoop/data/2/yarn/local 53 | - /var/hadoop/data/3/yarn/local 54 | - /var/hadoop/data/4/yarn/local 55 | yarn_nodemanager_log_dirs: 56 | - /var/hadoop/data/1/yarn/logs 57 | - /var/hadoop/data/2/yarn/logs 58 | - /var/hadoop/data/3/yarn/logs 59 | - /var/hadoop/data/4/yarn/logs 60 | yarn_nodemanager_remote_app_log_dir: 'hdfs://{{ nameservice_id }}/var/log/hadoop-yarn/apps' 61 | yarn_nodemanager_vmem_pmem_ratio: 10 62 | yarn_nodemanager_resource_memory_mb: 24576 63 | yarn_nodemanager_pmem_check_enabled: 'true' 64 | yarn_nodemanager_vmem_check_enabled: 'true' 65 | 66 | # mapred-site_xml 67 | mapreduce_map_memory_mb: 4096 68 | mapreduce_reduce_memory_mb: 8192 69 | mapreduce_map_java_opts: '-Xmx3072m' 70 | mapreduce_reduce_java_opts: '-Xmx6144m' 71 | mapreduce_jobtracker_handler_count: 128 72 | dfs_namenode_handler_count: 128 73 | 74 | # hive-site_xml 75 | hive_mysql_hosts: 76 | - "%" 77 | - "127.0.0.1" 78 | - "localhost" 79 | hive_mysql_passwd: mypasswd 80 | 81 | # oozie-site_xml 82 | oozie_db_passwd: mypasswd 83 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/mapred-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | mapreduce.framework.name 24 | yarn 25 | 26 | 27 | 28 | 29 | mapreduce.jobhistory.address 30 | {{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:10020 31 | 32 | 33 | mapreduce.jobhistory.webapp.address 34 | {{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:19888 35 | 36 | 37 | 38 | 39 | yarn.app.mapreduce.am.staging-dir 40 | /user 41 | 42 | 43 | 44 | 45 | mapreduce.map.memory.mb 46 | {{ mapreduce_map_memory_mb }} 47 | 48 | 49 | mapreduce.reduce.memory.mb 50 | {{ mapreduce_reduce_memory_mb }} 51 | 52 | 53 | mapreduce.map.java.opts 54 | {{ mapreduce_map_java_opts }} 55 | 56 | 57 | mapreduce.reduce.java.opts 58 | {{ mapreduce_reduce_java_opts }} 59 | 60 | 61 | 62 | mapreduce.jobtracker.handler.count 63 | {{ mapreduce_jobtracker_handler_count }} 64 | 65 | 66 | dfs.namenode.handler.count 67 | {{ dfs_namenode_handler_count }} 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/hadoop-metrics.properties.j2: -------------------------------------------------------------------------------- 1 | # Configuration of the "dfs" context for null 2 | dfs.class=org.apache.hadoop.metrics.spi.NullContext 3 | 4 | # Configuration of the "dfs" context for file 5 | #dfs.class=org.apache.hadoop.metrics.file.FileContext 6 | #dfs.period=10 7 | #dfs.fileName=/tmp/dfsmetrics.log 8 | 9 | # Configuration of the "dfs" context for ganglia 10 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 11 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext 12 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 13 | # dfs.period=10 14 | # dfs.servers=localhost:8649 15 | 16 | 17 | # Configuration of the "mapred" context for null 18 | mapred.class=org.apache.hadoop.metrics.spi.NullContext 19 | 20 | # Configuration of the "mapred" context for file 21 | #mapred.class=org.apache.hadoop.metrics.file.FileContext 22 | #mapred.period=10 23 | #mapred.fileName=/tmp/mrmetrics.log 24 | 25 | # Configuration of the "mapred" context for ganglia 26 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 27 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext 28 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 29 | # mapred.period=10 30 | # mapred.servers=localhost:8649 31 | 32 | 33 | # Configuration of the "jvm" context for null 34 | #jvm.class=org.apache.hadoop.metrics.spi.NullContext 35 | 36 | # Configuration of the "jvm" context for file 37 | #jvm.class=org.apache.hadoop.metrics.file.FileContext 38 | #jvm.period=10 39 | #jvm.fileName=/tmp/jvmmetrics.log 40 | 41 | # Configuration of the "jvm" context for ganglia 42 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext 43 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 44 | # jvm.period=10 45 | # jvm.servers=localhost:8649 46 | 47 | # Configuration of the "rpc" context for null 48 | rpc.class=org.apache.hadoop.metrics.spi.NullContext 49 | 50 | # Configuration of the "rpc" context for file 51 | #rpc.class=org.apache.hadoop.metrics.file.FileContext 52 | #rpc.period=10 53 | #rpc.fileName=/tmp/rpcmetrics.log 54 | 55 | # Configuration of the "rpc" context for ganglia 56 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext 57 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 58 | # rpc.period=10 59 | # rpc.servers=localhost:8649 60 | 61 | 62 | # Configuration of the "ugi" context for null 63 | ugi.class=org.apache.hadoop.metrics.spi.NullContext 64 | 65 | # Configuration of the "ugi" context for file 66 | #ugi.class=org.apache.hadoop.metrics.file.FileContext 67 | #ugi.period=10 68 | #ugi.fileName=/tmp/ugimetrics.log 69 | 70 | # Configuration of the "ugi" context for ganglia 71 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext 72 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 73 | # ugi.period=10 74 | # ugi.servers=localhost:8649 75 | 76 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive-exec-log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hive.log.threshold=ALL 19 | hive.root.logger=INFO,FA 20 | hive.log.dir=${java.io.tmpdir}/${user.name} 21 | hive.query.id=hadoop 22 | hive.log.file=${hive.query.id}.log 23 | 24 | # Define the root logger to the system property "hadoop.root.logger". 25 | log4j.rootLogger=${hive.root.logger}, EventCounter 26 | 27 | # Logging Threshold 28 | log4j.threshhold=${hive.log.threshold} 29 | 30 | # 31 | # File Appender 32 | # 33 | 34 | log4j.appender.FA=org.apache.log4j.FileAppender 35 | log4j.appender.FA.File=${hive.log.dir}/${hive.log.file} 36 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout 37 | 38 | # Pattern format: Date LogLevel LoggerName LogMessage 39 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 40 | # Debugging Pattern format 41 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 42 | 43 | 44 | # 45 | # console 46 | # Add "console" to rootlogger above if you want to use this 47 | # 48 | 49 | log4j.appender.console=org.apache.log4j.ConsoleAppender 50 | log4j.appender.console.target=System.err 51 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 52 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 53 | 54 | #custom logging levels 55 | #log4j.logger.xxx=DEBUG 56 | 57 | # 58 | # Event Counter Appender 59 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 60 | # 61 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter 62 | 63 | 64 | log4j.category.DataNucleus=ERROR,FA 65 | log4j.category.Datastore=ERROR,FA 66 | log4j.category.Datastore.Schema=ERROR,FA 67 | log4j.category.JPOX.Datastore=ERROR,FA 68 | log4j.category.JPOX.Plugin=ERROR,FA 69 | log4j.category.JPOX.MetaData=ERROR,FA 70 | log4j.category.JPOX.Query=ERROR,FA 71 | log4j.category.JPOX.General=ERROR,FA 72 | log4j.category.JPOX.Enhancer=ERROR,FA 73 | 74 | 75 | # Silence useless ZK logs 76 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,FA 77 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,FA 78 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/oozie-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Set Oozie specific environment variables here. 21 | 22 | 23 | export OOZIE_DATA=/var/lib/oozie 24 | export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat 25 | export CATALINA_TMPDIR=/var/lib/oozie 26 | export CATALINA_PID=/var/run/oozie/oozie.pid 27 | export CATALINA_BASE=/var/lib/oozie/tomcat-deployment 28 | 29 | # Settings for the Embedded Tomcat that runs Oozie 30 | # Java System properties for Oozie should be specified in this variable 31 | # 32 | export OOZIE_HTTPS_PORT=11443 33 | export OOZIE_HTTPS_KEYSTORE_PASS=password 34 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.port=${OOZIE_HTTPS_PORT}" 35 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.keystore.pass=${OOZIE_HTTPS_KEYSTORE_PASS}" 36 | export CATALINA_OPTS="$CATALINA_OPTS -Xmx1024m" 37 | 38 | # Oozie configuration file to load from Oozie configuration directory 39 | # 40 | # export OOZIE_CONFIG_FILE=oozie-site.xml 41 | export OOZIE_CONFIG=/etc/oozie/conf 42 | 43 | # Oozie logs directory 44 | # 45 | # export OOZIE_LOG=${OOZIE_HOME}/logs 46 | export OOZIE_LOG=/var/log/oozie 47 | 48 | # Oozie Log4J configuration file to load from Oozie configuration directory 49 | # 50 | # export OOZIE_LOG4J_FILE=oozie-log4j.properties 51 | 52 | # Reload interval of the Log4J configuration file, in seconds 53 | # 54 | # export OOZIE_LOG4J_RELOAD=10 55 | 56 | # The port Oozie server runs 57 | # 58 | # export OOZIE_HTTP_PORT=11000 59 | 60 | # The port Oozie server runs if using SSL (HTTPS) 61 | # 62 | # export OOZIE_HTTPS_PORT=11443 63 | 64 | # The host name Oozie server runs on 65 | # 66 | # export OOZIE_HTTP_HOSTNAME=`hostname -f` 67 | 68 | # The base URL for callback URLs to Oozie 69 | # 70 | # export OOZIE_BASE_URL="http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie" 71 | 72 | # The location of the keystore for the Oozie server if using SSL (HTTPS) 73 | # 74 | # export OOZIE_HTTPS_KEYSTORE_FILE=${HOME}/.keystore 75 | 76 | # The password of the keystore for the Oozie server if using SSL (HTTPS) 77 | # 78 | # export OOZIE_HTTPS_KEYSTORE_PASS=password 79 | 80 | # The Oozie Instance ID 81 | # 82 | # export OOZIE_INSTANCE_ID="${OOZIE_HTTP_HOSTNAME}" 83 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/yarn-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | yarn.nodemanager.aux-services 24 | mapreduce_shuffle 25 | 26 | 27 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 28 | org.apache.hadoop.mapred.ShuffleHandler 29 | 30 | 31 | yarn.resourcemanager.hostname 32 | {{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }} 33 | 34 | 35 | yarn.application.classpath 36 | 37 | $HADOOP_CONF_DIR, 38 | $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*, 39 | $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*, 40 | $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*, 41 | $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/* 42 | 43 | 44 | 45 | 46 | 47 | yarn.nodemanager.local-dirs 48 | {{ yarn_nodemanager_local_dirs | join(',') }} 49 | 50 | 51 | yarn.nodemanager.log-dirs 52 | {{ yarn_nodemanager_log_dirs | join(',') }} 53 | 54 | 55 | yarn.log-aggregation-enable 56 | true 57 | 58 | 59 | yarn.nodemanager.remote-app-log-dir 60 | {{ yarn_nodemanager_remote_app_log_dir }} 61 | 62 | 63 | yarn.log.server.url 64 | http://{{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:19888/jobhistory/logs/ 65 | 66 | 67 | 68 | 69 | yarn.web-proxy.address 70 | {{ groups['cdh5-resourcemanager'] | join() }}.{{ tl_domain }}:8100 71 | 72 | 73 | 74 | 75 | yarn.nodemanager.vmem-pmem-ratio 76 | {{ yarn_nodemanager_vmem_pmem_ratio }} 77 | 78 | 79 | yarn.nodemanager.resource.memory-mb 80 | {{ yarn_nodemanager_resource_memory_mb }} 81 | 82 | 83 | yarn.nodemanager.pmem-check-enabled 84 | {{ yarn_nodemanager_pmem_check_enabled }} 85 | 86 | 87 | yarn.nodemanager.vmem-check-enabled 88 | {{ yarn_nodemanager_vmem_check_enabled }} 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | javax.jdo.option.ConnectionURL 32 | jdbc:mysql://localhost/metastore 33 | 34 | 35 | javax.jdo.option.ConnectionDriverName 36 | com.mysql.jdbc.Driver 37 | 38 | 39 | javax.jdo.option.ConnectionUserName 40 | hive 41 | 42 | 43 | javax.jdo.option.ConnectionPassword 44 | {{ hive_mysql_passwd }} 45 | 46 | 47 | 48 | datanucleus.autoCreateSchema 49 | false 50 | 51 | 52 | datanucleus.fixedDatastore 53 | true 54 | 55 | 56 | datanucleus.autoStartMechanism 57 | SchemaTable 58 | 59 | 60 | 61 | hive.metastore.uris 62 | thrift://{{ groups['cdh5-hive'] | join() }}.{{ tl_domain }}:9083 63 | 64 | 65 | 66 | hive.support.concurrency 67 | true 68 | 69 | 70 | hive.zookeeper.quorum 71 | {{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }} 72 | 73 | 74 | 75 | hive.aux.jars.path 76 | file:///usr/lib/hive/lib/zookeeper.jar, 77 | file:///usr/lib/hive/lib/hive-hbase-handler.jar, 78 | file:///usr/lib/hive/lib/guava-11.0.2.jar, 79 | file:///usr/lib/hive/lib/hbase-client.jar, 80 | file:///usr/lib/hive/lib/hbase-common.jar, 81 | file:///usr/lib/hive/lib/hbase-hadoop-compat.jar, 82 | file:///usr/lib/hive/lib/hbase-hadoop2-compat.jar, 83 | file:///usr/lib/hive/lib/hbase-protocol.jar, 84 | file:///usr/lib/hive/lib/hbase-server.jar, 85 | file:///usr/lib/hive/lib/htrace-core.jar 86 | 87 | 88 | 89 | hbase.zookeeper.quorum 90 | {{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ',') }}.{{ tl_domain }} 91 | 92 | 93 | -------------------------------------------------------------------------------- /roles/cdh5-hive/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install hive server pkgs 3 | yum: name={{ item }}-{{ version['hive'] }} state=present 4 | with_items: 5 | - hive 6 | - hive-metastore 7 | - hive-server2 8 | - hive-hbase 9 | - hive-jdbc 10 | tags: 11 | - cdh5-hive 12 | 13 | - name: copy the hive configuration files 14 | template: src={{ item }}.j2 dest=/etc/hive/conf/{{ item }} owner=hive group=hive mode=0664 15 | with_items: 16 | - hive-site.xml 17 | - hive-env.sh.template 18 | - hive-default.xml.template 19 | - hive-exec-log4j.properties 20 | - hive-log4j.properties 21 | - hive-server2 22 | register: copy_hive_conf 23 | tags: 24 | - cdh5-hive 25 | - cdh5-hive-conf 26 | 27 | - name: copy the hive default env conf 28 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} owner=hive group=hive mode=0644 29 | with_items: 30 | - hive-server2 31 | tags: 32 | - cdh5-hive 33 | - cdh5-hive-conf 34 | 35 | - name: install mysql server and connector for hive metastore 36 | yum: name={{ item }} state=present 37 | with_items: 38 | - mysql-server 39 | - mysql-connector-java 40 | tags: 41 | - cdh5-hive 42 | - cdh5-hive-mysqlserver 43 | 44 | - name: start mysql server for hive metastore 45 | service: name=mysqld state=started 46 | tags: 47 | - cdh5-hive 48 | - cdh5-hive-mysqlserver 49 | 50 | - name: create symbolically link for mysql connector 51 | file: src=/usr/share/java/mysql-connector-java.jar dest=/usr/lib/hive/lib/mysql-connector-java.jar owner=hive group=hive state=link 52 | tags: 53 | - cdh5-hive 54 | - cdh5-hive-mysqlserver 55 | 56 | - name: create needed directories for hive 57 | file: name={{ item }} state=directory owner=hive group=hive mode=0700 58 | with_items: 59 | - /var/hive 60 | - /var/hive/deploy 61 | - /var/hive/.status 62 | tags: 63 | - cdh5-hive 64 | 65 | - name: copy hive_metastore_init.sql to /var/hive/deploy 66 | template: src={{ item }}.j2 dest=/var/hive/deploy/{{ item }} owner=hive group=hive mode=0644 67 | with_items: 68 | - hive_metastore_init.sql 69 | tags: 70 | - cdh5-hive 71 | - cdh5-hive-conf 72 | - cdh5-hive-mysqlserver 73 | 74 | - name: set the password for mysql root user 75 | shell: creates={{ item }} mysqladmin -u root password '{{ hive_mysql_passwd }}' && touch {{ item }} 76 | with_items: 77 | - /var/hive/.status/password.reset 78 | tags: 79 | - cdh5-hive 80 | - cdh5-hive-mysqlserver 81 | 82 | - name: create the database for hive metastore 83 | shell: creates={{ item }} mysql -uroot -p'{{ hive_mysql_passwd }}' < /var/hive/deploy/hive_metastore_init.sql && touch {{ item }} 84 | with_items: 85 | - /var/hive/.status/metastore.created 86 | tags: 87 | - cdh5-hive 88 | - cdh5-hive-mysqlserver 89 | 90 | - name: start metastore service 91 | service: name=hive-metastore state=started 92 | tags: 93 | - cdh5-hive 94 | 95 | - name: create warehouse directory 96 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/hive/warehouse; sudo -u hdfs hadoop fs -chown -R hive /user/hive; sudo -u hdfs hadoop fs -chmod -R 1777 /user/hive && touch {{ item }} 97 | with_items: 98 | - /var/hive/.status/warehouse.created 99 | tags: 100 | - cdh5-hive 101 | 102 | - name: change the permissions of .hivehistory 103 | file: path=/var/lib/hive/.hivehistory owner=hive group=hive mode=0666 104 | tags: 105 | - cdh5-hive 106 | 107 | - name: start hive server services 108 | service: name={{ item }} state=started 109 | with_items: 110 | - hive-server2 111 | tags: 112 | - cdh5-hive 113 | - cdh5-hive-server2 114 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | hbase.root.logger=INFO,console 3 | hbase.security.logger=INFO,console 4 | hbase.log.dir=. 5 | hbase.log.file=hbase.log 6 | 7 | # Define the root logger to the system property "hbase.root.logger". 8 | log4j.rootLogger=${hbase.root.logger} 9 | 10 | # Logging Threshold 11 | log4j.threshold=ALL 12 | 13 | # 14 | # Daily Rolling File Appender 15 | # 16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file} 18 | 19 | # Rollver at midnight 20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 21 | 22 | # 30-day backup 23 | #log4j.appender.DRFA.MaxBackupIndex=30 24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 25 | 26 | # Pattern format: Date LogLevel LoggerName LogMessage 27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 28 | 29 | # Rolling File Appender properties 30 | hbase.log.maxfilesize=256MB 31 | hbase.log.maxbackupindex=20 32 | 33 | # Rolling File Appender 34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file} 36 | 37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize} 38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex} 39 | 40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 42 | 43 | # 44 | # Security audit appender 45 | # 46 | hbase.security.log.file=SecurityAuth.audit 47 | hbase.security.log.maxfilesize=256MB 48 | hbase.security.log.maxbackupindex=20 49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file} 51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize} 52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex} 53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 55 | log4j.category.SecurityLogger=${hbase.security.logger} 56 | log4j.additivity.SecurityLogger=false 57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE 58 | 59 | # 60 | # Null Appender 61 | # 62 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 63 | 64 | # 65 | # console 66 | # Add "console" to rootlogger above if you want to use this 67 | # 68 | log4j.appender.console=org.apache.log4j.ConsoleAppender 69 | log4j.appender.console.target=System.err 70 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 71 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 72 | 73 | # Custom Logging levels 74 | 75 | log4j.logger.org.apache.zookeeper=INFO 76 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 77 | log4j.logger.org.apache.hadoop.hbase=DEBUG 78 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug. 79 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO 80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO 81 | #log4j.logger.org.apache.hadoop.dfs=DEBUG 82 | # Set this class to log INFO only otherwise its OTT 83 | # Enable this to get detailed connection error/retry logging. 84 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE 85 | 86 | 87 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output) 88 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG 89 | 90 | # Uncomment the below if you want to remove logging of client region caching' 91 | # and scan of .META. messages 92 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO 93 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO 94 | -------------------------------------------------------------------------------- /roles/cdh5-hive/templates/hive-log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hive.log.threshold=ALL 19 | hive.root.logger=INFO,DRFA 20 | hive.log.dir=${java.io.tmpdir}/${user.name} 21 | hive.log.file=hive.log 22 | 23 | # Define the root logger to the system property "hadoop.root.logger". 24 | log4j.rootLogger=${hive.root.logger}, EventCounter 25 | 26 | # Logging Threshold 27 | log4j.threshold=${hive.log.threshold} 28 | 29 | # 30 | # Daily Rolling File Appender 31 | # 32 | # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files 33 | # for different CLI session. 34 | # 35 | # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender 36 | 37 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 38 | 39 | log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file} 40 | 41 | # Rollver at midnight 42 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 43 | 44 | # 30-day backup 45 | #log4j.appender.DRFA.MaxBackupIndex=30 46 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 47 | 48 | # Pattern format: Date LogLevel LoggerName LogMessage 49 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 50 | # Debugging Pattern format 51 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 52 | 53 | 54 | # 55 | # console 56 | # Add "console" to rootlogger above if you want to use this 57 | # 58 | 59 | log4j.appender.console=org.apache.log4j.ConsoleAppender 60 | log4j.appender.console.target=System.err 61 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 62 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 63 | log4j.appender.console.encoding=UTF-8 64 | 65 | #custom logging levels 66 | #log4j.logger.xxx=DEBUG 67 | 68 | # 69 | # Event Counter Appender 70 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 71 | # 72 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter 73 | 74 | 75 | log4j.category.DataNucleus=ERROR,DRFA 76 | log4j.category.Datastore=ERROR,DRFA 77 | log4j.category.Datastore.Schema=ERROR,DRFA 78 | log4j.category.JPOX.Datastore=ERROR,DRFA 79 | log4j.category.JPOX.Plugin=ERROR,DRFA 80 | log4j.category.JPOX.MetaData=ERROR,DRFA 81 | log4j.category.JPOX.Query=ERROR,DRFA 82 | log4j.category.JPOX.General=ERROR,DRFA 83 | log4j.category.JPOX.Enhancer=ERROR,DRFA 84 | 85 | 86 | # Silence useless ZK logs 87 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA 88 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA 89 | 90 | #custom logging levels 91 | log4j.logger.org.apache.hadoop.hive.ql.parse.SemanticAnalyzer=INFO 92 | log4j.logger.org.apache.hadoop.hive.ql.Driver=INFO 93 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.ExecDriver=INFO 94 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapRedTask=INFO 95 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask=INFO 96 | log4j.logger.org.apache.hadoop.hive.ql.exec.Task=INFO 97 | 98 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/capacity-scheduler.xml.j2: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | yarn.scheduler.capacity.maximum-applications 18 | 10000 19 | 20 | Maximum number of applications that can be pending and running. 21 | 22 | 23 | 24 | 25 | yarn.scheduler.capacity.maximum-am-resource-percent 26 | 0.1 27 | 28 | Maximum percent of resources in the cluster which can be used to run 29 | application masters i.e. controls number of concurrent running 30 | applications. 31 | 32 | 33 | 34 | 35 | yarn.scheduler.capacity.resource-calculator 36 | org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator 37 | 38 | The ResourceCalculator implementation to be used to compare 39 | Resources in the scheduler. 40 | The default i.e. DefaultResourceCalculator only uses Memory while 41 | DominantResourceCalculator uses dominant-resource to compare 42 | multi-dimensional resources such as Memory, CPU etc. 43 | 44 | 45 | 46 | 47 | yarn.scheduler.capacity.root.queues 48 | default 49 | 50 | The queues at the this level (root is the root queue). 51 | 52 | 53 | 54 | 55 | yarn.scheduler.capacity.root.default.capacity 56 | 100 57 | Default queue target capacity. 58 | 59 | 60 | 61 | yarn.scheduler.capacity.root.default.user-limit-factor 62 | 1 63 | 64 | Default queue user limit a percentage from 0.0 to 1.0. 65 | 66 | 67 | 68 | 69 | yarn.scheduler.capacity.root.default.maximum-capacity 70 | 100 71 | 72 | The maximum capacity of the default queue. 73 | 74 | 75 | 76 | 77 | yarn.scheduler.capacity.root.default.state 78 | RUNNING 79 | 80 | The state of the default queue. State can be one of RUNNING or STOPPED. 81 | 82 | 83 | 84 | 85 | yarn.scheduler.capacity.root.default.acl_submit_applications 86 | * 87 | 88 | The ACL of who can submit jobs to the default queue. 89 | 90 | 91 | 92 | 93 | yarn.scheduler.capacity.root.default.acl_administer_queue 94 | * 95 | 96 | The ACL of who can administer jobs on the default queue. 97 | 98 | 99 | 100 | 101 | yarn.scheduler.capacity.node-locality-delay 102 | 40 103 | 104 | Number of missed scheduling opportunities after which the CapacityScheduler 105 | attempts to schedule rack-local containers. 106 | Typically this should be set to number of nodes in the cluster, By default is setting 107 | approximately number of nodes in one rack which is 40. 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/mapred-queues.xml.template.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | default 31 | 32 | 34 | 35 | 36 | 37 | 39 | running 40 | 41 | 54 | 55 | 56 | 73 | 74 | 75 | 76 | 92 | 93 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install oozie pkgs 3 | yum: name={{ item }}-{{ version['oozie'] }} state=present 4 | with_items: 5 | - oozie 6 | - oozie-client 7 | tags: 8 | - cdh5-oozie 9 | 10 | - name: create alternatives for YARN without SSL 11 | shell: creates=/etc/alternatives/oozie-tomcat-conf alternatives --install {{ item }} oozie-tomcat-conf {{ item }} 50; alternatives --set oozie-tomcat-conf {{ item }} 12 | with_items: 13 | - /etc/oozie/tomcat-conf.http 14 | tags: 15 | - cdh5-oozie 16 | - cdh5-oozie-conf 17 | 18 | - name: create the oozie configuration dirs 19 | file: path=/etc/oozie/conf/{{ item }} state=directory owner=oozie group=oozie mode=0755 20 | with_items: 21 | - action-conf 22 | - hadoop-conf 23 | tags: 24 | - cdh5-oozie 25 | - cdh5-oozie-conf 26 | 27 | - name: copy the oozie configuration files 28 | template: src={{ item }}.j2 dest=/etc/oozie/conf/{{ item }} owner=oozie group=oozie mode=0664 29 | with_items: 30 | - oozie-site.xml 31 | - adminusers.txt 32 | - hadoop-config.xml 33 | - oozie-default.xml 34 | - oozie-env.sh 35 | - oozie-log4j.properties 36 | - action-conf/hive.xml 37 | - hadoop-conf/core-site.xml 38 | register: copy_oozie_conf 39 | tags: 40 | - cdh5-oozie 41 | - cdh5-oozie-conf 42 | 43 | - name: create needed directories for oozie 44 | file: name={{ item }} state=directory owner=oozie group=oozie mode=0700 45 | with_items: 46 | - /var/oozie 47 | - /var/oozie/deploy 48 | - /var/oozie/.status 49 | tags: 50 | - cdh5-oozie 51 | 52 | - name: copy the oozie_db_init.sql to /var/oozie/deploy 53 | template: src={{ item }}.j2 dest=/var/oozie/deploy/{{ item }} owner=oozie group=oozie mode=0664 54 | with_items: 55 | - oozie_db_init.sql 56 | tags: 57 | - cdh5-oozie 58 | - cdh5-oozie-conf 59 | 60 | - name: create database for oozie 61 | shell: creates={{ item }} mysql -uroot -p'{{ hive_mysql_passwd }}' < /var/oozie/deploy/oozie_db_init.sql && touch {{ item }} 62 | with_items: 63 | - /var/oozie/.status/oozie_db.created 64 | tags: 65 | - cdh5-oozie 66 | - cdh5-oozie-db 67 | 68 | - name: download ext zip file from cloudera site 69 | get_url: url=http://archive.cloudera.com/gplextras/misc/ext-2.2.zip dest=/var/oozie/deploy/ext-2.2.zip mode=0644 70 | tags: 71 | - cdh5-oozie 72 | - cdh5-oozie-lib 73 | 74 | - name: extract the ext zip file to /var/lib/oozie 75 | shell: creates=/var/lib/oozie/ext-2.2 unzip /var/oozie/deploy/ext-2.2.zip -d /var/lib/oozie/ 76 | tags: 77 | - cdh5-oozie 78 | - cdh5-oozie-lib 79 | 80 | - name: create directories in hdfs 81 | shell: creates={{ item }} sudo -u hdfs hadoop fs -mkdir -p /user/oozie; sudo -u hdfs hadoop fs -chown -R oozie /user/oozie && touch {{ item }} 82 | with_items: 83 | - /var/oozie/.status/oozie_dir.created 84 | tags: 85 | - cdh5-oozie 86 | - cdh5-oozie-lib 87 | 88 | - name: create sharelib for oozie in hdfs 89 | shell: creates={{ item }} sudo oozie-setup sharelib create -fs hdfs://{{ nameservice_id }}:8020 -locallib /usr/lib/oozie/oozie-sharelib-yarn.tar.gz && touch {{ item }} 90 | with_items: 91 | - /var/oozie/.status/sharelib.created 92 | tags: 93 | - cdh5-oozie 94 | - cdh5-oozie-lib 95 | 96 | - name: install mysql connector 97 | yum: name={{ item }} state=present 98 | with_items: 99 | - mysql-connector-java 100 | tags: 101 | - cdh5-oozie 102 | - cdh5-oozie-lib 103 | 104 | - name: create symbolically link for mysql connector 105 | file: src=/usr/share/java/mysql-connector-java.jar dest=/var/lib/oozie/mysql-connector-java.jar owner=oozie group=oozie state=link 106 | tags: 107 | - cdh5-oozie 108 | - cdh5-oozie-lib 109 | 110 | - name: create symbolically links for hadoop-lzo 111 | file: src=/usr/lib/hadoop/lib/{{ item }} dest=/var/lib/oozie/{{ item }} owner=oozie group=oozie state=link 112 | with_items: 113 | - hadoop-lzo-0.4.15-gplextras5.0.0-beta-2-SNAPSHOT.jar 114 | - hadoop-lzo.jar 115 | tags: 116 | - cdh5-oozie 117 | - cdh5-oozie-lib 118 | 119 | - name: create symbolically links for hadoop-lzo 120 | file: src=/usr/lib/hadoop/lib/native/{{ item }} dest=/var/lib/oozie/{{ item }} owner=oozie group=oozie state=link 121 | with_items: 122 | - libgplcompression.a 123 | - libgplcompression.la 124 | - libgplcompression.lai 125 | - libgplcompression.so 126 | - libgplcompression.so.0 127 | - libgplcompression.so.0.0.0 128 | tags: 129 | - cdh5-oozie 130 | - cdh5-oozie-lib 131 | 132 | - name: start oozie 133 | service: name=oozie state=started 134 | tags: 135 | - cdh5-oozie 136 | - cdh5-oozie-service 137 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/yarn-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # User for YARN daemons 17 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} 18 | 19 | # resolve links - $0 may be a softlink 20 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" 21 | 22 | # some Java parameters 23 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/ 24 | if [ "$JAVA_HOME" != "" ]; then 25 | #echo "run java in $JAVA_HOME" 26 | JAVA_HOME=$JAVA_HOME 27 | fi 28 | 29 | if [ "$JAVA_HOME" = "" ]; then 30 | echo "Error: JAVA_HOME is not set." 31 | exit 1 32 | fi 33 | 34 | JAVA=$JAVA_HOME/bin/java 35 | JAVA_HEAP_MAX=-Xmx1000m 36 | 37 | # For setting YARN specific HEAP sizes please use this 38 | # Parameter and set appropriately 39 | # YARN_HEAPSIZE=1000 40 | 41 | # check envvars which might override default args 42 | if [ "$YARN_HEAPSIZE" != "" ]; then 43 | JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m" 44 | fi 45 | 46 | # Resource Manager specific parameters 47 | 48 | # Specify the max Heapsize for the ResourceManager using a numerical value 49 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set 50 | # the value to 1000. 51 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS 52 | # and/or YARN_RESOURCEMANAGER_OPTS. 53 | # If not specified, the default value will be picked from either YARN_HEAPMAX 54 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. 55 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000 56 | 57 | # Specify the JVM options to be used when starting the ResourceManager. 58 | # These options will be appended to the options specified as YARN_OPTS 59 | # and therefore may override any similar flags set in YARN_OPTS 60 | #export YARN_RESOURCEMANAGER_OPTS= 61 | 62 | # Node Manager specific parameters 63 | 64 | # Specify the max Heapsize for the NodeManager using a numerical value 65 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set 66 | # the value to 1000. 67 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS 68 | # and/or YARN_NODEMANAGER_OPTS. 69 | # If not specified, the default value will be picked from either YARN_HEAPMAX 70 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. 71 | #export YARN_NODEMANAGER_HEAPSIZE=1000 72 | 73 | # Specify the JVM options to be used when starting the NodeManager. 74 | # These options will be appended to the options specified as YARN_OPTS 75 | # and therefore may override any similar flags set in YARN_OPTS 76 | #export YARN_NODEMANAGER_OPTS= 77 | 78 | # so that filenames w/ spaces are handled correctly in loops below 79 | IFS= 80 | 81 | 82 | # default log directory & file 83 | if [ "$YARN_LOG_DIR" = "" ]; then 84 | YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" 85 | fi 86 | if [ "$YARN_LOGFILE" = "" ]; then 87 | YARN_LOGFILE='yarn.log' 88 | fi 89 | 90 | # default policy file for service-level authorization 91 | if [ "$YARN_POLICYFILE" = "" ]; then 92 | YARN_POLICYFILE="hadoop-policy.xml" 93 | fi 94 | 95 | # restore ordinary behaviour 96 | unset IFS 97 | 98 | 99 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR" 100 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" 101 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" 102 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" 103 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME" 104 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING" 105 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" 106 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" 107 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then 108 | YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" 109 | fi 110 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE" 111 | 112 | 113 | -------------------------------------------------------------------------------- /roles/cdh5-base/tasks/base.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: create cdh5 repo 3 | copy: src={{ item }} dest=/etc/yum.repos.d/{{ item }} owner=root group=root mode=0644 4 | with_items: 5 | - cloudera-cdh5b2.repo 6 | - cloudera-gplextras5b2.repo 7 | tags: 8 | - cdh5-base 9 | 10 | - name: add cdh5 repo key 11 | get_url: url=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera dest=/etc/pki/rpm-gpg/RPM-GPG-KEY-cloudera owner=root group=root mode=0644 12 | tags: 13 | - cdh5-base 14 | 15 | - name: install needed rpms for hadoop 16 | yum: name={{ item }} state=present 17 | with_items: 18 | - gcc 19 | - gcc-c++ 20 | - make 21 | tags: 22 | - cdh5-base 23 | 24 | - name: create the hosts file for all machines 25 | template: src=hosts.j2 dest=/etc/hosts owner=root group=root mode=0644 26 | tags: 27 | - cdh5-base 28 | - cdh5-base-hosts 29 | 30 | - name: install hadoop base pkgs 31 | yum: name={{ item }}-{{ version['hadoop'] }} state=present 32 | with_items: 33 | - hadoop 34 | - hadoop-mapreduce 35 | - hadoop-yarn 36 | - hadoop-hdfs 37 | tags: 38 | - cdh5-base 39 | 40 | - name: install hadoop lzo pkgs 41 | yum: name={{ item }}-{{ version['lzo'] }} state=present 42 | with_items: 43 | - hadoop-lzo 44 | tags: 45 | - cdh5-base 46 | 47 | - name: create all needed hadoop directories 48 | file: path={{ item }} state=directory owner=root group=root mode=0755 49 | with_items: 50 | - /var/hadoop 51 | - /var/hadoop/data 52 | - /var/hadoop/.status 53 | - /root/deploy 54 | - /root/deploy/hadoop 55 | tags: 56 | - cdh5-base 57 | 58 | - name: download jdk-6u35-linux-x64-rpm.bin 59 | get_url: url=http://{{ repo_server }}/repo/misc/jdk-6u45-linux-x64-rpm.bin dest=/root/deploy/hadoop/jdk-6u45-linux-x64-rpm.bin mode=0755 60 | tags: 61 | - cdh5-base 62 | - cdh5-jdk 63 | 64 | - name: remove openjdk pkgs 65 | yum: name={{ item }} state=absent 66 | with_items: 67 | - java-1.6.0-openjdk-devel 68 | - java-1.6.0-openjdk 69 | tags: 70 | - cdh5-base 71 | - cdh5-jdk 72 | 73 | - name: run jdk-6u45-linux-x64-rpm.bin 74 | shell: /root/deploy/hadoop/jdk-6u45-linux-x64-rpm.bin creates=/usr/java/jdk1.6.0_45 75 | tags: 76 | - cdh5-base 77 | - cdh5-jdk 78 | 79 | - name: create java env profile 80 | copy: src=java.sh dest=/etc/profile.d/java.sh owner=root group=root mode=0644 81 | register: java_env_profile 82 | tags: 83 | - cdh5-base 84 | - cdh5-jdk 85 | 86 | - name: source the java env profile 87 | shell: source /etc/profile.d/java.sh 88 | when: java_env_profile|changed 89 | tags: 90 | - cdh5-base 91 | - cdh5-jdk 92 | 93 | - name: install zookeeper base pkgs 94 | yum: name={{ item }}-{{ version['zookeeper'] }} state=present 95 | with_items: 96 | - zookeeper 97 | tags: 98 | - cdh5-base 99 | - cdh5-zookeeper 100 | 101 | - name: create zookeeper cfgs 102 | template: src=zoo.cfg.j2 dest=/etc/zookeeper/conf/zoo.cfg owner=zookeeper group=zookeeper mode=0644 103 | tags: 104 | - cdh5-base 105 | - cdh5-zookeeper 106 | 107 | - name: create /etc/hadoop/conf.{{ nameservice_id }} 108 | file: path=/etc/hadoop/conf.{{ nameservice_id }} state=directory owner=root group=root mode=0755 109 | register: create_hadoop_conf 110 | tags: 111 | - cdh5-base 112 | - cdh5-base-conf 113 | 114 | - name: create alternatives for hadoop-conf 115 | shell: alternatives --install /etc/hadoop/conf hadoop-conf {{ item }} 50 116 | with_items: 117 | - /etc/hadoop/conf.{{ nameservice_id }} 118 | when: create_hadoop_conf|changed 119 | tags: 120 | - cdh5-base 121 | - cdh5-base-conf 122 | 123 | - name: create alternatives for hadoop-conf 124 | shell: alternatives --set hadoop-conf {{ item }} 125 | with_items: 126 | - /etc/hadoop/conf.{{ nameservice_id }} 127 | when: create_hadoop_conf|changed 128 | tags: 129 | - cdh5-base 130 | - cdh5-base-conf 131 | 132 | - name: copy the hadoop configuration files 133 | template: src=hadoop/{{ item }}.j2 dest=/etc/hadoop/conf.{{ nameservice_id }}/{{ item }} owner=hdfs group=hadoop mode=0664 134 | with_items: 135 | - core-site.xml 136 | - hdfs-site.xml 137 | - mapred-site.xml 138 | - yarn-site.xml 139 | - yarn-env.sh 140 | - slaves 141 | - capacity-scheduler.xml 142 | - configuration.xsl 143 | - container-executor.cfg 144 | - hadoop-metrics2.properties 145 | - hadoop-metrics.properties 146 | - hadoop-policy.xml 147 | - log4j.properties 148 | - mapred-queues.xml.template 149 | - ssl-client.xml.example 150 | - ssl-server.xml.example 151 | register: copy_hadoop_conf 152 | tags: 153 | - cdh5-base 154 | - cdh5-base-conf 155 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/oozie-log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # http://www.apache.org/licenses/LICENSE-2.0 20 | # 21 | # Unless required by applicable law or agreed to in writing, software 22 | # distributed under the License is distributed on an "AS IS" BASIS, 23 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | # See the License for the specific language governing permissions and 25 | # limitations under the License. See accompanying LICENSE file. 26 | # 27 | 28 | # If the Java System property 'oozie.log.dir' is not defined at Oozie start up time 29 | # XLogService sets its value to '${oozie.home}/logs' 30 | 31 | # The appender that Oozie uses must be named 'oozie' (i.e. log4j.appender.oozie) 32 | 33 | # Using the RollingFileAppender with the OozieRollingPolicy will roll the log file every hour and retain up to MaxHistory number of 34 | # log files. If FileNamePattern ends with ".gz" it will create gzip files. 35 | log4j.appender.oozie=org.apache.log4j.rolling.RollingFileAppender 36 | log4j.appender.oozie.RollingPolicy=org.apache.oozie.util.OozieRollingPolicy 37 | log4j.appender.oozie.File=${oozie.log.dir}/oozie.log 38 | log4j.appender.oozie.Append=true 39 | log4j.appender.oozie.layout=org.apache.log4j.PatternLayout 40 | log4j.appender.oozie.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - SERVER[${oozie.instance.id}] %m%n 41 | # The FileNamePattern must end with "-%d{yyyy-MM-dd-HH}.gz" or "-%d{yyyy-MM-dd-HH}" and also start with the 42 | # value of log4j.appender.oozie.File 43 | log4j.appender.oozie.RollingPolicy.FileNamePattern=${log4j.appender.oozie.File}-%d{yyyy-MM-dd-HH} 44 | # The MaxHistory controls how many log files will be retained (720 hours / 24 hours per day = 30 days); -1 to disable 45 | log4j.appender.oozie.RollingPolicy.MaxHistory=720 46 | 47 | # Uncomment the below two lines to use the DailyRollingFileAppender instead 48 | # The DatePattern must end with either "dd" or "HH" 49 | #log4j.appender.oozie=org.apache.log4j.DailyRollingFileAppender 50 | #log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH 51 | 52 | log4j.appender.oozieops=org.apache.log4j.DailyRollingFileAppender 53 | log4j.appender.oozieops.DatePattern='.'yyyy-MM-dd 54 | log4j.appender.oozieops.File=${oozie.log.dir}/oozie-ops.log 55 | log4j.appender.oozieops.Append=true 56 | log4j.appender.oozieops.layout=org.apache.log4j.PatternLayout 57 | log4j.appender.oozieops.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 58 | 59 | log4j.appender.oozieinstrumentation=org.apache.log4j.DailyRollingFileAppender 60 | log4j.appender.oozieinstrumentation.DatePattern='.'yyyy-MM-dd 61 | log4j.appender.oozieinstrumentation.File=${oozie.log.dir}/oozie-instrumentation.log 62 | log4j.appender.oozieinstrumentation.Append=true 63 | log4j.appender.oozieinstrumentation.layout=org.apache.log4j.PatternLayout 64 | log4j.appender.oozieinstrumentation.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 65 | 66 | log4j.appender.oozieaudit=org.apache.log4j.DailyRollingFileAppender 67 | log4j.appender.oozieaudit.DatePattern='.'yyyy-MM-dd 68 | log4j.appender.oozieaudit.File=${oozie.log.dir}/oozie-audit.log 69 | log4j.appender.oozieaudit.Append=true 70 | log4j.appender.oozieaudit.layout=org.apache.log4j.PatternLayout 71 | log4j.appender.oozieaudit.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 72 | 73 | log4j.appender.openjpa=org.apache.log4j.DailyRollingFileAppender 74 | log4j.appender.openjpa.DatePattern='.'yyyy-MM-dd 75 | log4j.appender.openjpa.File=${oozie.log.dir}/oozie-jpa.log 76 | log4j.appender.openjpa.Append=true 77 | log4j.appender.openjpa.layout=org.apache.log4j.PatternLayout 78 | log4j.appender.openjpa.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 79 | 80 | log4j.logger.openjpa=INFO, openjpa 81 | log4j.logger.oozieops=INFO, oozieops 82 | log4j.logger.oozieinstrumentation=ALL, oozieinstrumentation 83 | log4j.logger.oozieaudit=ALL, oozieaudit 84 | log4j.logger.org.apache.oozie=INFO, oozie 85 | log4j.logger.org.apache.hadoop=WARN, oozie 86 | log4j.logger.org.mortbay=WARN, oozie 87 | log4j.logger.org.hsqldb=WARN, oozie 88 | log4j.logger.org.apache.hadoop.security.authentication.server=WARN, oozie 89 | -------------------------------------------------------------------------------- /roles/cdh5-spark-base/templates/metrics.properties.template.j2: -------------------------------------------------------------------------------- 1 | # syntax: [instance].sink|source.[name].[options]=[value] 2 | 3 | # This file configures Spark's internal metrics system. The metrics system is 4 | # divided into instances which correspond to internal components. 5 | # Each instance can be configured to report its metrics to one or more sinks. 6 | # Accepted values for [instance] are "master", "worker", "executor", "driver", 7 | # and "applications". A wild card "*" can be used as an instance name, in 8 | # which case all instances will inherit the supplied property. 9 | # 10 | # Within an instance, a "source" specifies a particular set of grouped metrics. 11 | # there are two kinds of sources: 12 | # 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will 13 | # collect a Spark component's internal state. Each instance is paired with a 14 | # Spark source that is added automatically. 15 | # 2. Common sources, like JvmSource, which will collect low level state. 16 | # These can be added through configuration options and are then loaded 17 | # using reflection. 18 | # 19 | # A "sink" specifies where metrics are delivered to. Each instance can be 20 | # assigned one or more sinks. 21 | # 22 | # The sink|source field specifies whether the property relates to a sink or 23 | # source. 24 | # 25 | # The [name] field specifies the name of source or sink. 26 | # 27 | # The [options] field is the specific property of this source or sink. The 28 | # source or sink is responsible for parsing this property. 29 | # 30 | # Notes: 31 | # 1. To add a new sink, set the "class" option to a fully qualified class 32 | # name (see examples below). 33 | # 2. Some sinks involve a polling period. The minimum allowed polling period 34 | # is 1 second. 35 | # 3. Wild card properties can be overridden by more specific properties. 36 | # For example, master.sink.console.period takes precedence over 37 | # *.sink.console.period. 38 | # 4. A metrics specific configuration 39 | # "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be 40 | # added to Java properties using -Dspark.metrics.conf=xxx if you want to 41 | # customize metrics system. You can also put the file in ${SPARK_HOME}/conf 42 | # and it will be loaded automatically. 43 | # 5. MetricsServlet is added by default as a sink in master, worker and client 44 | # driver, you can send http request "/metrics/json" to get a snapshot of all the 45 | # registered metrics in json format. For master, requests "/metrics/master/json" and 46 | # "/metrics/applications/json" can be sent seperately to get metrics snapshot of 47 | # instance master and applications. MetricsServlet may not be configured by self. 48 | # 49 | 50 | ## List of available sinks and their properties. 51 | 52 | # org.apache.spark.metrics.sink.ConsoleSink 53 | # Name: Default: Description: 54 | # period 10 Poll period 55 | # unit seconds Units of poll period 56 | 57 | # org.apache.spark.metrics.sink.CSVSink 58 | # Name: Default: Description: 59 | # period 10 Poll period 60 | # unit seconds Units of poll period 61 | # directory /tmp Where to store CSV files 62 | 63 | # org.apache.spark.metrics.sink.GangliaSink 64 | # Name: Default: Description: 65 | # host NONE Hostname or multicast group of Ganglia server 66 | # port NONE Port of Ganglia server(s) 67 | # period 10 Poll period 68 | # unit seconds Units of poll period 69 | # ttl 1 TTL of messages sent by Ganglia 70 | # mode multicast Ganglia network mode ('unicast' or 'mulitcast') 71 | 72 | # org.apache.spark.metrics.sink.JmxSink 73 | 74 | # org.apache.spark.metrics.sink.MetricsServlet 75 | # Name: Default: Description: 76 | # path VARIES* Path prefix from the web server root 77 | # sample false Whether to show entire set of samples for histograms ('false' or 'true') 78 | # 79 | # * Default path is /metrics/json for all instances except the master. The master has two paths: 80 | # /metrics/aplications/json # App information 81 | # /metrics/master/json # Master information 82 | 83 | # org.apache.spark.metrics.sink.GraphiteSink 84 | # Name: Default: Description: 85 | # host NONE Hostname of Graphite server 86 | # port NONE Port of Graphite server 87 | # period 10 Poll period 88 | # unit seconds Units of poll period 89 | # prefix EMPTY STRING Prefix to prepend to metric name 90 | 91 | ## Examples 92 | # Enable JmxSink for all instances by class name 93 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 94 | 95 | # Enable ConsoleSink for all instances by class name 96 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink 97 | 98 | # Polling period for ConsoleSink 99 | #*.sink.console.period=10 100 | 101 | #*.sink.console.unit=seconds 102 | 103 | # Master instance overlap polling period 104 | #master.sink.console.period=15 105 | 106 | #master.sink.console.unit=seconds 107 | 108 | # Enable CsvSink for all instances 109 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink 110 | 111 | # Polling period for CsvSink 112 | #*.sink.csv.period=1 113 | 114 | #*.sink.csv.unit=minutes 115 | 116 | # Polling directory for CsvSink 117 | #*.sink.csv.directory=/tmp/ 118 | 119 | # Worker instance overlap polling period 120 | #worker.sink.csv.period=10 121 | 122 | #worker.sink.csv.unit=minutes 123 | 124 | # Enable jvm source for instance master, worker, driver and executor 125 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 126 | 127 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 128 | 129 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 130 | 131 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 132 | 133 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/hdfs-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | dfs.nameservices 24 | {{ nameservice_id }} 25 | 26 | 27 | 28 | 29 | dfs.replication 30 | {{ dfs_replication }} 31 | 32 | 33 | 34 | 35 | dfs.namenode.name.dir 36 | {{ dfs_namenode_name_dir | join(',') }} 37 | 38 | 39 | 40 | dfs.datanode.data.dir 41 | {{ dfs_datanode_data_dir | join(',') }} 42 | 43 | 44 | 45 | 46 | dfs.permissions.superusergroup 47 | {{ dfs_permissions_superusergroup }} 48 | 49 | 50 | 51 | 52 | dfs.permissions.enabled 53 | {{ dfs_permissions_enabled }} 54 | 55 | 56 | 57 | 58 | dfs.ha.namenodes.{{ nameservice_id }} 59 | {{ groups['cdh5-namenode'] | join(',') }} 60 | 61 | 62 | {% for host in groups['cdh5-namenode'] %} 63 | 64 | dfs.namenode.rpc-address.{{ nameservice_id }}.{{ host }} 65 | {{ host }}.{{ tl_domain }}:8020 66 | 67 | {% endfor %} 68 | 69 | {% for host in groups['cdh5-namenode'] %} 70 | 71 | dfs.namenode.http-address.{{ nameservice_id }}.{{ host }} 72 | {{ host }}.{{ tl_domain }}:50070 73 | 74 | {% endfor %} 75 | 76 | 77 | 78 | dfs.namenode.shared.edits.dir 79 | qjournal://{{ groups['cdh5-journalnode'] | join('.' ~ tl_domain + ':8485' + ';') }}.{{ tl_domain }}:8485/{{ nameservice_id }} 80 | 81 | 82 | 83 | dfs.journalnode.edits.dir 84 | {{ dfs_journalnode_edits_dir }} 85 | 86 | 87 | 88 | 89 | dfs.client.failover.proxy.provider.{{ nameservice_id }} 90 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 91 | 92 | 93 | 94 | 95 | dfs.ha.fencing.methods 96 | shell(/bin/true) 97 | 98 | 99 | 100 | 101 | dfs.ha.automatic-failover.enabled 102 | true 103 | 104 | 105 | ha.zookeeper.quorum 106 | {{ groups['cdh5-zookeeperserver'] | join('.' ~ tl_domain + ':2181' + ',') }}.{{ tl_domain }}:2181 107 | 108 | 109 | 110 | 111 | dfs.blocksize 112 | {{ dfs_blocksize }} 113 | 114 | 115 | 116 | 117 | dfs.namenode.handler.count 118 | {{ dfs_namenode_handler_count }} 119 | 120 | 121 | 122 | dfs.datanode.handler.count 123 | {{ dfs_datanode_handler_count }} 124 | 125 | 126 | 127 | 128 | dfs.datanode.du.reserved 129 | {{ dfs_datanode_du_reserved }} 130 | 131 | 132 | 133 | 134 | dfs.balance.bandwidthPerSec 135 | {{ dfs_balance_bandwidthPerSec }} 136 | 137 | 138 | 139 | 140 | dfs.hosts.exclude 141 | {{ dfs_hosts_exclude }} 142 | 143 | 144 | 145 | 146 | dfs.datanode.max.transfer.threads 147 | {{ dfs_datanode_max_transfer_threads }} 148 | 149 | 150 | 151 | 152 | dfs.datanode.fsdataset.volume.choosing.policy 153 | org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy 154 | 155 | 156 | dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold 157 | {{ dfs_datanode_balanced_space_threshold }} 158 | 159 | 160 | dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction 161 | {{ dfs_datanode_balanced_space_preference_fraction }} 162 | 163 | 164 | 165 | 166 | dfs.datanode.max.xcievers 167 | {{ dfs_datanode_max_xcievers }} 168 | 169 | 170 | 171 | 172 | dfs.webhdfs.enabled 173 | true 174 | 175 | 176 | 177 | dfs.checksum.type 178 | {{ dfs_checksum_type }} 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hbase/hbase-env.sh.j2: -------------------------------------------------------------------------------- 1 | # 2 | #/** 3 | # * Copyright 2007 The Apache Software Foundation 4 | # * 5 | # * Licensed to the Apache Software Foundation (ASF) under one 6 | # * or more contributor license agreements. See the NOTICE file 7 | # * distributed with this work for additional information 8 | # * regarding copyright ownership. The ASF licenses this file 9 | # * to you under the Apache License, Version 2.0 (the 10 | # * "License"); you may not use this file except in compliance 11 | # * with the License. You may obtain a copy of the License at 12 | # * 13 | # * http://www.apache.org/licenses/LICENSE-2.0 14 | # * 15 | # * Unless required by applicable law or agreed to in writing, software 16 | # * distributed under the License is distributed on an "AS IS" BASIS, 17 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # * See the License for the specific language governing permissions and 19 | # * limitations under the License. 20 | # */ 21 | 22 | # Set environment variables here. 23 | 24 | # This script sets variables multiple times over the course of starting an hbase process, 25 | # so try to keep things idempotent unless you want to take an even deeper look 26 | # into the startup scripts (bin/hbase, etc.) 27 | 28 | # The java implementation to use. Java 1.6 required. 29 | # export JAVA_HOME=/usr/java/jdk1.6.0/ 30 | 31 | # Extra Java CLASSPATH elements. Optional. 32 | # export HBASE_CLASSPATH= 33 | 34 | # The maximum amount of heap to use, in MB. Default is 1000. 35 | # export HBASE_HEAPSIZE=1000 36 | 37 | # Extra Java runtime options. 38 | # Below are what we set by default. May only work with SUN JVM. 39 | # For more on why as well as other possible settings, 40 | # see http://wiki.apache.org/hadoop/PerformanceTuning 41 | export HBASE_OPTS="-XX:+UseConcMarkSweepGC" 42 | 43 | # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes. 44 | 45 | # This enables basic gc logging to the .out file. 46 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" 47 | 48 | # This enables basic gc logging to its own file. 49 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . 50 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:" 51 | 52 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. 53 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . 54 | # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M" 55 | 56 | # Uncomment one of the below three options to enable java garbage collection logging for the client processes. 57 | 58 | # This enables basic gc logging to the .out file. 59 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" 60 | 61 | # This enables basic gc logging to its own file. 62 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . 63 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:" 64 | 65 | # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. 66 | # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . 67 | # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M" 68 | 69 | # Uncomment below if you intend to use the EXPERIMENTAL off heap cache. 70 | # export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize=" 71 | # Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value. 72 | 73 | 74 | # Uncomment and adjust to enable JMX exporting 75 | # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access. 76 | # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html 77 | # 78 | # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false" 79 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101" 80 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102" 81 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103" 82 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104" 83 | # export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105" 84 | 85 | # File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default. 86 | # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers 87 | 88 | # Uncomment and adjust to keep all the Region Server pages mapped to be memory resident 89 | #HBASE_REGIONSERVER_MLOCK=true 90 | #HBASE_REGIONSERVER_UID="hbase" 91 | 92 | # File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default. 93 | # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters 94 | 95 | # Extra ssh options. Empty by default. 96 | # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR" 97 | 98 | # Where log files are stored. $HBASE_HOME/logs by default. 99 | # export HBASE_LOG_DIR=${HBASE_HOME}/logs 100 | 101 | # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 102 | # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070" 103 | # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071" 104 | # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072" 105 | # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073" 106 | 107 | # A string representing this instance of hbase. $USER by default. 108 | # export HBASE_IDENT_STRING=$USER 109 | 110 | # The scheduling priority for daemon processes. See 'man nice'. 111 | # export HBASE_NICENESS=10 112 | 113 | # The directory where pid files are stored. /tmp by default. 114 | # export HBASE_PID_DIR=/var/hadoop/pids 115 | 116 | # Seconds to sleep between slave commands. Unset by default. This 117 | # can be useful in large clusters, where, e.g., slave rsyncs can 118 | # otherwise arrive faster than the master can service them. 119 | # export HBASE_SLAVE_SLEEP=0.1 120 | 121 | # Tell HBase whether it should manage it's own instance of Zookeeper or not. 122 | # export HBASE_MANAGES_ZK=true 123 | 124 | # The default log rolling policy is RFA, where the log file is rolled as per the size defined for the 125 | # RFA appender. Please refer to the log4j.properties file to see more details on this appender. 126 | # In case one needs to do log rolling on a date change, one should set the environment property 127 | # HBASE_ROOT_LOGGER to ",DRFA". 128 | # For example: 129 | # HBASE_ROOT_LOGGER=INFO,DRFA 130 | # The reason for changing default to RFA is to avoid the boundary case of filling out disk space as 131 | # DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context. 132 | -------------------------------------------------------------------------------- /roles/cdh5-oozie/templates/oozie-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 25 | 26 | 27 | oozie.service.ActionService.executor.ext.classes 28 | 29 | org.apache.oozie.action.email.EmailActionExecutor, 30 | org.apache.oozie.action.hadoop.HiveActionExecutor, 31 | org.apache.oozie.action.hadoop.ShellActionExecutor, 32 | org.apache.oozie.action.hadoop.SqoopActionExecutor, 33 | org.apache.oozie.action.hadoop.DistcpActionExecutor 34 | 35 | 36 | 37 | 38 | oozie.service.SchemaService.wf.ext.schemas 39 | 40 | shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd, 41 | hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd, 42 | sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd, 43 | oozie-sla-0.1.xsd,oozie-sla-0.2.xsd 44 | 45 | 46 | 47 | 48 | oozie.system.id 49 | oozie-${user.name} 50 | 51 | 52 | 53 | oozie.systemmode 54 | NORMAL 55 | 56 | 57 | oozie.service.AuthorizationService.security.enabled 58 | false 59 | 60 | 61 | 62 | oozie.service.PurgeService.older.than 63 | 30 64 | 65 | 66 | oozie.service.PurgeService.purge.interval 67 | 3600 68 | 69 | 70 | oozie.service.CallableQueueService.queue.size 71 | 10000 72 | 73 | 74 | oozie.service.CallableQueueService.threads 75 | 10 76 | 77 | 78 | oozie.service.CallableQueueService.callable.concurrency 79 | 3 80 | 81 | 82 | oozie.service.coord.normal.default.timeout 83 | 84 | 120 85 | 86 | 87 | 88 | oozie.db.schema.name 89 | oozie 90 | 91 | 92 | oozie.service.JPAService.create.db.schema 93 | true 94 | 95 | 96 | 97 | oozie.service.JPAService.jdbc.driver 98 | com.mysql.jdbc.Driver 99 | 100 | 101 | oozie.service.JPAService.jdbc.url 102 | jdbc:mysql://localhost:3306/oozie 103 | 104 | 105 | oozie.service.JPAService.jdbc.username 106 | oozie 107 | 108 | 109 | oozie.service.JPAService.jdbc.password 110 | {{ oozie_db_passwd }} 111 | 112 | 113 | 114 | oozie.service.JPAService.pool.max.active.conn 115 | 10 116 | 117 | 118 | oozie.service.HadoopAccessorService.kerberos.enabled 119 | false 120 | 121 | 122 | local.realm 123 | LOCALHOST 124 | 125 | 126 | 127 | oozie.service.HadoopAccessorService.keytab.file 128 | ${user.home}/oozie.keytab 129 | 130 | 131 | oozie.service.HadoopAccessorService.kerberos.principal 132 | ${user.name}/localhost@${local.realm} 133 | 134 | 135 | oozie.service.HadoopAccessorService.jobTracker.whitelist 136 | 137 | 138 | 139 | oozie.service.HadoopAccessorService.nameNode.whitelist 140 | 141 | 142 | 143 | 144 | oozie.service.HadoopAccessorService.hadoop.configurations 145 | *=/etc/hadoop/conf 146 | 147 | 148 | oozie.service.WorkflowAppService.system.libpath 149 | /user/${user.name}/share/lib 150 | 151 | 152 | 153 | use.system.libpath.for.mapreduce.and.pig.jobs 154 | false 155 | 156 | 157 | 158 | oozie.authentication.type 159 | simple 160 | 161 | 162 | oozie.authentication.token.validity 163 | 36000 164 | 165 | 166 | oozie.authentication.signature.secret 167 | oozie 168 | 169 | 170 | 171 | oozie.authentication.cookie.domain 172 | 173 | 174 | 175 | 176 | oozie.authentication.simple.anonymous.allowed 177 | true 178 | 179 | 180 | 181 | oozie.authentication.kerberos.principal 182 | HTTP/localhost@${local.realm} 183 | 184 | 185 | oozie.authentication.kerberos.keytab 186 | ${oozie.service.HadoopAccessorService.keytab.file} 187 | 188 | 189 | oozie.authentication.kerberos.name.rules 190 | DEFAULT 191 | 192 | 193 | 194 | 195 | 207 | 208 | 209 | 210 | 211 | oozie.service.ProxyUserService.proxyuser.hue.hosts 212 | * 213 | 214 | 215 | oozie.service.ProxyUserService.proxyuser.hue.groups 216 | * 217 | 218 | 219 | 220 | oozie.action.mapreduce.uber.jar.enable 221 | true 222 | 223 | 224 | oozie.service.HadoopAccessorService.supported.filesystems 225 | hdfs,viewfs 226 | 227 | 228 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/hadoop-policy.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 24 | 25 | 26 | 27 | 28 | 29 | security.client.protocol.acl 30 | * 31 | ACL for ClientProtocol, which is used by user code 32 | via the DistributedFileSystem. 33 | The ACL is a comma-separated list of user and group names. The user and 34 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 35 | A special value of "*" means all users are allowed. 36 | 37 | 38 | 39 | security.client.datanode.protocol.acl 40 | * 41 | ACL for ClientDatanodeProtocol, the client-to-datanode protocol 42 | for block recovery. 43 | The ACL is a comma-separated list of user and group names. The user and 44 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 45 | A special value of "*" means all users are allowed. 46 | 47 | 48 | 49 | security.datanode.protocol.acl 50 | * 51 | ACL for DatanodeProtocol, which is used by datanodes to 52 | communicate with the namenode. 53 | The ACL is a comma-separated list of user and group names. The user and 54 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 55 | A special value of "*" means all users are allowed. 56 | 57 | 58 | 59 | security.inter.datanode.protocol.acl 60 | * 61 | ACL for InterDatanodeProtocol, the inter-datanode protocol 62 | for updating generation timestamp. 63 | The ACL is a comma-separated list of user and group names. The user and 64 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 65 | A special value of "*" means all users are allowed. 66 | 67 | 68 | 69 | security.namenode.protocol.acl 70 | * 71 | ACL for NamenodeProtocol, the protocol used by the secondary 72 | namenode to communicate with the namenode. 73 | The ACL is a comma-separated list of user and group names. The user and 74 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 75 | A special value of "*" means all users are allowed. 76 | 77 | 78 | 79 | security.admin.operations.protocol.acl 80 | * 81 | ACL for AdminOperationsProtocol. Used for admin commands. 82 | The ACL is a comma-separated list of user and group names. The user and 83 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 84 | A special value of "*" means all users are allowed. 85 | 86 | 87 | 88 | security.refresh.usertogroups.mappings.protocol.acl 89 | * 90 | ACL for RefreshUserMappingsProtocol. Used to refresh 91 | users mappings. The ACL is a comma-separated list of user and 92 | group names. The user and group list is separated by a blank. For 93 | e.g. "alice,bob users,wheel". A special value of "*" means all 94 | users are allowed. 95 | 96 | 97 | 98 | security.refresh.policy.protocol.acl 99 | * 100 | ACL for RefreshAuthorizationPolicyProtocol, used by the 101 | dfsadmin and mradmin commands to refresh the security policy in-effect. 102 | The ACL is a comma-separated list of user and group names. The user and 103 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 104 | A special value of "*" means all users are allowed. 105 | 106 | 107 | 108 | security.ha.service.protocol.acl 109 | * 110 | ACL for HAService protocol used by HAAdmin to manage the 111 | active and stand-by states of namenode. 112 | 113 | 114 | 115 | security.zkfc.protocol.acl 116 | * 117 | ACL for access to the ZK Failover Controller 118 | 119 | 120 | 121 | 122 | security.qjournal.service.protocol.acl 123 | * 124 | ACL for QJournalProtocol, used by the NN to communicate with 125 | JNs when using the QuorumJournalManager for edit logs. 126 | 127 | 128 | 129 | security.mrhs.client.protocol.acl 130 | * 131 | ACL for HSClientProtocol, used by job clients to 132 | communciate with the MR History Server job status etc. 133 | The ACL is a comma-separated list of user and group names. The user and 134 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 135 | A special value of "*" means all users are allowed. 136 | 137 | 138 | 139 | 140 | 141 | security.resourcetracker.protocol.acl 142 | * 143 | ACL for ResourceTrackerProtocol, used by the 144 | ResourceManager and NodeManager to communicate with each other. 145 | The ACL is a comma-separated list of user and group names. The user and 146 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 147 | A special value of "*" means all users are allowed. 148 | 149 | 150 | 151 | security.resourcemanager-administration.protocol.acl 152 | * 153 | ACL for ResourceManagerAdministrationProtocol, for admin commands. 154 | The ACL is a comma-separated list of user and group names. The user and 155 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 156 | A special value of "*" means all users are allowed. 157 | 158 | 159 | 160 | security.applicationclient.protocol.acl 161 | * 162 | ACL for ApplicationClientProtocol, used by the ResourceManager 163 | and applications submission clients to communicate with each other. 164 | The ACL is a comma-separated list of user and group names. The user and 165 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 166 | A special value of "*" means all users are allowed. 167 | 168 | 169 | 170 | security.applicationmaster.protocol.acl 171 | * 172 | ACL for ApplicationMasterProtocol, used by the ResourceManager 173 | and ApplicationMasters to communicate with each other. 174 | The ACL is a comma-separated list of user and group names. The user and 175 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 176 | A special value of "*" means all users are allowed. 177 | 178 | 179 | 180 | security.containermanagement.protocol.acl 181 | * 182 | ACL for ContainerManagementProtocol protocol, used by the NodeManager 183 | and ApplicationMasters to communicate with each other. 184 | The ACL is a comma-separated list of user and group names. The user and 185 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 186 | A special value of "*" means all users are allowed. 187 | 188 | 189 | 190 | security.resourcelocalizer.protocol.acl 191 | * 192 | ACL for ResourceLocalizer protocol, used by the NodeManager 193 | and ResourceLocalizer to communicate with each other. 194 | The ACL is a comma-separated list of user and group names. The user and 195 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 196 | A special value of "*" means all users are allowed. 197 | 198 | 199 | 200 | security.job.task.protocol.acl 201 | * 202 | ACL for TaskUmbilicalProtocol, used by the map and reduce 203 | tasks to communicate with the parent tasktracker. 204 | The ACL is a comma-separated list of user and group names. The user and 205 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 206 | A special value of "*" means all users are allowed. 207 | 208 | 209 | 210 | security.job.client.protocol.acl 211 | * 212 | ACL for MRClientProtocol, used by job clients to 213 | communciate with the MR ApplicationMaster to query job status etc. 214 | The ACL is a comma-separated list of user and group names. The user and 215 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 216 | A special value of "*" means all users are allowed. 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /roles/cdh5-base/templates/hadoop/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Copyright 2011 The Apache Software Foundation 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Define some default values that can be overridden by system properties 20 | hadoop.root.logger=INFO,console 21 | hadoop.log.dir=. 22 | hadoop.log.file=hadoop.log 23 | 24 | # Define the root logger to the system property "hadoop.root.logger". 25 | log4j.rootLogger=${hadoop.root.logger}, EventCounter 26 | 27 | # Logging Threshold 28 | log4j.threshold=ALL 29 | 30 | # Null Appender 31 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 32 | 33 | # 34 | # Rolling File Appender - cap space usage at 5gb. 35 | # 36 | hadoop.log.maxfilesize=256MB 37 | hadoop.log.maxbackupindex=20 38 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 39 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} 40 | 41 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} 42 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} 43 | 44 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 45 | 46 | # Pattern format: Date LogLevel LoggerName LogMessage 47 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 48 | # Debugging Pattern format 49 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 50 | 51 | 52 | # 53 | # Daily Rolling File Appender 54 | # 55 | 56 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 57 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} 58 | 59 | # Rollver at midnight 60 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 61 | 62 | # 30-day backup 63 | #log4j.appender.DRFA.MaxBackupIndex=30 64 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 65 | 66 | # Pattern format: Date LogLevel LoggerName LogMessage 67 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 68 | # Debugging Pattern format 69 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 70 | 71 | 72 | # 73 | # console 74 | # Add "console" to rootlogger above if you want to use this 75 | # 76 | 77 | log4j.appender.console=org.apache.log4j.ConsoleAppender 78 | log4j.appender.console.target=System.err 79 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 80 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 81 | 82 | # 83 | # TaskLog Appender 84 | # 85 | 86 | #Default values 87 | hadoop.tasklog.taskid=null 88 | hadoop.tasklog.iscleanup=false 89 | hadoop.tasklog.noKeepSplits=4 90 | hadoop.tasklog.totalLogFileSize=100 91 | hadoop.tasklog.purgeLogSplits=true 92 | hadoop.tasklog.logsRetainHours=12 93 | 94 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender 95 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} 96 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup} 97 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} 98 | 99 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout 100 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 101 | 102 | # 103 | # HDFS block state change log from block manager 104 | # 105 | # Uncomment the following to suppress normal block state change 106 | # messages from BlockManager in NameNode. 107 | #log4j.logger.BlockStateChange=WARN 108 | 109 | # 110 | #Security appender 111 | # 112 | hadoop.security.logger=INFO,NullAppender 113 | hadoop.security.log.maxfilesize=256MB 114 | hadoop.security.log.maxbackupindex=20 115 | log4j.category.SecurityLogger=${hadoop.security.logger} 116 | hadoop.security.log.file=SecurityAuth-${user.name}.audit 117 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 118 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 119 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 120 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 121 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} 122 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} 123 | 124 | # 125 | # Daily Rolling Security appender 126 | # 127 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 128 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 129 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout 130 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 131 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd 132 | 133 | # 134 | # hadoop configuration logging 135 | # 136 | 137 | # Uncomment the following line to turn off configuration deprecation warnings. 138 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN 139 | 140 | # 141 | # hdfs audit logging 142 | # 143 | hdfs.audit.logger=INFO,NullAppender 144 | hdfs.audit.log.maxfilesize=256MB 145 | hdfs.audit.log.maxbackupindex=20 146 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} 147 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false 148 | log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender 149 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log 150 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout 151 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 152 | log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize} 153 | log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex} 154 | 155 | # 156 | # mapred audit logging 157 | # 158 | mapred.audit.logger=INFO,NullAppender 159 | mapred.audit.log.maxfilesize=256MB 160 | mapred.audit.log.maxbackupindex=20 161 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} 162 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false 163 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender 164 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log 165 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout 166 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 167 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize} 168 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex} 169 | 170 | # Custom Logging levels 171 | 172 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG 173 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG 174 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG 175 | 176 | # Jets3t library 177 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR 178 | 179 | # 180 | # Event Counter Appender 181 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 182 | # 183 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter 184 | 185 | # 186 | # Job Summary Appender 187 | # 188 | # Use following logger to send summary to separate file defined by 189 | # hadoop.mapreduce.jobsummary.log.file : 190 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA 191 | # 192 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} 193 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log 194 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB 195 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20 196 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender 197 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} 198 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize} 199 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex} 200 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout 201 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 202 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} 203 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false 204 | 205 | # 206 | # Yarn ResourceManager Application Summary Log 207 | # 208 | # Set the ResourceManager summary log filename 209 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log 210 | # Set the ResourceManager summary log level and appender 211 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} 212 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY 213 | 214 | # To enable AppSummaryLogging for the RM, 215 | # set yarn.server.resourcemanager.appsummary.logger to 216 | # ,RMSUMMARY in hadoop-env.sh 217 | 218 | # Appender for ResourceManager Application Summary Log 219 | # Requires the following properties to be set 220 | # - hadoop.log.dir (Hadoop Log directory) 221 | # - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename) 222 | # - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender) 223 | 224 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger} 225 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false 226 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender 227 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file} 228 | log4j.appender.RMSUMMARY.MaxFileSize=256MB 229 | log4j.appender.RMSUMMARY.MaxBackupIndex=20 230 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout 231 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 232 | 233 | # HS audit log configs 234 | #mapreduce.hs.audit.logger=INFO,HSAUDIT 235 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger} 236 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false 237 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender 238 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log 239 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout 240 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 241 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd 242 | 243 | # Http Server Request Logs 244 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog 245 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 246 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log 247 | #log4j.appender.namenoderequestlog.RetainDays=3 248 | 249 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog 250 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 251 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log 252 | #log4j.appender.datanoderequestlog.RetainDays=3 253 | 254 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog 255 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 256 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log 257 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3 258 | 259 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog 260 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 261 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log 262 | #log4j.appender.jobhistoryrequestlog.RetainDays=3 263 | 264 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog 265 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 266 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log 267 | #log4j.appender.nodemanagerrequestlog.RetainDays=3 268 | --------------------------------------------------------------------------------