├── .gitignore
├── LICENSE
├── README.md
├── dashboard.demo.png
├── group_vars
    ├── all
    ├── hadoop
    ├── impala-store-catalog
    ├── kafka
    └── oozie
├── hosts
├── meta
    └── main.yml
├── roles
    ├── check_config
    │   └── tasks
    │   │   └── main.yaml
    ├── common
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   ├── bigtop-utils.j2
    │   │   ├── interfaces.j2
    │   │   └── services.xml.j2
    ├── dashboard
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   └── dashboard.j2
    ├── hadoop
    │   ├── files
    │   │   ├── capacity-scheduler.xml
    │   │   ├── configuration.xsl
    │   │   ├── container-executor.cfg
    │   │   ├── default
    │   │   │   ├── hadoop
    │   │   │   ├── hadoop-0.20-mapreduce
    │   │   │   ├── hadoop-httpfs
    │   │   │   ├── hadoop-mapreduce-historyserver
    │   │   │   └── hadoop-yarn-resourcemanager
    │   │   ├── dfs.exclude
    │   │   ├── hadoop-metrics.properties
    │   │   ├── hadoop-metrics2.properties
    │   │   ├── hadoop-policy.xml
    │   │   └── log4j.properties
    │   ├── tasks
    │   │   ├── base.yaml
    │   │   ├── datanode.yaml
    │   │   ├── journalnode.yaml
    │   │   ├── main.yaml
    │   │   ├── namenode.yaml
    │   │   ├── namenodes-fence.yaml
    │   │   ├── resourcemanager.yaml
    │   │   ├── test-hdfs.yaml
    │   │   └── test-mapreduce.yaml
    │   └── templates
    │   │   ├── bin
    │   │       └── hdfs-ready.sh
    │   │   ├── core-site.xml.j2
    │   │   ├── default
    │   │       ├── hadoop-hdfs-datanode.j2
    │   │       ├── hadoop-hdfs-journalnode.j2
    │   │       ├── hadoop-hdfs-namenode.j2
    │   │       ├── hadoop-hdfs-zkfc.j2
    │   │       └── hadoop-yarn-nodemanager.j2
    │   │   ├── fair-scheduler.xml.j2
    │   │   ├── hadoop-env.sh.j2
    │   │   ├── hdfs-site.xml.j2
    │   │   ├── mapred-env.sh.j2
    │   │   ├── mapred-site.xml.j2
    │   │   ├── yarn-env.sh.j2
    │   │   └── yarn-site.xml.j2
    ├── hbase
    │   ├── files
    │   │   ├── default
    │   │   │   └── hbase
    │   │   ├── hadoop-metrics2-hbase.properties
    │   │   ├── hbase-env.cmd
    │   │   ├── hbase-env.sh
    │   │   ├── hbase-policy.xml
    │   │   └── log4j.properties
    │   ├── tasks
    │   │   ├── hbase-master.yaml
    │   │   ├── main.yaml
    │   │   └── regionserver.yaml
    │   └── templates
    │   │   ├── hbase-site.xml.j2
    │   │   └── regionservers.j2
    ├── hivemetastore
    │   ├── files
    │   │   ├── default
    │   │   │   ├── hadoop-0.20-mapreduce
    │   │   │   ├── hive-metastore
    │   │   │   └── hive-server2
    │   │   ├── hive-exec-log4j.properties
    │   │   ├── hive-log4j.properties
    │   │   └── hive.limits.conf
    │   ├── tasks
    │   │   ├── hive-client.yaml
    │   │   ├── hive-server.yaml
    │   │   └── main.yaml
    │   └── templates
    │   │   ├── .pgpass.j2
    │   │   ├── hive-env.sh.j2
    │   │   ├── hive-site.xml.j2
    │   │   └── hive.sql.j2
    ├── hue
    │   ├── files
    │   │   ├── default
    │   │   │   └── hadoop-httpfs
    │   │   ├── log.conf
    │   │   └── log4j.properties
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   ├── .pgpass.j2
    │   │   ├── hue.ini.j2
    │   │   └── hue.sql.j2
    ├── impala
    │   ├── tasks
    │   │   ├── impala-server.yaml
    │   │   ├── impala.yaml
    │   │   └── main.yaml
    │   └── templates
    │   │   ├── core-site.xml.j2
    │   │   ├── hdfs-site.xml.j2
    │   │   ├── hive-site.xml.j2
    │   │   └── impala.j2
    ├── kafka
    │   ├── files
    │   │   ├── connect-console-sink.properties
    │   │   ├── connect-console-source.properties
    │   │   ├── connect-distributed.properties
    │   │   ├── connect-file-sink.properties
    │   │   ├── connect-file-source.properties
    │   │   ├── connect-log4j.properties
    │   │   ├── connect-standalone.properties
    │   │   ├── default
    │   │   │   └── kafka
    │   │   ├── log4j.properties
    │   │   └── tools-log4j.properties
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   └── server.properties.j2
    ├── oozie
    │   ├── files
    │   │   ├── action-conf
    │   │   │   ├── email.xml
    │   │   │   ├── fs.xml
    │   │   │   ├── hive.xml
    │   │   │   ├── shell.xml
    │   │   │   ├── sqoop.xml
    │   │   │   ├── ssh.xml
    │   │   │   └── sub-workflow.xml
    │   │   ├── adminusers.txt
    │   │   ├── hadoop-conf
    │   │   │   └── core-site.xml
    │   │   ├── hadoop-config.xml
    │   │   ├── oozie-default.xml
    │   │   └── oozie-log4j.properties
    │   ├── tasks
    │   │   ├── main.yaml
    │   │   └── oozie-test.yaml
    │   └── templates
    │   │   ├── .pgpass.j2
    │   │   ├── oozie-env.sh.j2
    │   │   ├── oozie-site.xml.j2
    │   │   └── oozie.sql.j2
    ├── postgresql
    │   ├── files
    │   │   ├── pg_hba.conf
    │   │   └── postgresql.conf
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   └── userdb.sql.j2
    ├── snmp
    │   ├── files
    │   │   ├── snmpd.conf
    │   │   └── subagent-shell-hadoop-conf.xml
    │   └── tasks
    │   │   └── main.yaml
    ├── solr
    │   ├── files
    │   │   ├── 0
    │   │   ├── 1
    │   │   ├── solr.xml
    │   │   └── zoo.cfg
    │   ├── tasks
    │   │   └── main.yaml
    │   └── templates
    │   │   └── default
    │   │       └── solr.j2
    ├── spark
    │   ├── files
    │   │   ├── fairscheduler.xml.template
    │   │   ├── log4j.properties.template
    │   │   ├── metrics.properties.template
    │   │   ├── slaves.template
    │   │   ├── spark-defaults.conf.template
    │   │   ├── spark-env.sh
    │   │   └── spark-env.sh.template
    │   ├── tasks
    │   │   └── main.yaml
    │   ├── templates
    │   │   ├── default
    │   │   │   └── spark.j2
    │   │   └── spark-defaults.conf.j2
    │   └── vars
    │   │   └── main.yaml
    ├── syslog-ng
    │   ├── files
    │   │   ├── hadoop.pdb
    │   │   ├── hive.pdb
    │   │   ├── impala.pdb
    │   │   └── oozie.pdb
    │   ├── tasks
    │   │   └── main.yaml
    │   ├── templates
    │   │   └── 30-hadoop.conf
    │   └── vars
    │   │   └── main.yaml
    └── zookeeper
    │   ├── files
    │       ├── configuration.xsl
    │       └── log4j.properties
    │   ├── tasks
    │       └── main.yaml
    │   └── templates
    │       ├── default
    │           └── zookeeper.j2
    │       ├── myid.j2
    │       └── zoo.cfg.j2
└── site.yaml


/.gitignore:
--------------------------------------------------------------------------------
1 | workdir/
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Serge Sergeev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dashboard.demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/dashboard.demo.png


--------------------------------------------------------------------------------
/group_vars/all:
--------------------------------------------------------------------------------
 1 | # java package name
 2 | # for oracle distributed packages you should use java-<version> as name
 3 | java_package: java-1.8.0
 4 | java_home: /usr/java/jdk1.8.0_65/
 5 | 
 6 | # cluster name will impact the name of alternatives links and hdfs URI in the case of HA
 7 | cluster_name: cluster
 8 | 
 9 | # common folders
10 | log_folder: /var/log
11 | etc_folder: /etc/cluster
12 | 
13 | # the ssh key to access other hosts without password 
14 | # you have to generate it and put public key to the target hosts <effective user>/.ssh/authorized_keys
15 | #
16 | #clinit_ssh_key: /root/.ssh/hadoop.key
17 | 
18 | # clinit effective user
19 | clinit_effective_user: root
20 | 
21 | # the variable to control data destruction. non destructive plays not tested yet
22 | destroy_data: True
23 | 
24 | # zookeeper data directory
25 | zookeeper_data_dir: '/var/lib/zookeeper'
26 | 
27 | # snmp monitoring, change to True to enable
28 | enable_snmp: False
29 | 
30 | # syslog monitoring, change to True to enable
31 | enable_syslog: False
32 | 
33 | # this is a directory for 5 static files forming a dashboard
34 | dashboard_folder: /var/www/html/dashboard
35 | 
36 | # postgresql version
37 | postgres_version: ""
38 | 
39 | # postgres database accounts passwords
40 | #
41 | # postgres administrative account password
42 | postgres_password: postgres
43 | 
44 | # metastore database password
45 | hiveuser_password: mypassword
46 | 
47 | # oozie database password
48 | oozie_password: theoozie
49 | 
50 | # hue database password
51 | hue_password: thehue
52 | 
53 | # a user database name
54 | # if specified, it will be created in postgres and hue will be configured to use it
55 | # uncomment the next line if a user database is required to setup
56 | #user_database: userdb
57 | 
58 | # a password for user database. username will be userdb_user
59 | #userdb_password: myfavoritepassword
60 | 
61 | # a custom sql 
62 | #postgres_script: |
63 | 


--------------------------------------------------------------------------------
/group_vars/hadoop:
--------------------------------------------------------------------------------
 1 | # service heap configuration, MB
 2 | namenode_heapsize: 2048
 3 | datanode_heapsize: 2048
 4 | journalnode_heapsize: 1000
 5 | zkfc_heapsize: 1000
 6 | nodemanager_heapsize: 2048
 7 | 
 8 | # http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_ig_hive_install.html?scroll=concept_alp_4kl_3q_unique_1
 9 | hiveserver2_heapsize: 1024
10 | hivemetastore_heapsize: 2048
11 | hivecli_heapsize: 1024
12 | 
13 | # hadoop directories
14 | #
15 | # name nodes directory
16 | dfs_namenode_name_dir: 'file:///var/lib/hadoop-hdfs/cache/hdfs/dfs/name'
17 | 
18 | # datanodes directory
19 | dfs_datanode_data_dir: 'file:///var/lib/hadoop-hdfs/cache/hdfs/dfs/data'
20 | 
21 | # journal nodes directory
22 | dfs_journalnode_edits_dir: '/var/lib/hadoop-hdfs/cache/hdfs/dfs/journal'
23 | 
24 | # default replication factor
25 | dfs_replication: 1
26 | 
27 | # umask for hdfs, the valued MUST be quoted
28 | fs_permissions_umask: '022'
29 | 
30 | # impala server additional arguments
31 | impala_server_args: ""
32 | 
33 | # yarn directories. A comma seperated lists local to yarn instances
34 | yarn_nodemanager_local_dirs: 'file:///var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir'
35 | yarn_nodemanager_log_dirs: 'file:///var/log/hadoop-yarn/containers'
36 | 
37 | # yarn memory settings in Mb
38 | yarn_nodemanager_resource_memory: 4096
39 | 
40 | # yarn cpu-vcores
41 | yarn_nodemanager_resource_cpu: 10
42 | 


--------------------------------------------------------------------------------
/group_vars/impala-store-catalog:
--------------------------------------------------------------------------------
1 | # impala additional arguments
2 | impala_catalog_args: ""
3 | impala_state_store_args: ""
4 | 


--------------------------------------------------------------------------------
/group_vars/kafka:
--------------------------------------------------------------------------------
1 | log_dirs: /tmp/kafka-logs
2 | num_io_threads: 8
3 | log_retention_hours: 168
4 | auto_create_topics_enable: true
5 | controlled_shutdown_enable: true
6 | delete_topic_enable: true
7 | 


--------------------------------------------------------------------------------
/group_vars/oozie:
--------------------------------------------------------------------------------
 1 | # oozie server heapsize
 2 | oozie_heapsize: 1024
 3 | 
 4 | # oozie launcher pool ( fair-scheduler configuration )
 5 | oozie_launcher_maxapps: 10
 6 | oozie_launcher_weight: 1.0
 7 | 
 8 | # additional oozie plugins
 9 | oozie_ext_classes: []
10 | 
11 | # additional oozie schemas
12 | oozie_ext_schemas: []
13 | 
14 | # additional oozie properties
15 | oozie_ext_properties: []
16 | 


--------------------------------------------------------------------------------
/hosts:
--------------------------------------------------------------------------------
 1 | # at least one is required, 2 allowed
 2 | # if 2, ha configured
 3 | [namenodes]
 4 | 
 5 | # at least one is required
 6 | [datanodes]
 7 | 
 8 | # at least one is required
 9 | # job history server will be also configured on the 1st host
10 | [yarnresourcemanager]
11 | 
12 | # optional
13 | # can be required for other services
14 | # 3 or 5 hosts is required if 2 namenodes configured
15 | [zookeepernodes]
16 | 
17 | # optional
18 | # required if 2 namenodes configured
19 | [journalnodes]
20 | 
21 | # optional
22 | # required if hivemetastore, oozie or hue configured
23 | [postgresql]
24 | 
25 | # optional
26 | # required if impala-store-catalog configured
27 | [hivemetastore]
28 | 
29 | # optional
30 | [impala-store-catalog]
31 | 
32 | # optional
33 | [hbasemaster]
34 | 
35 | # optional
36 | [solr]
37 | 
38 | #optional
39 | [spark]
40 | 
41 | # optional
42 | [oozie]
43 | 
44 | # optional
45 | [kafka]
46 | 
47 | # optional
48 | [hue]
49 | 
50 | # optional. comment this out completely or fill in a host into [dashboard]
51 | #[dashboard]
52 | [dashboard:children]
53 | namenodes
54 | 
55 | # please do not edit the groups below
56 | [hadoop:children]
57 | namenodes
58 | datanodes
59 | journalnodes
60 | yarnresourcemanager
61 | hivemetastore
62 | impala-store-catalog
63 | hbasemaster
64 | solr
65 | spark
66 | oozie
67 | hue
68 | 
69 | [java:children]
70 | hadoop
71 | kafka
72 | zookeepernodes
73 | 


--------------------------------------------------------------------------------
/meta/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | galaxy_info:
 3 |   author: Serge Sergeev
 4 |   description: Quick and easy cloudera hadoop components installer.
 5 |   company: No Company
 6 |   license: MIT
 7 |   min_ansible_version: 1.9
 8 |   platforms:
 9 |   - name: EL
10 |     versions:
11 |     - 6
12 |   galaxy_tags:
13 |     - installer
14 |     - cloudera
15 |     - cloudera-hadoop
16 |     - hadoop:hive:hbase:zookeeper:impala:oozie:hue
17 | dependencies: []
18 | 


--------------------------------------------------------------------------------
/roles/check_config/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: check number of namenodes
 2 |   when: groups['namenodes']|count < 1 or groups['namenodes']|count > 2
 3 |   fail: msg="expected number of namenodes is 1 or 2, you have configured {{ groups['namenodes']|count }}"
 4 |   run_once: true
 5 | 
 6 | - name: check number of datanodes
 7 |   when: groups['datanodes']|count < 1
 8 |   fail: msg="expected number of datanodes is more than 0, you have not configured any"
 9 |   run_once: true
10 | 
11 | - name: check number of yarnresourcemanager
12 |   when: groups['yarnresourcemanager']|count < 1
13 |   fail: msg="at least one node is required for yarnresoucemanager you have not configured any"
14 |   run_once: true
15 | 
16 | - name: check number of journal nodes
17 |   when: groups['namenodes']|count > 1 and groups['journalnodes']|count % 2 == 0
18 |   fail: msg="expected number of journalnodes for HA setup is odd and at least 1, you have configured {{ groups['journalnodes']|count }}"
19 |   run_once: true
20 | 
21 | - name: check number of journal nodes
22 |   when: groups['namenodes']|count < 2 and groups['journalnodes']|count > 0
23 |   fail: msg="{{ groups['journalnodes']|count }} journal nodes configured, however you have only 1 namenode, for HA setup at least 2 namenodes is requied"
24 |   run_once: true
25 | 
26 | - name: check number of zookeeper nodes for namenodes
27 |   when: groups['namenodes']|count > 1 and ( groups['zookeepernodes']|count != 3 and groups['zookeepernodes']|count != 5 )
28 |   fail: msg="expected number of zookeeper nodes for HA setup is 3 or 5, you have configured {{ groups['zookeepernodes']|count }}"
29 |   run_once: true
30 | 
31 | - name: check number of zookeeper nodes for resourcemanager
32 |   when: groups['yarnresourcemanager']|count > 1 and (  groups['zookeepernodes']|count != 1 and groups['zookeepernodes']|count != 3 and groups['zookeepernodes']|count != 5 )
33 |   fail: msg="expected number of zookeeper nodes for HA setup is 1 or 3 or 5, you have configured {{ groups['zookeepernodes']|count }}"
34 |   run_once: true
35 | 
36 | - name: check number of hivemetastore nodes
37 |   when: groups['hivemetastore']|count != 0 and groups['hivemetastore']|count != 1
38 |   fail: msg="expected number of hivemetastore nodes is 0 or 1, you have configured {{ groups['hivemetastore']|count }}"
39 |   run_once: true
40 | 
41 | - name: check hivemetastore prerequisites
42 |   when: groups['hivemetastore']|count >  0 and groups['zookeepernodes']|count < 1
43 |   fail: msg="hivemetastore requires at least one zookeepernode, you have not configured any"
44 |   run_once: true
45 | 
46 | - name: check hivemetastore prerequisites
47 |   when: groups['hivemetastore']|count > 0 and groups['postgresql']|count != 1
48 |   fail: msg="hivemetastore requires postgresql node, you have not configured any"
49 |   run_once: true
50 | 
51 | - name: check oozie prerequisites
52 |   when: groups['oozie']|count > 0  and groups['postgresql']|count != 1
53 |   fail: msg="hue requires postgresql node, you have not configured any"
54 |   run_once: true
55 | 
56 | - name: check hue prerequisites
57 |   when: groups['hue']|count > 0  and groups['postgresql']|count != 1
58 |   fail: msg="hue requires postgresql node, you have not configured any"
59 |   run_once: true
60 | 
61 | - name: check hue prerequisites
62 |   when: groups['hue']|count > 0 and groups['oozie']|count < 1
63 |   fail: msg="hue requires oozie node to submit jobs, you have not configured any"
64 |   run_once: true
65 | 
66 | - name: check number of impala-state-store hosts
67 |   when: groups['impala-store-catalog']|count > 1
68 |   fail: msg="expected number of impala-store-catalog is 0 or 1, you have configured {{ groups['impala-store-catalog']|count }}"
69 |   run_once: true
70 | 
71 | - name: check impala prerequisites
72 |   when: groups['impala-store-catalog']|count > 0 and groups['hivemetastore']|count < 1
73 |   fail: msg="impala requires hivemetastore node, you have not configured any"
74 |   run_once: true
75 | 
76 | - name: check number of hbase master hosts
77 |   when: groups['hbasemaster']|count > 1
78 |   fail: msg="expected number of hbasemaster hosts is 0 or 1, you have configured {{ groups['hbasemaster']|count }}"
79 |   run_once: true
80 | 
81 | - name: check hbase prerequisites
82 |   when: groups['hbasemaster']|count > 0 and groups['zookeepernodes']|count < 1
83 |   fail: msg="hbase requires at least one zookeepernode, you have not configured any"
84 |   run_once: true
85 | 
86 | - name: check solr prerequisites
87 |   when: groups['solr']|count > 0 and groups['zookeepernodes']|count < 1
88 |   fail: msg="solr requires at least one zookeepernode, you have not configured any"
89 |   run_once: true
90 | 
91 | - name: check kafka prerequisites
92 |   when: groups['kafka']|count >  0 and groups['zookeepernodes']|count < 1
93 |   fail: msg="kafka requires at least one zookeepernode, you have not configured any"
94 |   run_once: true
95 | 


--------------------------------------------------------------------------------
/roles/common/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{item}} state=latest
 4 |   with_items:
 5 |     - "{{ java_package }}"
 6 |     - bigtop-utils
 7 |     - redhat-lsb-core
 8 | 
 9 | - name: install template configurations
10 |   tags: config
11 |   template: src={{ item }}.j2 dest=/etc/default/{{ item }}
12 |   with_items:
13 |     - bigtop-utils
14 | 
15 | - name: create directories
16 |   tags: config
17 |   # is this a 'dirname' or just regex_replace? dirname would just strip '/' off the end
18 |   file: path="{{ item | regex_replace('[^/]+/*$','') }}" state=directory owner=root group=root mode=755 follow=yes
19 |   with_items:
20 |     - "{{ zookeeper_data_dir }}"
21 |     - "{{ log_folder }}"
22 |     - "{{ etc_folder }}"
23 | 
24 | - name: generate services.xml
25 |   tags:
26 |     - config
27 |     - clinit
28 |   local_action: template src="services.xml.j2" dest={{ inventory_dir }}/workdir/services.xml
29 |   run_once: true
30 | 
31 | - name: generate interfaces index
32 |   tags:
33 |     - config
34 |     - interfaces
35 |   local_action: template src="interfaces.j2" dest={{ inventory_dir }}/workdir/interfaces
36 |   run_once: true
37 | 
38 | 


--------------------------------------------------------------------------------
/roles/common/templates/bigtop-utils.j2:
--------------------------------------------------------------------------------
 1 | 
 2 | # Override JAVA_HOME detection for all bigtop packages
 3 | export JAVA_HOME={{ java_home }}
 4 | 
 5 | # Provide a colon-delimited list of directories to search for native Java libraries (e.g. libjvm.so)
 6 | # export JAVA_NATIVE_PATH
 7 | 
 8 | # Add common dependencies to the classpath (/var/lib/bigtop will already be included)
 9 | # export BIGTOP_CLASSPATH
10 | 


--------------------------------------------------------------------------------
/roles/common/templates/interfaces.j2:
--------------------------------------------------------------------------------
 1 | 
 2 | hdfs namenodes state
 3 | {% for item in groups['namenodes'] %}
 4 |   http://{{ item }}:50070
 5 | {% endfor %}
 6 | 
 7 | yarn resource manager and history server
 8 | {% for item in groups['yarnresourcemanager'] %}
 9 |   http://{{ item }}:8088
10 |   http://{{ item }}:19888
11 | {% endfor %}
12 | 
13 | impala catalog and statestore
14 | {% for item in groups['impala-store-catalog'] %}
15 |   http://{{ item }}:25020
16 |   http://{{ item }}:25010
17 | {% endfor %}
18 | 
19 | datanodes ( also have /logs ) and impala-server
20 | {% for item in groups['datanodes'] %}
21 |   http://{{ item }}:50075
22 |   http://{{ item }}:25000
23 | {% endfor %}
24 | 
25 | oozie
26 | {% for item in groups['oozie'] %}
27 |   http://{{ item }}:11000
28 | {% endfor %}
29 | 
30 | hbase
31 | {% for item in groups['hbasemaster'] %}
32 |   http://{{ item }}:60010
33 | {% endfor %}
34 | 
35 | solr
36 | {% for item in groups['solr'] %}
37 |   http://{{ item }}:8983
38 | {% endfor %}
39 | 
40 | hue 
41 | {% for item in groups['hue'] %}
42 |   http://{{ item }}:8888
43 | {% endfor %}
44 | 
45 | spark job history server
46 | {% if groups['spark']|count >0 %}
47 |   http://{{ groups['spark'][0] }}:18080  
48 | {% endif %}
49 | 


--------------------------------------------------------------------------------
/roles/dashboard/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: ensure httpd is in place for dashboard
 2 |   yum: name=httpd state=present
 3 | 
 4 | - name: create dashboard directory
 5 |   file: path={{ dashboard_folder }} state=directory
 6 | 
 7 | - name: download required css/js libraries
 8 |   local_action: get_url url={{ item }} dest={{ inventory_dir }}/workdir/{{ item|basename }}
 9 |   with_items:
10 |     - https://github.com/twbs/bootstrap/releases/download/v3.3.6/bootstrap-3.3.6-dist.zip
11 |     - https://github.com/jasny/bootstrap/releases/download/v3.1.3/jasny-bootstrap-3.1.3-dist.zip
12 |     - http://code.jquery.com/jquery-1.12.0.min.js
13 |   run_once: true
14 | 
15 | - name: extract required items from twbs
16 |   local_action: command unzip -o {{ inventory_dir }}/workdir/bootstrap-3.3.6-dist.zip bootstrap-3.3.6-dist/css/bootstrap.min.css bootstrap-3.3.6-dist/js/bootstrap.min.js -d {{ inventory_dir }}/workdir/
17 |   run_once: true
18 | 
19 | - name: extract required items from jasny
20 |   local_action: command unzip -o {{ inventory_dir }}/workdir/jasny-bootstrap-3.1.3-dist.zip jasny-bootstrap/css/jasny-bootstrap.min.css jasny-bootstrap/js/jasny-bootstrap.min.js -d {{ inventory_dir }}/workdir/
21 |   run_once: true
22 | 
23 | - name: copy css/js files 
24 |   copy: src={{ item }} dest={{ dashboard_folder }}/{{ item|basename }}
25 |   with_items:
26 |     - "{{ inventory_dir }}/workdir/bootstrap-3.3.6-dist/css/bootstrap.min.css"
27 |     - "{{ inventory_dir }}/workdir/bootstrap-3.3.6-dist/js/bootstrap.min.js"
28 |     - "{{ inventory_dir }}/workdir/jasny-bootstrap/css/jasny-bootstrap.min.css"
29 |     - "{{ inventory_dir }}/workdir/jasny-bootstrap/js/jasny-bootstrap.min.js"
30 | 
31 | - name: copy jquery file 
32 |   copy: src={{ inventory_dir }}/workdir/jquery-1.12.0.min.js dest={{ dashboard_folder }}/jquery.min.js
33 | 
34 | - name: create dashboard page
35 |   template: src=dashboard.j2 dest={{ dashboard_folder }}/index.html
36 | 
37 | - name: start service
38 |   tags: service
39 |   service: name=httpd state=started enabled=yes 
40 | 


--------------------------------------------------------------------------------
/roles/dashboard/templates/dashboard.j2:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head> <title>Dashboard</title>
  4 |     <link href='bootstrap.min.css' rel='stylesheet' media='screen'>
  5 |     <link href="jasny-bootstrap.min.css" rel="stylesheet" media="screen">
  6 | 
  7 |     <style>
  8 | html, body, .container {
  9 |   height: 100%;
 10 |   width: 100%;
 11 | }
 12 | .dash-button {
 13 |   background-color: rgb(98, 128, 93);
 14 | }
 15 | .navmenu-fixed-left {
 16 |   position: absolute;
 17 |   left: 0px;
 18 | }
 19 | 
 20 | iframe {
 21 |   z-index: 2;
 22 | }
 23 | 
 24 | .navbar-toggle {
 25 |   float: left;
 26 |   margin-left: 15px;
 27 | }
 28 | 
 29 | .navmenu {
 30 |   z-index: 1;
 31 | }
 32 | 
 33 | .canvas {
 34 |   position: relative;
 35 |   left: 0;
 36 |   z-index: 2;
 37 |   min-height: 100%;
 38 |   padding: 50px 0 0 0;
 39 |   background: #fff;
 40 | }
 41 | 
 42 | @media (min-width: 0) {
 43 |   .navbar-toggle {
 44 |     display: block; /* force showing the toggle */
 45 |   }
 46 | @media (min-width: 992px) {
 47 |   body {
 48 |     padding: 0;
 49 |   }
 50 |   .navbar {
 51 |     right: auto;
 52 |     background: none;
 53 |     border: none;
 54 |   }
 55 |   .canvas {
 56 |     padding: 0;
 57 |   }
 58 | }
 59 | 
 60 |     </style>
 61 |   </head>
 62 | <body>
 63 | <div class="navmenu navmenu-default navmenu-fixed-left">
 64 |   <a class="navmenu-brand" href="about:blank" target="container">Hadoop dashboard</a>
 65 |   <ul class="nav navmenu-nav">
 66 | {# list of services. Includes:
 67 |    - service display name
 68 |    - service hosts group from inventory
 69 |    - service port
 70 |    - service additional URL (used for eozie only 
 71 | #}
 72 | {% set services = [
 73 |      ('HDFS', 'http', 'namenodes', '50070', '', 'container')
 74 |      ,('Resource Manager', 'http', 'yarnresourcemanager', '8088', '', 'container')
 75 |      ,('Node Manager', 'http', 'datanodes', '8042', '', 'container')
 76 |      ,('Job history', 'http', 'yarnresourcemanager', '19888', '', 'container')
 77 |      ,('Oozie', 'http', 'oozie', '11000', 'oozie', 'container')
 78 |      ,('Catalog', 'http', 'impala-store-catalog', '25020', '', 'external')
 79 |      ,('StateStore', 'http', 'impala-store-catalog', '25010', '', 'external')
 80 |      ,('Impala', 'http', 'datanodes', '25000', '', 'external')
 81 |      ,('Spark', 'http', 'spark', '18080', '', 'external')
 82 |      ,('HBase', 'http', 'hbasemaster', '60010', 'master-status', 'container')
 83 |      ,('Solr', 'http', 'solr', '8983', '/solr/', 'external')
 84 |      ,('Hue', 'https', 'hue', '8888', '/about/', 'external')
 85 |    ] %}
 86 | {% for s in services %}
 87 |   {% set sname, sproto, sgroup, sport, surl, starget = s %}
 88 |   {% if groups[sgroup] | count > 1 %}
 89 |     <li class="dropdown">
 90 |       <a class="dropdown-toggle" data-toggle="dropdown" href="#">{{sname}}<b class="caret"></b></a>
 91 |       <ul class="dropdown-menu navmenu-nav">
 92 |         {% for shost in groups[sgroup] %}
 93 |         <li><a href="{{sproto}}://{{shost}}:{{sport}}/{{surl}}" target="{{starget}}">{{shost}}</a></li>
 94 |         {% endfor %}
 95 |       </ul>
 96 |     </li>
 97 |   {% endif %}
 98 |   {% if groups[sgroup] | count == 1 %}
 99 |     <li class="">
100 |       <a class="" href="{{sproto}}://{{groups[sgroup][0]}}:{{sport}}/{{surl}}" target="{{starget}}">{{sname}} ({{groups[sgroup][0]}})</a>
101 |     </li>
102 |   {% endif %}
103 | {% endfor %}
104 |   </ul>
105 | </div>
106 | 
107 | <div class='container canvas' >
108 |    <iframe id="ifr" name='container' style="display:block;width:100%;height:100%" frameborder=0 src="about:blank">
109 | 
110 |    </iframe>
111 | 
112 |    <div class="navbar navbar-default navbar-fixed-top">
113 |       <button type="button" class="navbar-toggle dash-button" data-toggle="offcanvas" data-recalc="false" data-target=".navmenu" data-canvas=".canvas">
114 |         <span class="icon-bar"></span>
115 |         <span class="icon-bar"></span>
116 |         <span class="icon-bar"></span>
117 |       </button>
118 |    </div>
119 | </div>
120 |     <script src="jquery.min.js"></script>
121 |     <script src="bootstrap.min.js"></script>
122 |     <script src="jasny-bootstrap.min.js"></script>
123 | <script type="text/javascript">
124 |     $('.dropdown-menu ').click(function(e) {
125 |         e.stopPropagation();
126 |     });
127 |     $('.dash-button').click()
128 |                      .on('click', function (e) {$(this).tooltip('destroy');})
129 |                      .tooltip({title:"Click me to open/close dashboard menu", placement:"bottom", trigger:"manual"})
130 |                      .tooltip('show');
131 | 
132 | </script>
133 | </body></html>
134 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/capacity-scheduler.xml:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   Licensed under the Apache License, Version 2.0 (the "License");
  3 |   you may not use this file except in compliance with the License.
  4 |   You may obtain a copy of the License at
  5 | 
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 |   Unless required by applicable law or agreed to in writing, software
  9 |   distributed under the License is distributed on an "AS IS" BASIS,
 10 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 |   See the License for the specific language governing permissions and
 12 |   limitations under the License. See accompanying LICENSE file.
 13 | -->
 14 | <configuration>
 15 | 
 16 |   <property>
 17 |     <name>yarn.scheduler.capacity.maximum-applications</name>
 18 |     <value>10000</value>
 19 |     <description>
 20 |       Maximum number of applications that can be pending and running.
 21 |     </description>
 22 |   </property>
 23 | 
 24 |   <property>
 25 |     <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
 26 |     <value>0.1</value>
 27 |     <description>
 28 |       Maximum percent of resources in the cluster which can be used to run 
 29 |       application masters i.e. controls number of concurrent running
 30 |       applications.
 31 |     </description>
 32 |   </property>
 33 | 
 34 |   <property>
 35 |     <name>yarn.scheduler.capacity.resource-calculator</name>
 36 |     <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
 37 |     <description>
 38 |       The ResourceCalculator implementation to be used to compare 
 39 |       Resources in the scheduler.
 40 |       The default i.e. DefaultResourceCalculator only uses Memory while
 41 |       DominantResourceCalculator uses dominant-resource to compare 
 42 |       multi-dimensional resources such as Memory, CPU etc.
 43 |     </description>
 44 |   </property>
 45 | 
 46 |   <property>
 47 |     <name>yarn.scheduler.capacity.root.queues</name>
 48 |     <value>default</value>
 49 |     <description>
 50 |       The queues at the this level (root is the root queue).
 51 |     </description>
 52 |   </property>
 53 | 
 54 |   <property>
 55 |     <name>yarn.scheduler.capacity.root.default.capacity</name>
 56 |     <value>100</value>
 57 |     <description>Default queue target capacity.</description>
 58 |   </property>
 59 | 
 60 |   <property>
 61 |     <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
 62 |     <value>1</value>
 63 |     <description>
 64 |       Default queue user limit a percentage from 0.0 to 1.0.
 65 |     </description>
 66 |   </property>
 67 | 
 68 |   <property>
 69 |     <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
 70 |     <value>100</value>
 71 |     <description>
 72 |       The maximum capacity of the default queue. 
 73 |     </description>
 74 |   </property>
 75 | 
 76 |   <property>
 77 |     <name>yarn.scheduler.capacity.root.default.state</name>
 78 |     <value>RUNNING</value>
 79 |     <description>
 80 |       The state of the default queue. State can be one of RUNNING or STOPPED.
 81 |     </description>
 82 |   </property>
 83 | 
 84 |   <property>
 85 |     <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
 86 |     <value>*</value>
 87 |     <description>
 88 |       The ACL of who can submit jobs to the default queue.
 89 |     </description>
 90 |   </property>
 91 | 
 92 |   <property>
 93 |     <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
 94 |     <value>*</value>
 95 |     <description>
 96 |       The ACL of who can administer jobs on the default queue.
 97 |     </description>
 98 |   </property>
 99 | 
100 |   <property>
101 |     <name>yarn.scheduler.capacity.node-locality-delay</name>
102 |     <value>40</value>
103 |     <description>
104 |       Number of missed scheduling opportunities after which the CapacityScheduler 
105 |       attempts to schedule rack-local containers. 
106 |       Typically this should be set to number of nodes in the cluster, By default is setting 
107 |       approximately number of nodes in one rack which is 40.
108 |     </description>
109 |   </property>
110 | 
111 |   <property>
112 |     <name>yarn.scheduler.capacity.queue-mappings</name>
113 |     <value></value>
114 |     <description>
115 |       A list of mappings that will be used to assign jobs to queues
116 |       The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
117 |       Typically this list will be used to map users to queues,
118 |       for example, u:%user:%user maps all users to queues with the same name
119 |       as the user.
120 |     </description>
121 |   </property>
122 | 
123 |   <property>
124 |     <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
125 |     <value>false</value>
126 |     <description>
127 |       If a queue mapping is present, will it override the value specified
128 |       by the user? This can be used by administrators to place jobs in queues
129 |       that are different than the one specified by the user.
130 |       The default is false.
131 |     </description>
132 |   </property>
133 | 
134 | </configuration>
135 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |    Licensed to the Apache Software Foundation (ASF) under one or more
 4 |    contributor license agreements.  See the NOTICE file distributed with
 5 |    this work for additional information regarding copyright ownership.
 6 |    The ASF licenses this file to You under the Apache License, Version 2.0
 7 |    (the "License"); you may not use this file except in compliance with
 8 |    the License.  You may obtain a copy of the License at
 9 | 
10 |        http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |    Unless required by applicable law or agreed to in writing, software
13 |    distributed under the License is distributed on an "AS IS" BASIS,
14 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |    See the License for the specific language governing permissions and
16 |    limitations under the License.
17 | -->
18 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
19 | <xsl:output method="html"/>
20 | <xsl:template match="configuration">
21 | <html>
22 | <body>
23 | <table border="1">
24 | <tr>
25 |  <td>name</td>
26 |  <td>value</td>
27 |  <td>description</td>
28 | </tr>
29 | <xsl:for-each select="property">
30 | <tr>
31 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
32 |   <td><xsl:value-of select="value"/></td>
33 |   <td><xsl:value-of select="description"/></td>
34 | </tr>
35 | </xsl:for-each>
36 | </table>
37 | </body>
38 | </html>
39 | </xsl:template>
40 | </xsl:stylesheet>
41 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/container-executor.cfg:
--------------------------------------------------------------------------------
1 | yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
2 | banned.users=#comma separated list of users who can not run applications
3 | min.user.id=1000#Prevent other super-users
4 | allowed.system.users=##comma separated list of system users who CAN run applications
5 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/default/hadoop:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HADOOP_HOME_WARN_SUPPRESS=true
16 | export HADOOP_PREFIX=/usr/lib/hadoop
17 | 
18 | export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec
19 | export HADOOP_CONF_DIR=/etc/hadoop/conf
20 | 
21 | export HADOOP_COMMON_HOME=/usr/lib/hadoop
22 | export HADOOP_HDFS_HOME=/usr/lib/hadoop-hdfs
23 | export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn
24 | 
25 | # Set HADOOP_MAPRED_HOME to /usr/lib/hadoop-0.20-mapreduce to use MR1
26 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
27 | 
28 | export JSVC_HOME=/usr/lib/bigtop-utils
29 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/default/hadoop-0.20-mapreduce:
--------------------------------------------------------------------------------
 1 | export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec
 2 | export HADOOP_CONF_DIR=/etc/hadoop/conf
 3 | export HADOOP_HOME=/usr/lib/hadoop-0.20-mapreduce
 4 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-0.20-mapreduce
 5 | export HADOOP_JOBTRACKER_USER=mapred
 6 | export HADOOP_TASKTRACKER_USER=mapred
 7 | export HADOOP_MRZKFC_USER=mapred
 8 | export HADOOP_JOBTRACKERHA_USER=mapred
 9 | export HADOOP_IDENT_STRING=hadoop
10 | export HADOOP_LOG_DIR=/var/log/hadoop-0.20-mapreduce
11 | export HADOOP_PID_DIR=/var/run/hadoop-0.20-mapreduce
12 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/default/hadoop-httpfs:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HTTPFS_USER=httpfs
16 | export HTTPFS_CONFIG=/etc/hadoop-httpfs/conf
17 | export HTTPFS_LOG=/var/log/hadoop-httpfs/
18 | export HTTPFS_TEMP=/var/run/hadoop-httpfs/
19 | export HTTPFS_CATALINA_HOME=/usr/lib/bigtop-tomcat
20 | export CATALINA_PID=/var/run/hadoop-httpfs/hadoop-httpfs-httpfs.pid
21 | export CATALINA_BASE=/var/lib/hadoop-httpfs/tomcat-deployment
22 | export CATALINA_TMPDIR=/var/run/hadoop-httpfs/
23 | # HTTPFS_HTTP_PORT
24 | # HTTPFS_ADMIN_PORT
25 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/default/hadoop-mapreduce-historyserver:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | export HADOOP_MAPRED_IDENT_STRING=mapred
17 | export HADOOP_MAPRED_PID_DIR=/var/run/hadoop-mapreduce
18 | export HADOOP_MAPRED_LOG_DIR=/var/log/hadoop-mapreduce
19 | export HADOOP_LOG_DIR=/var/log/hadoop-mapreduce
20 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
21 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/default/hadoop-yarn-resourcemanager:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export YARN_IDENT_STRING=yarn
16 | export YARN_PID_DIR=/var/run/hadoop-yarn
17 | export YARN_LOG_DIR=/var/log/hadoop-yarn
18 | export YARN_CONF_DIR=/etc/hadoop/conf
19 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/dfs.exclude:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/hadoop/files/dfs.exclude


--------------------------------------------------------------------------------
/roles/hadoop/files/hadoop-metrics.properties:
--------------------------------------------------------------------------------
 1 | # Configuration of the "dfs" context for null
 2 | dfs.class=org.apache.hadoop.metrics.spi.NullContext
 3 | 
 4 | # Configuration of the "dfs" context for file
 5 | #dfs.class=org.apache.hadoop.metrics.file.FileContext
 6 | #dfs.period=10
 7 | #dfs.fileName=/tmp/dfsmetrics.log
 8 | 
 9 | # Configuration of the "dfs" context for ganglia
10 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
11 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
12 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
13 | # dfs.period=10
14 | # dfs.servers=localhost:8649
15 | 
16 | 
17 | # Configuration of the "mapred" context for null
18 | mapred.class=org.apache.hadoop.metrics.spi.NullContext
19 | 
20 | # Configuration of the "mapred" context for file
21 | #mapred.class=org.apache.hadoop.metrics.file.FileContext
22 | #mapred.period=10
23 | #mapred.fileName=/tmp/mrmetrics.log
24 | 
25 | # Configuration of the "mapred" context for ganglia
26 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
27 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
28 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
29 | # mapred.period=10
30 | # mapred.servers=localhost:8649
31 | 
32 | 
33 | # Configuration of the "jvm" context for null
34 | #jvm.class=org.apache.hadoop.metrics.spi.NullContext
35 | 
36 | # Configuration of the "jvm" context for file
37 | #jvm.class=org.apache.hadoop.metrics.file.FileContext
38 | #jvm.period=10
39 | #jvm.fileName=/tmp/jvmmetrics.log
40 | 
41 | # Configuration of the "jvm" context for ganglia
42 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
43 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
44 | # jvm.period=10
45 | # jvm.servers=localhost:8649
46 | 
47 | # Configuration of the "rpc" context for null
48 | rpc.class=org.apache.hadoop.metrics.spi.NullContext
49 | 
50 | # Configuration of the "rpc" context for file
51 | #rpc.class=org.apache.hadoop.metrics.file.FileContext
52 | #rpc.period=10
53 | #rpc.fileName=/tmp/rpcmetrics.log
54 | 
55 | # Configuration of the "rpc" context for ganglia
56 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
57 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
58 | # rpc.period=10
59 | # rpc.servers=localhost:8649
60 | 
61 | 
62 | # Configuration of the "ugi" context for null
63 | ugi.class=org.apache.hadoop.metrics.spi.NullContext
64 | 
65 | # Configuration of the "ugi" context for file
66 | #ugi.class=org.apache.hadoop.metrics.file.FileContext
67 | #ugi.period=10
68 | #ugi.fileName=/tmp/ugimetrics.log
69 | 
70 | # Configuration of the "ugi" context for ganglia
71 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
72 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
73 | # ugi.period=10
74 | # ugi.servers=localhost:8649
75 | 
76 | 


--------------------------------------------------------------------------------
/roles/hadoop/files/hadoop-metrics2.properties:
--------------------------------------------------------------------------------
 1 | # syntax: [prefix].[source|sink].[instance].[options]
 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
 3 | 
 4 | *.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
 5 | # default sampling period, in seconds
 6 | *.period=10
 7 | 
 8 | # The namenode-metrics.out will contain metrics from all context
 9 | #namenode.sink.file.filename=namenode-metrics.out
10 | # Specifying a special sampling period for namenode:
11 | #namenode.sink.*.period=8
12 | 
13 | #datanode.sink.file.filename=datanode-metrics.out
14 | 
15 | #resourcemanager.sink.file.filename=resourcemanager-metrics.out
16 | 
17 | #nodemanager.sink.file.filename=nodemanager-metrics.out
18 | 
19 | #mrappmaster.sink.file.filename=mrappmaster-metrics.out
20 | 
21 | #jobhistoryserver.sink.file.filename=jobhistoryserver-metrics.out
22 | 
23 | # the following example split metrics of different
24 | # context to different sinks (in this case files)
25 | #nodemanager.sink.file_jvm.class=org.apache.hadoop.metrics2.sink.FileSink
26 | #nodemanager.sink.file_jvm.context=jvm
27 | #nodemanager.sink.file_jvm.filename=nodemanager-jvm-metrics.out
28 | #nodemanager.sink.file_mapred.class=org.apache.hadoop.metrics2.sink.FileSink
29 | #nodemanager.sink.file_mapred.context=mapred
30 | #nodemanager.sink.file_mapred.filename=nodemanager-mapred-metrics.out
31 | 
32 | #
33 | # Below are for sending metrics to Ganglia
34 | #
35 | # for Ganglia 3.0 support
36 | # *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30
37 | #
38 | # for Ganglia 3.1 support
39 | # *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31
40 | 
41 | # *.sink.ganglia.period=10
42 | 
43 | # default for supportsparse is false
44 | # *.sink.ganglia.supportsparse=true
45 | 
46 | #*.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both
47 | #*.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40
48 | 
49 | # Tag values to use for the ganglia prefix. If not defined no tags are used.
50 | # If '*' all tags are used. If specifiying multiple tags separate them with 
51 | # commas. Note that the last segment of the property name is the context name.
52 | #
53 | #*.sink.ganglia.tagsForPrefix.jvm=ProcesName
54 | #*.sink.ganglia.tagsForPrefix.dfs=
55 | #*.sink.ganglia.tagsForPrefix.rpc=
56 | #*.sink.ganglia.tagsForPrefix.mapred=
57 | 
58 | #namenode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
59 | 
60 | #datanode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
61 | 
62 | #resourcemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
63 | 
64 | #nodemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
65 | 
66 | #mrappmaster.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
67 | 
68 | #jobhistoryserver.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649
69 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/base.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - hadoop
 6 | 
 7 | - name: create configuration directory
 8 |   tags: config
 9 |   file: path={{ etc_folder }}/hadoop state=directory
10 | 
11 | - name: setup alternatives link
12 |   tags: config
13 |   alternatives: name=hadoop-conf link=/etc/hadoop/conf path={{ etc_folder }}/hadoop
14 | 
15 | - name: install template configurations
16 |   tags: config
17 |   template: src={{ item }}.j2 dest={{ etc_folder }}/hadoop/{{ item }}
18 |   with_items:
19 |     - core-site.xml
20 |     - fair-scheduler.xml
21 |     - hdfs-site.xml
22 |     - mapred-site.xml
23 |     - yarn-site.xml
24 |     - hadoop-env.sh
25 |     - mapred-env.sh
26 |     - yarn-env.sh
27 | 
28 | - name: install files configurations
29 |   tags: config
30 |   copy: src={{ item }} dest={{ etc_folder }}/hadoop/{{ item }}
31 |   with_items:
32 |     - capacity-scheduler.xml
33 |     - configuration.xsl
34 |     - container-executor.cfg
35 |     - dfs.exclude
36 |     - hadoop-metrics.properties
37 |     - hadoop-metrics2.properties
38 |     - hadoop-policy.xml
39 |     - log4j.properties
40 | 
41 | - name: install default configurations
42 |   tags: config
43 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
44 |   with_items:
45 |     - hadoop
46 | 
47 | - name: create log folders
48 |   tags: config
49 |   file: path={{ log_folder }}/{{ item }} state=directory owner={{ item }} mode=0755
50 |   with_items:
51 |     - hdfs
52 |     - yarn
53 |     - mapred
54 | 
55 | - name: install hdfs-ready utility
56 |   tags: config
57 |   template: src=bin/hdfs-ready.sh dest={{ etc_folder }}/hadoop/hdfs-ready.sh mode=0755
58 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/datanode.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: package
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |     - hadoop-hdfs-datanode
 6 |     - hadoop-yarn-nodemanager
 7 | 
 8 | - name: destroy data
 9 |   tags: init
10 |   command: rm -rf {{ item }}
11 |   with_items: "{{ dfs_datanode_data_dir.replace('file://','').split(',') }}"
12 |   when: destroy_data
13 | 
14 | - name: create datanode directories
15 |   file: dest={{ item }} owner=hdfs group=hdfs state=directory
16 |   with_items: "{{ dfs_datanode_data_dir.replace('file://','').split(',') }}"
17 | 
18 | - name: create yarn local directories
19 |   file: dest={{ item }} owner=yarn group=hadoop state=directory
20 |   with_items: "{{ yarn_nodemanager_local_dirs.replace('file://','').split(',') }}"
21 | 
22 | - name: create yarn log directories
23 |   file: dest={{ item }} owner=yarn group=hadoop state=directory
24 |   with_items: "{{ yarn_nodemanager_log_dirs.replace('file://','').split(',') }}"
25 | 
26 | - name: install default configurations
27 |   tags: config
28 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
29 |   with_items:
30 |     - hadoop-hdfs-datanode
31 |     - hadoop-yarn-nodemanager
32 | 
33 | - name: start services
34 |   tags: service
35 |   service: name={{ item }} state=restarted enabled=yes
36 |   with_items:
37 |     - hadoop-hdfs-datanode
38 |     - hadoop-yarn-nodemanager
39 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/journalnode.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: package
 3 |   yum:  name=hadoop-hdfs-journalnode state=latest
 4 | 
 5 | - name: destroy data
 6 |   tags: init
 7 |   command: rm -rf {{ dfs_journalnode_edits_dir }}
 8 |   when: destroy_data
 9 | 
10 | - name: create journal node directories
11 |   tags: config
12 |   file: dest={{ dfs_journalnode_edits_dir }} owner=hdfs group=hdfs state=directory
13 | 
14 | - name: install default configurations
15 |   tags: config
16 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
17 |   with_items:
18 |     - hadoop-hdfs-journalnode
19 | 
20 | - name: start services
21 |   tags: service
22 |   service: name=hadoop-hdfs-journalnode state=restarted enabled=yes
23 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - include: base.yaml
 2 |   when: deploy == "base"
 3 | 
 4 | - include: journalnode.yaml
 5 |   when: deploy == "journalnodes"
 6 | 
 7 | - include: namenodes-fence.yaml
 8 |   when: deploy == "namenodes-fence" and groups['namenodes']|count > 1
 9 | 
10 | - include: namenode.yaml
11 |   when: deploy == "namenodes"
12 | 
13 | - include: datanode.yaml
14 |   when: deploy == "datanodes"
15 | 
16 | - include: test-hdfs.yaml
17 |   when: deploy == "test-hdfs"
18 | 
19 | - include: resourcemanager.yaml
20 |   when: deploy == "resourcemanager"
21 | 
22 | - include: test-mapreduce.yaml
23 |   when: deploy == "test-mapreduce"
24 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/namenode.yaml:
--------------------------------------------------------------------------------
 1 | - name: install namenode package
 2 |   tags: package
 3 |   yum:  name=hadoop-hdfs-namenode state=latest
 4 | 
 5 | - name: install namenode default configurations
 6 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
 7 |   with_items:
 8 |     - hadoop-hdfs-namenode
 9 | 
10 | - name: install zkfc package
11 |   tags: package
12 |   yum:  name=hadoop-hdfs-zkfc state=latest
13 |   when:  groups['namenodes']|count > 1
14 | 
15 | - name: install zkfc default configurations
16 |   tags: config
17 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
18 |   with_items:
19 |     - hadoop-hdfs-zkfc
20 |   when:  groups['namenodes']|count > 1
21 | 
22 | - name: initialize zookeeper
23 |   tags: init
24 |   command: sudo -Hu hdfs hdfs zkfc -formatZK -force
25 |   when:  groups['namenodes']|count > 1
26 |   run_once: true
27 | 
28 | - name: start zkfc
29 |   tags: service
30 |   service: name=hadoop-hdfs-zkfc state=restarted enabled=yes
31 |   when:  groups['namenodes']|count > 1
32 | 
33 | - name: destroy data
34 |   tags: init
35 |   command: rm -rf {{ item }}
36 |   with_items: "{{ dfs_namenode_name_dir.replace('file://','').split(',') }}"
37 |   when: destroy_data
38 | 
39 | - name: create namenode directories
40 |   tags: init
41 |   file: dest={{ item }} owner=hdfs group=hdfs state=directory
42 |   with_items: "{{ dfs_namenode_name_dir.replace('file://','').split(',') }}"
43 | 
44 | - name: format primary namenode
45 |   tags: init
46 |   command: sudo -Hu hdfs hdfs namenode -format -force
47 |   when: ansible_hostname == groups['namenodes'][0] and destroy_data
48 | 
49 | - name: start primary namenode
50 |   tags: service
51 |   service: name=hadoop-hdfs-namenode state=restarted enabled=yes
52 |   when: ansible_hostname == groups['namenodes'][0]
53 | 
54 | - name: init secondary instance
55 |   tags: init
56 |   command: sudo -Hu hdfs hdfs namenode -bootstrapStandby
57 |   when: ansible_hostname != groups['namenodes'][0] and destroy_data
58 | 
59 | - name: start secondary namenode
60 |   tags: service
61 |   service: name=hadoop-hdfs-namenode state=restarted enabled=yes
62 |   when: ansible_hostname != groups['namenodes'][0]
63 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/namenodes-fence.yaml:
--------------------------------------------------------------------------------
 1 | - name: remove old keys
 2 |   tags: config
 3 |   local_action: shell rm -f {{ inventory_dir }}/workdir/{{ item }}
 4 |   with_items:
 5 |     - hdfs_key*
 6 |     - hdfs_known_hosts
 7 |   run_once: true
 8 | 
 9 | - name: generate ssh key
10 |   tags: config
11 |   local_action: command ssh-keygen -q -N "" -t rsa -b 2048  -f {{ inventory_dir }}/workdir/hdfs_key
12 |   run_once: true
13 | 
14 | - name: set key permissions
15 |   tags: config
16 |   local_action: file path={{ inventory_dir }}/workdir/hdfs_key mode=a+r
17 |   run_once: true
18 | 
19 | - name: generate known_hosts
20 |   tags: config
21 |   local_action: shell ssh-keyscan {{ ansible_hostname }} >> {{ inventory_dir }}/workdir/hdfs_known_hosts
22 | 
23 | - name: create .ssh dir
24 |   tags: config
25 |   file: path=/var/lib/hadoop-hdfs/.ssh state=directory owner=hdfs group=hdfs mode=700
26 | 
27 | - name: install ssh auth files
28 |   tags: config
29 |   copy: src={{ inventory_dir }}/workdir/{{ item.src }} dest={{ item.dest }} owner=hdfs group=hdfs mode=700
30 |   with_items:
31 |     - { src: hdfs_key, dest: /var/lib/hadoop-hdfs/.ssh/id_rsa }
32 |     - { src: hdfs_key.pub, dest: /var/lib/hadoop-hdfs/.ssh/authorized_keys }
33 |     - { src: hdfs_known_hosts, dest: /var/lib/hadoop-hdfs/.ssh/known_hosts  }
34 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/resourcemanager.yaml:
--------------------------------------------------------------------------------
 1 | - name: install resource manager package
 2 |   tags: package
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |     - hadoop-yarn-resourcemanager
 6 | 
 7 | - name: install history server package
 8 |   tags: package
 9 |   yum:  name={{ item }} state=latest
10 |   with_items:
11 |     - hadoop-mapreduce-historyserver
12 |   when: ansible_hostname == groups['yarnresourcemanager'][0]
13 | 
14 | - name: install default configurations
15 |   tags: config
16 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
17 |   with_items:
18 |     - hadoop-yarn-resourcemanager
19 |     - hadoop-mapreduce-historyserver
20 | 
21 | - name: configure hdfs directories
22 |   tags: config
23 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
24 |   with_items:
25 |     - -mkdir -p /tmp
26 |     - -chmod 1777 /tmp
27 |     - -mkdir -p /user/history
28 |     - -chmod 1777 /user/history
29 |     - -chown mapred:hadoop /user/history
30 |     - -mkdir -p /var/log/hadoop-yarn/apps
31 |     - -chown yarn:mapred /var/log/hadoop-yarn
32 |     - -chown yarn:hadoop /var/log/hadoop-yarn/apps
33 |     - -chmod 1777 /var/log/hadoop-yarn/apps
34 |   run_once: true
35 | 
36 | - name: start services
37 |   tags: service
38 |   service: name={{ item }} state=restarted enabled=yes
39 |   with_items:
40 |     - hadoop-yarn-resourcemanager
41 | 
42 | - name: start services
43 |   tags: service
44 |   service: name={{ item }} state=restarted enabled=yes
45 |   with_items:
46 |     - hadoop-mapreduce-historyserver
47 |   when: ansible_hostname == groups['yarnresourcemanager'][0]
48 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/test-hdfs.yaml:
--------------------------------------------------------------------------------
 1 | - name: get hdfs ha state
 2 |   tags: test
 3 |   command: sudo -Hu hdfs hdfs haadmin -getServiceState {{ ansible_hostname }}
 4 |   register: ha_state
 5 |   when: groups['namenodes']|count > 1
 6 | 
 7 | - name: shutdown active name service
 8 |   tags: test
 9 |   shell: service hadoop-hdfs-namenode stop; sleep 1
10 |   when: groups['namenodes']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1
11 | 
12 | - name: test hdfs
13 |   tags: test
14 |   command: sudo -Hu hdfs hdfs dfs {{ item }} /ansible_hdfs_test
15 |   with_items:
16 |     - -touchz
17 |     - -rm
18 |   run_once: true
19 | 
20 | - name: start services
21 |   tags: test
22 |   service: name=hadoop-hdfs-namenode state=restarted enabled=yes
23 |   when: groups['namenodes']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1
24 | 


--------------------------------------------------------------------------------
/roles/hadoop/tasks/test-mapreduce.yaml:
--------------------------------------------------------------------------------
 1 | - name: get resource manager ha state
 2 |   tags: test
 3 |   command: sudo -Hu yarn yarn rmadmin -getServiceState {{ ansible_hostname }}
 4 |   register: ha_state
 5 |   when: groups['yarnresourcemanager']|count > 1
 6 | 
 7 | - name: shutdown active resource manager service
 8 |   tags: test
 9 |   shell: service hadoop-yarn-resourcemanager stop; sleep 1
10 |   when: groups['yarnresourcemanager']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1
11 | 
12 | - name: test mapreduce
13 |   tags: test
14 |   command: sudo -Hu hdfs hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 1 1
15 |   run_once: true
16 | 
17 | - name: start services
18 |   tags: test
19 |   service: name=hadoop-yarn-resourcemanager state=restarted enabled=yes
20 |   when: groups['yarnresourcemanager']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1
21 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/bin/hdfs-ready.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ready_filename="/hdfs_ready"
3 | sudo -Hu hdfs timeout 10 hdfs dfs -touchz $ready_filename &>/dev/null && exit 0
4 | exit 1
5 | 
6 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/core-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <configuration>
 5 | 
 6 |   <property>
 7 |     <name>fs.defaultFS</name>
 8 | {% if groups['namenodes']|count > 1 %}
 9 |     <value>hdfs://{{ cluster_name }}</value>
10 | {% else %}
11 |     <value>hdfs://{{ groups['namenodes'][0] }}:8020</value>
12 | {% endif %}
13 |   </property>
14 | 
15 | {% if groups['namenodes']|count > 1 %}
16 |   <property>
17 |     <name>ha.zookeeper.quorum</name>
18 |     <value>{% for item in groups['zookeepernodes'] -%}
19 |               {{ item }}:2181{% if not loop.last %},{% endif %}
20 |            {%- endfor %}</value>
21 |   </property>
22 | {% endif %}
23 | 
24 |   <property>
25 |     <name>dfs.permissions.superusergroup</name>
26 |     <value>hadoop</value>
27 |   </property>
28 | 
29 |   <property>
30 |     <name>hadoop.proxyuser.mapred.groups</name>
31 |     <value>*</value>
32 |   </property>
33 | 
34 |   <property>
35 |     <name>hadoop.proxyuser.mapred.hosts</name>
36 |     <value>*</value>
37 |   </property>
38 | 
39 |   <property>
40 |     <name>hadoop.proxyuser.hue.hosts</name>
41 |     <value>*</value>
42 |   </property>
43 | 
44 |   <property>
45 |     <name>hadoop.proxyuser.hue.groups</name>
46 |     <value>*</value>
47 |   </property>
48 | 
49 |   <property>  
50 |     <name>hadoop.proxyuser.httpfs.hosts</name>  
51 |     <value>*</value>  
52 |   </property>  
53 | 
54 |   <property>  
55 |     <name>hadoop.proxyuser.httpfs.groups</name>  
56 |     <value>*</value>  
57 |   </property>  
58 | 
59 |   <property>
60 |     <name>hadoop.proxyuser.oozie.hosts</name>                                  
61 |     <value>*</value>
62 |   </property>
63 | 
64 |   <property>
65 |     <name>hadoop.proxyuser.oozie.groups</name>
66 |     <value>*</value>
67 |   </property>
68 | 
69 |   <property>
70 |     <name>hadoop.proxyuser.impala.hosts</name>     
71 |     <value>*</value>
72 |   </property>
73 | 
74 |   <property>
75 |    <name>hadoop.proxyuser.impala.groups</name>
76 |    <value>*</value>
77 |   </property>
78 | 
79 | </configuration>
80 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/default/hadoop-hdfs-datanode.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs
16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
17 | export HADOOP_NAMENODE_USER=hdfs
18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs
19 | export HADOOP_DATANODE_USER=hdfs
20 | export HADOOP_IDENT_STRING=hdfs
21 | 
22 | # We always want to start NFS as root and by setting a default value like the one below we
23 | # ensure this happens. Once port registration completes, privileges are dropped back from root
24 | # to HADOOP_PRIVILEGED_NFS_USER.
25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs
26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs
27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs
28 | 
29 | # export HADOOP_SECURE_DN_USER=hdfs
30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs
31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs
32 | 
33 | # JVM heapsize tuning
34 | export HADOOP_HEAPSIZE={{datanode_heapsize|default('2048')}}
35 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/default/hadoop-hdfs-journalnode.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs
16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
17 | export HADOOP_NAMENODE_USER=hdfs
18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs
19 | export HADOOP_DATANODE_USER=hdfs
20 | export HADOOP_IDENT_STRING=hdfs
21 | 
22 | # We always want to start NFS as root and by setting a default value like the one below we
23 | # ensure this happens. Once port registration completes, privileges are dropped back from root
24 | # to HADOOP_PRIVILEGED_NFS_USER.
25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs
26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs
27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs
28 | 
29 | # export HADOOP_SECURE_DN_USER=hdfs
30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs
31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs
32 | 
33 | # JVM heapsize tuning
34 | export HADOOP_HEAPSIZE={{journalnode_heapsize|default('1000')}}
35 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/default/hadoop-hdfs-namenode.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs
16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
17 | export HADOOP_NAMENODE_USER=hdfs
18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs
19 | export HADOOP_DATANODE_USER=hdfs
20 | export HADOOP_IDENT_STRING=hdfs
21 | 
22 | # We always want to start NFS as root and by setting a default value like the one below we
23 | # ensure this happens. Once port registration completes, privileges are dropped back from root
24 | # to HADOOP_PRIVILEGED_NFS_USER.
25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs
26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs
27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs
28 | 
29 | # export HADOOP_SECURE_DN_USER=hdfs
30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs
31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs
32 | 
33 | # JVM heapsize tuning
34 | export HADOOP_HEAPSIZE={{namenode_heapsize|default('2048')}}
35 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/default/hadoop-hdfs-zkfc.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs
16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
17 | export HADOOP_NAMENODE_USER=hdfs
18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs
19 | export HADOOP_DATANODE_USER=hdfs
20 | export HADOOP_IDENT_STRING=hdfs
21 | 
22 | # We always want to start NFS as root and by setting a default value like the one below we
23 | # ensure this happens. Once port registration completes, privileges are dropped back from root
24 | # to HADOOP_PRIVILEGED_NFS_USER.
25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs
26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs
27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs
28 | 
29 | # export HADOOP_SECURE_DN_USER=hdfs
30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs
31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs
32 | 
33 | # JVM heapsize tuning
34 | export HADOOP_HEAPSIZE={{zkfc_heapsize|default('1000')}}
35 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/default/hadoop-yarn-nodemanager.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | export YARN_IDENT_STRING=yarn
16 | export YARN_PID_DIR=/var/run/hadoop-yarn
17 | export YARN_LOG_DIR=/var/log/hadoop-yarn
18 | export YARN_CONF_DIR=/etc/hadoop/conf
19 | 
20 | export YARN_HEAPSIZE={{ nodemanager_heapsize }}
21 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/fair-scheduler.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   This is a sample configuration file for the Fair Scheduler. For details
 4 |   on the options, please refer to the fair scheduler documentation at
 5 |   http://hadoop.apache.org/core/docs/r0.21.0/fair_scheduler.html.
 6 | 
 7 |   To create your own configuration, copy this file to conf/fair-scheduler.xml
 8 |   and add the following property in mapred-site.xml to point Hadoop to the
 9 |   file, replacing [HADOOP_HOME] with the path to your installation directory:
10 |     <property>
11 |       <name>mapred.fairscheduler.allocation.file</name>
12 |       <value>[HADOOP_HOME]/conf/fair-scheduler.xml</value>
13 |     </property>
14 | 
15 |   Note that all the parameters in the configuration file below are optional,
16 |   including the parameters inside <pool> and <user> elements. It is only
17 |   necessary to set the ones you want to differ from the defaults.
18 | -->
19 | 
20 | <allocations>
21 | 
22 |   {% if groups['oozie']|count > 0 and 'oozie' in group_names %}
23 |   <!-- oozie launcher pool configuration -->
24 |   <pool name="launcher">
25 |     <maxRunningApps>{{ oozie_launcher_maxapps }}</maxRunningApps>
26 |     <weight>{{ oozie_launcher_weight }}</weight>
27 |   </pool>
28 |   {% endif %}
29 | 
30 |   <poolMaxJobsDefault>20</poolMaxJobsDefault>
31 | 
32 |   <userMaxJobsDefault>20</userMaxJobsDefault>
33 | 
34 |   <defaultMinSharePreemptionTimeout>600</defaultMinSharePreemptionTimeout>
35 | 
36 |   <fairSharePreemptionTimeout>600</fairSharePreemptionTimeout>
37 | 
38 |   <queueMaxAMShareDefault>0.5</queueMaxAMShareDefault>
39 | 
40 | </allocations>
41 | <!--
42 | vim:syn=xml
43 | -->
44 | 
45 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/hadoop-env.sh.j2:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # NOTE: this files contains environment variables for all hadoop services
3 | # NOTE: this files overwrites varibales from /etc/default/hadoop*
4 | export HADOOP_LOG_DIR={{log_folder}}/hdfs
5 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/hdfs-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3 | 
  4 | <configuration>
  5 | 
  6 | {% if groups['namenodes']|count > 1 %}
  7 |   <property>
  8 |     <name>dfs.nameservices</name>
  9 |     <value>{{ cluster_name }}</value>
 10 |   </property>
 11 | 
 12 |   <property>
 13 |     <name>dfs.ha.namenodes.{{ cluster_name }}</name>
 14 |     <value>{% for item in groups['namenodes'] -%}
 15 |               {{ item }}{% if not loop.last %},{% endif %}
 16 |            {%- endfor %}</value>
 17 |   </property>
 18 | 
 19 | {% for item in groups['namenodes'] %}
 20 |   <property>
 21 |     <name>dfs.namenode.rpc-address.{{ cluster_name }}.{{ item }}</name>
 22 |     <value>{{ item }}:8020</value>
 23 |   </property>
 24 | 
 25 |   <property>
 26 |     <name>dfs.namenode.http-address.{{ cluster_name }}.{{ item }}</name>
 27 |     <value>{{ item }}:50070</value>
 28 |   </property>
 29 | 
 30 | {% endfor %}
 31 |   <property>
 32 |     <name>dfs.namenode.shared.edits.dir</name>
 33 |     <value>qjournal://{% for item in groups['journalnodes'] -%}
 34 |              {{ item }}:8485{% if not loop.last %};{% endif %}
 35 |                       {%- endfor %}/{{ cluster_name }}</value>
 36 |   </property>
 37 | 
 38 |   <property>
 39 |     <name>dfs.journalnode.edits.dir</name>
 40 |     <value>{{ dfs_journalnode_edits_dir }}</value>
 41 |   </property>
 42 | 
 43 |   <property>
 44 |     <name>dfs.client.failover.proxy.provider.{{ cluster_name }}</name>
 45 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
 46 |   </property>
 47 | 
 48 |   <property>
 49 |     <name>dfs.ha.fencing.methods</name>
 50 |     <value>sshfence</value>
 51 |   </property>
 52 | 
 53 |   <property>
 54 |     <name>dfs.ha.fencing.ssh.private-key-files</name>
 55 |     <value>/var/lib/hadoop-hdfs/.ssh/id_rsa</value>
 56 |   </property>
 57 | 
 58 |   <property>
 59 |     <name>dfs.ha.automatic-failover.enabled</name>
 60 |     <value>true</value>
 61 |   </property>
 62 | {% endif %}
 63 | 
 64 |   <property>
 65 |      <name>dfs.namenode.name.dir</name>
 66 |      <value>{{ dfs_namenode_name_dir }}</value>
 67 |   </property>
 68 | 
 69 |   <property>
 70 |      <name>dfs.datanode.data.dir</name>
 71 |      <value>{{ dfs_datanode_data_dir }}</value>
 72 |   </property>
 73 | 
 74 |   <property>
 75 |     <name>dfs.replication</name>
 76 |     <value>{{ dfs_replication }}</value>
 77 |   </property>
 78 | 
 79 |   <property>
 80 |     <name>dfs.permissions.superusergroup</name>
 81 |     <value>hadoop</value>
 82 |   </property>
 83 | 
 84 |   <property>
 85 |     <name>dfs.hosts.exclude</name>
 86 |     <value>/etc/hadoop/conf/dfs.exclude</value>
 87 |   </property>
 88 | 
 89 |   <property>
 90 |     <name>dfs.client.read.shortcircuit</name>
 91 |     <value>true</value>
 92 |   </property>
 93 | 
 94 |   <property>
 95 |     <name>dfs.domain.socket.path</name>
 96 |     <value>/var/run/hadoop-hdfs/hdfs.socket</value>
 97 |   </property>
 98 | 
 99 |   <property>
100 |     <name>dfs.client.file-block-storage-locations.timeout.millis</name>
101 |     <value>10000</value>
102 |   </property>
103 |   
104 |   <property>
105 |     <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
106 |     <value>true</value>
107 |   </property>
108 | 
109 |   <property>
110 |     <name>dfs.datanode.max.transfer.threads</name>
111 |     <value>4096</value>
112 |   </property>
113 | 
114 |   <property>
115 |     <name>fs.permissions.umask-mode</name>
116 |     <value>{{ fs_permissions_umask }}</value>
117 |   </property>
118 |  <!--
119 |  <property>
120 |    <name>dfs.namenode.rpc-bind-host</name>
121 |    <value>0.0.0.0</value>
122 |  </property>
123 | -->
124 | 
125 | </configuration>
126 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/mapred-env.sh.j2:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export HADOOP_MAPRED_LOG_DIR={{log_folder}}/mapred
3 | 
4 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/mapred-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <configuration>
 5 | 
 6 |   <property>
 7 |     <name>mapreduce.framework.name</name>
 8 |     <value>yarn</value>
 9 |   </property>
10 | 
11 |   <property>
12 |     <name>mapreduce.jobhistory.address</name>
13 |     <value>{{ groups['yarnresourcemanager'][0] }}:10020</value>
14 |   </property>
15 | 
16 | <!-- tuning, uncomment if required
17 |   <property>
18 |     <name>mapreduce.reduce.speculative</name>
19 |     <value>false</value>
20 |   </property>
21 | 
22 |   <property>
23 |     <name>mapreduce.map.memory.mb</name>
24 |     <value>1024</value>
25 |     <description>Larger resource limit for maps.</description>
26 |   </property>
27 | 
28 |   <property>
29 |     <name>mapreduce.reduce.memory.mb</name>
30 |     <value>2046</value>
31 |     <description>Larger resource limit for maps.</description>
32 |   </property>
33 | 
34 |   <property>
35 |     <name>mapreduce.map.java.opts</name>
36 |     <value>-Xmx1024m</value>
37 |     <description>Heap-size for child jvms of maps.</description>
38 |   </property>
39 | 
40 |   <property>
41 |     <name>mapreduce.reduce.memory.mb</name>
42 |     <value>4096</value>
43 |     <description>Larger resource limit for reduces.</description>
44 |   </property>
45 | -->
46 |   <property>
47 |     <name>jobtracker.thrift.address</name>
48 |     <value>0.0.0.0:9290</value>
49 |   </property>
50 | 
51 |   <property>
52 |     <name>mapred.jobtracker.plugins</name>
53 |     <value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
54 |     <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
55 |   </property>
56 | 
57 |   <property>
58 |     <name>yarn.app.mapreduce.am.staging-dir</name>
59 |     <value>/user</value>
60 |   </property>
61 | 
62 | </configuration>
63 | 


--------------------------------------------------------------------------------
/roles/hadoop/templates/yarn-env.sh.j2:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # User for YARN daemons
 18 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
 19 | 
 20 | # resolve links - $0 may be a softlink
 21 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
 22 | 
 23 | # some Java parameters
 24 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/
 25 | if [ "$JAVA_HOME" != "" ]; then
 26 |   #echo "run java in $JAVA_HOME"
 27 |   JAVA_HOME=$JAVA_HOME
 28 | fi
 29 | 
 30 | if [ "$JAVA_HOME" = "" ]; then
 31 |   echo "Error: JAVA_HOME is not set."
 32 |   exit 1
 33 | fi
 34 | 
 35 | JAVA=$JAVA_HOME/bin/java
 36 | JAVA_HEAP_MAX=-Xmx1000m
 37 | 
 38 | # For setting YARN specific HEAP sizes please use this
 39 | # Parameter and set appropriately
 40 | # YARN_HEAPSIZE=1000
 41 | 
 42 | # check envvars which might override default args
 43 | if [ "$YARN_HEAPSIZE" != "" ]; then
 44 |   JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
 45 | fi
 46 | 
 47 | # Resource Manager specific parameters
 48 | 
 49 | # Specify the max Heapsize for the ResourceManager using a numerical value
 50 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 51 | # the value to 1000.
 52 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 53 | # and/or YARN_RESOURCEMANAGER_OPTS.
 54 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 55 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 56 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 57 | 
 58 | # Specify the max Heapsize for the timeline server using a numerical value
 59 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 60 | # the value to 1000.
 61 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 62 | # and/or YARN_TIMELINESERVER_OPTS.
 63 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 64 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 65 | #export YARN_TIMELINESERVER_HEAPSIZE=1000
 66 | 
 67 | # Specify the JVM options to be used when starting the ResourceManager.
 68 | # These options will be appended to the options specified as YARN_OPTS
 69 | # and therefore may override any similar flags set in YARN_OPTS
 70 | #export YARN_RESOURCEMANAGER_OPTS=
 71 | 
 72 | # Node Manager specific parameters
 73 | 
 74 | # Specify the max Heapsize for the NodeManager using a numerical value
 75 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 76 | # the value to 1000.
 77 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS
 78 | # and/or YARN_NODEMANAGER_OPTS.
 79 | # If not specified, the default value will be picked from either YARN_HEAPMAX
 80 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 81 | #export YARN_NODEMANAGER_HEAPSIZE=1000
 82 | 
 83 | # Specify the JVM options to be used when starting the NodeManager.
 84 | # These options will be appended to the options specified as YARN_OPTS
 85 | # and therefore may override any similar flags set in YARN_OPTS
 86 | #export YARN_NODEMANAGER_OPTS=
 87 | 
 88 | # so that filenames w/ spaces are handled correctly in loops below
 89 | IFS=
 90 | 
 91 | 
 92 | YARN_LOG_DIR={{log_folder}}/yarn
 93 | if [ "$YARN_LOGFILE" = "" ]; then
 94 |   YARN_LOGFILE='yarn.log'
 95 | fi
 96 | 
 97 | # default policy file for service-level authorization
 98 | if [ "$YARN_POLICYFILE" = "" ]; then
 99 |   YARN_POLICYFILE="hadoop-policy.xml"
100 | fi
101 | 
102 | # restore ordinary behaviour
103 | unset IFS
104 | 
105 | 
106 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
107 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
108 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
109 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
110 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
111 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
112 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
113 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
114 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
115 |   YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
116 | fi
117 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/roles/hbase/files/default/hbase:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | export HBASE_PID_DIR="/var/run/hbase"
17 | export HBASE_LOG_DIR="/var/log/hbase"
18 | export HBASE_IDENT_STRING=hbase
19 | #export HBASE_REGIONSERVER_MLOCK=true
20 | #export HBASE_REGIONSERVER_UID=hbase
21 | export HBASE_THRIFT_MODE="-nonblocking"
22 | 
23 | # Up to 100 region servers can be run on a single host by specifying offsets
24 | # here or as CLI args when using init scripts. Each offset identifies an
25 | # instance and is used to determine the network ports it uses. Each instance
26 | # will have have its own log and pid files.
27 | #
28 | # REGIONSERVER_OFFSETS="1 2 3"
29 | 
30 | 


--------------------------------------------------------------------------------
/roles/hbase/files/hadoop-metrics2-hbase.properties:
--------------------------------------------------------------------------------
 1 | # syntax: [prefix].[source|sink].[instance].[options]
 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
 3 | 
 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink
 5 | # default sampling period
 6 | *.period=10
 7 | 
 8 | # Below are some examples of sinks that could be used
 9 | # to monitor different hbase daemons.
10 | 
11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink
12 | # hbase.sink.file-all.filename=all.metrics
13 | 
14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink
15 | # hbase.sink.file0.context=hmaster
16 | # hbase.sink.file0.filename=master.metrics
17 | 
18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink
19 | # hbase.sink.file1.context=thrift-one
20 | # hbase.sink.file1.filename=thrift-one.metrics
21 | 
22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink
23 | # hbase.sink.file2.context=thrift-two
24 | # hbase.sink.file2.filename=thrift-one.metrics
25 | 
26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink
27 | # hbase.sink.file3.context=rest
28 | # hbase.sink.file3.filename=rest.metrics
29 | 


--------------------------------------------------------------------------------
/roles/hbase/files/hbase-env.cmd:
--------------------------------------------------------------------------------
 1 | @rem/**
 2 | @rem * Licensed to the Apache Software Foundation (ASF) under one
 3 | @rem * or more contributor license agreements.  See the NOTICE file
 4 | @rem * distributed with this work for additional information
 5 | @rem * regarding copyright ownership.  The ASF licenses this file
 6 | @rem * to you under the Apache License, Version 2.0 (the
 7 | @rem * "License"); you may not use this file except in compliance
 8 | @rem * with the License.  You may obtain a copy of the License at
 9 | @rem *
10 | @rem *     http://www.apache.org/licenses/LICENSE-2.0
11 | @rem *
12 | @rem * Unless required by applicable law or agreed to in writing, software
13 | @rem * distributed under the License is distributed on an "AS IS" BASIS,
14 | @rem * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | @rem * See the License for the specific language governing permissions and
16 | @rem * limitations under the License.
17 | @rem */
18 | 
19 | @rem Set environment variables here.
20 | 
21 | @rem The java implementation to use.  Java 1.7+ required.
22 | @rem set JAVA_HOME=c:\apps\java
23 | 
24 | @rem Extra Java CLASSPATH elements.  Optional.
25 | @rem set HBASE_CLASSPATH=
26 | 
27 | @rem The maximum amount of heap to use, in MB. Default is 1000.
28 | @rem set HBASE_HEAPSIZE=1000
29 | 
30 | @rem Uncomment below if you intend to use off heap cache.
31 | @rem set HBASE_OFFHEAPSIZE=1000
32 | 
33 | @rem For example, to allocate 8G of offheap, to 8G:
34 | @rem etHBASE_OFFHEAPSIZE=8G
35 | 
36 | @rem Extra Java runtime options.
37 | @rem Below are what we set by default.  May only work with SUN JVM.
38 | @rem For more on why as well as other possible settings,
39 | @rem see http://wiki.apache.org/hadoop/PerformanceTuning
40 | @rem JDK6 on Windows has a known bug for IPv6, use preferIPv4Stack unless JDK7.
41 | @rem @rem See TestIPv6NIOServerSocketChannel.
42 | set HBASE_OPTS="-XX:+UseConcMarkSweepGC" "-Djava.net.preferIPv4Stack=true"
43 | 
44 | @rem Uncomment below to enable java garbage collection logging for the server-side processes
45 | @rem this enables basic gc logging for the server processes to the .out file
46 | @rem set SERVER_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" %HBASE_GC_OPTS%
47 | 
48 | @rem this enables gc logging using automatic GC log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. Either use this set of options or the one above
49 | @rem set SERVER_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" "-XX:+UseGCLogFileRotation" "-XX:NumberOfGCLogFiles=1" "-XX:GCLogFileSize=512M" %HBASE_GC_OPTS%
50 | 
51 | @rem Uncomment below to enable java garbage collection logging for the client processes in the .out file.
52 | @rem set CLIENT_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" %HBASE_GC_OPTS%
53 | 
54 | @rem Uncomment below (along with above GC logging) to put GC information in its own logfile (will set HBASE_GC_OPTS)
55 | @rem set HBASE_USE_GC_LOGFILE=true
56 | 
57 | @rem Uncomment and adjust to enable JMX exporting
58 | @rem See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
59 | @rem More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
60 | @rem
61 | @rem set HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false" "-Dcom.sun.management.jmxremote.authenticate=false"
62 | @rem set HBASE_MASTER_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10101"
63 | @rem set HBASE_REGIONSERVER_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10102"
64 | @rem set HBASE_THRIFT_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10103"
65 | @rem set HBASE_ZOOKEEPER_OPTS=%HBASE_JMX_BASE% -Dcom.sun.management.jmxremote.port=10104"
66 | 
67 | @rem File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
68 | @rem set HBASE_REGIONSERVERS=%HBASE_HOME%\conf\regionservers
69 | 
70 | @rem Where log files are stored.  $HBASE_HOME/logs by default.
71 | @rem set HBASE_LOG_DIR=%HBASE_HOME%\logs
72 | 
73 | @rem A string representing this instance of hbase. $USER by default.
74 | @rem set HBASE_IDENT_STRING=%USERNAME%
75 | 
76 | @rem Seconds to sleep between slave commands.  Unset by default.  This
77 | @rem can be useful in large clusters, where, e.g., slave rsyncs can
78 | @rem otherwise arrive faster than the master can service them.
79 | @rem set HBASE_SLAVE_SLEEP=0.1
80 | 
81 | @rem Tell HBase whether it should manage it's own instance of Zookeeper or not.
82 | @rem set HBASE_MANAGES_ZK=true
83 | 


--------------------------------------------------------------------------------
/roles/hbase/files/hbase-policy.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  * Licensed to the Apache Software Foundation (ASF) under one
 6 |  * or more contributor license agreements.  See the NOTICE file
 7 |  * distributed with this work for additional information
 8 |  * regarding copyright ownership.  The ASF licenses this file
 9 |  * to you under the Apache License, Version 2.0 (the
10 |  * "License"); you may not use this file except in compliance
11 |  * with the License.  You may obtain a copy of the License at
12 |  *
13 |  *     http://www.apache.org/licenses/LICENSE-2.0
14 |  *
15 |  * Unless required by applicable law or agreed to in writing, software
16 |  * distributed under the License is distributed on an "AS IS" BASIS,
17 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 |  * See the License for the specific language governing permissions and
19 |  * limitations under the License.
20 |  */
21 | -->
22 | 
23 | <configuration>
24 |   <property>
25 |     <name>security.client.protocol.acl</name>
26 |     <value>*</value>
27 |     <description>ACL for ClientProtocol and AdminProtocol implementations (ie. 
28 |     clients talking to HRegionServers)
29 |     The ACL is a comma-separated list of user and group names. The user and 
30 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
31 |     A special value of "*" means all users are allowed.</description>
32 |   </property>
33 | 
34 |   <property>
35 |     <name>security.admin.protocol.acl</name>
36 |     <value>*</value>
37 |     <description>ACL for HMasterInterface protocol implementation (ie. 
38 |     clients talking to HMaster for admin operations).
39 |     The ACL is a comma-separated list of user and group names. The user and 
40 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
41 |     A special value of "*" means all users are allowed.</description>
42 |   </property>
43 | 
44 |   <property>
45 |     <name>security.masterregion.protocol.acl</name>
46 |     <value>*</value>
47 |     <description>ACL for HMasterRegionInterface protocol implementations
48 |     (for HRegionServers communicating with HMaster)
49 |     The ACL is a comma-separated list of user and group names. The user and 
50 |     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
51 |     A special value of "*" means all users are allowed.</description>
52 |   </property>
53 | </configuration>
54 | 


--------------------------------------------------------------------------------
/roles/hbase/files/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Define some default values that can be overridden by system properties
 2 | hbase.root.logger=INFO,console
 3 | hbase.security.logger=INFO,console
 4 | hbase.log.dir=.
 5 | hbase.log.file=hbase.log
 6 | 
 7 | # Define the root logger to the system property "hbase.root.logger".
 8 | log4j.rootLogger=${hbase.root.logger}
 9 | 
10 | # Logging Threshold
11 | log4j.threshold=ALL
12 | 
13 | #
14 | # Daily Rolling File Appender
15 | #
16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file}
18 | 
19 | # Rollver at midnight
20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
21 | 
22 | # 30-day backup
23 | #log4j.appender.DRFA.MaxBackupIndex=30
24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
25 | 
26 | # Pattern format: Date LogLevel LoggerName LogMessage
27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
28 | 
29 | # Rolling File Appender properties
30 | hbase.log.maxfilesize=256MB
31 | hbase.log.maxbackupindex=20
32 | 
33 | # Rolling File Appender
34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file}
36 | 
37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize}
38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex}
39 | 
40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
42 | 
43 | #
44 | # Security audit appender
45 | #
46 | hbase.security.log.file=SecurityAuth.audit
47 | hbase.security.log.maxfilesize=256MB
48 | hbase.security.log.maxbackupindex=20
49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file}
51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize}
52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex}
53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
55 | log4j.category.SecurityLogger=${hbase.security.logger}
56 | log4j.additivity.SecurityLogger=false
57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE
58 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.visibility.VisibilityController=TRACE
59 | 
60 | #
61 | # Null Appender
62 | #
63 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
64 | 
65 | #
66 | # console
67 | # Add "console" to rootlogger above if you want to use this 
68 | #
69 | log4j.appender.console=org.apache.log4j.ConsoleAppender
70 | log4j.appender.console.target=System.err
71 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
72 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n
73 | 
74 | # Custom Logging levels
75 | 
76 | log4j.logger.org.apache.zookeeper=INFO
77 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
78 | log4j.logger.org.apache.hadoop.hbase=INFO
79 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug.
80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO
81 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO
82 | #log4j.logger.org.apache.hadoop.dfs=DEBUG
83 | # Set this class to log INFO only otherwise its OTT
84 | # Enable this to get detailed connection error/retry logging.
85 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE
86 | 
87 | 
88 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output)
89 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG
90 | 
91 | # Uncomment the below if you want to remove logging of client region caching'
92 | # and scan of hbase:meta messages
93 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO
94 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO
95 | 


--------------------------------------------------------------------------------
/roles/hbase/tasks/hbase-master.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - hbase-master
 6 |     - hbase-thrift
 7 |     - hbase-rest
 8 | 
 9 | - name: create configuration directory
10 |   file: path=/etc/hbase/conf.{{ cluster_name }} state=directory
11 | 
12 | - name: setup alternatives link
13 |   alternatives: name=hbase-conf link=/etc/hbase/conf path=/etc/hbase/conf.{{ cluster_name }}
14 | 
15 | - name: install default configurations
16 |   tags: config
17 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
18 |   with_items:
19 |     - hbase
20 | 
21 | - name: install template configurations
22 |   tags: config
23 |   template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }}
24 |   with_items:
25 |     - hbase-site.xml
26 |     - regionservers
27 | 
28 | - name: install files configurations
29 |   tags: config
30 |   copy: src={{ item }} dest=/etc/hbase/conf/{{ item }}
31 |   with_items:
32 |     - hadoop-metrics2-hbase.properties
33 |     - hbase-env.cmd
34 |     - hbase-env.sh
35 |     - hbase-policy.xml
36 |     - log4j.properties
37 | 
38 | - name: create hdfs directories
39 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
40 |   with_items:
41 |     - -mkdir -p /hbase
42 |     - -chown hbase /hbase
43 | 
44 | - name: start services
45 |   tags: service
46 |   service: name={{ item }} state=restarted enabled=yes
47 |   with_items:
48 |     - hbase-master
49 |     - hbase-thrift
50 |     - hbase-rest
51 | 


--------------------------------------------------------------------------------
/roles/hbase/tasks/main.yaml:
--------------------------------------------------------------------------------
1 | - include: hbase-master.yaml
2 |   when: deploy == "hbase-master"
3 | 
4 | - include: regionserver.yaml
5 |   when: deploy == "regionserver" and groups['hbasemaster']|count == 1
6 | 


--------------------------------------------------------------------------------
/roles/hbase/tasks/regionserver.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - hbase-regionserver
 6 | 
 7 | - name: create configuration directory
 8 |   file: path=/etc/hbase/conf.{{ cluster_name }} state=directory
 9 | 
10 | - name: setup alternatives link
11 |   alternatives: name=hbase-conf link=/etc/hbase/conf path=/etc/hbase/conf.{{ cluster_name }}
12 | 
13 | - name: install template configurations
14 |   tags: config
15 |   template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }}
16 |   with_items:
17 |     - hbase-site.xml
18 |     - regionservers
19 | 
20 | - name: install files configurations
21 |   tags: config
22 |   copy: src={{ item }} dest=/etc/hbase/conf/{{ item }}
23 |   with_items:
24 |     - hadoop-metrics2-hbase.properties
25 |     - hbase-env.cmd
26 |     - hbase-env.sh
27 |     - hbase-policy.xml
28 |     - log4j.properties
29 | 
30 | - name: start services
31 |   tags: service
32 |   service: name={{ item }} state=restarted enabled=yes
33 |   with_items:
34 |     - hbase-regionserver
35 | 
36 | - name: test
37 |   tags: test
38 |   shell: echo -e "create 'ansible_test_table', 'id'\ndisable 'ansible_test_table'\n drop 'ansible_test_table'" | hbase shell
39 |   run_once: true
40 | 


--------------------------------------------------------------------------------
/roles/hbase/templates/hbase-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <configuration>
 5 | 
 6 | <property>
 7 |   <name>hbase.cluster.distributed</name>
 8 |   <value>true</value>
 9 | </property>
10 | 
11 | <property>
12 |   <name>hbase.rootdir</name>
13 | {% if groups['namenodes']|count > 1 %}
14 |   <value>hdfs://{{ cluster_name }}/hbase</value>
15 | {% else %}
16 |   <value>hdfs://{{ groups['namenodes'][0] }}:8020/hbase</value>
17 | {% endif %}
18 | </property>
19 | 
20 | <property>
21 |   <name>hbase.zookeeper.quorum</name>
22 |   <value>{% for item in groups['zookeepernodes'] -%}
23 |          {{ item }}:2181{% if not loop.last %},{% endif %}
24 |          {%- endfor %}</value>
25 | </property>
26 | {% if groups['dashboard']|count > 0 %}
27 | 
28 | <property>
29 |   <name>hbase.http.filter.xframeoptions.mode</name>
30 |   <value>ALLOWALL</value>
31 | </property>
32 | {% endif %}
33 | 
34 | </configuration>
35 | 


--------------------------------------------------------------------------------
/roles/hbase/templates/regionservers.j2:
--------------------------------------------------------------------------------
1 | {% for item in groups['datanodes'] %}
2 | {{ item }}
3 | {% endfor %}
4 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/files/default/hadoop-0.20-mapreduce:
--------------------------------------------------------------------------------
1 | ../../../hadoop/files/default/hadoop-0.20-mapreduce


--------------------------------------------------------------------------------
/roles/hivemetastore/files/default/hive-metastore:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # The port for Hive metastore daemon to listen to.
17 | # Unfortunatelly, there is no way to specify the interfaces 
18 | # to which the daemon binds.
19 | #
20 | #PORT=
21 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/files/default/hive-server2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # The port for Hive server2 daemon to listen to.
17 | # Unfortunatelly, there is no way to specify the interfaces 
18 | # to which the daemon binds.
19 | #
20 | #PORT=
21 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/files/hive-exec-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=INFO,FA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.query.id=hadoop
22 | hive.log.file=${hive.query.id}.log
23 | 
24 | # Define the root logger to the system property "hadoop.root.logger".
25 | log4j.rootLogger=${hive.root.logger}, EventCounter
26 | 
27 | # Logging Threshold
28 | log4j.threshhold=${hive.log.threshold}
29 | 
30 | #
31 | # File Appender
32 | #
33 | 
34 | log4j.appender.FA=org.apache.log4j.FileAppender
35 | log4j.appender.FA.File=${hive.log.dir}/${hive.log.file}
36 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout
37 | 
38 | # Pattern format: Date LogLevel LoggerName LogMessage
39 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
40 | # Debugging Pattern format
41 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
42 | 
43 | 
44 | #
45 | # console
46 | # Add "console" to rootlogger above if you want to use this
47 | #
48 | 
49 | log4j.appender.console=org.apache.log4j.ConsoleAppender
50 | log4j.appender.console.target=System.err
51 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
52 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t] %p %c{2}: %m%n
53 | 
54 | #custom logging levels
55 | #log4j.logger.xxx=DEBUG
56 | 
57 | #
58 | # Event Counter Appender
59 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
60 | #
61 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
62 | 
63 | 
64 | log4j.category.DataNucleus=ERROR,FA
65 | log4j.category.Datastore=ERROR,FA
66 | log4j.category.Datastore.Schema=ERROR,FA
67 | log4j.category.JPOX.Datastore=ERROR,FA
68 | log4j.category.JPOX.Plugin=ERROR,FA
69 | log4j.category.JPOX.MetaData=ERROR,FA
70 | log4j.category.JPOX.Query=ERROR,FA
71 | log4j.category.JPOX.General=ERROR,FA
72 | log4j.category.JPOX.Enhancer=ERROR,FA
73 | 
74 | 
75 | # Silence useless ZK logs
76 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,FA
77 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,FA
78 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/files/hive-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Define some default values that can be overridden by system properties
18 | hive.log.threshold=ALL
19 | hive.root.logger=WARN,DRFA
20 | hive.log.dir=${java.io.tmpdir}/${user.name}
21 | hive.log.file=hive.log
22 | 
23 | # Define the root logger to the system property "hadoop.root.logger".
24 | log4j.rootLogger=${hive.root.logger}, EventCounter
25 | 
26 | # Logging Threshold
27 | log4j.threshold=${hive.log.threshold}
28 | 
29 | #
30 | # Daily Rolling File Appender
31 | #
32 | # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files
33 | # for different CLI session.
34 | #
35 | # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender
36 | 
37 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
38 | 
39 | log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
40 | 
41 | # Rollver at midnight
42 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
43 | 
44 | # 30-day backup
45 | #log4j.appender.DRFA.MaxBackupIndex=30
46 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
47 | 
48 | # Pattern format: Date LogLevel LoggerName LogMessage
49 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
50 | # Debugging Pattern format
51 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p [%t]: %c{2} (%F:%M(%L)) - %m%n
52 | 
53 | 
54 | #
55 | # console
56 | # Add "console" to rootlogger above if you want to use this
57 | #
58 | 
59 | log4j.appender.console=org.apache.log4j.ConsoleAppender
60 | log4j.appender.console.target=System.err
61 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
62 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
63 | log4j.appender.console.encoding=UTF-8
64 | 
65 | #custom logging levels
66 | #log4j.logger.xxx=DEBUG
67 | 
68 | #
69 | # Event Counter Appender
70 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
71 | #
72 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter
73 | 
74 | 
75 | log4j.category.DataNucleus=ERROR,DRFA
76 | log4j.category.Datastore=ERROR,DRFA
77 | log4j.category.Datastore.Schema=ERROR,DRFA
78 | log4j.category.JPOX.Datastore=ERROR,DRFA
79 | log4j.category.JPOX.Plugin=ERROR,DRFA
80 | log4j.category.JPOX.MetaData=ERROR,DRFA
81 | log4j.category.JPOX.Query=ERROR,DRFA
82 | log4j.category.JPOX.General=ERROR,DRFA
83 | log4j.category.JPOX.Enhancer=ERROR,DRFA
84 | 
85 | 
86 | # Silence useless ZK logs
87 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA
88 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA
89 | 
90 | #custom logging levels
91 | log4j.logger.org.apache.hadoop.hive.ql.parse.SemanticAnalyzer=INFO
92 | log4j.logger.org.apache.hadoop.hive.ql.Driver=INFO
93 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.ExecDriver=INFO
94 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapRedTask=INFO
95 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask=INFO
96 | log4j.logger.org.apache.hadoop.hive.ql.exec.Task=INFO
97 | log4j.logger.org.apache.hadoop.hive.ql.session.SessionState=INFO
98 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/files/hive.limits.conf:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | hive   - nofile 32768
17 | hive   - nproc  65536
18 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/tasks/hive-client.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: package
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |     - hive
 6 | 
 7 | - name: create configuration directory
 8 |   tags: config
 9 |   file: path={{ etc_folder }}/hive state=directory
10 | 
11 | - name: setup alternatives link
12 |   tags: config
13 |   alternatives: name=hive-conf link=/etc/hive/conf path={{ etc_folder }}/hive
14 | 
15 | - name: set limits for hive user
16 |   tags: config
17 |   copy: src=hive.limits.conf dest=/etc/security/limits.d/hive.conf
18 | 
19 | - name: install template configurations
20 |   tags: config
21 |   template: src={{ item }}.j2 dest={{ etc_folder }}/hive/{{ item }}
22 |   with_items:
23 |     - hive-site.xml
24 |     - hive-env.sh
25 | 
26 | - name: install files configurations
27 |   tags: config
28 |   copy: src={{ item }} dest={{ etc_folder }}/hive/{{ item }}
29 |   with_items:
30 |     - hive-exec-log4j.properties
31 |     - hive-log4j.properties
32 | 
33 | - name: install default configurations
34 |   tags: config
35 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
36 |   with_items:
37 |     - hadoop-0.20-mapreduce
38 | 
39 | - name: add group for hive user
40 |   user: name=hive groups=hive,hadoop
41 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/tasks/hive-server.yaml:
--------------------------------------------------------------------------------
  1 | - name: install packages
  2 |   tags: package
  3 |   yum:  name={{ item }} state=latest
  4 |   with_items:
  5 |     - hive-metastore
  6 |     - hive-server2
  7 |     - postgresql{{ postgres_version|default('')|replace('.', '') }}
  8 |     - postgresql{{ postgres_version|default('')|replace('.', '') }}-jdbc
  9 | 
 10 | - name: create configuration directory
 11 |   tags: config
 12 |   file: path={{ etc_folder }}/hive state=directory
 13 | 
 14 | - name: setup alternatives link
 15 |   tags: config
 16 |   alternatives: name=hive-conf link=/etc/hive/conf path={{ etc_folder }}/hive
 17 | 
 18 | - name: set limits for hive user
 19 |   tags: config
 20 |   copy: src=hive.limits.conf dest=/etc/security/limits.d/hive.conf
 21 | 
 22 | - name: install template configurations
 23 |   tags: config
 24 |   template: src={{ item }}.j2 dest={{ etc_folder }}/hive/{{ item }}
 25 |   with_items:
 26 |     - hive-site.xml
 27 |     - hive-env.sh
 28 | 
 29 | - name: install files configurations
 30 |   tags: config
 31 |   copy: src={{ item }} dest={{ etc_folder }}/hive/{{ item }}
 32 |   with_items:
 33 |     - hive-exec-log4j.properties
 34 |     - hive-log4j.properties
 35 | 
 36 | - name: install default configurations
 37 |   tags: config
 38 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
 39 |   with_items:
 40 |     - hadoop-0.20-mapreduce
 41 |     - hive-server2
 42 |     - hive-metastore
 43 | 
 44 | - name: copy hive-site.xml to hdfs
 45 |   tags: config
 46 |   command: sudo -u hdfs hdfs dfs {{ item }}
 47 |   with_items:
 48 |     - -mkdir -p /etc/hive/conf
 49 |     - -copyFromLocal -f {{ etc_folder }}/hive/hive-site.xml /etc/hive/conf
 50 | 
 51 | - name: create warehouse dir
 52 |   tags: config
 53 |   command: sudo -u hdfs hdfs dfs {{ item }}
 54 |   with_items:
 55 |     - -mkdir -p /user/hive/warehouse
 56 |     - -chmod 1777 /user/hive
 57 |     - -chmod 1777 /user/hive/warehouse
 58 | 
 59 | - name:  setup hive log folders
 60 |   tags: config
 61 |   command: sed -i -e 's|^\(\s\+\)LOG_FILE=.*$|\1LOG_FILE={{ log_folder }}/hive/${DAEMON}.out|g' /etc/init.d/{{ item }}
 62 |   with_items:
 63 |     - hive-server2
 64 |     - hive-metastore
 65 | 
 66 | - name: create log folders
 67 |   tags: config
 68 |   file: path={{ log_folder }}/hive state=directory owner=hive mode=0755
 69 | 
 70 | - name: create link to jdbc driver
 71 |   tags: config
 72 |   file: src=/usr/share/java/postgresql{{ postgres_version|default('')|replace('.', '') }}-jdbc.jar dest=/usr/lib/hive/lib/postgresql-jdbc.jar state=link force=yes
 73 | 
 74 | - name: generate sql
 75 |   tags: init
 76 |   template: src="hive.sql.j2" dest=/tmp/ahive.sql
 77 | 
 78 | - name: install .pgpass
 79 |   template: src=".pgpass.j2" dest=/root/.pgpass mode=0600
 80 | 
 81 | - name: create metastore database
 82 |   tags: init
 83 |   command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/ahive.sql chdir=/usr/lib/hive/scripts/metastore/upgrade/postgres
 84 |   when: destroy_data
 85 | 
 86 | - name: remove sql and .pgpass files
 87 |   command: rm -f /tmp/ahive.sql /root/.pgpass
 88 | 
 89 | - name: add hive user to hadoop group
 90 |   user: name=hive groups=hive,hadoop
 91 | 
 92 | - name: start services
 93 |   tags: service
 94 |   service: name={{ item }} state=restarted enabled=yes
 95 |   with_items:
 96 |     - hive-metastore
 97 |     - hive-server2
 98 | 
 99 | - name: wait
100 |   command: sleep 7
101 | 
102 | - name: test hive
103 |   tags: test
104 |   command: sudo -Hu hdfs beeline -u jdbc:hive2://{{ ansible_hostname }}:10000 -nhdfs -p password -d org.apache.hive.jdbc.HiveDriver -e 'create table ansible_test_table ( id int )  location "/tmp/ansible_test_table"; drop table ansible_test_table;'
105 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/tasks/main.yaml:
--------------------------------------------------------------------------------
1 | - include: hive-server.yaml
2 |   when: deploy == "hive-server"
3 | 
4 | - include: hive-client.yaml
5 |   when: deploy == "hive-client" and groups['hivemetastore']|count == 1
6 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/templates/.pgpass.j2:
--------------------------------------------------------------------------------
1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }}
2 | {{ groups['postgresql'][0] }}:5432:metastore:hiveuser:{{ hiveuser_password }}
3 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/templates/hive-env.sh.j2:
--------------------------------------------------------------------------------
 1 | # Hive Client memory usage can be an issue if a large number of clients
 2 | # are running at the same time. The flags below have been useful in 
 3 | # reducing memory usage:
 4 | #
 5 | # if [ "$SERVICE" = "cli" ]; then
 6 | #   if [ -z "$DEBUG" ]; then
 7 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
 8 | #   else
 9 | #     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
10 | #   fi
11 | # fi
12 | 
13 | case $SERVICE in
14 |   cli)
15 |     export HADOOP_HEAPSIZE={{ hivecli_heapsize }}
16 |     ;;
17 |   hiveserver2)
18 |     export HADOOP_HEAPSIZE={{ hiveserver2_heapsize }}
19 |     ;;
20 |   metastore)
21 |     export HADOOP_HEAPSIZE={{ hivemetastore_heapsize }}
22 |     ;;
23 | esac
24 | 
25 | # The heap size of the jvm stared by hive shell script can be controlled via:
26 | #
27 | # export HADOOP_HEAPSIZE=1024
28 | #
29 | # Larger heap size may be required when running queries over large number of files or partitions. 
30 | # By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be 
31 | # appropriate for hive server (hwi etc).
32 | 
33 | 
34 | # Set HADOOP_HOME to point to a specific hadoop install directory
35 | # HADOOP_HOME=${bin}/../../hadoop
36 | 
37 | # Hive Configuration Directory can be controlled by:
38 | # export HIVE_CONF_DIR=
39 | 
40 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by:
41 | # export HIVE_AUX_JARS_PATH=
42 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/templates/hive-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3 | 
  4 | <configuration>
  5 | 
  6 |   <property>
  7 |     <name>javax.jdo.option.ConnectionURL</name>
  8 |     <value>jdbc:postgresql://{{ groups['postgresql'][0] }}/metastore</value>
  9 |     <description>JDBC connect string for a JDBC metastore</description>
 10 |   </property>
 11 | 
 12 |   <property>
 13 |     <name>javax.jdo.option.ConnectionDriverName</name>
 14 |     <value>org.postgresql.Driver</value>
 15 |     <description>Driver class name for a JDBC metastore</description>
 16 |   </property>
 17 | 
 18 |   <property>
 19 |     <name>javax.jdo.option.ConnectionUserName</name>
 20 |     <value>hiveuser</value>
 21 |   </property>
 22 | 
 23 |   <property>
 24 |     <name>javax.jdo.option.ConnectionPassword</name>
 25 |     <value>{{ hiveuser_password }}</value>
 26 |   </property>
 27 | 
 28 |   <property>
 29 |     <name>datanucleus.autoCreateSchema</name>
 30 |     <value>false</value>
 31 |   </property>
 32 | 
 33 |   <property>
 34 |     <name>datanucleus.fixedDatastore</name>
 35 |     <value>true</value>
 36 |   </property>
 37 | 
 38 |   <property>
 39 |     <name>datanucleus.autoStartMechanism</name>
 40 |     <value>SchemaTable</value>
 41 |   </property>
 42 | 
 43 |   <property>
 44 |     <name>hive.metastore.uris</name>
 45 |     <value>thrift://{{ groups['hivemetastore'][0] }}:9083</value>
 46 |     <description>IP address (or fully-qualified domain name) and port of the metastore host</description>
 47 |   </property>
 48 | 
 49 |   <property>
 50 |     <name>hive.metastore.schema.verification</name>
 51 |     <value>true</value>
 52 |   </property>
 53 | 
 54 | {% if groups['zookeepernodes']|count >0 %}
 55 |   <property>
 56 |     <name>hive.support.concurrency</name>
 57 |     <value>true</value>
 58 |     <description>Enable Hive's Table Lock Manager Service</description>
 59 |   </property>
 60 | 
 61 |   <property>
 62 |     <name>hive.zookeeper.quorum</name>
 63 |     <description>Zookeeper quorum used by Hive's Table Lock Manager</description>
 64 |     <value>{% for item in groups['zookeepernodes'] -%}
 65 |            {{ item }}{% if not loop.last %},{% endif %}
 66 |            {%- endfor %}</value>
 67 |   </property>
 68 | {% else %}
 69 |   <property>
 70 |     <name>hive.support.concurrency</name>
 71 |     <value>false</value>
 72 |     <description>Enable Hive's Table Lock Manager Service</description>
 73 |   </property>
 74 | {% endif %}
 75 | 
 76 |   <property>
 77 |     <name>hive.server2.thrift.port</name>
 78 |     <value>10000</value>
 79 |     <description>TCP port number to listen on, default 10000</description>
 80 |   </property>
 81 | 
 82 |   <property>
 83 |     <name>hive.metastore.client.socket.timeout</name>
 84 |     <value>3600</value>
 85 |     <description>MetaStore Client socket timeout in seconds</description>
 86 |   </property>
 87 | 
 88 |   <property>
 89 |     <name>hive.exec.compress.intermediate</name>
 90 |     <value>true</value>
 91 |   </property>
 92 | 
 93 |   <property>
 94 |     <name>hive.exec.compress.output</name>
 95 |     <value>true</value>
 96 |   </property>
 97 | 
 98 |   <property>
 99 |     <name>mapred.output.compression.codec</name>
100 |     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
101 |   </property>
102 | 
103 |   <property>
104 |     <name>mapred.map.output.compression.codec</name>
105 |     <value>org.apache.hadoop.io.compress.SnappyCodec</value>
106 |   </property>
107 | 
108 |   <property>
109 |     <name>hive.exec.reducers.bytes.per.reducer</name>
110 |     <value>200000000</value>
111 |   </property>
112 | 
113 |   <property>
114 |     <name>hive.exec.dynamic.partition</name>
115 |     <value>true</value>
116 |   </property>
117 | 
118 |   <property>
119 |     <name>hive.exec.dynamic.partition.mode</name>
120 |     <value>nonstrict</value>
121 |   </property>
122 | 
123 |   <property>
124 |     <name>parquet.compression</name>
125 |     <value>SNAPPY</value>
126 |   </property>
127 | 
128 |   <property>
129 |     <name>mapred.output.compression.type</name>
130 |     <value>BLOCK</value>
131 |   </property>
132 | 
133 |   <property>
134 |     <name>mapred.reduce.tasks</name>
135 |     <value>-1</value>
136 |   </property>
137 | 
138 |   <property>
139 |     <name>hive.mapred.reduce.tasks.speculative.execution</name>
140 |     <value>false</value>
141 |   </property>
142 | 
143 |   <property>
144 |     <name>hive.optimize.sort.dynamic.partition</name>
145 |     <value>false</value>
146 |   </property>
147 | 
148 | </configuration>
149 | 


--------------------------------------------------------------------------------
/roles/hivemetastore/templates/hive.sql.j2:
--------------------------------------------------------------------------------
 1 | CREATE USER hiveuser WITH PASSWORD '{{ hiveuser_password }}';
 2 | CREATE DATABASE metastore;
 3 | \c metastore;
 4 | \i /usr/lib/hive/scripts/metastore/upgrade/postgres/hive-schema-1.1.0.postgres.sql
 5 | \pset tuples_only on
 6 | \o /tmp/grant-privs
 7 |   SELECT 'GRANT SELECT,INSERT,UPDATE,DELETE ON "'  || schemaname || '". "' ||tablename ||'" TO hiveuser ;'
 8 |   FROM pg_tables
 9 |   WHERE tableowner = CURRENT_USER and schemaname = 'public';
10 | \o
11 | \pset tuples_only off
12 | \i /tmp/grant-privs
13 | 


--------------------------------------------------------------------------------
/roles/hue/files/default/hadoop-httpfs:
--------------------------------------------------------------------------------
1 | ../../../hadoop/files/default/hadoop-httpfs


--------------------------------------------------------------------------------
/roles/hue/files/log.conf:
--------------------------------------------------------------------------------
 1 | ##########################################
 2 | # To change the log leve, edit the `level' field.
 3 | # Choices are: DEBUG, INFO, WARNING, ERROR, CRITICAL
 4 | #
 5 | # The logrotation limit is set at 5MB per file for a total of 5 copies.
 6 | # I.e. 25MB for each set of logs.
 7 | ##########################################
 8 | [handler_logfile]
 9 | level=INFO
10 | class=handlers.RotatingFileHandler
11 | formatter=default
12 | args=('%LOG_DIR%/%PROC_NAME%.log', 'a', 5000000, 5)
13 | 
14 | ##########################################
15 | # Please do not change the settings below
16 | ##########################################
17 | 
18 | [logger_root]
19 | handlers=logfile,errorlog
20 | 
21 | [logger_access]
22 | handlers=accesslog
23 | qualname=access
24 | 
25 | [logger_django_auth_ldap]
26 | handlers=accesslog
27 | qualname=django_auth_ldap
28 | 
29 | # The logrotation limit is set at 5MB per file for a total of 5 copies.
30 | # I.e. 25MB for each set of logs.
31 | [handler_accesslog]
32 | class=handlers.RotatingFileHandler
33 | level=DEBUG
34 | propagate=True
35 | formatter=access
36 | args=('%LOG_DIR%/access.log', 'a', 5000000, 5)
37 | 
38 | # All errors go into error.log
39 | [handler_errorlog]
40 | class=handlers.RotatingFileHandler
41 | level=ERROR
42 | formatter=default
43 | args=('%LOG_DIR%/error.log', 'a', 5000000, 5)
44 | 
45 | [formatter_default]
46 | class=desktop.log.formatter.Formatter
47 | format=[%(asctime)s] %(module)-12s %(levelname)-8s %(message)s
48 | datefmt=%d/%b/%Y %H:%M:%S %z
49 | 
50 | [formatter_access]
51 | class=desktop.log.formatter.Formatter
52 | format=[%(asctime)s] %(levelname)-8s %(message)s
53 | datefmt=%d/%b/%Y %H:%M:%S %z
54 | 
55 | [loggers]
56 | keys=root,access,django_auth_ldap
57 | 
58 | [handlers]
59 | keys=logfile,accesslog,errorlog
60 | 
61 | [formatters]
62 | keys=default,access
63 | 


--------------------------------------------------------------------------------
/roles/hue/files/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Define some default values that can be overridden by system properties
 2 | hadoop.log.dir=.
 3 | hadoop.log.file=hadoop.log
 4 | 
 5 | # Define the root logger to the system property "hadoop.root.logger".
 6 | log4j.rootLogger=INFO,console, EventCounter
 7 | 
 8 | # Logging Threshold
 9 | log4j.threshhold=ALL
10 | 
11 | #
12 | # Daily Rolling File Appender
13 | #
14 | 
15 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
16 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
17 | 
18 | # Rollver at midnight
19 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
20 | 
21 | # 30-day backup
22 | #log4j.appender.DRFA.MaxBackupIndex=30
23 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
24 | 
25 | # Pattern format: Date LogLevel LoggerName LogMessage
26 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
27 | # Debugging Pattern format
28 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
29 | 
30 | 
31 | #
32 | # console
33 | # Add "console" to rootlogger above if you want to use this
34 | #
35 | 
36 | log4j.appender.console=org.apache.log4j.ConsoleAppender
37 | log4j.appender.console.target=System.err
38 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
39 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
40 | 
41 | #
42 | # Rolling File Appender
43 | #
44 | 
45 | #log4j.appender.RFA=org.apache.log4j.RollingFileAppender
46 | #log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
47 | 
48 | # Logfile size and and 30-day backups
49 | #log4j.appender.RFA.MaxFileSize=1MB
50 | #log4j.appender.RFA.MaxBackupIndex=30
51 | 
52 | #log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
53 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
54 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
55 | 
56 | #
57 | # Event Counter Appender
58 | # Sends counts of logging messages at different severity levels to Hadoop
59 | # Metrics.
60 | #
61 | log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
62 | 


--------------------------------------------------------------------------------
/roles/hue/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |   - hue
 6 |   - hadoop-httpfs
 7 |   - postgresql
 8 |   - python-psycopg2
 9 | 
10 | - name: create configuration directory
11 |   tags: config
12 |   file: path=/etc/hue/conf.{{cluster_name}} state=directory
13 | 
14 | - name: setup alternatives link
15 |   tags: config
16 |   alternatives: name=hue-conf link=/etc/hue/conf path=/etc/hue/conf.{{cluster_name}}
17 | 
18 | - name: install template configurations
19 |   tags: config
20 |   template: src=hue.ini.j2 dest=/etc/hue/conf/hue.ini
21 | 
22 | - name: check certificate file exists
23 |   local_action: stat path={{ inventory_dir }}/workdir/hue.pem
24 |   register: hue_cert
25 |   run_once: true
26 | 
27 | - name: generate certificate
28 |   tags: init
29 |   local_action: shell {{ item }} chdir={{ inventory_dir }}/workdir/
30 |   with_items:
31 |     - openssl req -nodes -newkey rsa:2048 -keyout hue.key -out hue.csr -subj "/O=Hadoop/OU=hue/CN=hue/emailAddress=adm@it.xx"
32 |     - openssl x509 -req -days 3650 -in hue.csr -signkey hue.key -out hue.crt
33 |     - cat hue.crt hue.key > hue.pem
34 |   when: hue_cert.stat.isreg is not defined
35 |   run_once: true
36 | 
37 | - name: install certificate
38 |   tags: config
39 |   copy: src={{ inventory_dir }}/workdir/hue.pem dest=/etc/hue/conf/hue.pem
40 | 
41 | - name: install files configurations
42 |   tags: config
43 |   copy: src={{ item }} dest=/etc/hue/conf/{{ item }}
44 |   with_items:
45 |     - log4j.properties
46 |     - log.conf
47 | 
48 | - name: install default configurations
49 |   tags: config
50 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
51 |   with_items:
52 |     - hadoop-httpfs
53 | 
54 | - name: create warehouse dir and home for admin user
55 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
56 |   with_items:
57 |     - -mkdir -p /user/admin
58 |     - -chown admin /user/admin
59 |   run_once: true
60 | 
61 | - name: generate sql
62 |   template: src="hue.sql.j2" dest=/tmp/hue.sql
63 | 
64 | - name: install .pgpass
65 |   template: src=".pgpass.j2" dest=/root/.pgpass mode=0600
66 | 
67 | - name: create database
68 |   command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/hue.sql
69 |   when: destroy_data
70 |   run_once: true
71 | 
72 | - name: remove sql and .pgpass files
73 |   command: rm -f /tmp/hue.sql /root/.pgpass
74 | 
75 | - name: initialize database
76 |   command: "{{ item }} chdir=/tmp"
77 |   with_items:
78 |     - mkdir -p logs
79 |     - /usr/lib/hue/build/env/bin/hue syncdb --noinput
80 |     - /usr/lib/hue/build/env/bin/hue migrate
81 |     - rm -rf logs
82 |   when: destroy_data
83 |   run_once: true
84 | 
85 | - name: fix /etc/init.d/hue for systemd
86 |   tags: patch
87 |   lineinfile: 'dest=/etc/init.d/hue regexp="^# pidfile:" line="# pidfile: /var/run/hue/supervisor.pid"'
88 | 
89 | - name: reload systemd
90 |   command: systemctl daemon-reload
91 |   when: ansible_os_family == "RedHat" and {{ ansible_distribution_major_version }} >= 7
92 | 
93 | - name: start services
94 |   tags: service
95 |   service: name={{ item }} state=restarted enabled=yes
96 |   with_items:
97 |     - hue
98 |     - hadoop-httpfs
99 | 


--------------------------------------------------------------------------------
/roles/hue/templates/.pgpass.j2:
--------------------------------------------------------------------------------
1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }}
2 | 


--------------------------------------------------------------------------------
/roles/hue/templates/hue.sql.j2:
--------------------------------------------------------------------------------
1 | DROP DATABASE IF EXISTS "hue";
2 | create database hue;
3 | \c hue;
4 | create user hue with password '{{ hue_password }}';
5 | grant all privileges on database hue to hue;
6 | 


--------------------------------------------------------------------------------
/roles/impala/tasks/impala-server.yaml:
--------------------------------------------------------------------------------
 1 | - name: install impala server package
 2 |   tags: packages
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |     - impala-server
 6 |     - impala-shell
 7 | 
 8 | - name: fix impala-shell
 9 |   tags: patch
10 |   lineinfile: dest=/usr/bin/impala-shell regexp="^PYTHON_EGG_CACHE=" line="export PYTHON_EGG_CACHE=/tmp/impala-shell-python-egg-cache-`whoami`"
11 | 
12 | - name: create configuration directory
13 |   tags: config
14 |   file: path={{ etc_folder }}/impala state=directory
15 | 
16 | - name: setup alternatives link
17 |   tags: config
18 |   alternatives: name=impala-conf link=/etc/impala/conf path={{ etc_folder }}/impala
19 | 
20 | - name: install template configurations
21 |   tags: config
22 |   template: src={{ item }}.j2 dest={{ etc_folder }}/impala/{{ item }}
23 |   with_items:
24 |     - hive-site.xml
25 |     - core-site.xml
26 |     - hdfs-site.xml
27 | 
28 | - name: install default configurations
29 |   tags: config
30 |   template: src={{ item }}.j2 dest=/etc/default/{{ item }}
31 |   with_items:
32 |     - impala
33 | 
34 | - name: create log folders
35 |   tags: config
36 |   file: path={{ log_folder }}/impala state=directory owner=impala mode=0755
37 | 
38 | - name: start services
39 |   tags: service
40 |   service: name={{ item }} state=restarted enabled=yes
41 |   with_items:
42 |     - impala-server
43 | 
44 | - name: wait
45 |   command: sleep 30
46 | 
47 | - name: test impala
48 |   tags: test
49 |   command: impala-shell -u hdfs -d default -q 'create external table ansible_test_table ( id int ) location "/tmp/ansible_test_table"; drop table ansible_test_table;'
50 |   when: ansible_hostname == groups['datanodes'][0]
51 | 
52 | - name: create hdfs dir
53 |   tags: config
54 |   command: sudo -u hdfs hdfs dfs {{ item }}
55 |   with_items:
56 |     - -mkdir -p /user/impala
57 |     - '-chown impala:hadoop /user/impala'
58 |     - -chmod 1777 /user/impala
59 | 


--------------------------------------------------------------------------------
/roles/impala/tasks/impala.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum:  name={{ item }} state=latest
 4 |   with_items:
 5 |     - impala-state-store
 6 |     - impala-catalog
 7 | 
 8 | - name: create configuration directory
 9 |   tags: config
10 |   file: path={{ etc_folder }}/impala state=directory
11 | 
12 | - name: setup alternatives link
13 |   tags: config
14 |   alternatives: name=impala-conf link=/etc/impala/conf path={{ etc_folder }}/impala
15 | 
16 | - name: install template configurations
17 |   tags: config
18 |   template: src={{ item }}.j2 dest=/etc/impala/conf/{{ item }}
19 |   with_items:
20 |     - hive-site.xml
21 |     - core-site.xml
22 |     - hdfs-site.xml
23 | 
24 | - name: install template configuration for hive
25 |   tags: config
26 |   template: src=hive-site.xml.j2 dest=/etc/hive/conf/hive-site.xml
27 | 
28 | - name: install default configurations
29 |   tags: config
30 |   template: src={{ item }}.j2 dest=/etc/default/{{ item }}
31 |   with_items:
32 |     - impala
33 | 
34 | - name: create log folders
35 |   tags: config
36 |   file: path={{ log_folder }}/impala state=directory owner=impala mode=0755
37 | 
38 | - name: start services
39 |   tags: service
40 |   service: name={{ item }} state=restarted enabled=yes
41 |   with_items:
42 |     - impala-state-store
43 |     - impala-catalog
44 | 


--------------------------------------------------------------------------------
/roles/impala/tasks/main.yaml:
--------------------------------------------------------------------------------
1 | - include: impala.yaml
2 |   when: deploy == "impala"
3 | 
4 | - include: impala-server.yaml
5 |   when: deploy == "impala-server" and groups['impala-store-catalog']|count == 1
6 | 


--------------------------------------------------------------------------------
/roles/impala/templates/core-site.xml.j2:
--------------------------------------------------------------------------------
1 | ../../hadoop/templates/core-site.xml.j2


--------------------------------------------------------------------------------
/roles/impala/templates/hdfs-site.xml.j2:
--------------------------------------------------------------------------------
1 | ../../hadoop/templates/hdfs-site.xml.j2


--------------------------------------------------------------------------------
/roles/impala/templates/hive-site.xml.j2:
--------------------------------------------------------------------------------
1 | ../../hivemetastore/templates/hive-site.xml.j2


--------------------------------------------------------------------------------
/roles/impala/templates/impala.j2:
--------------------------------------------------------------------------------
 1 | IMPALA_CATALOG_SERVICE_HOST={{ groups['impala-store-catalog'][0] }}
 2 | IMPALA_STATE_STORE_HOST={{ groups['impala-store-catalog'][0] }}
 3 | IMPALA_STATE_STORE_PORT=24000
 4 | IMPALA_BACKEND_PORT=22000
 5 | IMPALA_LOG_DIR={{ log_folder }}/impala
 6 | 
 7 | IMPALA_CATALOG_ARGS=" -log_dir=${IMPALA_LOG_DIR} {{impala_catalog_args|default('')}}"
 8 | IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT} {{impala_state_store_args|default('')}}"
 9 | IMPALA_SERVER_ARGS=" \
10 |     -log_dir=${IMPALA_LOG_DIR} \
11 |     -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
12 |     -state_store_port=${IMPALA_STATE_STORE_PORT} \
13 |     -use_statestore {{impala_server_args|default('')}} \
14 |     -state_store_host=${IMPALA_STATE_STORE_HOST} \
15 |     -be_port=${IMPALA_BACKEND_PORT}"
16 | 
17 | ENABLE_CORE_DUMPS=false
18 | 
19 | # LIBHDFS_OPTS=-Djava.library.path=/usr/lib/impala/lib
20 | # MYSQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar
21 | # IMPALA_BIN=/usr/lib/impala/sbin
22 | # IMPALA_HOME=/usr/lib/impala
23 | # HIVE_HOME=/usr/lib/hive
24 | # HBASE_HOME=/usr/lib/hbase
25 | # IMPALA_CONF_DIR=/etc/impala/conf
26 | # HADOOP_CONF_DIR=/etc/impala/conf
27 | # HIVE_CONF_DIR=/etc/impala/conf
28 | # HBASE_CONF_DIR=/etc/impala/conf
29 | 
30 | 


--------------------------------------------------------------------------------
/roles/kafka/files/connect-console-sink.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name=local-console-sink
17 | connector.class=org.apache.kafka.connect.file.FileStreamSinkConnector
18 | tasks.max=1
19 | topics=connect-test


--------------------------------------------------------------------------------
/roles/kafka/files/connect-console-source.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name=local-console-source
17 | connector.class=org.apache.kafka.connect.file.FileStreamSourceConnector
18 | tasks.max=1
19 | topic=connect-test


--------------------------------------------------------------------------------
/roles/kafka/files/connect-distributed.properties:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | ##
17 | 
18 | # These are defaults. This file just demonstrates how to override some settings.
19 | bootstrap.servers=localhost:9092
20 | 
21 | group.id=connect-cluster
22 | 
23 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will
24 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka
25 | key.converter=org.apache.kafka.connect.json.JsonConverter
26 | value.converter=org.apache.kafka.connect.json.JsonConverter
27 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply
28 | # it to
29 | key.converter.schemas.enable=true
30 | value.converter.schemas.enable=true
31 | 
32 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will
33 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format.
34 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter
35 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter
36 | internal.key.converter.schemas.enable=false
37 | internal.value.converter.schemas.enable=false
38 | 
39 | offset.storage.topic=connect-offsets
40 | # Flush much faster than normal, which is useful for testing/debugging
41 | offset.flush.interval.ms=10000
42 | config.storage.topic=connect-configs


--------------------------------------------------------------------------------
/roles/kafka/files/connect-file-sink.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name=local-file-sink
17 | connector.class=FileStreamSink
18 | tasks.max=1
19 | file=test.sink.txt
20 | topics=connect-test


--------------------------------------------------------------------------------
/roles/kafka/files/connect-file-source.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | name=local-file-source
17 | connector.class=FileStreamSource
18 | tasks.max=1
19 | file=test.txt
20 | topic=connect-test


--------------------------------------------------------------------------------
/roles/kafka/files/connect-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | log4j.rootLogger=INFO, stdout
17 | 
18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n
21 | 
22 | log4j.logger.org.apache.zookeeper=ERROR
23 | log4j.logger.org.I0Itec.zkclient=ERROR


--------------------------------------------------------------------------------
/roles/kafka/files/connect-standalone.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # These are defaults. This file just demonstrates how to override some settings.
17 | bootstrap.servers=localhost:9092
18 | 
19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will
20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka
21 | key.converter=org.apache.kafka.connect.json.JsonConverter
22 | value.converter=org.apache.kafka.connect.json.JsonConverter
23 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply
24 | # it to
25 | key.converter.schemas.enable=true
26 | value.converter.schemas.enable=true
27 | 
28 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will
29 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format.
30 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter
31 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter
32 | internal.key.converter.schemas.enable=false
33 | internal.value.converter.schemas.enable=false
34 | 
35 | offset.storage.file.filename=/tmp/connect.offsets
36 | # Flush much faster than normal, which is useful for testing/debugging
37 | offset.flush.interval.ms=10000
38 | 


--------------------------------------------------------------------------------
/roles/kafka/files/default/kafka:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/kafka/files/default/kafka


--------------------------------------------------------------------------------
/roles/kafka/files/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | log4j.rootLogger=INFO, stdout 
17 | 
18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
21 | 
22 | log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender
23 | log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH
24 | log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log
25 | log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout
26 | log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
27 | 
28 | log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender
29 | log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH
30 | log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log
31 | log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout
32 | log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
33 | 
34 | log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender
35 | log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH
36 | log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log
37 | log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout
38 | log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
39 | 
40 | log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender
41 | log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH
42 | log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log
43 | log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout
44 | log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
45 | 
46 | log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender
47 | log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH
48 | log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log
49 | log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout
50 | log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
51 | 
52 | log4j.appender.authorizerAppender=org.apache.log4j.DailyRollingFileAppender
53 | log4j.appender.authorizerAppender.DatePattern='.'yyyy-MM-dd-HH
54 | log4j.appender.authorizerAppender.File=${kafka.logs.dir}/kafka-authorizer.log
55 | log4j.appender.authorizerAppender.layout=org.apache.log4j.PatternLayout
56 | log4j.appender.authorizerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
57 | 
58 | # Turn on all our debugging info
59 | #log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender
60 | #log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender
61 | #log4j.logger.kafka.perf=DEBUG, kafkaAppender
62 | #log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender
63 | #log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG
64 | log4j.logger.kafka=INFO, kafkaAppender
65 | 
66 | log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender
67 | log4j.additivity.kafka.network.RequestChannel$=false
68 | 
69 | #log4j.logger.kafka.network.Processor=TRACE, requestAppender
70 | #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender
71 | #log4j.additivity.kafka.server.KafkaApis=false
72 | log4j.logger.kafka.request.logger=WARN, requestAppender
73 | log4j.additivity.kafka.request.logger=false
74 | 
75 | log4j.logger.kafka.controller=TRACE, controllerAppender
76 | log4j.additivity.kafka.controller=false
77 | 
78 | log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender
79 | log4j.additivity.kafka.log.LogCleaner=false
80 | 
81 | log4j.logger.state.change.logger=TRACE, stateChangeAppender
82 | log4j.additivity.state.change.logger=false
83 | 
84 | #Change this to debug to get the actual audit log for authorizer.
85 | log4j.logger.kafka.authorizer.logger=WARN, authorizerAppender
86 | log4j.additivity.kafka.authorizer.logger=false
87 | 
88 | 


--------------------------------------------------------------------------------
/roles/kafka/files/tools-log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | log4j.rootLogger=WARN, stderr
17 | 
18 | log4j.appender.stderr=org.apache.log4j.ConsoleAppender
19 | log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.stderr.layout.ConversionPattern=[%d] %p %m (%c)%n
21 | log4j.appender.stderr.Target=System.err
22 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - kafka-server
 6 | 
 7 | - name: create configuration directory
 8 |   file: path=/etc/kafka/conf.{{ cluster_name }} state=directory
 9 | 
10 | - name: setup alternatives link
11 |   alternatives: name=kafka-conf link=/etc/kafka/conf path=/etc/kafka/conf.{{ cluster_name }}
12 | 
13 | - name: install default configurations
14 |   tags: config
15 |   copy: src=default/{{ item }} dest=/etc/default/{{ item }}
16 |   with_items:
17 |     - kafka
18 | 
19 | - name: install template configurations
20 |   tags: config
21 |   template: src={{ item }}.j2 dest=/etc/kafka/conf/{{ item }}
22 |   with_items:
23 |     - server.properties
24 | 
25 | - name: install files configurations
26 |   tags: config
27 |   copy: src={{ item }} dest=/etc/kafka/conf/{{ item }}
28 |   with_items:
29 |     - connect-console-sink.properties
30 |     - connect-console-source.properties
31 |     - connect-distributed.properties
32 |     - connect-file-sink.properties
33 |     - connect-file-source.properties
34 |     - connect-log4j.properties
35 |     - connect-standalone.properties
36 |     - log4j.properties
37 |     - tools-log4j.properties
38 | 
39 | - name: destroy data
40 |   tags: init
41 |   command: rm -rf {{ item }}
42 |   with_items: "{{ log_dirs.split(',') }}"
43 |   when: destroy_data
44 | 
45 | - name: create kafka directories
46 |   tags: init
47 |   file: dest={{ item }} owner=kafka group=kafka state=directory
48 |   with_items: "{{ log_dirs.split(',') }}"
49 | 
50 | - name: start services
51 |   tags: service
52 |   service: name={{ item }} state=restarted enabled=yes
53 |   with_items:
54 |     - kafka-server
55 | 
56 | - name: test, create topic
57 |   tags: test
58 |   command: kafka-topics --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --replication-factor {{ groups['kafka']|count }} --partitions 1 --create --topic test
59 |   run_once: true
60 | 
61 | - name: test, create test message
62 |   tags: test
63 |   shell: echo 'test message' | kafka-console-producer --broker-list {{ ansible_hostname }}:9092 --topic test
64 |   run_once: true
65 | 
66 | - name: test, read the message
67 |   tags: test
68 |   command: kafka-console-consumer --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --max-messages 1 --topic test --from-beginning
69 |   run_once: true
70 |   register: msg
71 | 
72 | - name: test, result
73 |   tags: test
74 |   debug: var=msg.stdout  
75 |   failed_when: msg.stdout.find('test message') < 0
76 |   run_once: true
77 | 
78 | - name: test, delete topic
79 |   tags: test
80 |   command: kafka-topics --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --delete --topic test
81 |   run_once: true
82 | 


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/email.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>
26 | 


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/fs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/hive.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |    <property>
22 |       <name>hadoop.bin.path</name>
23 |       <value>/usr/bin/hadoop</value>
24 |    </property>
25 | 
26 |    <property>
27 |       <name>hadoop.config.dir</name>
28 |       <value>/etc/hadoop/conf</value>
29 |     </property>
30 | 
31 |   <property>
32 |     <name>oozie.launcher.mapred.job.queue.name</name>
33 |     <value>launcher</value>
34 |   </property>
35 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/shell.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/sqoop.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/ssh.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/action-conf/sub-workflow.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 |   <property>
22 |     <name>oozie.launcher.mapred.job.queue.name</name>
23 |     <value>launcher</value>
24 |   </property>
25 | </configuration>


--------------------------------------------------------------------------------
/roles/oozie/files/adminusers.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # Admin Users, one user by line
20 | 


--------------------------------------------------------------------------------
/roles/oozie/files/hadoop-conf/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>mapreduce.jobtracker.kerberos.principal</name>
24 |         <value>mapred/_HOST@LOCALREALM</value>
25 |     </property>
26 | 
27 |     <property>
28 |       <name>yarn.resourcemanager.principal</name>
29 |       <value>yarn/_HOST@LOCALREALM</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>dfs.namenode.kerberos.principal</name>
34 |         <value>hdfs/_HOST@LOCALREALM</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>mapreduce.framework.name</name>
39 |         <value>yarn</value>
40 |     </property>
41 | 
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/roles/oozie/files/hadoop-config.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 | -->
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>mapreduce.jobtracker.kerberos.principal</name>
24 |         <value>mapred/_HOST@LOCALREALM</value>
25 |     </property>
26 | 
27 |     <property>
28 |       <name>yarn.resourcemanager.principal</name>
29 |       <value>yarn/_HOST@LOCALREALM</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>dfs.namenode.kerberos.principal</name>
34 |         <value>hdfs/_HOST@LOCALREALM</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>mapreduce.framework.name</name>
39 |         <value>yarn</value>
40 |     </property>
41 | 
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/roles/oozie/files/oozie-log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | #    http://www.apache.org/licenses/LICENSE-2.0
20 | #
21 | # Unless required by applicable law or agreed to in writing, software
22 | # distributed under the License is distributed on an "AS IS" BASIS,
23 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | # See the License for the specific language governing permissions and
25 | # limitations under the License. See accompanying LICENSE file.
26 | #
27 | 
28 | # If the Java System property 'oozie.log.dir' is not defined at Oozie start up time
29 | # XLogService sets its value to '${oozie.home}/logs'
30 | 
31 | # The appender that Oozie uses must be named 'oozie' (i.e. log4j.appender.oozie)
32 | 
33 | # Using the RollingFileAppender with the OozieRollingPolicy will roll the log file every hour and retain up to MaxHistory number of
34 | # log files. If FileNamePattern ends with ".gz" it will create gzip files.
35 | log4j.appender.oozie=org.apache.log4j.rolling.RollingFileAppender
36 | log4j.appender.oozie.RollingPolicy=org.apache.oozie.util.OozieRollingPolicy
37 | log4j.appender.oozie.File=${oozie.log.dir}/oozie.log
38 | log4j.appender.oozie.Append=true
39 | log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
40 | log4j.appender.oozie.layout.ConversionPattern=%d{ISO8601} %p %c{1}:%L - SERVER[${oozie.instance.id}] %m%n
41 | # The FileNamePattern must end with "-%d{yyyy-MM-dd-HH}.gz" or "-%d{yyyy-MM-dd-HH}" and also start with the
42 | # value of log4j.appender.oozie.File
43 | log4j.appender.oozie.RollingPolicy.FileNamePattern=${log4j.appender.oozie.File}-%d{yyyy-MM-dd-HH}
44 | # The MaxHistory controls how many log files will be retained (720 hours / 24 hours per day = 30 days); -1 to disable
45 | log4j.appender.oozie.RollingPolicy.MaxHistory=720
46 | 
47 | # Uncomment the below two lines to use the DailyRollingFileAppender instead
48 | # The DatePattern must end with either "dd" or "HH"
49 | #log4j.appender.oozie=org.apache.log4j.DailyRollingFileAppender
50 | #log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH
51 | 
52 | log4j.appender.oozieops=org.apache.log4j.DailyRollingFileAppender
53 | log4j.appender.oozieops.DatePattern='.'yyyy-MM-dd
54 | log4j.appender.oozieops.File=${oozie.log.dir}/oozie-ops.log
55 | log4j.appender.oozieops.Append=true
56 | log4j.appender.oozieops.layout=org.apache.log4j.PatternLayout
57 | log4j.appender.oozieops.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
58 | 
59 | log4j.appender.oozieinstrumentation=org.apache.log4j.DailyRollingFileAppender
60 | log4j.appender.oozieinstrumentation.DatePattern='.'yyyy-MM-dd
61 | log4j.appender.oozieinstrumentation.File=${oozie.log.dir}/oozie-instrumentation.log
62 | log4j.appender.oozieinstrumentation.Append=true
63 | log4j.appender.oozieinstrumentation.layout=org.apache.log4j.PatternLayout
64 | log4j.appender.oozieinstrumentation.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
65 | 
66 | log4j.appender.oozieaudit=org.apache.log4j.DailyRollingFileAppender
67 | log4j.appender.oozieaudit.DatePattern='.'yyyy-MM-dd
68 | log4j.appender.oozieaudit.File=${oozie.log.dir}/oozie-audit.log
69 | log4j.appender.oozieaudit.Append=true
70 | log4j.appender.oozieaudit.layout=org.apache.log4j.PatternLayout
71 | log4j.appender.oozieaudit.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
72 | 
73 | log4j.appender.openjpa=org.apache.log4j.DailyRollingFileAppender
74 | log4j.appender.openjpa.DatePattern='.'yyyy-MM-dd
75 | log4j.appender.openjpa.File=${oozie.log.dir}/oozie-jpa.log
76 | log4j.appender.openjpa.Append=true
77 | log4j.appender.openjpa.layout=org.apache.log4j.PatternLayout
78 | log4j.appender.openjpa.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n
79 | 
80 | log4j.logger.openjpa=INFO, openjpa
81 | log4j.logger.oozieops=INFO, oozieops
82 | log4j.logger.oozieinstrumentation=ALL, oozieinstrumentation
83 | log4j.logger.oozieaudit=ALL, oozieaudit
84 | log4j.logger.org.apache.oozie=INFO, oozie
85 | log4j.logger.org.apache.hadoop=WARN, oozie
86 | log4j.logger.org.mortbay=WARN, oozie
87 | log4j.logger.org.hsqldb=WARN, oozie
88 | log4j.logger.org.apache.hadoop.security.authentication.server=WARN, oozie
89 | 


--------------------------------------------------------------------------------
/roles/oozie/tasks/main.yaml:
--------------------------------------------------------------------------------
  1 | - name: install oozie packages
  2 |   tags: packages
  3 |   yum:  name={{ item }} state=latest
  4 |   with_items:
  5 |     - oozie
  6 |     - unzip
  7 |     - postgresql
  8 | 
  9 | - name: create configuration directory
 10 |   tags: config
 11 |   file: path={{ etc_folder }}/oozie state=directory
 12 | 
 13 | - name: create log directory
 14 |   tags: config
 15 |   file: path={{ log_folder }}/oozie state=directory owner=oozie group=oozie mode=755
 16 | 
 17 | - name: setup alternatives link
 18 |   tags: config
 19 |   alternatives: name=oozie-conf link=/etc/oozie/conf path={{ etc_folder }}/oozie
 20 | 
 21 | - name: setup alternatives link
 22 |   tags: config
 23 |   alternatives: name=oozie-tomcat-conf link=/etc/oozie/oozie-tomcat-conf path=/etc/oozie/tomcat-conf.http
 24 | 
 25 | - name: install template configurations
 26 |   tags: config
 27 |   template: src="{{ item }}.j2" dest={{ etc_folder }}/oozie/{{ item }}
 28 |   with_items:
 29 |     - oozie-site.xml
 30 |     - oozie-env.sh
 31 | 
 32 | - name: install files configurations
 33 |   tags: config
 34 |   copy: src={{ item }} dest={{ etc_folder }}/oozie/{{ item }}
 35 |   with_items:
 36 |     - hadoop-config.xml
 37 |     - oozie-default.xml
 38 |     - oozie-log4j.properties
 39 | 
 40 | - name: create config directories
 41 |   tags: config
 42 |   file: path=/etc/oozie/conf/{{ item }} state=directory
 43 |   with_items:
 44 |     - action-conf
 45 |     - hadoop-conf
 46 | 
 47 | - name: install files configurations
 48 |   tags: config
 49 |   copy: src={{ item }} dest={{ etc_folder }}/oozie/action-conf/{{ item|basename }}
 50 |   with_fileglob:
 51 |     - action-conf/*
 52 | 
 53 | - name: install files configurations
 54 |   tags: config
 55 |   copy: src={{ item }} dest={{ etc_folder }}/oozie/hadoop-conf/{{ item|basename }}
 56 |   with_fileglob:
 57 |     - hadoop-conf/*
 58 | 
 59 | - name: create hdfs directories
 60 |   tags: init
 61 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
 62 |   with_items:
 63 |     - -mkdir -p /user/oozie
 64 |     - -chown oozie:oozie /user/oozie
 65 |   run_once: true
 66 | 
 67 | - name: install shared libraries to hdfs
 68 |   tags: 
 69 |     - config
 70 |     - oozie-setup 
 71 |   command: oozie-setup sharelib create -fs {% if groups['namenodes']|count > 1 %} hdfs://{{ cluster_name }} {% else %} hdfs://{{ groups['namenodes'][0] }}:8020 {% endif %} -locallib /usr/lib/oozie/oozie-sharelib-yarn
 72 |   run_once: true
 73 | 
 74 | - name: generate sql
 75 |   tags: init
 76 |   template: src="oozie.sql.j2" dest=/tmp/oozie.sql
 77 | 
 78 | - name: install .pgpass
 79 |   template: src=".pgpass.j2" dest=/root/.pgpass mode=0600
 80 | 
 81 | - name: create database
 82 |   tags: init
 83 |   command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/oozie.sql
 84 |   when: destroy_data
 85 |   run_once: true
 86 | 
 87 | - name: remove sql and .pgpass files
 88 |   command: rm -f /tmp/oozie.sql /root/.pgpass
 89 | 
 90 | - name: initialize database
 91 |   tags: init
 92 |   command: service oozie init
 93 |   when: destroy_data
 94 |   run_once: true
 95 | 
 96 | - name: get ext-2.2.zip if not exists
 97 |   tags: download
 98 |   local_action: get_url url=http://archive.cloudera.com/gplextras/misc/ext-2.2.zip dest={{ inventory_dir }}/workdir/ext-2.2.zip
 99 |   run_once: true
100 | 
101 | - name: extract ext-2.2
102 |   tags: config
103 |   unarchive: src={{ inventory_dir }}/workdir/ext-2.2.zip dest=/var/lib/oozie/
104 | 
105 | - name: start services
106 |   tags: service
107 |   service: name=oozie state=restarted enabled=yes
108 | 
109 | - include: oozie-test.yaml tags=test
110 | 


--------------------------------------------------------------------------------
/roles/oozie/tasks/oozie-test.yaml:
--------------------------------------------------------------------------------
 1 | - name: get examples name
 2 |   shell: rpm -ql oozie-client | grep oozie-examples
 3 |   register: o
 4 | 
 5 | - name: extract examples
 6 |   unarchive: src={{ o.stdout_lines[0] }} dest=/tmp copy=no
 7 | 
 8 | - name: put examples to hdfs
 9 |   command: sudo -Hi -u hdfs hdfs dfs -put -f /tmp/examples examples
10 | 
11 | - name: amend example configuration
12 |   lineinfile: dest=/tmp/examples/apps/map-reduce/job.properties regexp='^nameNode=' line="nameNode={% if groups['namenodes']|count > 1 %}hdfs://{{ cluster_name }}{% else %}hdfs://{{ groups['namenodes'][0] }}:8020{% endif %}"
13 | 
14 | - name: amend example configuration
15 |   lineinfile: dest=/tmp/examples/apps/map-reduce/job.properties regexp='^jobTracker=' line="jobTracker={% if groups['yarnresourcemanager']|count > 1 %}maprfs:///{% else %}hdfs://{{ groups['yarnresourcemanager'][0] }}:8032{% endif %}"
16 | 
17 | - name: run map-reduce job
18 |   command: sudo -Hi -u hdfs oozie job -oozie http://localhost:11000/oozie -config /tmp/examples/apps/map-reduce/job.properties -run
19 |   register: j
20 | 
21 | - name: check status
22 |   shell: sudo -Hi -u hdfs oozie job -oozie http://localhost:11000/oozie -info {{ j.stdout_lines[0].split()[1] }} | awk '/^Status/{ print $3 }'
23 |   register: s
24 |   until: s.stdout == 'SUCCEEDED'
25 |   delay: 5
26 |   retries: 12
27 | 
28 | - debug: msg="Job status is {{ s.stdout }}"
29 | 
30 | - name: clean up examples
31 |   file: path=/tmp/examples state=absent
32 | 
33 | - name: remove examples from hdfs
34 |   command: sudo -Hi -u hdfs hdfs dfs -rm -r examples
35 | 


--------------------------------------------------------------------------------
/roles/oozie/templates/.pgpass.j2:
--------------------------------------------------------------------------------
1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }}
2 | {{ groups['postgresql'][0] }}:5432:oozie:oozie:{{ oozie_password }}
3 | 


--------------------------------------------------------------------------------
/roles/oozie/templates/oozie-env.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Licensed to the Apache Software Foundation (ASF) under one
 4 | # or more contributor license agreements.  See the NOTICE file
 5 | # distributed with this work for additional information
 6 | # regarding copyright ownership.  The ASF licenses this file
 7 | # to you under the Apache License, Version 2.0 (the
 8 | # "License"); you may not use this file except in compliance
 9 | # with the License.  You may obtain a copy of the License at
10 | #
11 | #      http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | export JAVA_LIBRARY_PATH="$JAVA_LIBRARY_PATH:/usr/lib/hadoop/lib/native"
21 | 
22 | # Set Oozie specific environment variables here.
23 | 
24 | export OOZIE_DATA=/var/lib/oozie
25 | export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat
26 | export CATALINA_TMPDIR=/var/lib/oozie
27 | export CATALINA_PID=/var/run/oozie/oozie.pid
28 | export CATALINA_BASE=/var/lib/oozie/tomcat-deployment
29 | 
30 | # Settings for the Embedded Tomcat that runs Oozie
31 | # Java System properties for Oozie should be specified in this variable
32 | #
33 | export OOZIE_HTTPS_PORT=11443
34 | export OOZIE_HTTPS_KEYSTORE_PASS=password
35 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.port=${OOZIE_HTTPS_PORT}"
36 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.keystore.pass=${OOZIE_HTTPS_KEYSTORE_PASS}"
37 | export CATALINA_OPTS="$CATALINA_OPTS -Xmx{{ oozie_heapsize }}m"
38 | 
39 | # Oozie configuration file to load from Oozie configuration directory
40 | #
41 | # export OOZIE_CONFIG_FILE=oozie-site.xml
42 | export OOZIE_CONFIG=/etc/oozie/conf
43 | 
44 | # Oozie logs directory
45 | #
46 | # export OOZIE_LOG=${OOZIE_HOME}/logs
47 | export OOZIE_LOG={{ log_folder }}/oozie
48 | 
49 | # Oozie Log4J configuration file to load from Oozie configuration directory
50 | #
51 | # export OOZIE_LOG4J_FILE=oozie-log4j.properties
52 | 
53 | # Reload interval of the Log4J configuration file, in seconds
54 | #
55 | # export OOZIE_LOG4J_RELOAD=10
56 | 
57 | # The port Oozie server runs
58 | #
59 | # export OOZIE_HTTP_PORT=11000
60 | 
61 | # The port Oozie server runs if using SSL (HTTPS)
62 | #
63 | # export OOZIE_HTTPS_PORT=11443
64 | 
65 | # The host name Oozie server runs on
66 | #
67 | # export OOZIE_HTTP_HOSTNAME=`hostname -f`
68 | 
69 | # The base URL for callback URLs to Oozie
70 | #
71 | # export OOZIE_BASE_URL="http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie"
72 | 
73 | # The location of the keystore for the Oozie server if using SSL (HTTPS)
74 | #
75 | # export OOZIE_HTTPS_KEYSTORE_FILE=${HOME}/.keystore
76 | 
77 | # The password of the keystore for the Oozie server if using SSL (HTTPS)
78 | #
79 | # export OOZIE_HTTPS_KEYSTORE_PASS=password
80 | 
81 | # The Oozie Instance ID
82 | #
83 | # export OOZIE_INSTANCE_ID="${OOZIE_HTTP_HOSTNAME}"
84 | 


--------------------------------------------------------------------------------
/roles/oozie/templates/oozie-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <configuration>
 3 | 
 4 |   <property>
 5 |     <name>oozie.service.JPAService.jdbc.driver</name>
 6 |     <value>org.postgresql.Driver</value>
 7 |   </property>
 8 | 
 9 |   <property>
10 |     <name>oozie.service.JPAService.jdbc.url</name>
11 |     <value>jdbc:postgresql://{{ groups['postgresql'][0] }}:5432/oozie</value>
12 |   </property>
13 | 
14 |   <property>
15 |     <name>oozie.service.JPAService.jdbc.username</name>
16 |     <value>oozie</value>
17 |   </property>
18 | 
19 |   <property>
20 |     <name>oozie.service.JPAService.jdbc.password</name>
21 |     <value>{{ oozie_password }}</value>
22 |   </property>
23 | 
24 | <!-- IMPORTANT to access hadoop -->
25 |   <property>
26 |     <name>oozie.service.HadoopAccessorService.hadoop.configurations</name>
27 |     <value>*=/etc/hadoop/conf</value>
28 |     <description> Comma separated AUTHORITY=HADOOP_CONF_DIR, where AUTHORITY is the HOST:PORT of the Hadoop service (JobTracker, HDFS). The wildcard '*' configuration is used when there is no exact match for an authority. The HADOOP_CONF_DIR contains the relevant Hadoop *-site.xml files. If the path is relative is looked within the Oozie configuration directory; though the path can be absolute (i.e. to point to Hadoop client conf/ directories in the local filesystem.
29 |     </description>
30 |   </property>
31 | 
32 | <!-- log complaints but the config is required -->
33 |   <property>
34 |     <name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
35 |     <value>*</value>
36 |   </property>
37 | 
38 |   <property>
39 |     <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
40 |     <value>*</value>
41 |   </property>
42 | 
43 | {% if oozie_ext_properties %}
44 |   {% for property in oozie_ext_properties %}
45 |   <property>
46 |     <name>{{property.name}}</name>
47 |     <value>{{property.value}}</value>
48 |   </property>
49 |   {% endfor %}
50 | {% endif %}
51 | 
52 |   <property>
53 |     <name>oozie.service.ActionService.executor.ext.classes</name>
54 |     <value>
55 |       {{oozie_ext_classes|join(',')}}
56 |     </value>
57 |   </property>
58 | 
59 |   <property>
60 |     <name>oozie.service.SchemaService.wf.ext.schemas</name>
61 |     <value>
62 |       {{oozie_ext_schemas|join(',')}}
63 |     </value>
64 |   </property>
65 | 
66 |   <property>
67 |     <name>oozie.action.fs.glob.max</name>
68 |     <value>500000</value>
69 |   </property>
70 | 
71 | </configuration>
72 | 


--------------------------------------------------------------------------------
/roles/oozie/templates/oozie.sql.j2:
--------------------------------------------------------------------------------
 1 | DROP DATABASE IF EXISTS "oozie";
 2 | DROP ROLE IF EXISTS oozie;
 3 | CREATE ROLE oozie LOGIN ENCRYPTED PASSWORD '{{ oozie_password }}' NOSUPERUSER INHERIT CREATEDB NOCREATEROLE;
 4 | CREATE DATABASE "oozie" WITH OWNER = oozie
 5 |  ENCODING = 'UTF8'
 6 |  TABLESPACE = pg_default
 7 |  LC_COLLATE = 'en_US.UTF-8'
 8 |  LC_CTYPE = 'en_US.UTF-8'
 9 |  CONNECTION LIMIT = -1;
10 | \q
11 | 


--------------------------------------------------------------------------------
/roles/postgresql/files/pg_hba.conf:
--------------------------------------------------------------------------------
 1 | # PostgreSQL Client Authentication Configuration File
 2 | # ===================================================
 3 | #
 4 | # Refer to the "Client Authentication" section in the
 5 | # PostgreSQL documentation for a complete description
 6 | # of this file.  A short synopsis follows.
 7 | #
 8 | # This file controls: which hosts are allowed to connect, how clients
 9 | # are authenticated, which PostgreSQL user names they can use, which
10 | # databases they can access.  Records take one of these forms:
11 | #
12 | # local      DATABASE  USER  METHOD  [OPTIONS]
13 | # host       DATABASE  USER  CIDR-ADDRESS  METHOD  [OPTIONS]
14 | # hostssl    DATABASE  USER  CIDR-ADDRESS  METHOD  [OPTIONS]
15 | # hostnossl  DATABASE  USER  CIDR-ADDRESS  METHOD  [OPTIONS]
16 | #
17 | # (The uppercase items must be replaced by actual values.)
18 | #
19 | # The first field is the connection type: "local" is a Unix-domain socket,
20 | # "host" is either a plain or SSL-encrypted TCP/IP socket, "hostssl" is an
21 | # SSL-encrypted TCP/IP socket, and "hostnossl" is a plain TCP/IP socket.
22 | #
23 | # DATABASE can be "all", "sameuser", "samerole", a database name, or
24 | # a comma-separated list thereof.
25 | #
26 | # USER can be "all", a user name, a group name prefixed with "+", or
27 | # a comma-separated list thereof.  In both the DATABASE and USER fields
28 | # you can also write a file name prefixed with "@" to include names from
29 | # a separate file.
30 | #
31 | # CIDR-ADDRESS specifies the set of hosts the record matches.
32 | # It is made up of an IP address and a CIDR mask that is an integer
33 | # (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that specifies
34 | # the number of significant bits in the mask.  Alternatively, you can write
35 | # an IP address and netmask in separate columns to specify the set of hosts.
36 | #
37 | # METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", "krb5",
38 | # "ident", "pam", "ldap" or "cert".  Note that "password" sends passwords
39 | # in clear text; "md5" is preferred since it sends encrypted passwords.
40 | #
41 | # OPTIONS are a set of options for the authentication in the format
42 | # NAME=VALUE. The available options depend on the different authentication
43 | # methods - refer to the "Client Authentication" section in the documentation
44 | # for a list of which options are available for which authentication methods.
45 | #
46 | # Database and user names containing spaces, commas, quotes and other special
47 | # characters must be quoted. Quoting one of the keywords "all", "sameuser" or
48 | # "samerole" makes the name lose its special character, and just match a
49 | # database or username with that name.
50 | #
51 | # This file is read on server startup and when the postmaster receives
52 | # a SIGHUP signal.  If you edit the file on a running system, you have
53 | # to SIGHUP the postmaster for the changes to take effect.  You can use
54 | # "pg_ctl reload" to do that.
55 | 
56 | # Put your actual configuration here
57 | # ----------------------------------
58 | #
59 | # If you want to allow non-local connections, you need to add more
60 | # "host" records. In that case you will also need to make PostgreSQL listen
61 | # on a non-local interface via the listen_addresses configuration parameter,
62 | # or via the -i or -h command line switches.
63 | #
64 | 
65 | 
66 | 
67 | # TYPE  DATABASE    USER        CIDR-ADDRESS          METHOD
68 | 
69 | # "local" is for Unix domain socket connections only
70 | local   all         all                               ident
71 | # IPv4 local connections:
72 | host    all         all         127.0.0.1/32          ident
73 | # IPv6 local connections:
74 | host    all         all         ::1/128               ident
75 | # allow all connections
76 | host    all         all         0.0.0.0 0.0.0.0       md5
77 | 


--------------------------------------------------------------------------------
/roles/postgresql/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install postgresql server and jdbc
 2 |   tags: package
 3 |   yum:  name=postgresql{{ postgres_version|default('')|replace('.', '') }}-server state=latest
 4 | 
 5 | - name: destroy data and init data and init database
 6 |   tags: init
 7 |   shell: "{{ item }}"
 8 |   with_items:
 9 |   - service postgresql{% if postgres_version %}-{{ postgres_version }} {% endif %} stop
10 |   - rm -rf /var/lib/pgsql/{{ postgres_version|default('') }}/data/*
11 |   - service postgresql{% if postgres_version %}-{{ postgres_version }}{% endif %} initdb
12 |   when: destroy_data
13 | 
14 | - name: install configuration files
15 |   tags: config
16 |   copy: src={{ item }} dest=/var/lib/pgsql/{{ postgres_version|default('') }}/data/{{ item }} owner=postgres group=postgres
17 |   with_items:
18 |   - postgresql.conf
19 |   - pg_hba.conf
20 | 
21 | - name: start services
22 |   tags: service
23 |   service: name=postgresql{% if postgres_version %}-{{ postgres_version }}{% endif %} state=restarted enabled=yes
24 | 
25 | - name: set postgres password
26 |   tags: init
27 |   command: sudo -Hu postgres psql -c "alter user postgres with password '{{ postgres_password }}';"
28 |   when: destroy_data
29 | 
30 | - name: install userdb sql
31 |   tags: init
32 |   template: src=userdb.sql.j2 dest=/tmp/userdb.sql
33 |   when: user_database is defined
34 | 
35 | - name: create userdb
36 |   tags: init
37 |   command: sudo -Hu postgres psql -f /tmp/userdb.sql
38 |   when: user_database is defined and destroy_data
39 | 
40 | - name: remove userdb sql
41 |   tags: init
42 |   command: rm -f /tmp/userdb.sql
43 |   when: user_database is defined
44 | 
45 | - name: execute user sql script
46 |   shell: echo "{{ postgres_script }}" | sudo -Hu postgres psql
47 |   when: postgres_script is defined
48 | 


--------------------------------------------------------------------------------
/roles/postgresql/templates/userdb.sql.j2:
--------------------------------------------------------------------------------
1 | drop database if exists "{{ user_database }}";
2 | create database {{ user_database }};
3 | \c {{ user_database }};
4 | create user userdb_user with password '{{ userdb_password }}';
5 | grant all privileges on database {{ user_database }} to userdb_user;
6 | 


--------------------------------------------------------------------------------
/roles/snmp/files/snmpd.conf:
--------------------------------------------------------------------------------
 1 | com2sec         notConfigUser  default       public
 2 | group           notConfigGroup v1            notConfigUser
 3 | group           notConfigGroup v2c           notConfigUser
 4 | access          notConfigGroup ""            any  noauth exact systemview none none
 5 | 
 6 | view            systemview     included      .1
 7 | 
 8 | #for hight load system comment the line above and uncomment view configuration below
 9 | #this will exclude large tcp connection tables from default system view
10 | 
11 | #view    systemview    included   .1.3.6.1.2.1.1
12 | #view    systemview    included   .1.3.6.1.2.1.2
13 | #view    systemview    included   .1.3.6.1.2.1.4
14 | #view    systemview    included   .1.3.6.1.2.1.25
15 | #view    systemview    included   .1.3.6.1.2.1.31
16 | #view    systemview    included   .1.3.6.1.4.1.777
17 | #view    systemview    included   .1.3.6.1.4.1.2021
18 | #view    systemview    included   .1.3.6.1.4.1.28675
19 | #view    systemview    included   .1.3.6.1.4.1.57052
20 | 
21 | master agentx
22 | agentxperms 770 770 daemon users
23 | 
24 | dontLogTCPWrappersConnects 1
25 | interface lo 24 1000000000
26 | 


--------------------------------------------------------------------------------
/roles/snmp/files/subagent-shell-hadoop-conf.xml:
--------------------------------------------------------------------------------
1 | <?xml version='1.0'?>
2 | <config>
3 | <function id="getHadoopStats"></function>
4 | </config>
5 | 


--------------------------------------------------------------------------------
/roles/snmp/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - net-snmp
 6 |     - net-snmp-subagent-shell
 7 |     - hadoop-monitoring-utility
 8 | 
 9 | - name: install snmpd configurations
10 |   tags: config
11 |   copy: src={{ item }} dest=/etc/snmp/{{ item }}
12 |   with_items:
13 |     - snmpd.conf
14 | 
15 | - name: install subagent-shell configurations
16 |   tags: config
17 |   copy: src={{ item }} dest=/etc/snmp/subagent-shell/{{ item }}
18 |   with_items:
19 |     - subagent-shell-hadoop-conf.xml
20 | 
21 | - name: start snmp services
22 |   tags: service
23 |   service: name={{ item }} enabled=yes state=restarted
24 |   with_items:
25 |     - snmpd
26 |     - subagent-shell
27 | 


--------------------------------------------------------------------------------
/roles/solr/files/0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/solr/files/0


--------------------------------------------------------------------------------
/roles/solr/files/1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/solr/files/1


--------------------------------------------------------------------------------
/roles/solr/files/solr.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!--
 3 |  Licensed to the Apache Software Foundation (ASF) under one or more
 4 |  contributor license agreements.  See the NOTICE file distributed with
 5 |  this work for additional information regarding copyright ownership.
 6 |  The ASF licenses this file to You under the Apache License, Version 2.0
 7 |  (the "License"); you may not use this file except in compliance with
 8 |  the License.  You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |  Unless required by applicable law or agreed to in writing, software
13 |  distributed under the License is distributed on an "AS IS" BASIS,
14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  See the License for the specific language governing permissions and
16 |  limitations under the License.
17 | -->
18 | 
19 | <!--
20 |    This is an example of a simple "solr.xml" file for configuring one or 
21 |    more Solr Cores, as well as allowing Cores to be added, removed, and 
22 |    reloaded via HTTP requests.
23 | 
24 |    More information about options available in this configuration file, 
25 |    and Solr Core administration can be found online:
26 |    http://wiki.apache.org/solr/CoreAdmin
27 | -->
28 | 
29 | <solr>
30 | 
31 |   <solrcloud>
32 |     <str name="host">${host:}</str>
33 |     <int name="hostPort">${jetty.port:8983}</int>
34 |     <str name="hostContext">${hostContext:solr}</str>
35 |     <int name="zkClientTimeout">${zkClientTimeout:30000}</int>
36 |     <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
37 |     
38 |     <!-- ZooKeeper Security -->
39 |     <str name="zkACLProvider">${zkACLProvider:}</str>
40 |     <str name="zkCredentialsProvider">${zkCredentialsProvider:}</str>
41 |     
42 |   </solrcloud>
43 | 
44 |   <shardHandlerFactory name="shardHandlerFactory"
45 |     class="HttpShardHandlerFactory">
46 |     <int name="socketTimeout">${socketTimeout:0}</int>
47 |     <int name="connTimeout">${connTimeout:0}</int>
48 |   </shardHandlerFactory>
49 | 
50 | </solr>
51 | 


--------------------------------------------------------------------------------
/roles/solr/files/zoo.cfg:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | 
10 | # the directory where the snapshot is stored.
11 | # dataDir=/opt/zookeeper/data
12 | # NOTE: Solr defaults the dataDir to <solrHome>/zoo_data
13 | 
14 | # the port at which the clients will connect
15 | # clientPort=2181
16 | # NOTE: Solr sets this based on zkRun / zkHost params
17 | 
18 | 


--------------------------------------------------------------------------------
/roles/solr/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - solr-server
 6 | 
 7 | - name: create configuration directory
 8 |   file: path=/etc/solr/conf.{{ cluster_name }} state=directory
 9 | 
10 | - name: setup alternatives link
11 |   alternatives: name=solr-conf link=/etc/solr/conf path=/etc/solr/conf.{{ cluster_name }}
12 | 
13 | - name: install default configurations
14 |   tags: config
15 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
16 |   with_items:
17 |     - solr
18 | 
19 | #- name: install template configurations
20 | #  tags: config
21 | #  template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }}
22 | #  with_items:
23 | #    - hbase-site.xml
24 | #    - regionservers
25 | 
26 | - name: install files configurations
27 |   tags: config
28 |   copy: src={{ item }} dest=/etc/solr/conf/{{ item }}
29 |   with_items:
30 |     - solr.xml
31 |     - zoo.cfg
32 | 
33 | - name: create hdfs directories
34 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
35 |   with_items:
36 |     - -mkdir -p /solr
37 |     - -chown solr /solr
38 |   run_once: true
39 | 
40 | - name: init zookeeper
41 |   tags: init
42 |   command: solrctl init
43 |   run_once: true
44 | 
45 | - name: start services
46 |   tags: service
47 |   service: name={{ item }} state=restarted enabled=yes
48 |   with_items:
49 |     - solr-server
50 | 


--------------------------------------------------------------------------------
/roles/solr/templates/default/solr.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | SOLR_PORT=8983
16 | SOLR_ADMIN_PORT=8984
17 | SOLR_LOG=/var/log/solr
18 | SOLR_ZK_ENSEMBLE={% for item in groups['zookeepernodes'] -%}
19 |                     {{ item }}:2181{% if not loop.last %},{% endif %}
20 |                  {%- endfor %}/solr
21 | 
22 | {% if groups['namenodes']|count > 1 %}
23 | SOLR_HDFS_HOME=hdfs://{{ cluster_name }}/solr
24 | {% else %}
25 | SOLR_HDFS_HOME=hdfs://{{ groups['namenodes'][0] }}:8020/solr
26 | {% endif %}
27 | 
28 | SOLR_HDFS_CONFIG=/etc/hadoop/conf
29 | # SOLR_KERBEROS_ENABLED=true
30 | # SOLR_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab
31 | # SOLR_KERBEROS_PRINCIPAL=solr/localhost@LOCALHOST
32 | SOLR_AUTHENTICATION_TYPE=simple
33 | SOLR_AUTHENTICATION_SIMPLE_ALLOW_ANON=true
34 | # SOLR_AUTHENTICATION_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab
35 | # SOLR_AUTHENTICATION_KERBEROS_PRINCIPAL=HTTP/localhost@LOCALHOST
36 | # SOLR_AUTHENTICATION_KERBEROS_NAME_RULES=DEFAULT
37 | # SOLR_AUTHENTICATION_JAAS_CONF=/etc/solr/conf/jaas.conf
38 | SOLR_SECURITY_ALLOWED_PROXYUSERS=hue
39 | SOLR_SECURITY_PROXYUSER_hue_HOSTS=*
40 | SOLR_SECURITY_PROXYUSER_hue_GROUPS=*
41 | # SOLR_AUTHORIZATION_SENTRY_SITE=/etc/solr/conf/sentry-site.xml
42 | # SOLR_AUTHORIZATION_SUPERUSER=solr
43 | SOLRD_WATCHDOG_TIMEOUT=30
44 | 
45 | #SOLR_SSL_ENABLED=true
46 | #SOLR_KEYSTORE_PATH=/var/lib/solr/.keystore
47 | #SOLR_KEYSTORE_PASSWORD=
48 | #SOLR_TRUSTSTORE_PATH=/var/lib/solr/.truststore
49 | #SOLR_TRUSTSTORE_PASSWORD=
50 | 
51 | 


--------------------------------------------------------------------------------
/roles/spark/files/fairscheduler.xml.template:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <allocations>
 3 |   <pool name="production">
 4 |     <schedulingMode>FAIR</schedulingMode>
 5 |     <weight>1</weight>
 6 |     <minShare>2</minShare>
 7 |   </pool>
 8 |   <pool name="test">
 9 |     <schedulingMode>FIFO</schedulingMode>
10 |     <weight>2</weight>
11 |     <minShare>3</minShare>
12 |   </pool>
13 | </allocations>
14 | 


--------------------------------------------------------------------------------
/roles/spark/files/log4j.properties.template:
--------------------------------------------------------------------------------
 1 | # Set everything to be logged to the console
 2 | log4j.rootCategory=INFO, console
 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 4 | log4j.appender.console.target=System.err
 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 7 | 
 8 | # Settings to quiet third party logs that are too verbose
 9 | log4j.logger.org.eclipse.jetty=WARN
10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13 | 


--------------------------------------------------------------------------------
/roles/spark/files/slaves.template:
--------------------------------------------------------------------------------
1 | # A Spark Worker will be started on each of the machines listed below.
2 | localhost


--------------------------------------------------------------------------------
/roles/spark/files/spark-defaults.conf.template:
--------------------------------------------------------------------------------
 1 | # Default system properties included when running spark-submit.
 2 | # This is useful for setting default environmental settings.
 3 | 
 4 | # Example:
 5 | # spark.master                     spark://master:7077
 6 | # spark.eventLog.enabled           true
 7 | # spark.eventLog.dir               hdfs://namenode:8021/directory
 8 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
 9 | # spark.driver.memory              5g
10 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
11 | 


--------------------------------------------------------------------------------
/roles/spark/files/spark-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This file is sourced when running various Spark programs.
 4 | # Copy it as spark-env.sh and edit that to configure Spark for your site.
 5 | 
 6 | # Options read when launching programs locally with 
 7 | # ./bin/run-example or ./bin/spark-submit
 8 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
 9 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
10 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
11 | # - SPARK_CLASSPATH, default classpath entries to append
12 | 
13 | # Options read by executors and drivers running inside the cluster
14 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
15 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
16 | # - SPARK_CLASSPATH, default classpath entries to append
17 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
18 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
19 | 
20 | # Options read in YARN client mode
21 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
22 | # - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
23 | # - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
24 | # - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
25 | # - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
26 | # - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
27 | # - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
28 | # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
29 | # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
30 | 
31 | # Options for the daemons used in the standalone deploy mode
32 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
33 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
34 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
35 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
36 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
37 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
38 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
39 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
40 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
41 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
42 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
43 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
44 | 
45 | # Generic options for the daemons used in the standalone deploy mode
46 | # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
47 | # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
48 | # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
49 | # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
50 | # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
51 | 


--------------------------------------------------------------------------------
/roles/spark/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - spark-core
 6 | 
 7 | - name: install packages
 8 |   tags: packages
 9 |   yum: name={{ item }} state=latest
10 |   with_items:
11 |     - spark-history-server
12 |   when: ansible_hostname == groups['spark'][0]
13 | 
14 | - name: create configuration directory
15 |   file: path=/etc/spark/conf.{{ cluster_name }} state=directory
16 | 
17 | - name: setup alternatives link
18 |   alternatives: name=spark-conf link=/etc/spark/conf path=/etc/spark/conf.{{ cluster_name }}
19 | 
20 | - name: install default configurations
21 |   tags: config
22 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
23 |   with_items:
24 |     - spark
25 | 
26 | - name: install template configurations
27 |   tags: config
28 |   template: src={{ item }}.j2 dest=/etc/spark/conf/{{ item }}
29 |   with_items:
30 |     - spark-defaults.conf
31 | 
32 | - name: install files configurations
33 |   tags: config
34 |   copy: src={{ item }} dest=/etc/spark/conf/{{ item|basename }}
35 |   with_fileglob:
36 |     - ./*
37 | 
38 | - name: create hdfs directories
39 |   command: sudo -Hu hdfs hdfs dfs {{ item }}
40 |   with_items:
41 |     - -mkdir -p {{ spark_history_server_dir }}
42 |     - -chown spark:spark {{ spark_history_server_dir }}
43 |     - -chmod 1777 {{ spark_history_server_dir }}
44 |   run_once: true
45 | 
46 | - name: start services
47 |   tags: service
48 |   service: name={{ item }} state=restarted enabled=yes
49 |   with_items:
50 |     - spark-history-server
51 |   when: ansible_hostname == groups['spark'][0]
52 | 
53 | - name: test
54 |   tags: test
55 |   command: sudo -Hu hdfs spark-submit --master yarn-cluster --class org.apache.spark.examples.SparkPi --num-executors 2 --driver-cores 1 --driver-memory 512m --executor-memory 512m --executor-cores 2 --queue default /usr/lib/spark/lib/spark-examples.jar 10
56 |   run_once: true
57 | 


--------------------------------------------------------------------------------
/roles/spark/templates/default/spark.j2:
--------------------------------------------------------------------------------
1 | {% if groups['namenodes']|count > 1 %}
2 | export SPARK_HISTORY_SERVER_LOG_DIR=hdfs://{{ cluster_name }}{{ spark_history_server_dir }}
3 | {% else %}
4 | export SPARK_HISTORY_SERVER_LOG_DIR=hdfs://{{ groups['namenodes'][0] }}:8020{{ spark_history_server_dir }}
5 | {% endif %}
6 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=${SPARK_HISTORY_SERVER_LOG_DIR}"
7 | export SPARK_CONF_DIR=/etc/spark/conf
8 | 


--------------------------------------------------------------------------------
/roles/spark/templates/spark-defaults.conf.j2:
--------------------------------------------------------------------------------
 1 | # Default system properties included when running spark-submit.
 2 | # This is useful for setting default environmental settings.
 3 | 
 4 | # Example:
 5 | # spark.master                     spark://master:7077
 6 | spark.eventLog.enabled           true
 7 | {% if groups['namenodes']|count > 1 %}
 8 | spark.eventLog.dir               hdfs://{{ cluster_name }}{{ spark_history_server_dir }}
 9 | {% else %}
10 | spark.eventLog.dir               hdfs://{{ groups['namenodes'][0] }}:8020{{ spark_history_server_dir }}
11 | {% endif %}
12 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
13 | # spark.driver.memory              5g
14 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
15 | 


--------------------------------------------------------------------------------
/roles/spark/vars/main.yaml:
--------------------------------------------------------------------------------
1 | spark_history_server_dir: /var/log/spark/apps
2 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/files/hadoop.pdb:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <patterndb version="4" pub_date="2015-11-23">
 3 |   <ruleset name='hadoop' id="89FA0E51-A1D4-4C67-B3E7-CA5B814FA5C9">
 4 |     <description>
 5 |       covers hadoop java logs.
 6 |     </description>
 7 |     <pattern>hadoop</pattern>
 8 |     <rules>
 9 |       <rule provider="patterndb" id="A2AD5659-9A85-4A7C-9001-E010948D3D34" class="hadoop">
10 |         <patterns>
11 |           <pattern>@ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname::@ @ANYSTRING:hadoop.message@</pattern>
12 |         </patterns>
13 |         <examples>
14 |           <example>
15 |            <test_message program="hadoop">2015-11-17 16:54:27,586 ERROR org.apache.hadoop.hdfs.server.namenode.NameNode: RECEIVED SIGNAL 15: SIGTERM</test_message>
16 |            <test_values>
17 |             <test_value name="hadoop.date">2015-11-17</test_value>
18 |             <test_value name="hadoop.time">16:54:27,586</test_value>
19 |             <test_value name="hadoop.severity">ERROR</test_value>
20 |             <test_value name="hadoop.classname">org.apache.hadoop.hdfs.server.namenode.NameNode</test_value>
21 |             <test_value name="hadoop.message">RECEIVED SIGNAL 15: SIGTERM</test_value>
22 |            </test_values>
23 |           </example>
24 |         </examples>
25 |         <actions>
26 |           <action>
27 |             <message inherit-properties='TRUE'>
28 |               <values>
29 |                 <value name="MESSAGE">${hadoop.classname}: ${hadoop.message}</value>
30 | 		<value name="hadoop.generated">true</value> <!-- it is needed to skip original message -->
31 |               </values>
32 |             </message>
33 |           </action>
34 |         </actions>
35 |       </rule>
36 |     </rules>
37 |   </ruleset>
38 | </patterndb>
39 | <!--
40 | vim: syn=xml
41 | -->
42 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/files/hive.pdb:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <patterndb version="4" pub_date="2015-11-24">
 3 |   <ruleset name='hive' id="F213763A-B560-418F-83DD-8D197C3F8F16">
 4 |     <description>
 5 |       covers hive java logs.
 6 |     </description>
 7 |     <pattern>hive</pattern>
 8 |     <rules>
 9 |       <rule provider="patterndb" id="CF9D3941-9521-432D-86F5-5DFD59CC02CA" class="hadoop">
10 |         <patterns>
11 | 	<pattern>@ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname:(@@ESTRING:hadoop.filename::@@ESTRING:hadoop.funcname:(@@ESTRING:hadoop.lineno:)@) - @ANYSTRING:hadoop.message@</pattern>
12 |         </patterns>
13 |         <examples>
14 |           <example>
15 | 	    <test_message program="hive">2015-10-30 14:30:39,802 ERROR Datastore.Schema (Log4JLogger.java:error(125)) - Failed initialising database.</test_message>
16 |             <test_values>
17 |               <test_value name="hadoop.date">2015-10-30</test_value>
18 |               <test_value name="hadoop.time">14:30:39,802</test_value>
19 |               <test_value name="hadoop.severity">ERROR</test_value>
20 |               <test_value name="hadoop.classname">Datastore.Schema </test_value>
21 |               <test_value name="hadoop.filename">Log4JLogger.java</test_value>
22 |               <test_value name="hadoop.funcname">error</test_value>
23 |               <test_value name="hadoop.lineno">125</test_value>
24 |               <test_value name="hadoop.message">Failed initialising database.</test_value>
25 |             </test_values>
26 |           </example>
27 |         </examples>
28 |         <actions>
29 |           <action>
30 |             <message inherit-properties='TRUE'>
31 |               <values>
32 |                 <value name="MESSAGE">${hadoop.classname}: ${hadoop.message}</value>
33 | 		<value name="hadoop.generated">true</value> <!-- it is needed to skip original message -->
34 |               </values>
35 |             </message>
36 |           </action>
37 |         </actions>
38 |       </rule>
39 |     </rules>
40 |   </ruleset>
41 | </patterndb>
42 | <!--
43 | vim: syn=xml
44 | -->
45 | 
46 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/files/impala.pdb:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <patterndb version="4" pub_date="2015-11-25">
 3 |   <ruleset name='impala' id="E159C3D6-A452-4CCE-9876-EC38A7ECC69D">
 4 |     <description>
 5 |       covers impala java logs.
 6 |     </description>
 7 |     <pattern>impala</pattern>
 8 |     <rules>
 9 |       <rule provider="patterndb" id="4DE94FF9-B040-4A50-8BE4-FD5934124DD5" class="hadoop">
10 |         <patterns>
11 | 	  <pattern>@ESTRING:hadoop.code: @@STRING:hadoop.time:.:@@QSTRING:hadoop.pid: @@ESTRING:hadoop.filename::@@ESTRING:hadoop.lineno:]@ @ANYSTRING:hadoop.message@</pattern>
12 |         </patterns>
13 |         <examples>
14 |           <example>
15 |            <test_message program="impala">I1124 17:23:12.848029  6746 authentication.cc:1014] External communication is not authenticated</test_message>
16 |            <test_values>
17 |             <test_value name="hadoop.code">I1124</test_value>
18 |             <test_value name="hadoop.time">17:23:12,848029</test_value>
19 |             <test_value name="hadoop.pid">6746</test_value>
20 |             <test_value name="hadoop.filename">authentication.cc</test_value>
21 |             <test_value name="hadoop.lineno">1014</test_value>
22 |             <test_value name="hadoop.message">External communication is not authenticated</test_value>
23 |            </test_values>
24 |           </example>
25 |         </examples>
26 |         <actions>
27 |           <action>
28 |             <message inherit-properties='TRUE'>
29 |               <values>
30 |                 <value name="MESSAGE">[${hadoop.code}] ${hadoop.filename}: ${hadoop.message}</value>
31 | 		<value name="hadoop.generated">true</value> <!-- it is needed to skip original message -->
32 |               </values>
33 |             </message>
34 |           </action>
35 |         </actions>
36 |       </rule>
37 |     </rules>
38 |   </ruleset>
39 | </patterndb>
40 | <!--
41 | vim: syn=xml
42 | -->
43 | 
44 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/files/oozie.pdb:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <patterndb version="4" pub_date="2015-11-24">
 3 |   <ruleset name='oozie' id="6381025C-0352-46A4-8000-D6DD03F1E20F">
 4 |     <description>
 5 |       covers oozie java logs.
 6 |     </description>
 7 |     <pattern>oozie</pattern>
 8 |     <rules>
 9 |       <rule provider="patterndb" id="EC20D008-60BF-4FAD-8E43-C5C371D3EFA5" class="hadoop">
10 |         <patterns>
11 | 	  <pattern>@ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname::@@ESTRING:hadoop.lineno: @- SERVER[@ESTRING:hadoop.hostname:]@ @ANYSTRING:hadoop.message@</pattern>
12 |         </patterns>
13 |         <examples>
14 |           <example>
15 | 	  <test_message program="oozie">2015-11-15 16:07:34,036  INFO PauseTransitService:520 - SERVER[gp-test0.ocslab.com] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[-] Acquired lock for [org.apache.oozie.service.PauseTransitService]</test_message>
16 |            <test_values>
17 |             <test_value name="hadoop.date">2015-11-15</test_value>
18 |             <test_value name="hadoop.time">16:07:34,036</test_value>
19 |             <test_value name="hadoop.severity">INFO</test_value>
20 |             <test_value name="hadoop.classname">PauseTransitService</test_value>
21 |             <test_value name="hadoop.lineno">520</test_value>
22 | 	    <test_value name="hadoop.message">2015-11-15 16:07:34,036  INFO PauseTransitService:520 - SERVER[gp-test0.ocslab.com] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[-] Acquired lock for [org.apache.oozie.service.PauseTransitService]</test_value>
23 |            </test_values>
24 |           </example>
25 |         </examples>
26 |         <actions>
27 |           <action>
28 |             <message inherit-properties='TRUE'>
29 |               <values>
30 |                 <value name="MESSAGE">${hadoop.classname}: ${hadoop.message}</value>
31 | 		<value name="hadoop.generated">true</value> <!-- it is needed to skip original message -->
32 |               </values>
33 |             </message>
34 |           </action>
35 |         </actions>
36 |       </rule>
37 |     </rules>
38 |   </ruleset>
39 | </patterndb>
40 | <!--
41 | vim: syn=xml
42 | -->
43 | 
44 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install packages
 2 |   tags: packages
 3 |   yum: name={{ item }} state=latest
 4 |   with_items:
 5 |     - syslog-ng
 6 | 
 7 | - name: create directories
 8 |   tags: config
 9 |   file: path=/etc/syslog-ng/{{ item }} state=directory mode=0755
10 |   with_items:
11 |     - conf.d
12 |     - patterndb.d/hadoop
13 | 
14 | - name: update patterndb.d
15 |   tags: config
16 |   copy: src={{ item }} dest=/etc/syslog-ng/patterndb.d/hadoop/{{ item }}
17 |   with_items:
18 |     - hadoop.pdb
19 |     - hive.pdb
20 |     - impala.pdb
21 |     - oozie.pdb
22 | 
23 | - name: update configuration
24 |   tags: config
25 |   template: src={{ item }} dest=/etc/syslog-ng/conf.d/{{ item }}
26 |   with_items:
27 |     - 30-hadoop.conf
28 | 
29 | - name: reload syslog-ng service
30 |   tags: service
31 |   service: name=syslog-ng enabled=yes state=restarted
32 | 


--------------------------------------------------------------------------------
/roles/syslog-ng/vars/main.yaml:
--------------------------------------------------------------------------------
1 | syslog_ng_destination: d_logcollector_throttled
2 | 


--------------------------------------------------------------------------------
/roles/zookeeper/files/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 3 | <xsl:output method="html"/>
 4 | <xsl:template match="configuration">
 5 | <html>
 6 | <body>
 7 | <table border="1">
 8 | <tr>
 9 |  <td>name</td>
10 |  <td>value</td>
11 |  <td>description</td>
12 | </tr>
13 | <xsl:for-each select="property">
14 | <tr>
15 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
16 |   <td><xsl:value-of select="value"/></td>
17 |   <td><xsl:value-of select="description"/></td>
18 | </tr>
19 | </xsl:for-each>
20 | </table>
21 | </body>
22 | </html>
23 | </xsl:template>
24 | </xsl:stylesheet>
25 | 


--------------------------------------------------------------------------------
/roles/zookeeper/files/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Copyright 2012 The Apache Software Foundation
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one
 4 | # or more contributor license agreements.  See the NOTICE file
 5 | # distributed with this work for additional information
 6 | # regarding copyright ownership.  The ASF licenses this file
 7 | # to you under the Apache License, Version 2.0 (the
 8 | # "License"); you may not use this file except in compliance
 9 | # with the License.  You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | # Define some default values that can be overridden by system properties
20 | zookeeper.root.logger=INFO, CONSOLE
21 | 
22 | zookeeper.console.threshold=INFO
23 | 
24 | zookeeper.log.dir=.
25 | zookeeper.log.file=zookeeper.log
26 | zookeeper.log.threshold=INFO
27 | zookeeper.log.maxfilesize=256MB
28 | zookeeper.log.maxbackupindex=20
29 | 
30 | zookeeper.tracelog.dir=.
31 | zookeeper.tracelog.file=zookeeper_trace.log
32 | 
33 | log4j.rootLogger=${zookeeper.root.logger}
34 | 
35 | #
36 | # console
37 | # Add "console" to rootlogger above if you want to use this 
38 | #
39 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
40 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
41 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
42 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
43 | 
44 | #
45 | # Add ROLLINGFILE to rootLogger to get log file output
46 | #
47 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
48 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
49 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
50 | log4j.appender.ROLLINGFILE.MaxFileSize=${zookeeper.log.maxfilesize}
51 | log4j.appender.ROLLINGFILE.MaxBackupIndex=${zookeeper.log.maxbackupindex}
52 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
53 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
54 | 
55 | #
56 | # Add TRACEFILE to rootLogger to get log file output
57 | #    Log TRACE level and above messages to a log file
58 | #
59 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
60 | log4j.appender.TRACEFILE.Threshold=TRACE
61 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
62 | 
63 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
64 | ### Notice we are including log4j's NDC here (%x)
65 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n
66 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | - name: install zookeeper-server
 2 |   tags: package
 3 |   yum: name=zookeeper-server state=latest
 4 | 
 5 | - name: create configuration directory
 6 |   tags: config
 7 |   file: path={{ etc_folder }}/zookeeper state=directory
 8 | 
 9 | - name: setup alternatives link
10 |   tags: config
11 |   alternatives: name=zookeeper-conf link=/etc/zookeeper/conf path={{ etc_folder }}/zookeeper
12 | 
13 | - name: install template configurations
14 |   tags: config
15 |   template: src=zoo.cfg.j2 dest={{ etc_folder }}/zookeeper/zoo.cfg
16 | 
17 | - name: install files configurations
18 |   tags: config
19 |   copy: src={{ item }} dest=/etc/zookeeper/conf/{{ item }}
20 |   with_items:
21 |     - configuration.xsl
22 |     - log4j.properties
23 | 
24 | - name: install default configurations
25 |   tags: config
26 |   template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }}
27 |   with_items:
28 |     - zookeeper
29 | 
30 | - name: ensure data dir
31 |   tags: config
32 |   file: path={{ zookeeper_data_dir }} state=directory owner=zookeeper group=zookeeper
33 | 
34 | - name: clean zookeeper data directory
35 |   tags: init
36 |   shell: rm -rf {{ zookeeper_data_dir }}/*
37 |   when: destroy_data
38 | 
39 | - name: init zookeeper directory
40 |   tags: init
41 |   command: service zookeeper-server init
42 |   when: destroy_data
43 | 
44 | - name: install myid
45 |   tags: init
46 |   template: src=myid.j2 dest={{ zookeeper_data_dir }}/myid
47 |   when: groups['zookeepernodes']|count > 1 and destroy_data
48 | 
49 | - name: start zookeeper
50 |   tags: service
51 |   service: name=zookeeper-server state=restarted enabled=yes
52 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/default/zookeeper.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # Command-line parameters to pass to the JVM
17 | # export SERVER_JVMFLAGS=""
18 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/myid.j2:
--------------------------------------------------------------------------------
1 | {% if groups['zookeepernodes']|count > 1 %}
2 | {% for item in groups['zookeepernodes'] %}
3 | {% if ansible_hostname == item %}
4 | {{ loop.index }}
5 | {% endif %}
6 | {% endfor %}
7 | {% endif %}
8 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/zoo.cfg.j2:
--------------------------------------------------------------------------------
 1 | maxClientCnxns=50
 2 | # The number of milliseconds of each tick
 3 | tickTime=2000
 4 | # The number of ticks that the initial
 5 | # synchronization phase can take
 6 | initLimit=10
 7 | # The number of ticks that can pass between
 8 | # sending a request and getting an acknowledgement
 9 | syncLimit=5
10 | # the directory where the snapshot is stored.
11 | dataDir={{zookeeper_data_dir}}
12 | # the port at which the clients will connect
13 | clientPort=2181
14 | # the directory where the transaction logs are stored.
15 | dataLogDir={{zookeeper_data_dir}}
16 | 
17 | {% if groups['zookeepernodes']|count > 1 %}
18 | {% for item in groups['zookeepernodes'] %}
19 | server.{{ loop.index }}={{ item }}:2888:3888
20 | {% endfor %}
21 | {% endif %}
22 | 


--------------------------------------------------------------------------------
/site.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: Check hosts file
  3 |   tags: check
  4 |   hosts: all
  5 |   roles:
  6 |     - check_config
  7 | 
  8 | - name: Setup local environment
  9 |   tags:
 10 |     - common
 11 |     - clinit
 12 |     - interfaces
 13 |     - config
 14 |   hosts: all
 15 |   tasks:
 16 |     - name: create working directory
 17 |       local_action: file dest="{{ inventory_dir }}/workdir" state=directory
 18 |       run_once: true
 19 | 
 20 | - name: Prepare hosts for cloudera hadoop cluster
 21 |   tags: common
 22 |   hosts: java
 23 |   roles:
 24 |     - common
 25 | 
 26 | - name: Deploy zookeeper
 27 |   hosts: zookeepernodes
 28 |   tags: zookeepernodes
 29 |   roles:
 30 |     - zookeeper
 31 | 
 32 | - name: Deploy hadoop configuration
 33 |   tags:
 34 |     - config
 35 |     - hadoop
 36 |   hosts: hadoop
 37 |   roles:
 38 |     - { role: hadoop, deploy: 'base' }
 39 | 
 40 | - name: Deploy journal nodes
 41 |   tags: journalnodes
 42 |   hosts: journalnodes
 43 |   roles:
 44 |     - { role: hadoop, deploy: 'journalnodes' }
 45 | 
 46 | - name: Deploy namenodes
 47 |   tags: namenodes
 48 |   hosts: namenodes
 49 |   roles:
 50 |     - { role: hadoop, deploy: 'namenodes' }
 51 |     - { role: hadoop, deploy: 'namenodes-fence' }
 52 | 
 53 | - name: Deploy datanodes
 54 |   tags: datanodes
 55 |   hosts: datanodes
 56 |   roles:
 57 |     - { role: hadoop, deploy: 'datanodes' }
 58 | 
 59 | - name: Test hdfs
 60 |   tags: test
 61 |   hosts: namenodes
 62 |   roles:
 63 |     - { role: hadoop, deploy: 'test-hdfs' }
 64 | 
 65 | - name: Deploy yarn resource manager and job history server
 66 |   tags: yarnresourcemanager
 67 |   hosts: yarnresourcemanager
 68 |   roles:
 69 |     - { role: hadoop, deploy: 'resourcemanager' }
 70 | 
 71 | - name: Test mapreduce
 72 |   tags: test
 73 |   hosts: yarnresourcemanager
 74 |   roles:
 75 |     - { role: hadoop, deploy: 'test-mapreduce' }
 76 | 
 77 | - name: Deploy postgresql
 78 |   tags: postgresql
 79 |   hosts: postgresql
 80 |   roles:
 81 |     - postgresql
 82 | 
 83 | - name: Deploy hive metastore
 84 |   tags: hivemetastore
 85 |   hosts: hivemetastore
 86 |   roles:
 87 |     - { role: hivemetastore, deploy: 'hive-server' }
 88 | 
 89 | - name: Deploy hive client on datanodes
 90 |   tags: hive
 91 |   hosts: datanodes
 92 |   roles:
 93 |     - { role: hivemetastore, deploy: 'hive-client' }
 94 | 
 95 | - name: Deploy impala state-store and catalog
 96 |   tags: impala
 97 |   hosts: impala-store-catalog
 98 |   roles:
 99 |     - { role: impala, deploy: 'impala' }
100 | 
101 | - name: Deploy impala daemon on datanodes
102 |   tags: impala
103 |   hosts: datanodes
104 |   roles:
105 |     - { role: impala, deploy: 'impala-server' }
106 | 
107 | - name: Deploy HBase
108 |   tags: hbase
109 |   hosts: hbasemaster
110 |   roles:
111 |     - { role: hbase, deploy: 'hbase-master' }
112 | 
113 | - name: Deploy HBase regionservers on datanodes
114 |   tags: hbase
115 |   hosts: datanodes
116 |   roles:
117 |     - { role: hbase, deploy: 'regionserver' }
118 | 
119 | - name: Deploy spark
120 |   tags: spark
121 |   hosts: spark
122 |   roles:
123 |     - spark
124 | 
125 | - name: Deploy solr search
126 |   tags: solr
127 |   hosts: solr
128 |   roles:
129 |     - solr
130 | 
131 | - name: Deploy oozie
132 |   tags: oozie
133 |   hosts: oozie
134 |   roles:
135 |     - oozie
136 | 
137 | - name: Deploy kafka
138 |   tags: kafka
139 |   hosts: kafka
140 |   roles:
141 |     - kafka
142 | 
143 | - name: Deploy Hue
144 |   tags: hue
145 |   hosts: hue
146 |   roles:
147 |     - hue
148 | 
149 | - name: Deploy snmp monitoring
150 |   tags: snmp
151 |   hosts: java
152 |   roles:
153 |     - { role: snmp, when: enable_snmp }
154 | 
155 | - name: Deploy syslog-ng monitoring
156 |   tags: syslog
157 |   hosts: java
158 |   roles:
159 |     - { role: syslog-ng, when: enable_syslog }
160 | 
161 | - name: cluster
162 |   tags: cluster
163 |   hosts: yarnresourcemanager
164 |   tasks:
165 |     - debug: msg="{{ lookup('pipe', 'echo; clinit -S workdir/services.xml --nocolors tree;echo =')}}"
166 |       run_once: true
167 | 
168 | - name: dashboard
169 |   tags: dashboard
170 |   hosts: dashboard
171 |   roles:
172 |     - dashboard
173 | 


--------------------------------------------------------------------------------