├── .gitignore ├── LICENSE ├── README.md ├── dashboard.demo.png ├── group_vars ├── all ├── hadoop ├── impala-store-catalog ├── kafka └── oozie ├── hosts ├── meta └── main.yml ├── roles ├── check_config │ └── tasks │ │ └── main.yaml ├── common │ ├── tasks │ │ └── main.yaml │ └── templates │ │ ├── bigtop-utils.j2 │ │ ├── interfaces.j2 │ │ └── services.xml.j2 ├── dashboard │ ├── tasks │ │ └── main.yaml │ └── templates │ │ └── dashboard.j2 ├── hadoop │ ├── files │ │ ├── capacity-scheduler.xml │ │ ├── configuration.xsl │ │ ├── container-executor.cfg │ │ ├── default │ │ │ ├── hadoop │ │ │ ├── hadoop-0.20-mapreduce │ │ │ ├── hadoop-httpfs │ │ │ ├── hadoop-mapreduce-historyserver │ │ │ └── hadoop-yarn-resourcemanager │ │ ├── dfs.exclude │ │ ├── hadoop-metrics.properties │ │ ├── hadoop-metrics2.properties │ │ ├── hadoop-policy.xml │ │ └── log4j.properties │ ├── tasks │ │ ├── base.yaml │ │ ├── datanode.yaml │ │ ├── journalnode.yaml │ │ ├── main.yaml │ │ ├── namenode.yaml │ │ ├── namenodes-fence.yaml │ │ ├── resourcemanager.yaml │ │ ├── test-hdfs.yaml │ │ └── test-mapreduce.yaml │ └── templates │ │ ├── bin │ │ └── hdfs-ready.sh │ │ ├── core-site.xml.j2 │ │ ├── default │ │ ├── hadoop-hdfs-datanode.j2 │ │ ├── hadoop-hdfs-journalnode.j2 │ │ ├── hadoop-hdfs-namenode.j2 │ │ ├── hadoop-hdfs-zkfc.j2 │ │ └── hadoop-yarn-nodemanager.j2 │ │ ├── fair-scheduler.xml.j2 │ │ ├── hadoop-env.sh.j2 │ │ ├── hdfs-site.xml.j2 │ │ ├── mapred-env.sh.j2 │ │ ├── mapred-site.xml.j2 │ │ ├── yarn-env.sh.j2 │ │ └── yarn-site.xml.j2 ├── hbase │ ├── files │ │ ├── default │ │ │ └── hbase │ │ ├── hadoop-metrics2-hbase.properties │ │ ├── hbase-env.cmd │ │ ├── hbase-env.sh │ │ ├── hbase-policy.xml │ │ └── log4j.properties │ ├── tasks │ │ ├── hbase-master.yaml │ │ ├── main.yaml │ │ └── regionserver.yaml │ └── templates │ │ ├── hbase-site.xml.j2 │ │ └── regionservers.j2 ├── hivemetastore │ ├── files │ │ ├── default │ │ │ ├── hadoop-0.20-mapreduce │ │ │ ├── hive-metastore │ │ │ └── hive-server2 │ │ ├── hive-exec-log4j.properties │ │ ├── hive-log4j.properties │ │ └── hive.limits.conf │ ├── tasks │ │ ├── hive-client.yaml │ │ ├── hive-server.yaml │ │ └── main.yaml │ └── templates │ │ ├── .pgpass.j2 │ │ ├── hive-env.sh.j2 │ │ ├── hive-site.xml.j2 │ │ └── hive.sql.j2 ├── hue │ ├── files │ │ ├── default │ │ │ └── hadoop-httpfs │ │ ├── log.conf │ │ └── log4j.properties │ ├── tasks │ │ └── main.yaml │ └── templates │ │ ├── .pgpass.j2 │ │ ├── hue.ini.j2 │ │ └── hue.sql.j2 ├── impala │ ├── tasks │ │ ├── impala-server.yaml │ │ ├── impala.yaml │ │ └── main.yaml │ └── templates │ │ ├── core-site.xml.j2 │ │ ├── hdfs-site.xml.j2 │ │ ├── hive-site.xml.j2 │ │ └── impala.j2 ├── kafka │ ├── files │ │ ├── connect-console-sink.properties │ │ ├── connect-console-source.properties │ │ ├── connect-distributed.properties │ │ ├── connect-file-sink.properties │ │ ├── connect-file-source.properties │ │ ├── connect-log4j.properties │ │ ├── connect-standalone.properties │ │ ├── default │ │ │ └── kafka │ │ ├── log4j.properties │ │ └── tools-log4j.properties │ ├── tasks │ │ └── main.yaml │ └── templates │ │ └── server.properties.j2 ├── oozie │ ├── files │ │ ├── action-conf │ │ │ ├── email.xml │ │ │ ├── fs.xml │ │ │ ├── hive.xml │ │ │ ├── shell.xml │ │ │ ├── sqoop.xml │ │ │ ├── ssh.xml │ │ │ └── sub-workflow.xml │ │ ├── adminusers.txt │ │ ├── hadoop-conf │ │ │ └── core-site.xml │ │ ├── hadoop-config.xml │ │ ├── oozie-default.xml │ │ └── oozie-log4j.properties │ ├── tasks │ │ ├── main.yaml │ │ └── oozie-test.yaml │ └── templates │ │ ├── .pgpass.j2 │ │ ├── oozie-env.sh.j2 │ │ ├── oozie-site.xml.j2 │ │ └── oozie.sql.j2 ├── postgresql │ ├── files │ │ ├── pg_hba.conf │ │ └── postgresql.conf │ ├── tasks │ │ └── main.yaml │ └── templates │ │ └── userdb.sql.j2 ├── snmp │ ├── files │ │ ├── snmpd.conf │ │ └── subagent-shell-hadoop-conf.xml │ └── tasks │ │ └── main.yaml ├── solr │ ├── files │ │ ├── 0 │ │ ├── 1 │ │ ├── solr.xml │ │ └── zoo.cfg │ ├── tasks │ │ └── main.yaml │ └── templates │ │ └── default │ │ └── solr.j2 ├── spark │ ├── files │ │ ├── fairscheduler.xml.template │ │ ├── log4j.properties.template │ │ ├── metrics.properties.template │ │ ├── slaves.template │ │ ├── spark-defaults.conf.template │ │ ├── spark-env.sh │ │ └── spark-env.sh.template │ ├── tasks │ │ └── main.yaml │ ├── templates │ │ ├── default │ │ │ └── spark.j2 │ │ └── spark-defaults.conf.j2 │ └── vars │ │ └── main.yaml ├── syslog-ng │ ├── files │ │ ├── hadoop.pdb │ │ ├── hive.pdb │ │ ├── impala.pdb │ │ └── oozie.pdb │ ├── tasks │ │ └── main.yaml │ ├── templates │ │ └── 30-hadoop.conf │ └── vars │ │ └── main.yaml └── zookeeper │ ├── files │ ├── configuration.xsl │ └── log4j.properties │ ├── tasks │ └── main.yaml │ └── templates │ ├── default │ └── zookeeper.j2 │ ├── myid.j2 │ └── zoo.cfg.j2 └── site.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | workdir/ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Serge Sergeev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dashboard.demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/dashboard.demo.png -------------------------------------------------------------------------------- /group_vars/all: -------------------------------------------------------------------------------- 1 | # java package name 2 | # for oracle distributed packages you should use java- as name 3 | java_package: java-1.8.0 4 | java_home: /usr/java/jdk1.8.0_65/ 5 | 6 | # cluster name will impact the name of alternatives links and hdfs URI in the case of HA 7 | cluster_name: cluster 8 | 9 | # common folders 10 | log_folder: /var/log 11 | etc_folder: /etc/cluster 12 | 13 | # the ssh key to access other hosts without password 14 | # you have to generate it and put public key to the target hosts /.ssh/authorized_keys 15 | # 16 | #clinit_ssh_key: /root/.ssh/hadoop.key 17 | 18 | # clinit effective user 19 | clinit_effective_user: root 20 | 21 | # the variable to control data destruction. non destructive plays not tested yet 22 | destroy_data: True 23 | 24 | # zookeeper data directory 25 | zookeeper_data_dir: '/var/lib/zookeeper' 26 | 27 | # snmp monitoring, change to True to enable 28 | enable_snmp: False 29 | 30 | # syslog monitoring, change to True to enable 31 | enable_syslog: False 32 | 33 | # this is a directory for 5 static files forming a dashboard 34 | dashboard_folder: /var/www/html/dashboard 35 | 36 | # postgresql version 37 | postgres_version: "" 38 | 39 | # postgres database accounts passwords 40 | # 41 | # postgres administrative account password 42 | postgres_password: postgres 43 | 44 | # metastore database password 45 | hiveuser_password: mypassword 46 | 47 | # oozie database password 48 | oozie_password: theoozie 49 | 50 | # hue database password 51 | hue_password: thehue 52 | 53 | # a user database name 54 | # if specified, it will be created in postgres and hue will be configured to use it 55 | # uncomment the next line if a user database is required to setup 56 | #user_database: userdb 57 | 58 | # a password for user database. username will be userdb_user 59 | #userdb_password: myfavoritepassword 60 | 61 | # a custom sql 62 | #postgres_script: | 63 | -------------------------------------------------------------------------------- /group_vars/hadoop: -------------------------------------------------------------------------------- 1 | # service heap configuration, MB 2 | namenode_heapsize: 2048 3 | datanode_heapsize: 2048 4 | journalnode_heapsize: 1000 5 | zkfc_heapsize: 1000 6 | nodemanager_heapsize: 2048 7 | 8 | # http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_ig_hive_install.html?scroll=concept_alp_4kl_3q_unique_1 9 | hiveserver2_heapsize: 1024 10 | hivemetastore_heapsize: 2048 11 | hivecli_heapsize: 1024 12 | 13 | # hadoop directories 14 | # 15 | # name nodes directory 16 | dfs_namenode_name_dir: 'file:///var/lib/hadoop-hdfs/cache/hdfs/dfs/name' 17 | 18 | # datanodes directory 19 | dfs_datanode_data_dir: 'file:///var/lib/hadoop-hdfs/cache/hdfs/dfs/data' 20 | 21 | # journal nodes directory 22 | dfs_journalnode_edits_dir: '/var/lib/hadoop-hdfs/cache/hdfs/dfs/journal' 23 | 24 | # default replication factor 25 | dfs_replication: 1 26 | 27 | # umask for hdfs, the valued MUST be quoted 28 | fs_permissions_umask: '022' 29 | 30 | # impala server additional arguments 31 | impala_server_args: "" 32 | 33 | # yarn directories. A comma seperated lists local to yarn instances 34 | yarn_nodemanager_local_dirs: 'file:///var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir' 35 | yarn_nodemanager_log_dirs: 'file:///var/log/hadoop-yarn/containers' 36 | 37 | # yarn memory settings in Mb 38 | yarn_nodemanager_resource_memory: 4096 39 | 40 | # yarn cpu-vcores 41 | yarn_nodemanager_resource_cpu: 10 42 | -------------------------------------------------------------------------------- /group_vars/impala-store-catalog: -------------------------------------------------------------------------------- 1 | # impala additional arguments 2 | impala_catalog_args: "" 3 | impala_state_store_args: "" 4 | -------------------------------------------------------------------------------- /group_vars/kafka: -------------------------------------------------------------------------------- 1 | log_dirs: /tmp/kafka-logs 2 | num_io_threads: 8 3 | log_retention_hours: 168 4 | auto_create_topics_enable: true 5 | controlled_shutdown_enable: true 6 | delete_topic_enable: true 7 | -------------------------------------------------------------------------------- /group_vars/oozie: -------------------------------------------------------------------------------- 1 | # oozie server heapsize 2 | oozie_heapsize: 1024 3 | 4 | # oozie launcher pool ( fair-scheduler configuration ) 5 | oozie_launcher_maxapps: 10 6 | oozie_launcher_weight: 1.0 7 | 8 | # additional oozie plugins 9 | oozie_ext_classes: [] 10 | 11 | # additional oozie schemas 12 | oozie_ext_schemas: [] 13 | 14 | # additional oozie properties 15 | oozie_ext_properties: [] 16 | -------------------------------------------------------------------------------- /hosts: -------------------------------------------------------------------------------- 1 | # at least one is required, 2 allowed 2 | # if 2, ha configured 3 | [namenodes] 4 | 5 | # at least one is required 6 | [datanodes] 7 | 8 | # at least one is required 9 | # job history server will be also configured on the 1st host 10 | [yarnresourcemanager] 11 | 12 | # optional 13 | # can be required for other services 14 | # 3 or 5 hosts is required if 2 namenodes configured 15 | [zookeepernodes] 16 | 17 | # optional 18 | # required if 2 namenodes configured 19 | [journalnodes] 20 | 21 | # optional 22 | # required if hivemetastore, oozie or hue configured 23 | [postgresql] 24 | 25 | # optional 26 | # required if impala-store-catalog configured 27 | [hivemetastore] 28 | 29 | # optional 30 | [impala-store-catalog] 31 | 32 | # optional 33 | [hbasemaster] 34 | 35 | # optional 36 | [solr] 37 | 38 | #optional 39 | [spark] 40 | 41 | # optional 42 | [oozie] 43 | 44 | # optional 45 | [kafka] 46 | 47 | # optional 48 | [hue] 49 | 50 | # optional. comment this out completely or fill in a host into [dashboard] 51 | #[dashboard] 52 | [dashboard:children] 53 | namenodes 54 | 55 | # please do not edit the groups below 56 | [hadoop:children] 57 | namenodes 58 | datanodes 59 | journalnodes 60 | yarnresourcemanager 61 | hivemetastore 62 | impala-store-catalog 63 | hbasemaster 64 | solr 65 | spark 66 | oozie 67 | hue 68 | 69 | [java:children] 70 | hadoop 71 | kafka 72 | zookeepernodes 73 | -------------------------------------------------------------------------------- /meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: Serge Sergeev 4 | description: Quick and easy cloudera hadoop components installer. 5 | company: No Company 6 | license: MIT 7 | min_ansible_version: 1.9 8 | platforms: 9 | - name: EL 10 | versions: 11 | - 6 12 | galaxy_tags: 13 | - installer 14 | - cloudera 15 | - cloudera-hadoop 16 | - hadoop:hive:hbase:zookeeper:impala:oozie:hue 17 | dependencies: [] 18 | -------------------------------------------------------------------------------- /roles/check_config/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: check number of namenodes 2 | when: groups['namenodes']|count < 1 or groups['namenodes']|count > 2 3 | fail: msg="expected number of namenodes is 1 or 2, you have configured {{ groups['namenodes']|count }}" 4 | run_once: true 5 | 6 | - name: check number of datanodes 7 | when: groups['datanodes']|count < 1 8 | fail: msg="expected number of datanodes is more than 0, you have not configured any" 9 | run_once: true 10 | 11 | - name: check number of yarnresourcemanager 12 | when: groups['yarnresourcemanager']|count < 1 13 | fail: msg="at least one node is required for yarnresoucemanager you have not configured any" 14 | run_once: true 15 | 16 | - name: check number of journal nodes 17 | when: groups['namenodes']|count > 1 and groups['journalnodes']|count % 2 == 0 18 | fail: msg="expected number of journalnodes for HA setup is odd and at least 1, you have configured {{ groups['journalnodes']|count }}" 19 | run_once: true 20 | 21 | - name: check number of journal nodes 22 | when: groups['namenodes']|count < 2 and groups['journalnodes']|count > 0 23 | fail: msg="{{ groups['journalnodes']|count }} journal nodes configured, however you have only 1 namenode, for HA setup at least 2 namenodes is requied" 24 | run_once: true 25 | 26 | - name: check number of zookeeper nodes for namenodes 27 | when: groups['namenodes']|count > 1 and ( groups['zookeepernodes']|count != 3 and groups['zookeepernodes']|count != 5 ) 28 | fail: msg="expected number of zookeeper nodes for HA setup is 3 or 5, you have configured {{ groups['zookeepernodes']|count }}" 29 | run_once: true 30 | 31 | - name: check number of zookeeper nodes for resourcemanager 32 | when: groups['yarnresourcemanager']|count > 1 and ( groups['zookeepernodes']|count != 1 and groups['zookeepernodes']|count != 3 and groups['zookeepernodes']|count != 5 ) 33 | fail: msg="expected number of zookeeper nodes for HA setup is 1 or 3 or 5, you have configured {{ groups['zookeepernodes']|count }}" 34 | run_once: true 35 | 36 | - name: check number of hivemetastore nodes 37 | when: groups['hivemetastore']|count != 0 and groups['hivemetastore']|count != 1 38 | fail: msg="expected number of hivemetastore nodes is 0 or 1, you have configured {{ groups['hivemetastore']|count }}" 39 | run_once: true 40 | 41 | - name: check hivemetastore prerequisites 42 | when: groups['hivemetastore']|count > 0 and groups['zookeepernodes']|count < 1 43 | fail: msg="hivemetastore requires at least one zookeepernode, you have not configured any" 44 | run_once: true 45 | 46 | - name: check hivemetastore prerequisites 47 | when: groups['hivemetastore']|count > 0 and groups['postgresql']|count != 1 48 | fail: msg="hivemetastore requires postgresql node, you have not configured any" 49 | run_once: true 50 | 51 | - name: check oozie prerequisites 52 | when: groups['oozie']|count > 0 and groups['postgresql']|count != 1 53 | fail: msg="hue requires postgresql node, you have not configured any" 54 | run_once: true 55 | 56 | - name: check hue prerequisites 57 | when: groups['hue']|count > 0 and groups['postgresql']|count != 1 58 | fail: msg="hue requires postgresql node, you have not configured any" 59 | run_once: true 60 | 61 | - name: check hue prerequisites 62 | when: groups['hue']|count > 0 and groups['oozie']|count < 1 63 | fail: msg="hue requires oozie node to submit jobs, you have not configured any" 64 | run_once: true 65 | 66 | - name: check number of impala-state-store hosts 67 | when: groups['impala-store-catalog']|count > 1 68 | fail: msg="expected number of impala-store-catalog is 0 or 1, you have configured {{ groups['impala-store-catalog']|count }}" 69 | run_once: true 70 | 71 | - name: check impala prerequisites 72 | when: groups['impala-store-catalog']|count > 0 and groups['hivemetastore']|count < 1 73 | fail: msg="impala requires hivemetastore node, you have not configured any" 74 | run_once: true 75 | 76 | - name: check number of hbase master hosts 77 | when: groups['hbasemaster']|count > 1 78 | fail: msg="expected number of hbasemaster hosts is 0 or 1, you have configured {{ groups['hbasemaster']|count }}" 79 | run_once: true 80 | 81 | - name: check hbase prerequisites 82 | when: groups['hbasemaster']|count > 0 and groups['zookeepernodes']|count < 1 83 | fail: msg="hbase requires at least one zookeepernode, you have not configured any" 84 | run_once: true 85 | 86 | - name: check solr prerequisites 87 | when: groups['solr']|count > 0 and groups['zookeepernodes']|count < 1 88 | fail: msg="solr requires at least one zookeepernode, you have not configured any" 89 | run_once: true 90 | 91 | - name: check kafka prerequisites 92 | when: groups['kafka']|count > 0 and groups['zookeepernodes']|count < 1 93 | fail: msg="kafka requires at least one zookeepernode, you have not configured any" 94 | run_once: true 95 | -------------------------------------------------------------------------------- /roles/common/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{item}} state=latest 4 | with_items: 5 | - "{{ java_package }}" 6 | - bigtop-utils 7 | - redhat-lsb-core 8 | 9 | - name: install template configurations 10 | tags: config 11 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} 12 | with_items: 13 | - bigtop-utils 14 | 15 | - name: create directories 16 | tags: config 17 | # is this a 'dirname' or just regex_replace? dirname would just strip '/' off the end 18 | file: path="{{ item | regex_replace('[^/]+/*$','') }}" state=directory owner=root group=root mode=755 follow=yes 19 | with_items: 20 | - "{{ zookeeper_data_dir }}" 21 | - "{{ log_folder }}" 22 | - "{{ etc_folder }}" 23 | 24 | - name: generate services.xml 25 | tags: 26 | - config 27 | - clinit 28 | local_action: template src="services.xml.j2" dest={{ inventory_dir }}/workdir/services.xml 29 | run_once: true 30 | 31 | - name: generate interfaces index 32 | tags: 33 | - config 34 | - interfaces 35 | local_action: template src="interfaces.j2" dest={{ inventory_dir }}/workdir/interfaces 36 | run_once: true 37 | 38 | -------------------------------------------------------------------------------- /roles/common/templates/bigtop-utils.j2: -------------------------------------------------------------------------------- 1 | 2 | # Override JAVA_HOME detection for all bigtop packages 3 | export JAVA_HOME={{ java_home }} 4 | 5 | # Provide a colon-delimited list of directories to search for native Java libraries (e.g. libjvm.so) 6 | # export JAVA_NATIVE_PATH 7 | 8 | # Add common dependencies to the classpath (/var/lib/bigtop will already be included) 9 | # export BIGTOP_CLASSPATH 10 | -------------------------------------------------------------------------------- /roles/common/templates/interfaces.j2: -------------------------------------------------------------------------------- 1 | 2 | hdfs namenodes state 3 | {% for item in groups['namenodes'] %} 4 | http://{{ item }}:50070 5 | {% endfor %} 6 | 7 | yarn resource manager and history server 8 | {% for item in groups['yarnresourcemanager'] %} 9 | http://{{ item }}:8088 10 | http://{{ item }}:19888 11 | {% endfor %} 12 | 13 | impala catalog and statestore 14 | {% for item in groups['impala-store-catalog'] %} 15 | http://{{ item }}:25020 16 | http://{{ item }}:25010 17 | {% endfor %} 18 | 19 | datanodes ( also have /logs ) and impala-server 20 | {% for item in groups['datanodes'] %} 21 | http://{{ item }}:50075 22 | http://{{ item }}:25000 23 | {% endfor %} 24 | 25 | oozie 26 | {% for item in groups['oozie'] %} 27 | http://{{ item }}:11000 28 | {% endfor %} 29 | 30 | hbase 31 | {% for item in groups['hbasemaster'] %} 32 | http://{{ item }}:60010 33 | {% endfor %} 34 | 35 | solr 36 | {% for item in groups['solr'] %} 37 | http://{{ item }}:8983 38 | {% endfor %} 39 | 40 | hue 41 | {% for item in groups['hue'] %} 42 | http://{{ item }}:8888 43 | {% endfor %} 44 | 45 | spark job history server 46 | {% if groups['spark']|count >0 %} 47 | http://{{ groups['spark'][0] }}:18080 48 | {% endif %} 49 | -------------------------------------------------------------------------------- /roles/dashboard/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: ensure httpd is in place for dashboard 2 | yum: name=httpd state=present 3 | 4 | - name: create dashboard directory 5 | file: path={{ dashboard_folder }} state=directory 6 | 7 | - name: download required css/js libraries 8 | local_action: get_url url={{ item }} dest={{ inventory_dir }}/workdir/{{ item|basename }} 9 | with_items: 10 | - https://github.com/twbs/bootstrap/releases/download/v3.3.6/bootstrap-3.3.6-dist.zip 11 | - https://github.com/jasny/bootstrap/releases/download/v3.1.3/jasny-bootstrap-3.1.3-dist.zip 12 | - http://code.jquery.com/jquery-1.12.0.min.js 13 | run_once: true 14 | 15 | - name: extract required items from twbs 16 | local_action: command unzip -o {{ inventory_dir }}/workdir/bootstrap-3.3.6-dist.zip bootstrap-3.3.6-dist/css/bootstrap.min.css bootstrap-3.3.6-dist/js/bootstrap.min.js -d {{ inventory_dir }}/workdir/ 17 | run_once: true 18 | 19 | - name: extract required items from jasny 20 | local_action: command unzip -o {{ inventory_dir }}/workdir/jasny-bootstrap-3.1.3-dist.zip jasny-bootstrap/css/jasny-bootstrap.min.css jasny-bootstrap/js/jasny-bootstrap.min.js -d {{ inventory_dir }}/workdir/ 21 | run_once: true 22 | 23 | - name: copy css/js files 24 | copy: src={{ item }} dest={{ dashboard_folder }}/{{ item|basename }} 25 | with_items: 26 | - "{{ inventory_dir }}/workdir/bootstrap-3.3.6-dist/css/bootstrap.min.css" 27 | - "{{ inventory_dir }}/workdir/bootstrap-3.3.6-dist/js/bootstrap.min.js" 28 | - "{{ inventory_dir }}/workdir/jasny-bootstrap/css/jasny-bootstrap.min.css" 29 | - "{{ inventory_dir }}/workdir/jasny-bootstrap/js/jasny-bootstrap.min.js" 30 | 31 | - name: copy jquery file 32 | copy: src={{ inventory_dir }}/workdir/jquery-1.12.0.min.js dest={{ dashboard_folder }}/jquery.min.js 33 | 34 | - name: create dashboard page 35 | template: src=dashboard.j2 dest={{ dashboard_folder }}/index.html 36 | 37 | - name: start service 38 | tags: service 39 | service: name=httpd state=started enabled=yes 40 | -------------------------------------------------------------------------------- /roles/dashboard/templates/dashboard.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dashboard 4 | 5 | 6 | 7 | 61 | 62 | 63 | 106 | 107 |
108 | 111 | 112 | 119 |
120 | 121 | 122 | 123 | 133 | 134 | -------------------------------------------------------------------------------- /roles/hadoop/files/capacity-scheduler.xml: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | yarn.scheduler.capacity.maximum-applications 18 | 10000 19 | 20 | Maximum number of applications that can be pending and running. 21 | 22 | 23 | 24 | 25 | yarn.scheduler.capacity.maximum-am-resource-percent 26 | 0.1 27 | 28 | Maximum percent of resources in the cluster which can be used to run 29 | application masters i.e. controls number of concurrent running 30 | applications. 31 | 32 | 33 | 34 | 35 | yarn.scheduler.capacity.resource-calculator 36 | org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator 37 | 38 | The ResourceCalculator implementation to be used to compare 39 | Resources in the scheduler. 40 | The default i.e. DefaultResourceCalculator only uses Memory while 41 | DominantResourceCalculator uses dominant-resource to compare 42 | multi-dimensional resources such as Memory, CPU etc. 43 | 44 | 45 | 46 | 47 | yarn.scheduler.capacity.root.queues 48 | default 49 | 50 | The queues at the this level (root is the root queue). 51 | 52 | 53 | 54 | 55 | yarn.scheduler.capacity.root.default.capacity 56 | 100 57 | Default queue target capacity. 58 | 59 | 60 | 61 | yarn.scheduler.capacity.root.default.user-limit-factor 62 | 1 63 | 64 | Default queue user limit a percentage from 0.0 to 1.0. 65 | 66 | 67 | 68 | 69 | yarn.scheduler.capacity.root.default.maximum-capacity 70 | 100 71 | 72 | The maximum capacity of the default queue. 73 | 74 | 75 | 76 | 77 | yarn.scheduler.capacity.root.default.state 78 | RUNNING 79 | 80 | The state of the default queue. State can be one of RUNNING or STOPPED. 81 | 82 | 83 | 84 | 85 | yarn.scheduler.capacity.root.default.acl_submit_applications 86 | * 87 | 88 | The ACL of who can submit jobs to the default queue. 89 | 90 | 91 | 92 | 93 | yarn.scheduler.capacity.root.default.acl_administer_queue 94 | * 95 | 96 | The ACL of who can administer jobs on the default queue. 97 | 98 | 99 | 100 | 101 | yarn.scheduler.capacity.node-locality-delay 102 | 40 103 | 104 | Number of missed scheduling opportunities after which the CapacityScheduler 105 | attempts to schedule rack-local containers. 106 | Typically this should be set to number of nodes in the cluster, By default is setting 107 | approximately number of nodes in one rack which is 40. 108 | 109 | 110 | 111 | 112 | yarn.scheduler.capacity.queue-mappings 113 | 114 | 115 | A list of mappings that will be used to assign jobs to queues 116 | The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]* 117 | Typically this list will be used to map users to queues, 118 | for example, u:%user:%user maps all users to queues with the same name 119 | as the user. 120 | 121 | 122 | 123 | 124 | yarn.scheduler.capacity.queue-mappings-override.enable 125 | false 126 | 127 | If a queue mapping is present, will it override the value specified 128 | by the user? This can be used by administrators to place jobs in queues 129 | that are different than the one specified by the user. 130 | The default is false. 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /roles/hadoop/files/configuration.xsl: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
namevaluedescription
37 | 38 | 39 |
40 |
41 | -------------------------------------------------------------------------------- /roles/hadoop/files/container-executor.cfg: -------------------------------------------------------------------------------- 1 | yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group 2 | banned.users=#comma separated list of users who can not run applications 3 | min.user.id=1000#Prevent other super-users 4 | allowed.system.users=##comma separated list of system users who CAN run applications 5 | -------------------------------------------------------------------------------- /roles/hadoop/files/default/hadoop: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HADOOP_HOME_WARN_SUPPRESS=true 16 | export HADOOP_PREFIX=/usr/lib/hadoop 17 | 18 | export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec 19 | export HADOOP_CONF_DIR=/etc/hadoop/conf 20 | 21 | export HADOOP_COMMON_HOME=/usr/lib/hadoop 22 | export HADOOP_HDFS_HOME=/usr/lib/hadoop-hdfs 23 | export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn 24 | 25 | # Set HADOOP_MAPRED_HOME to /usr/lib/hadoop-0.20-mapreduce to use MR1 26 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce 27 | 28 | export JSVC_HOME=/usr/lib/bigtop-utils 29 | -------------------------------------------------------------------------------- /roles/hadoop/files/default/hadoop-0.20-mapreduce: -------------------------------------------------------------------------------- 1 | export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec 2 | export HADOOP_CONF_DIR=/etc/hadoop/conf 3 | export HADOOP_HOME=/usr/lib/hadoop-0.20-mapreduce 4 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-0.20-mapreduce 5 | export HADOOP_JOBTRACKER_USER=mapred 6 | export HADOOP_TASKTRACKER_USER=mapred 7 | export HADOOP_MRZKFC_USER=mapred 8 | export HADOOP_JOBTRACKERHA_USER=mapred 9 | export HADOOP_IDENT_STRING=hadoop 10 | export HADOOP_LOG_DIR=/var/log/hadoop-0.20-mapreduce 11 | export HADOOP_PID_DIR=/var/run/hadoop-0.20-mapreduce 12 | -------------------------------------------------------------------------------- /roles/hadoop/files/default/hadoop-httpfs: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HTTPFS_USER=httpfs 16 | export HTTPFS_CONFIG=/etc/hadoop-httpfs/conf 17 | export HTTPFS_LOG=/var/log/hadoop-httpfs/ 18 | export HTTPFS_TEMP=/var/run/hadoop-httpfs/ 19 | export HTTPFS_CATALINA_HOME=/usr/lib/bigtop-tomcat 20 | export CATALINA_PID=/var/run/hadoop-httpfs/hadoop-httpfs-httpfs.pid 21 | export CATALINA_BASE=/var/lib/hadoop-httpfs/tomcat-deployment 22 | export CATALINA_TMPDIR=/var/run/hadoop-httpfs/ 23 | # HTTPFS_HTTP_PORT 24 | # HTTPFS_ADMIN_PORT 25 | -------------------------------------------------------------------------------- /roles/hadoop/files/default/hadoop-mapreduce-historyserver: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | export HADOOP_MAPRED_IDENT_STRING=mapred 17 | export HADOOP_MAPRED_PID_DIR=/var/run/hadoop-mapreduce 18 | export HADOOP_MAPRED_LOG_DIR=/var/log/hadoop-mapreduce 19 | export HADOOP_LOG_DIR=/var/log/hadoop-mapreduce 20 | export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce 21 | -------------------------------------------------------------------------------- /roles/hadoop/files/default/hadoop-yarn-resourcemanager: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export YARN_IDENT_STRING=yarn 16 | export YARN_PID_DIR=/var/run/hadoop-yarn 17 | export YARN_LOG_DIR=/var/log/hadoop-yarn 18 | export YARN_CONF_DIR=/etc/hadoop/conf 19 | -------------------------------------------------------------------------------- /roles/hadoop/files/dfs.exclude: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/hadoop/files/dfs.exclude -------------------------------------------------------------------------------- /roles/hadoop/files/hadoop-metrics.properties: -------------------------------------------------------------------------------- 1 | # Configuration of the "dfs" context for null 2 | dfs.class=org.apache.hadoop.metrics.spi.NullContext 3 | 4 | # Configuration of the "dfs" context for file 5 | #dfs.class=org.apache.hadoop.metrics.file.FileContext 6 | #dfs.period=10 7 | #dfs.fileName=/tmp/dfsmetrics.log 8 | 9 | # Configuration of the "dfs" context for ganglia 10 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 11 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext 12 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 13 | # dfs.period=10 14 | # dfs.servers=localhost:8649 15 | 16 | 17 | # Configuration of the "mapred" context for null 18 | mapred.class=org.apache.hadoop.metrics.spi.NullContext 19 | 20 | # Configuration of the "mapred" context for file 21 | #mapred.class=org.apache.hadoop.metrics.file.FileContext 22 | #mapred.period=10 23 | #mapred.fileName=/tmp/mrmetrics.log 24 | 25 | # Configuration of the "mapred" context for ganglia 26 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 27 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext 28 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 29 | # mapred.period=10 30 | # mapred.servers=localhost:8649 31 | 32 | 33 | # Configuration of the "jvm" context for null 34 | #jvm.class=org.apache.hadoop.metrics.spi.NullContext 35 | 36 | # Configuration of the "jvm" context for file 37 | #jvm.class=org.apache.hadoop.metrics.file.FileContext 38 | #jvm.period=10 39 | #jvm.fileName=/tmp/jvmmetrics.log 40 | 41 | # Configuration of the "jvm" context for ganglia 42 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext 43 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 44 | # jvm.period=10 45 | # jvm.servers=localhost:8649 46 | 47 | # Configuration of the "rpc" context for null 48 | rpc.class=org.apache.hadoop.metrics.spi.NullContext 49 | 50 | # Configuration of the "rpc" context for file 51 | #rpc.class=org.apache.hadoop.metrics.file.FileContext 52 | #rpc.period=10 53 | #rpc.fileName=/tmp/rpcmetrics.log 54 | 55 | # Configuration of the "rpc" context for ganglia 56 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext 57 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 58 | # rpc.period=10 59 | # rpc.servers=localhost:8649 60 | 61 | 62 | # Configuration of the "ugi" context for null 63 | ugi.class=org.apache.hadoop.metrics.spi.NullContext 64 | 65 | # Configuration of the "ugi" context for file 66 | #ugi.class=org.apache.hadoop.metrics.file.FileContext 67 | #ugi.period=10 68 | #ugi.fileName=/tmp/ugimetrics.log 69 | 70 | # Configuration of the "ugi" context for ganglia 71 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext 72 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 73 | # ugi.period=10 74 | # ugi.servers=localhost:8649 75 | 76 | -------------------------------------------------------------------------------- /roles/hadoop/files/hadoop-metrics2.properties: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | *.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period, in seconds 6 | *.period=10 7 | 8 | # The namenode-metrics.out will contain metrics from all context 9 | #namenode.sink.file.filename=namenode-metrics.out 10 | # Specifying a special sampling period for namenode: 11 | #namenode.sink.*.period=8 12 | 13 | #datanode.sink.file.filename=datanode-metrics.out 14 | 15 | #resourcemanager.sink.file.filename=resourcemanager-metrics.out 16 | 17 | #nodemanager.sink.file.filename=nodemanager-metrics.out 18 | 19 | #mrappmaster.sink.file.filename=mrappmaster-metrics.out 20 | 21 | #jobhistoryserver.sink.file.filename=jobhistoryserver-metrics.out 22 | 23 | # the following example split metrics of different 24 | # context to different sinks (in this case files) 25 | #nodemanager.sink.file_jvm.class=org.apache.hadoop.metrics2.sink.FileSink 26 | #nodemanager.sink.file_jvm.context=jvm 27 | #nodemanager.sink.file_jvm.filename=nodemanager-jvm-metrics.out 28 | #nodemanager.sink.file_mapred.class=org.apache.hadoop.metrics2.sink.FileSink 29 | #nodemanager.sink.file_mapred.context=mapred 30 | #nodemanager.sink.file_mapred.filename=nodemanager-mapred-metrics.out 31 | 32 | # 33 | # Below are for sending metrics to Ganglia 34 | # 35 | # for Ganglia 3.0 support 36 | # *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30 37 | # 38 | # for Ganglia 3.1 support 39 | # *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31 40 | 41 | # *.sink.ganglia.period=10 42 | 43 | # default for supportsparse is false 44 | # *.sink.ganglia.supportsparse=true 45 | 46 | #*.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both 47 | #*.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40 48 | 49 | # Tag values to use for the ganglia prefix. If not defined no tags are used. 50 | # If '*' all tags are used. If specifiying multiple tags separate them with 51 | # commas. Note that the last segment of the property name is the context name. 52 | # 53 | #*.sink.ganglia.tagsForPrefix.jvm=ProcesName 54 | #*.sink.ganglia.tagsForPrefix.dfs= 55 | #*.sink.ganglia.tagsForPrefix.rpc= 56 | #*.sink.ganglia.tagsForPrefix.mapred= 57 | 58 | #namenode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 59 | 60 | #datanode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 61 | 62 | #resourcemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 63 | 64 | #nodemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 65 | 66 | #mrappmaster.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 67 | 68 | #jobhistoryserver.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 69 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/base.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hadoop 6 | 7 | - name: create configuration directory 8 | tags: config 9 | file: path={{ etc_folder }}/hadoop state=directory 10 | 11 | - name: setup alternatives link 12 | tags: config 13 | alternatives: name=hadoop-conf link=/etc/hadoop/conf path={{ etc_folder }}/hadoop 14 | 15 | - name: install template configurations 16 | tags: config 17 | template: src={{ item }}.j2 dest={{ etc_folder }}/hadoop/{{ item }} 18 | with_items: 19 | - core-site.xml 20 | - fair-scheduler.xml 21 | - hdfs-site.xml 22 | - mapred-site.xml 23 | - yarn-site.xml 24 | - hadoop-env.sh 25 | - mapred-env.sh 26 | - yarn-env.sh 27 | 28 | - name: install files configurations 29 | tags: config 30 | copy: src={{ item }} dest={{ etc_folder }}/hadoop/{{ item }} 31 | with_items: 32 | - capacity-scheduler.xml 33 | - configuration.xsl 34 | - container-executor.cfg 35 | - dfs.exclude 36 | - hadoop-metrics.properties 37 | - hadoop-metrics2.properties 38 | - hadoop-policy.xml 39 | - log4j.properties 40 | 41 | - name: install default configurations 42 | tags: config 43 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 44 | with_items: 45 | - hadoop 46 | 47 | - name: create log folders 48 | tags: config 49 | file: path={{ log_folder }}/{{ item }} state=directory owner={{ item }} mode=0755 50 | with_items: 51 | - hdfs 52 | - yarn 53 | - mapred 54 | 55 | - name: install hdfs-ready utility 56 | tags: config 57 | template: src=bin/hdfs-ready.sh dest={{ etc_folder }}/hadoop/hdfs-ready.sh mode=0755 58 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/datanode.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: package 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hadoop-hdfs-datanode 6 | - hadoop-yarn-nodemanager 7 | 8 | - name: destroy data 9 | tags: init 10 | command: rm -rf {{ item }} 11 | with_items: "{{ dfs_datanode_data_dir.replace('file://','').split(',') }}" 12 | when: destroy_data 13 | 14 | - name: create datanode directories 15 | file: dest={{ item }} owner=hdfs group=hdfs state=directory 16 | with_items: "{{ dfs_datanode_data_dir.replace('file://','').split(',') }}" 17 | 18 | - name: create yarn local directories 19 | file: dest={{ item }} owner=yarn group=hadoop state=directory 20 | with_items: "{{ yarn_nodemanager_local_dirs.replace('file://','').split(',') }}" 21 | 22 | - name: create yarn log directories 23 | file: dest={{ item }} owner=yarn group=hadoop state=directory 24 | with_items: "{{ yarn_nodemanager_log_dirs.replace('file://','').split(',') }}" 25 | 26 | - name: install default configurations 27 | tags: config 28 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 29 | with_items: 30 | - hadoop-hdfs-datanode 31 | - hadoop-yarn-nodemanager 32 | 33 | - name: start services 34 | tags: service 35 | service: name={{ item }} state=restarted enabled=yes 36 | with_items: 37 | - hadoop-hdfs-datanode 38 | - hadoop-yarn-nodemanager 39 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/journalnode.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: package 3 | yum: name=hadoop-hdfs-journalnode state=latest 4 | 5 | - name: destroy data 6 | tags: init 7 | command: rm -rf {{ dfs_journalnode_edits_dir }} 8 | when: destroy_data 9 | 10 | - name: create journal node directories 11 | tags: config 12 | file: dest={{ dfs_journalnode_edits_dir }} owner=hdfs group=hdfs state=directory 13 | 14 | - name: install default configurations 15 | tags: config 16 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 17 | with_items: 18 | - hadoop-hdfs-journalnode 19 | 20 | - name: start services 21 | tags: service 22 | service: name=hadoop-hdfs-journalnode state=restarted enabled=yes 23 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - include: base.yaml 2 | when: deploy == "base" 3 | 4 | - include: journalnode.yaml 5 | when: deploy == "journalnodes" 6 | 7 | - include: namenodes-fence.yaml 8 | when: deploy == "namenodes-fence" and groups['namenodes']|count > 1 9 | 10 | - include: namenode.yaml 11 | when: deploy == "namenodes" 12 | 13 | - include: datanode.yaml 14 | when: deploy == "datanodes" 15 | 16 | - include: test-hdfs.yaml 17 | when: deploy == "test-hdfs" 18 | 19 | - include: resourcemanager.yaml 20 | when: deploy == "resourcemanager" 21 | 22 | - include: test-mapreduce.yaml 23 | when: deploy == "test-mapreduce" 24 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/namenode.yaml: -------------------------------------------------------------------------------- 1 | - name: install namenode package 2 | tags: package 3 | yum: name=hadoop-hdfs-namenode state=latest 4 | 5 | - name: install namenode default configurations 6 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 7 | with_items: 8 | - hadoop-hdfs-namenode 9 | 10 | - name: install zkfc package 11 | tags: package 12 | yum: name=hadoop-hdfs-zkfc state=latest 13 | when: groups['namenodes']|count > 1 14 | 15 | - name: install zkfc default configurations 16 | tags: config 17 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 18 | with_items: 19 | - hadoop-hdfs-zkfc 20 | when: groups['namenodes']|count > 1 21 | 22 | - name: initialize zookeeper 23 | tags: init 24 | command: sudo -Hu hdfs hdfs zkfc -formatZK -force 25 | when: groups['namenodes']|count > 1 26 | run_once: true 27 | 28 | - name: start zkfc 29 | tags: service 30 | service: name=hadoop-hdfs-zkfc state=restarted enabled=yes 31 | when: groups['namenodes']|count > 1 32 | 33 | - name: destroy data 34 | tags: init 35 | command: rm -rf {{ item }} 36 | with_items: "{{ dfs_namenode_name_dir.replace('file://','').split(',') }}" 37 | when: destroy_data 38 | 39 | - name: create namenode directories 40 | tags: init 41 | file: dest={{ item }} owner=hdfs group=hdfs state=directory 42 | with_items: "{{ dfs_namenode_name_dir.replace('file://','').split(',') }}" 43 | 44 | - name: format primary namenode 45 | tags: init 46 | command: sudo -Hu hdfs hdfs namenode -format -force 47 | when: ansible_hostname == groups['namenodes'][0] and destroy_data 48 | 49 | - name: start primary namenode 50 | tags: service 51 | service: name=hadoop-hdfs-namenode state=restarted enabled=yes 52 | when: ansible_hostname == groups['namenodes'][0] 53 | 54 | - name: init secondary instance 55 | tags: init 56 | command: sudo -Hu hdfs hdfs namenode -bootstrapStandby 57 | when: ansible_hostname != groups['namenodes'][0] and destroy_data 58 | 59 | - name: start secondary namenode 60 | tags: service 61 | service: name=hadoop-hdfs-namenode state=restarted enabled=yes 62 | when: ansible_hostname != groups['namenodes'][0] 63 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/namenodes-fence.yaml: -------------------------------------------------------------------------------- 1 | - name: remove old keys 2 | tags: config 3 | local_action: shell rm -f {{ inventory_dir }}/workdir/{{ item }} 4 | with_items: 5 | - hdfs_key* 6 | - hdfs_known_hosts 7 | run_once: true 8 | 9 | - name: generate ssh key 10 | tags: config 11 | local_action: command ssh-keygen -q -N "" -t rsa -b 2048 -f {{ inventory_dir }}/workdir/hdfs_key 12 | run_once: true 13 | 14 | - name: set key permissions 15 | tags: config 16 | local_action: file path={{ inventory_dir }}/workdir/hdfs_key mode=a+r 17 | run_once: true 18 | 19 | - name: generate known_hosts 20 | tags: config 21 | local_action: shell ssh-keyscan {{ ansible_hostname }} >> {{ inventory_dir }}/workdir/hdfs_known_hosts 22 | 23 | - name: create .ssh dir 24 | tags: config 25 | file: path=/var/lib/hadoop-hdfs/.ssh state=directory owner=hdfs group=hdfs mode=700 26 | 27 | - name: install ssh auth files 28 | tags: config 29 | copy: src={{ inventory_dir }}/workdir/{{ item.src }} dest={{ item.dest }} owner=hdfs group=hdfs mode=700 30 | with_items: 31 | - { src: hdfs_key, dest: /var/lib/hadoop-hdfs/.ssh/id_rsa } 32 | - { src: hdfs_key.pub, dest: /var/lib/hadoop-hdfs/.ssh/authorized_keys } 33 | - { src: hdfs_known_hosts, dest: /var/lib/hadoop-hdfs/.ssh/known_hosts } 34 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/resourcemanager.yaml: -------------------------------------------------------------------------------- 1 | - name: install resource manager package 2 | tags: package 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hadoop-yarn-resourcemanager 6 | 7 | - name: install history server package 8 | tags: package 9 | yum: name={{ item }} state=latest 10 | with_items: 11 | - hadoop-mapreduce-historyserver 12 | when: ansible_hostname == groups['yarnresourcemanager'][0] 13 | 14 | - name: install default configurations 15 | tags: config 16 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 17 | with_items: 18 | - hadoop-yarn-resourcemanager 19 | - hadoop-mapreduce-historyserver 20 | 21 | - name: configure hdfs directories 22 | tags: config 23 | command: sudo -Hu hdfs hdfs dfs {{ item }} 24 | with_items: 25 | - -mkdir -p /tmp 26 | - -chmod 1777 /tmp 27 | - -mkdir -p /user/history 28 | - -chmod 1777 /user/history 29 | - -chown mapred:hadoop /user/history 30 | - -mkdir -p /var/log/hadoop-yarn/apps 31 | - -chown yarn:mapred /var/log/hadoop-yarn 32 | - -chown yarn:hadoop /var/log/hadoop-yarn/apps 33 | - -chmod 1777 /var/log/hadoop-yarn/apps 34 | run_once: true 35 | 36 | - name: start services 37 | tags: service 38 | service: name={{ item }} state=restarted enabled=yes 39 | with_items: 40 | - hadoop-yarn-resourcemanager 41 | 42 | - name: start services 43 | tags: service 44 | service: name={{ item }} state=restarted enabled=yes 45 | with_items: 46 | - hadoop-mapreduce-historyserver 47 | when: ansible_hostname == groups['yarnresourcemanager'][0] 48 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/test-hdfs.yaml: -------------------------------------------------------------------------------- 1 | - name: get hdfs ha state 2 | tags: test 3 | command: sudo -Hu hdfs hdfs haadmin -getServiceState {{ ansible_hostname }} 4 | register: ha_state 5 | when: groups['namenodes']|count > 1 6 | 7 | - name: shutdown active name service 8 | tags: test 9 | shell: service hadoop-hdfs-namenode stop; sleep 1 10 | when: groups['namenodes']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1 11 | 12 | - name: test hdfs 13 | tags: test 14 | command: sudo -Hu hdfs hdfs dfs {{ item }} /ansible_hdfs_test 15 | with_items: 16 | - -touchz 17 | - -rm 18 | run_once: true 19 | 20 | - name: start services 21 | tags: test 22 | service: name=hadoop-hdfs-namenode state=restarted enabled=yes 23 | when: groups['namenodes']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1 24 | -------------------------------------------------------------------------------- /roles/hadoop/tasks/test-mapreduce.yaml: -------------------------------------------------------------------------------- 1 | - name: get resource manager ha state 2 | tags: test 3 | command: sudo -Hu yarn yarn rmadmin -getServiceState {{ ansible_hostname }} 4 | register: ha_state 5 | when: groups['yarnresourcemanager']|count > 1 6 | 7 | - name: shutdown active resource manager service 8 | tags: test 9 | shell: service hadoop-yarn-resourcemanager stop; sleep 1 10 | when: groups['yarnresourcemanager']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1 11 | 12 | - name: test mapreduce 13 | tags: test 14 | command: sudo -Hu hdfs hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 1 1 15 | run_once: true 16 | 17 | - name: start services 18 | tags: test 19 | service: name=hadoop-yarn-resourcemanager state=restarted enabled=yes 20 | when: groups['yarnresourcemanager']|count > 1 and ha_state is defined and ha_state.stdout.find('active') != -1 21 | -------------------------------------------------------------------------------- /roles/hadoop/templates/bin/hdfs-ready.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ready_filename="/hdfs_ready" 3 | sudo -Hu hdfs timeout 10 hdfs dfs -touchz $ready_filename &>/dev/null && exit 0 4 | exit 1 5 | 6 | -------------------------------------------------------------------------------- /roles/hadoop/templates/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | fs.defaultFS 8 | {% if groups['namenodes']|count > 1 %} 9 | hdfs://{{ cluster_name }} 10 | {% else %} 11 | hdfs://{{ groups['namenodes'][0] }}:8020 12 | {% endif %} 13 | 14 | 15 | {% if groups['namenodes']|count > 1 %} 16 | 17 | ha.zookeeper.quorum 18 | {% for item in groups['zookeepernodes'] -%} 19 | {{ item }}:2181{% if not loop.last %},{% endif %} 20 | {%- endfor %} 21 | 22 | {% endif %} 23 | 24 | 25 | dfs.permissions.superusergroup 26 | hadoop 27 | 28 | 29 | 30 | hadoop.proxyuser.mapred.groups 31 | * 32 | 33 | 34 | 35 | hadoop.proxyuser.mapred.hosts 36 | * 37 | 38 | 39 | 40 | hadoop.proxyuser.hue.hosts 41 | * 42 | 43 | 44 | 45 | hadoop.proxyuser.hue.groups 46 | * 47 | 48 | 49 | 50 | hadoop.proxyuser.httpfs.hosts 51 | * 52 | 53 | 54 | 55 | hadoop.proxyuser.httpfs.groups 56 | * 57 | 58 | 59 | 60 | hadoop.proxyuser.oozie.hosts 61 | * 62 | 63 | 64 | 65 | hadoop.proxyuser.oozie.groups 66 | * 67 | 68 | 69 | 70 | hadoop.proxyuser.impala.hosts 71 | * 72 | 73 | 74 | 75 | hadoop.proxyuser.impala.groups 76 | * 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /roles/hadoop/templates/default/hadoop-hdfs-datanode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs 16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs 17 | export HADOOP_NAMENODE_USER=hdfs 18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs 19 | export HADOOP_DATANODE_USER=hdfs 20 | export HADOOP_IDENT_STRING=hdfs 21 | 22 | # We always want to start NFS as root and by setting a default value like the one below we 23 | # ensure this happens. Once port registration completes, privileges are dropped back from root 24 | # to HADOOP_PRIVILEGED_NFS_USER. 25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs 26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs 27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs 28 | 29 | # export HADOOP_SECURE_DN_USER=hdfs 30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 32 | 33 | # JVM heapsize tuning 34 | export HADOOP_HEAPSIZE={{datanode_heapsize|default('2048')}} 35 | -------------------------------------------------------------------------------- /roles/hadoop/templates/default/hadoop-hdfs-journalnode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs 16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs 17 | export HADOOP_NAMENODE_USER=hdfs 18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs 19 | export HADOOP_DATANODE_USER=hdfs 20 | export HADOOP_IDENT_STRING=hdfs 21 | 22 | # We always want to start NFS as root and by setting a default value like the one below we 23 | # ensure this happens. Once port registration completes, privileges are dropped back from root 24 | # to HADOOP_PRIVILEGED_NFS_USER. 25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs 26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs 27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs 28 | 29 | # export HADOOP_SECURE_DN_USER=hdfs 30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 32 | 33 | # JVM heapsize tuning 34 | export HADOOP_HEAPSIZE={{journalnode_heapsize|default('1000')}} 35 | -------------------------------------------------------------------------------- /roles/hadoop/templates/default/hadoop-hdfs-namenode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs 16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs 17 | export HADOOP_NAMENODE_USER=hdfs 18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs 19 | export HADOOP_DATANODE_USER=hdfs 20 | export HADOOP_IDENT_STRING=hdfs 21 | 22 | # We always want to start NFS as root and by setting a default value like the one below we 23 | # ensure this happens. Once port registration completes, privileges are dropped back from root 24 | # to HADOOP_PRIVILEGED_NFS_USER. 25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs 26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs 27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs 28 | 29 | # export HADOOP_SECURE_DN_USER=hdfs 30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 32 | 33 | # JVM heapsize tuning 34 | export HADOOP_HEAPSIZE={{namenode_heapsize|default('2048')}} 35 | -------------------------------------------------------------------------------- /roles/hadoop/templates/default/hadoop-hdfs-zkfc.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export HADOOP_PID_DIR=/var/run/hadoop-hdfs 16 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs 17 | export HADOOP_NAMENODE_USER=hdfs 18 | export HADOOP_SECONDARYNAMENODE_USER=hdfs 19 | export HADOOP_DATANODE_USER=hdfs 20 | export HADOOP_IDENT_STRING=hdfs 21 | 22 | # We always want to start NFS as root and by setting a default value like the one below we 23 | # ensure this happens. Once port registration completes, privileges are dropped back from root 24 | # to HADOOP_PRIVILEGED_NFS_USER. 25 | export HADOOP_PRIVILEGED_NFS_USER=hdfs 26 | export HADOOP_PRIVILEGED_NFS_PID_DIR=/var/run/hadoop-hdfs 27 | export HADOOP_PRIVILEGED_NFS_LOG_DIR=/var/log/hadoop-hdfs 28 | 29 | # export HADOOP_SECURE_DN_USER=hdfs 30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 32 | 33 | # JVM heapsize tuning 34 | export HADOOP_HEAPSIZE={{zkfc_heapsize|default('1000')}} 35 | -------------------------------------------------------------------------------- /roles/hadoop/templates/default/hadoop-yarn-nodemanager.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | export YARN_IDENT_STRING=yarn 16 | export YARN_PID_DIR=/var/run/hadoop-yarn 17 | export YARN_LOG_DIR=/var/log/hadoop-yarn 18 | export YARN_CONF_DIR=/etc/hadoop/conf 19 | 20 | export YARN_HEAPSIZE={{ nodemanager_heapsize }} 21 | -------------------------------------------------------------------------------- /roles/hadoop/templates/fair-scheduler.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 22 | {% if groups['oozie']|count > 0 and 'oozie' in group_names %} 23 | 24 | 25 | {{ oozie_launcher_maxapps }} 26 | {{ oozie_launcher_weight }} 27 | 28 | {% endif %} 29 | 30 | 20 31 | 32 | 20 33 | 34 | 600 35 | 36 | 600 37 | 38 | 0.5 39 | 40 | 41 | 44 | 45 | -------------------------------------------------------------------------------- /roles/hadoop/templates/hadoop-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # NOTE: this files contains environment variables for all hadoop services 3 | # NOTE: this files overwrites varibales from /etc/default/hadoop* 4 | export HADOOP_LOG_DIR={{log_folder}}/hdfs 5 | -------------------------------------------------------------------------------- /roles/hadoop/templates/hdfs-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {% if groups['namenodes']|count > 1 %} 7 | 8 | dfs.nameservices 9 | {{ cluster_name }} 10 | 11 | 12 | 13 | dfs.ha.namenodes.{{ cluster_name }} 14 | {% for item in groups['namenodes'] -%} 15 | {{ item }}{% if not loop.last %},{% endif %} 16 | {%- endfor %} 17 | 18 | 19 | {% for item in groups['namenodes'] %} 20 | 21 | dfs.namenode.rpc-address.{{ cluster_name }}.{{ item }} 22 | {{ item }}:8020 23 | 24 | 25 | 26 | dfs.namenode.http-address.{{ cluster_name }}.{{ item }} 27 | {{ item }}:50070 28 | 29 | 30 | {% endfor %} 31 | 32 | dfs.namenode.shared.edits.dir 33 | qjournal://{% for item in groups['journalnodes'] -%} 34 | {{ item }}:8485{% if not loop.last %};{% endif %} 35 | {%- endfor %}/{{ cluster_name }} 36 | 37 | 38 | 39 | dfs.journalnode.edits.dir 40 | {{ dfs_journalnode_edits_dir }} 41 | 42 | 43 | 44 | dfs.client.failover.proxy.provider.{{ cluster_name }} 45 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 46 | 47 | 48 | 49 | dfs.ha.fencing.methods 50 | sshfence 51 | 52 | 53 | 54 | dfs.ha.fencing.ssh.private-key-files 55 | /var/lib/hadoop-hdfs/.ssh/id_rsa 56 | 57 | 58 | 59 | dfs.ha.automatic-failover.enabled 60 | true 61 | 62 | {% endif %} 63 | 64 | 65 | dfs.namenode.name.dir 66 | {{ dfs_namenode_name_dir }} 67 | 68 | 69 | 70 | dfs.datanode.data.dir 71 | {{ dfs_datanode_data_dir }} 72 | 73 | 74 | 75 | dfs.replication 76 | {{ dfs_replication }} 77 | 78 | 79 | 80 | dfs.permissions.superusergroup 81 | hadoop 82 | 83 | 84 | 85 | dfs.hosts.exclude 86 | /etc/hadoop/conf/dfs.exclude 87 | 88 | 89 | 90 | dfs.client.read.shortcircuit 91 | true 92 | 93 | 94 | 95 | dfs.domain.socket.path 96 | /var/run/hadoop-hdfs/hdfs.socket 97 | 98 | 99 | 100 | dfs.client.file-block-storage-locations.timeout.millis 101 | 10000 102 | 103 | 104 | 105 | dfs.datanode.hdfs-blocks-metadata.enabled 106 | true 107 | 108 | 109 | 110 | dfs.datanode.max.transfer.threads 111 | 4096 112 | 113 | 114 | 115 | fs.permissions.umask-mode 116 | {{ fs_permissions_umask }} 117 | 118 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /roles/hadoop/templates/mapred-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export HADOOP_MAPRED_LOG_DIR={{log_folder}}/mapred 3 | 4 | -------------------------------------------------------------------------------- /roles/hadoop/templates/mapred-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | mapreduce.framework.name 8 | yarn 9 | 10 | 11 | 12 | mapreduce.jobhistory.address 13 | {{ groups['yarnresourcemanager'][0] }}:10020 14 | 15 | 16 | 46 | 47 | jobtracker.thrift.address 48 | 0.0.0.0:9290 49 | 50 | 51 | 52 | mapred.jobtracker.plugins 53 | org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin 54 | Comma-separated list of jobtracker plug-ins to be activated. 55 | 56 | 57 | 58 | yarn.app.mapreduce.am.staging-dir 59 | /user 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /roles/hadoop/templates/yarn-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # User for YARN daemons 18 | export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} 19 | 20 | # resolve links - $0 may be a softlink 21 | export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" 22 | 23 | # some Java parameters 24 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/ 25 | if [ "$JAVA_HOME" != "" ]; then 26 | #echo "run java in $JAVA_HOME" 27 | JAVA_HOME=$JAVA_HOME 28 | fi 29 | 30 | if [ "$JAVA_HOME" = "" ]; then 31 | echo "Error: JAVA_HOME is not set." 32 | exit 1 33 | fi 34 | 35 | JAVA=$JAVA_HOME/bin/java 36 | JAVA_HEAP_MAX=-Xmx1000m 37 | 38 | # For setting YARN specific HEAP sizes please use this 39 | # Parameter and set appropriately 40 | # YARN_HEAPSIZE=1000 41 | 42 | # check envvars which might override default args 43 | if [ "$YARN_HEAPSIZE" != "" ]; then 44 | JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m" 45 | fi 46 | 47 | # Resource Manager specific parameters 48 | 49 | # Specify the max Heapsize for the ResourceManager using a numerical value 50 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set 51 | # the value to 1000. 52 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS 53 | # and/or YARN_RESOURCEMANAGER_OPTS. 54 | # If not specified, the default value will be picked from either YARN_HEAPMAX 55 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. 56 | #export YARN_RESOURCEMANAGER_HEAPSIZE=1000 57 | 58 | # Specify the max Heapsize for the timeline server using a numerical value 59 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set 60 | # the value to 1000. 61 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS 62 | # and/or YARN_TIMELINESERVER_OPTS. 63 | # If not specified, the default value will be picked from either YARN_HEAPMAX 64 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. 65 | #export YARN_TIMELINESERVER_HEAPSIZE=1000 66 | 67 | # Specify the JVM options to be used when starting the ResourceManager. 68 | # These options will be appended to the options specified as YARN_OPTS 69 | # and therefore may override any similar flags set in YARN_OPTS 70 | #export YARN_RESOURCEMANAGER_OPTS= 71 | 72 | # Node Manager specific parameters 73 | 74 | # Specify the max Heapsize for the NodeManager using a numerical value 75 | # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set 76 | # the value to 1000. 77 | # This value will be overridden by an Xmx setting specified in either YARN_OPTS 78 | # and/or YARN_NODEMANAGER_OPTS. 79 | # If not specified, the default value will be picked from either YARN_HEAPMAX 80 | # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. 81 | #export YARN_NODEMANAGER_HEAPSIZE=1000 82 | 83 | # Specify the JVM options to be used when starting the NodeManager. 84 | # These options will be appended to the options specified as YARN_OPTS 85 | # and therefore may override any similar flags set in YARN_OPTS 86 | #export YARN_NODEMANAGER_OPTS= 87 | 88 | # so that filenames w/ spaces are handled correctly in loops below 89 | IFS= 90 | 91 | 92 | YARN_LOG_DIR={{log_folder}}/yarn 93 | if [ "$YARN_LOGFILE" = "" ]; then 94 | YARN_LOGFILE='yarn.log' 95 | fi 96 | 97 | # default policy file for service-level authorization 98 | if [ "$YARN_POLICYFILE" = "" ]; then 99 | YARN_POLICYFILE="hadoop-policy.xml" 100 | fi 101 | 102 | # restore ordinary behaviour 103 | unset IFS 104 | 105 | 106 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR" 107 | YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" 108 | YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" 109 | YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" 110 | YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME" 111 | YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING" 112 | YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" 113 | YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" 114 | if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then 115 | YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" 116 | fi 117 | YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE" 118 | 119 | 120 | -------------------------------------------------------------------------------- /roles/hbase/files/default/hbase: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | export HBASE_PID_DIR="/var/run/hbase" 17 | export HBASE_LOG_DIR="/var/log/hbase" 18 | export HBASE_IDENT_STRING=hbase 19 | #export HBASE_REGIONSERVER_MLOCK=true 20 | #export HBASE_REGIONSERVER_UID=hbase 21 | export HBASE_THRIFT_MODE="-nonblocking" 22 | 23 | # Up to 100 region servers can be run on a single host by specifying offsets 24 | # here or as CLI args when using init scripts. Each offset identifies an 25 | # instance and is used to determine the network ports it uses. Each instance 26 | # will have have its own log and pid files. 27 | # 28 | # REGIONSERVER_OFFSETS="1 2 3" 29 | 30 | -------------------------------------------------------------------------------- /roles/hbase/files/hadoop-metrics2-hbase.properties: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period 6 | *.period=10 7 | 8 | # Below are some examples of sinks that could be used 9 | # to monitor different hbase daemons. 10 | 11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink 12 | # hbase.sink.file-all.filename=all.metrics 13 | 14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink 15 | # hbase.sink.file0.context=hmaster 16 | # hbase.sink.file0.filename=master.metrics 17 | 18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink 19 | # hbase.sink.file1.context=thrift-one 20 | # hbase.sink.file1.filename=thrift-one.metrics 21 | 22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink 23 | # hbase.sink.file2.context=thrift-two 24 | # hbase.sink.file2.filename=thrift-one.metrics 25 | 26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink 27 | # hbase.sink.file3.context=rest 28 | # hbase.sink.file3.filename=rest.metrics 29 | -------------------------------------------------------------------------------- /roles/hbase/files/hbase-env.cmd: -------------------------------------------------------------------------------- 1 | @rem/** 2 | @rem * Licensed to the Apache Software Foundation (ASF) under one 3 | @rem * or more contributor license agreements. See the NOTICE file 4 | @rem * distributed with this work for additional information 5 | @rem * regarding copyright ownership. The ASF licenses this file 6 | @rem * to you under the Apache License, Version 2.0 (the 7 | @rem * "License"); you may not use this file except in compliance 8 | @rem * with the License. You may obtain a copy of the License at 9 | @rem * 10 | @rem * http://www.apache.org/licenses/LICENSE-2.0 11 | @rem * 12 | @rem * Unless required by applicable law or agreed to in writing, software 13 | @rem * distributed under the License is distributed on an "AS IS" BASIS, 14 | @rem * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | @rem * See the License for the specific language governing permissions and 16 | @rem * limitations under the License. 17 | @rem */ 18 | 19 | @rem Set environment variables here. 20 | 21 | @rem The java implementation to use. Java 1.7+ required. 22 | @rem set JAVA_HOME=c:\apps\java 23 | 24 | @rem Extra Java CLASSPATH elements. Optional. 25 | @rem set HBASE_CLASSPATH= 26 | 27 | @rem The maximum amount of heap to use, in MB. Default is 1000. 28 | @rem set HBASE_HEAPSIZE=1000 29 | 30 | @rem Uncomment below if you intend to use off heap cache. 31 | @rem set HBASE_OFFHEAPSIZE=1000 32 | 33 | @rem For example, to allocate 8G of offheap, to 8G: 34 | @rem etHBASE_OFFHEAPSIZE=8G 35 | 36 | @rem Extra Java runtime options. 37 | @rem Below are what we set by default. May only work with SUN JVM. 38 | @rem For more on why as well as other possible settings, 39 | @rem see http://wiki.apache.org/hadoop/PerformanceTuning 40 | @rem JDK6 on Windows has a known bug for IPv6, use preferIPv4Stack unless JDK7. 41 | @rem @rem See TestIPv6NIOServerSocketChannel. 42 | set HBASE_OPTS="-XX:+UseConcMarkSweepGC" "-Djava.net.preferIPv4Stack=true" 43 | 44 | @rem Uncomment below to enable java garbage collection logging for the server-side processes 45 | @rem this enables basic gc logging for the server processes to the .out file 46 | @rem set SERVER_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" %HBASE_GC_OPTS% 47 | 48 | @rem this enables gc logging using automatic GC log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. Either use this set of options or the one above 49 | @rem set SERVER_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" "-XX:+UseGCLogFileRotation" "-XX:NumberOfGCLogFiles=1" "-XX:GCLogFileSize=512M" %HBASE_GC_OPTS% 50 | 51 | @rem Uncomment below to enable java garbage collection logging for the client processes in the .out file. 52 | @rem set CLIENT_GC_OPTS="-verbose:gc" "-XX:+PrintGCDetails" "-XX:+PrintGCDateStamps" %HBASE_GC_OPTS% 53 | 54 | @rem Uncomment below (along with above GC logging) to put GC information in its own logfile (will set HBASE_GC_OPTS) 55 | @rem set HBASE_USE_GC_LOGFILE=true 56 | 57 | @rem Uncomment and adjust to enable JMX exporting 58 | @rem See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access. 59 | @rem More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html 60 | @rem 61 | @rem set HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false" "-Dcom.sun.management.jmxremote.authenticate=false" 62 | @rem set HBASE_MASTER_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10101" 63 | @rem set HBASE_REGIONSERVER_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10102" 64 | @rem set HBASE_THRIFT_OPTS=%HBASE_JMX_BASE% "-Dcom.sun.management.jmxremote.port=10103" 65 | @rem set HBASE_ZOOKEEPER_OPTS=%HBASE_JMX_BASE% -Dcom.sun.management.jmxremote.port=10104" 66 | 67 | @rem File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default. 68 | @rem set HBASE_REGIONSERVERS=%HBASE_HOME%\conf\regionservers 69 | 70 | @rem Where log files are stored. $HBASE_HOME/logs by default. 71 | @rem set HBASE_LOG_DIR=%HBASE_HOME%\logs 72 | 73 | @rem A string representing this instance of hbase. $USER by default. 74 | @rem set HBASE_IDENT_STRING=%USERNAME% 75 | 76 | @rem Seconds to sleep between slave commands. Unset by default. This 77 | @rem can be useful in large clusters, where, e.g., slave rsyncs can 78 | @rem otherwise arrive faster than the master can service them. 79 | @rem set HBASE_SLAVE_SLEEP=0.1 80 | 81 | @rem Tell HBase whether it should manage it's own instance of Zookeeper or not. 82 | @rem set HBASE_MANAGES_ZK=true 83 | -------------------------------------------------------------------------------- /roles/hbase/files/hbase-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | 24 | 25 | security.client.protocol.acl 26 | * 27 | ACL for ClientProtocol and AdminProtocol implementations (ie. 28 | clients talking to HRegionServers) 29 | The ACL is a comma-separated list of user and group names. The user and 30 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 31 | A special value of "*" means all users are allowed. 32 | 33 | 34 | 35 | security.admin.protocol.acl 36 | * 37 | ACL for HMasterInterface protocol implementation (ie. 38 | clients talking to HMaster for admin operations). 39 | The ACL is a comma-separated list of user and group names. The user and 40 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 41 | A special value of "*" means all users are allowed. 42 | 43 | 44 | 45 | security.masterregion.protocol.acl 46 | * 47 | ACL for HMasterRegionInterface protocol implementations 48 | (for HRegionServers communicating with HMaster) 49 | The ACL is a comma-separated list of user and group names. The user and 50 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 51 | A special value of "*" means all users are allowed. 52 | 53 | 54 | -------------------------------------------------------------------------------- /roles/hbase/files/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | hbase.root.logger=INFO,console 3 | hbase.security.logger=INFO,console 4 | hbase.log.dir=. 5 | hbase.log.file=hbase.log 6 | 7 | # Define the root logger to the system property "hbase.root.logger". 8 | log4j.rootLogger=${hbase.root.logger} 9 | 10 | # Logging Threshold 11 | log4j.threshold=ALL 12 | 13 | # 14 | # Daily Rolling File Appender 15 | # 16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file} 18 | 19 | # Rollver at midnight 20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 21 | 22 | # 30-day backup 23 | #log4j.appender.DRFA.MaxBackupIndex=30 24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 25 | 26 | # Pattern format: Date LogLevel LoggerName LogMessage 27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 28 | 29 | # Rolling File Appender properties 30 | hbase.log.maxfilesize=256MB 31 | hbase.log.maxbackupindex=20 32 | 33 | # Rolling File Appender 34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file} 36 | 37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize} 38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex} 39 | 40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 42 | 43 | # 44 | # Security audit appender 45 | # 46 | hbase.security.log.file=SecurityAuth.audit 47 | hbase.security.log.maxfilesize=256MB 48 | hbase.security.log.maxbackupindex=20 49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file} 51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize} 52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex} 53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 55 | log4j.category.SecurityLogger=${hbase.security.logger} 56 | log4j.additivity.SecurityLogger=false 57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE 58 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.visibility.VisibilityController=TRACE 59 | 60 | # 61 | # Null Appender 62 | # 63 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 64 | 65 | # 66 | # console 67 | # Add "console" to rootlogger above if you want to use this 68 | # 69 | log4j.appender.console=org.apache.log4j.ConsoleAppender 70 | log4j.appender.console.target=System.err 71 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 72 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 73 | 74 | # Custom Logging levels 75 | 76 | log4j.logger.org.apache.zookeeper=INFO 77 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 78 | log4j.logger.org.apache.hadoop.hbase=INFO 79 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug. 80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO 81 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO 82 | #log4j.logger.org.apache.hadoop.dfs=DEBUG 83 | # Set this class to log INFO only otherwise its OTT 84 | # Enable this to get detailed connection error/retry logging. 85 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE 86 | 87 | 88 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output) 89 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG 90 | 91 | # Uncomment the below if you want to remove logging of client region caching' 92 | # and scan of hbase:meta messages 93 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO 94 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO 95 | -------------------------------------------------------------------------------- /roles/hbase/tasks/hbase-master.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hbase-master 6 | - hbase-thrift 7 | - hbase-rest 8 | 9 | - name: create configuration directory 10 | file: path=/etc/hbase/conf.{{ cluster_name }} state=directory 11 | 12 | - name: setup alternatives link 13 | alternatives: name=hbase-conf link=/etc/hbase/conf path=/etc/hbase/conf.{{ cluster_name }} 14 | 15 | - name: install default configurations 16 | tags: config 17 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 18 | with_items: 19 | - hbase 20 | 21 | - name: install template configurations 22 | tags: config 23 | template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }} 24 | with_items: 25 | - hbase-site.xml 26 | - regionservers 27 | 28 | - name: install files configurations 29 | tags: config 30 | copy: src={{ item }} dest=/etc/hbase/conf/{{ item }} 31 | with_items: 32 | - hadoop-metrics2-hbase.properties 33 | - hbase-env.cmd 34 | - hbase-env.sh 35 | - hbase-policy.xml 36 | - log4j.properties 37 | 38 | - name: create hdfs directories 39 | command: sudo -Hu hdfs hdfs dfs {{ item }} 40 | with_items: 41 | - -mkdir -p /hbase 42 | - -chown hbase /hbase 43 | 44 | - name: start services 45 | tags: service 46 | service: name={{ item }} state=restarted enabled=yes 47 | with_items: 48 | - hbase-master 49 | - hbase-thrift 50 | - hbase-rest 51 | -------------------------------------------------------------------------------- /roles/hbase/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - include: hbase-master.yaml 2 | when: deploy == "hbase-master" 3 | 4 | - include: regionserver.yaml 5 | when: deploy == "regionserver" and groups['hbasemaster']|count == 1 6 | -------------------------------------------------------------------------------- /roles/hbase/tasks/regionserver.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hbase-regionserver 6 | 7 | - name: create configuration directory 8 | file: path=/etc/hbase/conf.{{ cluster_name }} state=directory 9 | 10 | - name: setup alternatives link 11 | alternatives: name=hbase-conf link=/etc/hbase/conf path=/etc/hbase/conf.{{ cluster_name }} 12 | 13 | - name: install template configurations 14 | tags: config 15 | template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }} 16 | with_items: 17 | - hbase-site.xml 18 | - regionservers 19 | 20 | - name: install files configurations 21 | tags: config 22 | copy: src={{ item }} dest=/etc/hbase/conf/{{ item }} 23 | with_items: 24 | - hadoop-metrics2-hbase.properties 25 | - hbase-env.cmd 26 | - hbase-env.sh 27 | - hbase-policy.xml 28 | - log4j.properties 29 | 30 | - name: start services 31 | tags: service 32 | service: name={{ item }} state=restarted enabled=yes 33 | with_items: 34 | - hbase-regionserver 35 | 36 | - name: test 37 | tags: test 38 | shell: echo -e "create 'ansible_test_table', 'id'\ndisable 'ansible_test_table'\n drop 'ansible_test_table'" | hbase shell 39 | run_once: true 40 | -------------------------------------------------------------------------------- /roles/hbase/templates/hbase-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hbase.cluster.distributed 8 | true 9 | 10 | 11 | 12 | hbase.rootdir 13 | {% if groups['namenodes']|count > 1 %} 14 | hdfs://{{ cluster_name }}/hbase 15 | {% else %} 16 | hdfs://{{ groups['namenodes'][0] }}:8020/hbase 17 | {% endif %} 18 | 19 | 20 | 21 | hbase.zookeeper.quorum 22 | {% for item in groups['zookeepernodes'] -%} 23 | {{ item }}:2181{% if not loop.last %},{% endif %} 24 | {%- endfor %} 25 | 26 | {% if groups['dashboard']|count > 0 %} 27 | 28 | 29 | hbase.http.filter.xframeoptions.mode 30 | ALLOWALL 31 | 32 | {% endif %} 33 | 34 | 35 | -------------------------------------------------------------------------------- /roles/hbase/templates/regionservers.j2: -------------------------------------------------------------------------------- 1 | {% for item in groups['datanodes'] %} 2 | {{ item }} 3 | {% endfor %} 4 | -------------------------------------------------------------------------------- /roles/hivemetastore/files/default/hadoop-0.20-mapreduce: -------------------------------------------------------------------------------- 1 | ../../../hadoop/files/default/hadoop-0.20-mapreduce -------------------------------------------------------------------------------- /roles/hivemetastore/files/default/hive-metastore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # The port for Hive metastore daemon to listen to. 17 | # Unfortunatelly, there is no way to specify the interfaces 18 | # to which the daemon binds. 19 | # 20 | #PORT= 21 | -------------------------------------------------------------------------------- /roles/hivemetastore/files/default/hive-server2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # The port for Hive server2 daemon to listen to. 17 | # Unfortunatelly, there is no way to specify the interfaces 18 | # to which the daemon binds. 19 | # 20 | #PORT= 21 | -------------------------------------------------------------------------------- /roles/hivemetastore/files/hive-exec-log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hive.log.threshold=ALL 19 | hive.root.logger=INFO,FA 20 | hive.log.dir=${java.io.tmpdir}/${user.name} 21 | hive.query.id=hadoop 22 | hive.log.file=${hive.query.id}.log 23 | 24 | # Define the root logger to the system property "hadoop.root.logger". 25 | log4j.rootLogger=${hive.root.logger}, EventCounter 26 | 27 | # Logging Threshold 28 | log4j.threshhold=${hive.log.threshold} 29 | 30 | # 31 | # File Appender 32 | # 33 | 34 | log4j.appender.FA=org.apache.log4j.FileAppender 35 | log4j.appender.FA.File=${hive.log.dir}/${hive.log.file} 36 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout 37 | 38 | # Pattern format: Date LogLevel LoggerName LogMessage 39 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 40 | # Debugging Pattern format 41 | log4j.appender.FA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n 42 | 43 | 44 | # 45 | # console 46 | # Add "console" to rootlogger above if you want to use this 47 | # 48 | 49 | log4j.appender.console=org.apache.log4j.ConsoleAppender 50 | log4j.appender.console.target=System.err 51 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 52 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t] %p %c{2}: %m%n 53 | 54 | #custom logging levels 55 | #log4j.logger.xxx=DEBUG 56 | 57 | # 58 | # Event Counter Appender 59 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 60 | # 61 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter 62 | 63 | 64 | log4j.category.DataNucleus=ERROR,FA 65 | log4j.category.Datastore=ERROR,FA 66 | log4j.category.Datastore.Schema=ERROR,FA 67 | log4j.category.JPOX.Datastore=ERROR,FA 68 | log4j.category.JPOX.Plugin=ERROR,FA 69 | log4j.category.JPOX.MetaData=ERROR,FA 70 | log4j.category.JPOX.Query=ERROR,FA 71 | log4j.category.JPOX.General=ERROR,FA 72 | log4j.category.JPOX.Enhancer=ERROR,FA 73 | 74 | 75 | # Silence useless ZK logs 76 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,FA 77 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,FA 78 | -------------------------------------------------------------------------------- /roles/hivemetastore/files/hive-log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hive.log.threshold=ALL 19 | hive.root.logger=WARN,DRFA 20 | hive.log.dir=${java.io.tmpdir}/${user.name} 21 | hive.log.file=hive.log 22 | 23 | # Define the root logger to the system property "hadoop.root.logger". 24 | log4j.rootLogger=${hive.root.logger}, EventCounter 25 | 26 | # Logging Threshold 27 | log4j.threshold=${hive.log.threshold} 28 | 29 | # 30 | # Daily Rolling File Appender 31 | # 32 | # Use the PidDailyerRollingFileAppend class instead if you want to use separate log files 33 | # for different CLI session. 34 | # 35 | # log4j.appender.DRFA=org.apache.hadoop.hive.ql.log.PidDailyRollingFileAppender 36 | 37 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 38 | 39 | log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file} 40 | 41 | # Rollver at midnight 42 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 43 | 44 | # 30-day backup 45 | #log4j.appender.DRFA.MaxBackupIndex=30 46 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 47 | 48 | # Pattern format: Date LogLevel LoggerName LogMessage 49 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 50 | # Debugging Pattern format 51 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p [%t]: %c{2} (%F:%M(%L)) - %m%n 52 | 53 | 54 | # 55 | # console 56 | # Add "console" to rootlogger above if you want to use this 57 | # 58 | 59 | log4j.appender.console=org.apache.log4j.ConsoleAppender 60 | log4j.appender.console.target=System.err 61 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 62 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n 63 | log4j.appender.console.encoding=UTF-8 64 | 65 | #custom logging levels 66 | #log4j.logger.xxx=DEBUG 67 | 68 | # 69 | # Event Counter Appender 70 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 71 | # 72 | log4j.appender.EventCounter=org.apache.hadoop.hive.shims.HiveEventCounter 73 | 74 | 75 | log4j.category.DataNucleus=ERROR,DRFA 76 | log4j.category.Datastore=ERROR,DRFA 77 | log4j.category.Datastore.Schema=ERROR,DRFA 78 | log4j.category.JPOX.Datastore=ERROR,DRFA 79 | log4j.category.JPOX.Plugin=ERROR,DRFA 80 | log4j.category.JPOX.MetaData=ERROR,DRFA 81 | log4j.category.JPOX.Query=ERROR,DRFA 82 | log4j.category.JPOX.General=ERROR,DRFA 83 | log4j.category.JPOX.Enhancer=ERROR,DRFA 84 | 85 | 86 | # Silence useless ZK logs 87 | log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN,DRFA 88 | log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA 89 | 90 | #custom logging levels 91 | log4j.logger.org.apache.hadoop.hive.ql.parse.SemanticAnalyzer=INFO 92 | log4j.logger.org.apache.hadoop.hive.ql.Driver=INFO 93 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.ExecDriver=INFO 94 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapRedTask=INFO 95 | log4j.logger.org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask=INFO 96 | log4j.logger.org.apache.hadoop.hive.ql.exec.Task=INFO 97 | log4j.logger.org.apache.hadoop.hive.ql.session.SessionState=INFO 98 | -------------------------------------------------------------------------------- /roles/hivemetastore/files/hive.limits.conf: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | hive - nofile 32768 17 | hive - nproc 65536 18 | -------------------------------------------------------------------------------- /roles/hivemetastore/tasks/hive-client.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: package 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hive 6 | 7 | - name: create configuration directory 8 | tags: config 9 | file: path={{ etc_folder }}/hive state=directory 10 | 11 | - name: setup alternatives link 12 | tags: config 13 | alternatives: name=hive-conf link=/etc/hive/conf path={{ etc_folder }}/hive 14 | 15 | - name: set limits for hive user 16 | tags: config 17 | copy: src=hive.limits.conf dest=/etc/security/limits.d/hive.conf 18 | 19 | - name: install template configurations 20 | tags: config 21 | template: src={{ item }}.j2 dest={{ etc_folder }}/hive/{{ item }} 22 | with_items: 23 | - hive-site.xml 24 | - hive-env.sh 25 | 26 | - name: install files configurations 27 | tags: config 28 | copy: src={{ item }} dest={{ etc_folder }}/hive/{{ item }} 29 | with_items: 30 | - hive-exec-log4j.properties 31 | - hive-log4j.properties 32 | 33 | - name: install default configurations 34 | tags: config 35 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 36 | with_items: 37 | - hadoop-0.20-mapreduce 38 | 39 | - name: add group for hive user 40 | user: name=hive groups=hive,hadoop 41 | -------------------------------------------------------------------------------- /roles/hivemetastore/tasks/hive-server.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: package 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hive-metastore 6 | - hive-server2 7 | - postgresql{{ postgres_version|default('')|replace('.', '') }} 8 | - postgresql{{ postgres_version|default('')|replace('.', '') }}-jdbc 9 | 10 | - name: create configuration directory 11 | tags: config 12 | file: path={{ etc_folder }}/hive state=directory 13 | 14 | - name: setup alternatives link 15 | tags: config 16 | alternatives: name=hive-conf link=/etc/hive/conf path={{ etc_folder }}/hive 17 | 18 | - name: set limits for hive user 19 | tags: config 20 | copy: src=hive.limits.conf dest=/etc/security/limits.d/hive.conf 21 | 22 | - name: install template configurations 23 | tags: config 24 | template: src={{ item }}.j2 dest={{ etc_folder }}/hive/{{ item }} 25 | with_items: 26 | - hive-site.xml 27 | - hive-env.sh 28 | 29 | - name: install files configurations 30 | tags: config 31 | copy: src={{ item }} dest={{ etc_folder }}/hive/{{ item }} 32 | with_items: 33 | - hive-exec-log4j.properties 34 | - hive-log4j.properties 35 | 36 | - name: install default configurations 37 | tags: config 38 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 39 | with_items: 40 | - hadoop-0.20-mapreduce 41 | - hive-server2 42 | - hive-metastore 43 | 44 | - name: copy hive-site.xml to hdfs 45 | tags: config 46 | command: sudo -u hdfs hdfs dfs {{ item }} 47 | with_items: 48 | - -mkdir -p /etc/hive/conf 49 | - -copyFromLocal -f {{ etc_folder }}/hive/hive-site.xml /etc/hive/conf 50 | 51 | - name: create warehouse dir 52 | tags: config 53 | command: sudo -u hdfs hdfs dfs {{ item }} 54 | with_items: 55 | - -mkdir -p /user/hive/warehouse 56 | - -chmod 1777 /user/hive 57 | - -chmod 1777 /user/hive/warehouse 58 | 59 | - name: setup hive log folders 60 | tags: config 61 | command: sed -i -e 's|^\(\s\+\)LOG_FILE=.*$|\1LOG_FILE={{ log_folder }}/hive/${DAEMON}.out|g' /etc/init.d/{{ item }} 62 | with_items: 63 | - hive-server2 64 | - hive-metastore 65 | 66 | - name: create log folders 67 | tags: config 68 | file: path={{ log_folder }}/hive state=directory owner=hive mode=0755 69 | 70 | - name: create link to jdbc driver 71 | tags: config 72 | file: src=/usr/share/java/postgresql{{ postgres_version|default('')|replace('.', '') }}-jdbc.jar dest=/usr/lib/hive/lib/postgresql-jdbc.jar state=link force=yes 73 | 74 | - name: generate sql 75 | tags: init 76 | template: src="hive.sql.j2" dest=/tmp/ahive.sql 77 | 78 | - name: install .pgpass 79 | template: src=".pgpass.j2" dest=/root/.pgpass mode=0600 80 | 81 | - name: create metastore database 82 | tags: init 83 | command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/ahive.sql chdir=/usr/lib/hive/scripts/metastore/upgrade/postgres 84 | when: destroy_data 85 | 86 | - name: remove sql and .pgpass files 87 | command: rm -f /tmp/ahive.sql /root/.pgpass 88 | 89 | - name: add hive user to hadoop group 90 | user: name=hive groups=hive,hadoop 91 | 92 | - name: start services 93 | tags: service 94 | service: name={{ item }} state=restarted enabled=yes 95 | with_items: 96 | - hive-metastore 97 | - hive-server2 98 | 99 | - name: wait 100 | command: sleep 7 101 | 102 | - name: test hive 103 | tags: test 104 | command: sudo -Hu hdfs beeline -u jdbc:hive2://{{ ansible_hostname }}:10000 -nhdfs -p password -d org.apache.hive.jdbc.HiveDriver -e 'create table ansible_test_table ( id int ) location "/tmp/ansible_test_table"; drop table ansible_test_table;' 105 | -------------------------------------------------------------------------------- /roles/hivemetastore/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - include: hive-server.yaml 2 | when: deploy == "hive-server" 3 | 4 | - include: hive-client.yaml 5 | when: deploy == "hive-client" and groups['hivemetastore']|count == 1 6 | -------------------------------------------------------------------------------- /roles/hivemetastore/templates/.pgpass.j2: -------------------------------------------------------------------------------- 1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }} 2 | {{ groups['postgresql'][0] }}:5432:metastore:hiveuser:{{ hiveuser_password }} 3 | -------------------------------------------------------------------------------- /roles/hivemetastore/templates/hive-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Hive Client memory usage can be an issue if a large number of clients 2 | # are running at the same time. The flags below have been useful in 3 | # reducing memory usage: 4 | # 5 | # if [ "$SERVICE" = "cli" ]; then 6 | # if [ -z "$DEBUG" ]; then 7 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" 8 | # else 9 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit" 10 | # fi 11 | # fi 12 | 13 | case $SERVICE in 14 | cli) 15 | export HADOOP_HEAPSIZE={{ hivecli_heapsize }} 16 | ;; 17 | hiveserver2) 18 | export HADOOP_HEAPSIZE={{ hiveserver2_heapsize }} 19 | ;; 20 | metastore) 21 | export HADOOP_HEAPSIZE={{ hivemetastore_heapsize }} 22 | ;; 23 | esac 24 | 25 | # The heap size of the jvm stared by hive shell script can be controlled via: 26 | # 27 | # export HADOOP_HEAPSIZE=1024 28 | # 29 | # Larger heap size may be required when running queries over large number of files or partitions. 30 | # By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be 31 | # appropriate for hive server (hwi etc). 32 | 33 | 34 | # Set HADOOP_HOME to point to a specific hadoop install directory 35 | # HADOOP_HOME=${bin}/../../hadoop 36 | 37 | # Hive Configuration Directory can be controlled by: 38 | # export HIVE_CONF_DIR= 39 | 40 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by: 41 | # export HIVE_AUX_JARS_PATH= 42 | -------------------------------------------------------------------------------- /roles/hivemetastore/templates/hive-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | javax.jdo.option.ConnectionURL 8 | jdbc:postgresql://{{ groups['postgresql'][0] }}/metastore 9 | JDBC connect string for a JDBC metastore 10 | 11 | 12 | 13 | javax.jdo.option.ConnectionDriverName 14 | org.postgresql.Driver 15 | Driver class name for a JDBC metastore 16 | 17 | 18 | 19 | javax.jdo.option.ConnectionUserName 20 | hiveuser 21 | 22 | 23 | 24 | javax.jdo.option.ConnectionPassword 25 | {{ hiveuser_password }} 26 | 27 | 28 | 29 | datanucleus.autoCreateSchema 30 | false 31 | 32 | 33 | 34 | datanucleus.fixedDatastore 35 | true 36 | 37 | 38 | 39 | datanucleus.autoStartMechanism 40 | SchemaTable 41 | 42 | 43 | 44 | hive.metastore.uris 45 | thrift://{{ groups['hivemetastore'][0] }}:9083 46 | IP address (or fully-qualified domain name) and port of the metastore host 47 | 48 | 49 | 50 | hive.metastore.schema.verification 51 | true 52 | 53 | 54 | {% if groups['zookeepernodes']|count >0 %} 55 | 56 | hive.support.concurrency 57 | true 58 | Enable Hive's Table Lock Manager Service 59 | 60 | 61 | 62 | hive.zookeeper.quorum 63 | Zookeeper quorum used by Hive's Table Lock Manager 64 | {% for item in groups['zookeepernodes'] -%} 65 | {{ item }}{% if not loop.last %},{% endif %} 66 | {%- endfor %} 67 | 68 | {% else %} 69 | 70 | hive.support.concurrency 71 | false 72 | Enable Hive's Table Lock Manager Service 73 | 74 | {% endif %} 75 | 76 | 77 | hive.server2.thrift.port 78 | 10000 79 | TCP port number to listen on, default 10000 80 | 81 | 82 | 83 | hive.metastore.client.socket.timeout 84 | 3600 85 | MetaStore Client socket timeout in seconds 86 | 87 | 88 | 89 | hive.exec.compress.intermediate 90 | true 91 | 92 | 93 | 94 | hive.exec.compress.output 95 | true 96 | 97 | 98 | 99 | mapred.output.compression.codec 100 | org.apache.hadoop.io.compress.SnappyCodec 101 | 102 | 103 | 104 | mapred.map.output.compression.codec 105 | org.apache.hadoop.io.compress.SnappyCodec 106 | 107 | 108 | 109 | hive.exec.reducers.bytes.per.reducer 110 | 200000000 111 | 112 | 113 | 114 | hive.exec.dynamic.partition 115 | true 116 | 117 | 118 | 119 | hive.exec.dynamic.partition.mode 120 | nonstrict 121 | 122 | 123 | 124 | parquet.compression 125 | SNAPPY 126 | 127 | 128 | 129 | mapred.output.compression.type 130 | BLOCK 131 | 132 | 133 | 134 | mapred.reduce.tasks 135 | -1 136 | 137 | 138 | 139 | hive.mapred.reduce.tasks.speculative.execution 140 | false 141 | 142 | 143 | 144 | hive.optimize.sort.dynamic.partition 145 | false 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /roles/hivemetastore/templates/hive.sql.j2: -------------------------------------------------------------------------------- 1 | CREATE USER hiveuser WITH PASSWORD '{{ hiveuser_password }}'; 2 | CREATE DATABASE metastore; 3 | \c metastore; 4 | \i /usr/lib/hive/scripts/metastore/upgrade/postgres/hive-schema-1.1.0.postgres.sql 5 | \pset tuples_only on 6 | \o /tmp/grant-privs 7 | SELECT 'GRANT SELECT,INSERT,UPDATE,DELETE ON "' || schemaname || '". "' ||tablename ||'" TO hiveuser ;' 8 | FROM pg_tables 9 | WHERE tableowner = CURRENT_USER and schemaname = 'public'; 10 | \o 11 | \pset tuples_only off 12 | \i /tmp/grant-privs 13 | -------------------------------------------------------------------------------- /roles/hue/files/default/hadoop-httpfs: -------------------------------------------------------------------------------- 1 | ../../../hadoop/files/default/hadoop-httpfs -------------------------------------------------------------------------------- /roles/hue/files/log.conf: -------------------------------------------------------------------------------- 1 | ########################################## 2 | # To change the log leve, edit the `level' field. 3 | # Choices are: DEBUG, INFO, WARNING, ERROR, CRITICAL 4 | # 5 | # The logrotation limit is set at 5MB per file for a total of 5 copies. 6 | # I.e. 25MB for each set of logs. 7 | ########################################## 8 | [handler_logfile] 9 | level=INFO 10 | class=handlers.RotatingFileHandler 11 | formatter=default 12 | args=('%LOG_DIR%/%PROC_NAME%.log', 'a', 5000000, 5) 13 | 14 | ########################################## 15 | # Please do not change the settings below 16 | ########################################## 17 | 18 | [logger_root] 19 | handlers=logfile,errorlog 20 | 21 | [logger_access] 22 | handlers=accesslog 23 | qualname=access 24 | 25 | [logger_django_auth_ldap] 26 | handlers=accesslog 27 | qualname=django_auth_ldap 28 | 29 | # The logrotation limit is set at 5MB per file for a total of 5 copies. 30 | # I.e. 25MB for each set of logs. 31 | [handler_accesslog] 32 | class=handlers.RotatingFileHandler 33 | level=DEBUG 34 | propagate=True 35 | formatter=access 36 | args=('%LOG_DIR%/access.log', 'a', 5000000, 5) 37 | 38 | # All errors go into error.log 39 | [handler_errorlog] 40 | class=handlers.RotatingFileHandler 41 | level=ERROR 42 | formatter=default 43 | args=('%LOG_DIR%/error.log', 'a', 5000000, 5) 44 | 45 | [formatter_default] 46 | class=desktop.log.formatter.Formatter 47 | format=[%(asctime)s] %(module)-12s %(levelname)-8s %(message)s 48 | datefmt=%d/%b/%Y %H:%M:%S %z 49 | 50 | [formatter_access] 51 | class=desktop.log.formatter.Formatter 52 | format=[%(asctime)s] %(levelname)-8s %(message)s 53 | datefmt=%d/%b/%Y %H:%M:%S %z 54 | 55 | [loggers] 56 | keys=root,access,django_auth_ldap 57 | 58 | [handlers] 59 | keys=logfile,accesslog,errorlog 60 | 61 | [formatters] 62 | keys=default,access 63 | -------------------------------------------------------------------------------- /roles/hue/files/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | hadoop.log.dir=. 3 | hadoop.log.file=hadoop.log 4 | 5 | # Define the root logger to the system property "hadoop.root.logger". 6 | log4j.rootLogger=INFO,console, EventCounter 7 | 8 | # Logging Threshold 9 | log4j.threshhold=ALL 10 | 11 | # 12 | # Daily Rolling File Appender 13 | # 14 | 15 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 16 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} 17 | 18 | # Rollver at midnight 19 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 20 | 21 | # 30-day backup 22 | #log4j.appender.DRFA.MaxBackupIndex=30 23 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 24 | 25 | # Pattern format: Date LogLevel LoggerName LogMessage 26 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 27 | # Debugging Pattern format 28 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 29 | 30 | 31 | # 32 | # console 33 | # Add "console" to rootlogger above if you want to use this 34 | # 35 | 36 | log4j.appender.console=org.apache.log4j.ConsoleAppender 37 | log4j.appender.console.target=System.err 38 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 39 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 40 | 41 | # 42 | # Rolling File Appender 43 | # 44 | 45 | #log4j.appender.RFA=org.apache.log4j.RollingFileAppender 46 | #log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} 47 | 48 | # Logfile size and and 30-day backups 49 | #log4j.appender.RFA.MaxFileSize=1MB 50 | #log4j.appender.RFA.MaxBackupIndex=30 51 | 52 | #log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 53 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n 54 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 55 | 56 | # 57 | # Event Counter Appender 58 | # Sends counts of logging messages at different severity levels to Hadoop 59 | # Metrics. 60 | # 61 | log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter 62 | -------------------------------------------------------------------------------- /roles/hue/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - hue 6 | - hadoop-httpfs 7 | - postgresql 8 | - python-psycopg2 9 | 10 | - name: create configuration directory 11 | tags: config 12 | file: path=/etc/hue/conf.{{cluster_name}} state=directory 13 | 14 | - name: setup alternatives link 15 | tags: config 16 | alternatives: name=hue-conf link=/etc/hue/conf path=/etc/hue/conf.{{cluster_name}} 17 | 18 | - name: install template configurations 19 | tags: config 20 | template: src=hue.ini.j2 dest=/etc/hue/conf/hue.ini 21 | 22 | - name: check certificate file exists 23 | local_action: stat path={{ inventory_dir }}/workdir/hue.pem 24 | register: hue_cert 25 | run_once: true 26 | 27 | - name: generate certificate 28 | tags: init 29 | local_action: shell {{ item }} chdir={{ inventory_dir }}/workdir/ 30 | with_items: 31 | - openssl req -nodes -newkey rsa:2048 -keyout hue.key -out hue.csr -subj "/O=Hadoop/OU=hue/CN=hue/emailAddress=adm@it.xx" 32 | - openssl x509 -req -days 3650 -in hue.csr -signkey hue.key -out hue.crt 33 | - cat hue.crt hue.key > hue.pem 34 | when: hue_cert.stat.isreg is not defined 35 | run_once: true 36 | 37 | - name: install certificate 38 | tags: config 39 | copy: src={{ inventory_dir }}/workdir/hue.pem dest=/etc/hue/conf/hue.pem 40 | 41 | - name: install files configurations 42 | tags: config 43 | copy: src={{ item }} dest=/etc/hue/conf/{{ item }} 44 | with_items: 45 | - log4j.properties 46 | - log.conf 47 | 48 | - name: install default configurations 49 | tags: config 50 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 51 | with_items: 52 | - hadoop-httpfs 53 | 54 | - name: create warehouse dir and home for admin user 55 | command: sudo -Hu hdfs hdfs dfs {{ item }} 56 | with_items: 57 | - -mkdir -p /user/admin 58 | - -chown admin /user/admin 59 | run_once: true 60 | 61 | - name: generate sql 62 | template: src="hue.sql.j2" dest=/tmp/hue.sql 63 | 64 | - name: install .pgpass 65 | template: src=".pgpass.j2" dest=/root/.pgpass mode=0600 66 | 67 | - name: create database 68 | command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/hue.sql 69 | when: destroy_data 70 | run_once: true 71 | 72 | - name: remove sql and .pgpass files 73 | command: rm -f /tmp/hue.sql /root/.pgpass 74 | 75 | - name: initialize database 76 | command: "{{ item }} chdir=/tmp" 77 | with_items: 78 | - mkdir -p logs 79 | - /usr/lib/hue/build/env/bin/hue syncdb --noinput 80 | - /usr/lib/hue/build/env/bin/hue migrate 81 | - rm -rf logs 82 | when: destroy_data 83 | run_once: true 84 | 85 | - name: fix /etc/init.d/hue for systemd 86 | tags: patch 87 | lineinfile: 'dest=/etc/init.d/hue regexp="^# pidfile:" line="# pidfile: /var/run/hue/supervisor.pid"' 88 | 89 | - name: reload systemd 90 | command: systemctl daemon-reload 91 | when: ansible_os_family == "RedHat" and {{ ansible_distribution_major_version }} >= 7 92 | 93 | - name: start services 94 | tags: service 95 | service: name={{ item }} state=restarted enabled=yes 96 | with_items: 97 | - hue 98 | - hadoop-httpfs 99 | -------------------------------------------------------------------------------- /roles/hue/templates/.pgpass.j2: -------------------------------------------------------------------------------- 1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }} 2 | -------------------------------------------------------------------------------- /roles/hue/templates/hue.sql.j2: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS "hue"; 2 | create database hue; 3 | \c hue; 4 | create user hue with password '{{ hue_password }}'; 5 | grant all privileges on database hue to hue; 6 | -------------------------------------------------------------------------------- /roles/impala/tasks/impala-server.yaml: -------------------------------------------------------------------------------- 1 | - name: install impala server package 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - impala-server 6 | - impala-shell 7 | 8 | - name: fix impala-shell 9 | tags: patch 10 | lineinfile: dest=/usr/bin/impala-shell regexp="^PYTHON_EGG_CACHE=" line="export PYTHON_EGG_CACHE=/tmp/impala-shell-python-egg-cache-`whoami`" 11 | 12 | - name: create configuration directory 13 | tags: config 14 | file: path={{ etc_folder }}/impala state=directory 15 | 16 | - name: setup alternatives link 17 | tags: config 18 | alternatives: name=impala-conf link=/etc/impala/conf path={{ etc_folder }}/impala 19 | 20 | - name: install template configurations 21 | tags: config 22 | template: src={{ item }}.j2 dest={{ etc_folder }}/impala/{{ item }} 23 | with_items: 24 | - hive-site.xml 25 | - core-site.xml 26 | - hdfs-site.xml 27 | 28 | - name: install default configurations 29 | tags: config 30 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} 31 | with_items: 32 | - impala 33 | 34 | - name: create log folders 35 | tags: config 36 | file: path={{ log_folder }}/impala state=directory owner=impala mode=0755 37 | 38 | - name: start services 39 | tags: service 40 | service: name={{ item }} state=restarted enabled=yes 41 | with_items: 42 | - impala-server 43 | 44 | - name: wait 45 | command: sleep 30 46 | 47 | - name: test impala 48 | tags: test 49 | command: impala-shell -u hdfs -d default -q 'create external table ansible_test_table ( id int ) location "/tmp/ansible_test_table"; drop table ansible_test_table;' 50 | when: ansible_hostname == groups['datanodes'][0] 51 | 52 | - name: create hdfs dir 53 | tags: config 54 | command: sudo -u hdfs hdfs dfs {{ item }} 55 | with_items: 56 | - -mkdir -p /user/impala 57 | - '-chown impala:hadoop /user/impala' 58 | - -chmod 1777 /user/impala 59 | -------------------------------------------------------------------------------- /roles/impala/tasks/impala.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - impala-state-store 6 | - impala-catalog 7 | 8 | - name: create configuration directory 9 | tags: config 10 | file: path={{ etc_folder }}/impala state=directory 11 | 12 | - name: setup alternatives link 13 | tags: config 14 | alternatives: name=impala-conf link=/etc/impala/conf path={{ etc_folder }}/impala 15 | 16 | - name: install template configurations 17 | tags: config 18 | template: src={{ item }}.j2 dest=/etc/impala/conf/{{ item }} 19 | with_items: 20 | - hive-site.xml 21 | - core-site.xml 22 | - hdfs-site.xml 23 | 24 | - name: install template configuration for hive 25 | tags: config 26 | template: src=hive-site.xml.j2 dest=/etc/hive/conf/hive-site.xml 27 | 28 | - name: install default configurations 29 | tags: config 30 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} 31 | with_items: 32 | - impala 33 | 34 | - name: create log folders 35 | tags: config 36 | file: path={{ log_folder }}/impala state=directory owner=impala mode=0755 37 | 38 | - name: start services 39 | tags: service 40 | service: name={{ item }} state=restarted enabled=yes 41 | with_items: 42 | - impala-state-store 43 | - impala-catalog 44 | -------------------------------------------------------------------------------- /roles/impala/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - include: impala.yaml 2 | when: deploy == "impala" 3 | 4 | - include: impala-server.yaml 5 | when: deploy == "impala-server" and groups['impala-store-catalog']|count == 1 6 | -------------------------------------------------------------------------------- /roles/impala/templates/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | ../../hadoop/templates/core-site.xml.j2 -------------------------------------------------------------------------------- /roles/impala/templates/hdfs-site.xml.j2: -------------------------------------------------------------------------------- 1 | ../../hadoop/templates/hdfs-site.xml.j2 -------------------------------------------------------------------------------- /roles/impala/templates/hive-site.xml.j2: -------------------------------------------------------------------------------- 1 | ../../hivemetastore/templates/hive-site.xml.j2 -------------------------------------------------------------------------------- /roles/impala/templates/impala.j2: -------------------------------------------------------------------------------- 1 | IMPALA_CATALOG_SERVICE_HOST={{ groups['impala-store-catalog'][0] }} 2 | IMPALA_STATE_STORE_HOST={{ groups['impala-store-catalog'][0] }} 3 | IMPALA_STATE_STORE_PORT=24000 4 | IMPALA_BACKEND_PORT=22000 5 | IMPALA_LOG_DIR={{ log_folder }}/impala 6 | 7 | IMPALA_CATALOG_ARGS=" -log_dir=${IMPALA_LOG_DIR} {{impala_catalog_args|default('')}}" 8 | IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT} {{impala_state_store_args|default('')}}" 9 | IMPALA_SERVER_ARGS=" \ 10 | -log_dir=${IMPALA_LOG_DIR} \ 11 | -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \ 12 | -state_store_port=${IMPALA_STATE_STORE_PORT} \ 13 | -use_statestore {{impala_server_args|default('')}} \ 14 | -state_store_host=${IMPALA_STATE_STORE_HOST} \ 15 | -be_port=${IMPALA_BACKEND_PORT}" 16 | 17 | ENABLE_CORE_DUMPS=false 18 | 19 | # LIBHDFS_OPTS=-Djava.library.path=/usr/lib/impala/lib 20 | # MYSQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar 21 | # IMPALA_BIN=/usr/lib/impala/sbin 22 | # IMPALA_HOME=/usr/lib/impala 23 | # HIVE_HOME=/usr/lib/hive 24 | # HBASE_HOME=/usr/lib/hbase 25 | # IMPALA_CONF_DIR=/etc/impala/conf 26 | # HADOOP_CONF_DIR=/etc/impala/conf 27 | # HIVE_CONF_DIR=/etc/impala/conf 28 | # HBASE_CONF_DIR=/etc/impala/conf 29 | 30 | -------------------------------------------------------------------------------- /roles/kafka/files/connect-console-sink.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name=local-console-sink 17 | connector.class=org.apache.kafka.connect.file.FileStreamSinkConnector 18 | tasks.max=1 19 | topics=connect-test -------------------------------------------------------------------------------- /roles/kafka/files/connect-console-source.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name=local-console-source 17 | connector.class=org.apache.kafka.connect.file.FileStreamSourceConnector 18 | tasks.max=1 19 | topic=connect-test -------------------------------------------------------------------------------- /roles/kafka/files/connect-distributed.properties: -------------------------------------------------------------------------------- 1 | ## 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ## 17 | 18 | # These are defaults. This file just demonstrates how to override some settings. 19 | bootstrap.servers=localhost:9092 20 | 21 | group.id=connect-cluster 22 | 23 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 24 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 25 | key.converter=org.apache.kafka.connect.json.JsonConverter 26 | value.converter=org.apache.kafka.connect.json.JsonConverter 27 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 28 | # it to 29 | key.converter.schemas.enable=true 30 | value.converter.schemas.enable=true 31 | 32 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 33 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 34 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 35 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 36 | internal.key.converter.schemas.enable=false 37 | internal.value.converter.schemas.enable=false 38 | 39 | offset.storage.topic=connect-offsets 40 | # Flush much faster than normal, which is useful for testing/debugging 41 | offset.flush.interval.ms=10000 42 | config.storage.topic=connect-configs -------------------------------------------------------------------------------- /roles/kafka/files/connect-file-sink.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name=local-file-sink 17 | connector.class=FileStreamSink 18 | tasks.max=1 19 | file=test.sink.txt 20 | topics=connect-test -------------------------------------------------------------------------------- /roles/kafka/files/connect-file-source.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name=local-file-source 17 | connector.class=FileStreamSource 18 | tasks.max=1 19 | file=test.txt 20 | topic=connect-test -------------------------------------------------------------------------------- /roles/kafka/files/connect-log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 21 | 22 | log4j.logger.org.apache.zookeeper=ERROR 23 | log4j.logger.org.I0Itec.zkclient=ERROR -------------------------------------------------------------------------------- /roles/kafka/files/connect-standalone.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # These are defaults. This file just demonstrates how to override some settings. 17 | bootstrap.servers=localhost:9092 18 | 19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 21 | key.converter=org.apache.kafka.connect.json.JsonConverter 22 | value.converter=org.apache.kafka.connect.json.JsonConverter 23 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 24 | # it to 25 | key.converter.schemas.enable=true 26 | value.converter.schemas.enable=true 27 | 28 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 29 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 30 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 31 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 32 | internal.key.converter.schemas.enable=false 33 | internal.value.converter.schemas.enable=false 34 | 35 | offset.storage.file.filename=/tmp/connect.offsets 36 | # Flush much faster than normal, which is useful for testing/debugging 37 | offset.flush.interval.ms=10000 38 | -------------------------------------------------------------------------------- /roles/kafka/files/default/kafka: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/kafka/files/default/kafka -------------------------------------------------------------------------------- /roles/kafka/files/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender 23 | log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH 24 | log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log 25 | log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout 26 | log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 27 | 28 | log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender 29 | log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH 30 | log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log 31 | log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout 32 | log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 33 | 34 | log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender 35 | log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH 36 | log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log 37 | log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout 38 | log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 39 | 40 | log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender 41 | log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH 42 | log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log 43 | log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout 44 | log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 45 | 46 | log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender 47 | log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH 48 | log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log 49 | log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout 50 | log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 51 | 52 | log4j.appender.authorizerAppender=org.apache.log4j.DailyRollingFileAppender 53 | log4j.appender.authorizerAppender.DatePattern='.'yyyy-MM-dd-HH 54 | log4j.appender.authorizerAppender.File=${kafka.logs.dir}/kafka-authorizer.log 55 | log4j.appender.authorizerAppender.layout=org.apache.log4j.PatternLayout 56 | log4j.appender.authorizerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 57 | 58 | # Turn on all our debugging info 59 | #log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender 60 | #log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender 61 | #log4j.logger.kafka.perf=DEBUG, kafkaAppender 62 | #log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender 63 | #log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG 64 | log4j.logger.kafka=INFO, kafkaAppender 65 | 66 | log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender 67 | log4j.additivity.kafka.network.RequestChannel$=false 68 | 69 | #log4j.logger.kafka.network.Processor=TRACE, requestAppender 70 | #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender 71 | #log4j.additivity.kafka.server.KafkaApis=false 72 | log4j.logger.kafka.request.logger=WARN, requestAppender 73 | log4j.additivity.kafka.request.logger=false 74 | 75 | log4j.logger.kafka.controller=TRACE, controllerAppender 76 | log4j.additivity.kafka.controller=false 77 | 78 | log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender 79 | log4j.additivity.kafka.log.LogCleaner=false 80 | 81 | log4j.logger.state.change.logger=TRACE, stateChangeAppender 82 | log4j.additivity.state.change.logger=false 83 | 84 | #Change this to debug to get the actual audit log for authorizer. 85 | log4j.logger.kafka.authorizer.logger=WARN, authorizerAppender 86 | log4j.additivity.kafka.authorizer.logger=false 87 | 88 | -------------------------------------------------------------------------------- /roles/kafka/files/tools-log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=WARN, stderr 17 | 18 | log4j.appender.stderr=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stderr.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stderr.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | log4j.appender.stderr.Target=System.err 22 | -------------------------------------------------------------------------------- /roles/kafka/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - kafka-server 6 | 7 | - name: create configuration directory 8 | file: path=/etc/kafka/conf.{{ cluster_name }} state=directory 9 | 10 | - name: setup alternatives link 11 | alternatives: name=kafka-conf link=/etc/kafka/conf path=/etc/kafka/conf.{{ cluster_name }} 12 | 13 | - name: install default configurations 14 | tags: config 15 | copy: src=default/{{ item }} dest=/etc/default/{{ item }} 16 | with_items: 17 | - kafka 18 | 19 | - name: install template configurations 20 | tags: config 21 | template: src={{ item }}.j2 dest=/etc/kafka/conf/{{ item }} 22 | with_items: 23 | - server.properties 24 | 25 | - name: install files configurations 26 | tags: config 27 | copy: src={{ item }} dest=/etc/kafka/conf/{{ item }} 28 | with_items: 29 | - connect-console-sink.properties 30 | - connect-console-source.properties 31 | - connect-distributed.properties 32 | - connect-file-sink.properties 33 | - connect-file-source.properties 34 | - connect-log4j.properties 35 | - connect-standalone.properties 36 | - log4j.properties 37 | - tools-log4j.properties 38 | 39 | - name: destroy data 40 | tags: init 41 | command: rm -rf {{ item }} 42 | with_items: "{{ log_dirs.split(',') }}" 43 | when: destroy_data 44 | 45 | - name: create kafka directories 46 | tags: init 47 | file: dest={{ item }} owner=kafka group=kafka state=directory 48 | with_items: "{{ log_dirs.split(',') }}" 49 | 50 | - name: start services 51 | tags: service 52 | service: name={{ item }} state=restarted enabled=yes 53 | with_items: 54 | - kafka-server 55 | 56 | - name: test, create topic 57 | tags: test 58 | command: kafka-topics --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --replication-factor {{ groups['kafka']|count }} --partitions 1 --create --topic test 59 | run_once: true 60 | 61 | - name: test, create test message 62 | tags: test 63 | shell: echo 'test message' | kafka-console-producer --broker-list {{ ansible_hostname }}:9092 --topic test 64 | run_once: true 65 | 66 | - name: test, read the message 67 | tags: test 68 | command: kafka-console-consumer --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --max-messages 1 --topic test --from-beginning 69 | run_once: true 70 | register: msg 71 | 72 | - name: test, result 73 | tags: test 74 | debug: var=msg.stdout 75 | failed_when: msg.stdout.find('test message') < 0 76 | run_once: true 77 | 78 | - name: test, delete topic 79 | tags: test 80 | command: kafka-topics --zookeeper {{ groups['zookeepernodes'][0] }}:2181 --delete --topic test 81 | run_once: true 82 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/email.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | 26 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/fs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/hive.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | hadoop.bin.path 23 | /usr/bin/hadoop 24 | 25 | 26 | 27 | hadoop.config.dir 28 | /etc/hadoop/conf 29 | 30 | 31 | 32 | oozie.launcher.mapred.job.queue.name 33 | launcher 34 | 35 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/shell.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/sqoop.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/ssh.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | -------------------------------------------------------------------------------- /roles/oozie/files/action-conf/sub-workflow.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | oozie.launcher.mapred.job.queue.name 23 | launcher 24 | 25 | -------------------------------------------------------------------------------- /roles/oozie/files/adminusers.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # Admin Users, one user by line 20 | -------------------------------------------------------------------------------- /roles/oozie/files/hadoop-conf/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | 23 | mapreduce.jobtracker.kerberos.principal 24 | mapred/_HOST@LOCALREALM 25 | 26 | 27 | 28 | yarn.resourcemanager.principal 29 | yarn/_HOST@LOCALREALM 30 | 31 | 32 | 33 | dfs.namenode.kerberos.principal 34 | hdfs/_HOST@LOCALREALM 35 | 36 | 37 | 38 | mapreduce.framework.name 39 | yarn 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /roles/oozie/files/hadoop-config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 21 | 22 | 23 | mapreduce.jobtracker.kerberos.principal 24 | mapred/_HOST@LOCALREALM 25 | 26 | 27 | 28 | yarn.resourcemanager.principal 29 | yarn/_HOST@LOCALREALM 30 | 31 | 32 | 33 | dfs.namenode.kerberos.principal 34 | hdfs/_HOST@LOCALREALM 35 | 36 | 37 | 38 | mapreduce.framework.name 39 | yarn 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /roles/oozie/files/oozie-log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # http://www.apache.org/licenses/LICENSE-2.0 20 | # 21 | # Unless required by applicable law or agreed to in writing, software 22 | # distributed under the License is distributed on an "AS IS" BASIS, 23 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | # See the License for the specific language governing permissions and 25 | # limitations under the License. See accompanying LICENSE file. 26 | # 27 | 28 | # If the Java System property 'oozie.log.dir' is not defined at Oozie start up time 29 | # XLogService sets its value to '${oozie.home}/logs' 30 | 31 | # The appender that Oozie uses must be named 'oozie' (i.e. log4j.appender.oozie) 32 | 33 | # Using the RollingFileAppender with the OozieRollingPolicy will roll the log file every hour and retain up to MaxHistory number of 34 | # log files. If FileNamePattern ends with ".gz" it will create gzip files. 35 | log4j.appender.oozie=org.apache.log4j.rolling.RollingFileAppender 36 | log4j.appender.oozie.RollingPolicy=org.apache.oozie.util.OozieRollingPolicy 37 | log4j.appender.oozie.File=${oozie.log.dir}/oozie.log 38 | log4j.appender.oozie.Append=true 39 | log4j.appender.oozie.layout=org.apache.log4j.PatternLayout 40 | log4j.appender.oozie.layout.ConversionPattern=%d{ISO8601} %p %c{1}:%L - SERVER[${oozie.instance.id}] %m%n 41 | # The FileNamePattern must end with "-%d{yyyy-MM-dd-HH}.gz" or "-%d{yyyy-MM-dd-HH}" and also start with the 42 | # value of log4j.appender.oozie.File 43 | log4j.appender.oozie.RollingPolicy.FileNamePattern=${log4j.appender.oozie.File}-%d{yyyy-MM-dd-HH} 44 | # The MaxHistory controls how many log files will be retained (720 hours / 24 hours per day = 30 days); -1 to disable 45 | log4j.appender.oozie.RollingPolicy.MaxHistory=720 46 | 47 | # Uncomment the below two lines to use the DailyRollingFileAppender instead 48 | # The DatePattern must end with either "dd" or "HH" 49 | #log4j.appender.oozie=org.apache.log4j.DailyRollingFileAppender 50 | #log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH 51 | 52 | log4j.appender.oozieops=org.apache.log4j.DailyRollingFileAppender 53 | log4j.appender.oozieops.DatePattern='.'yyyy-MM-dd 54 | log4j.appender.oozieops.File=${oozie.log.dir}/oozie-ops.log 55 | log4j.appender.oozieops.Append=true 56 | log4j.appender.oozieops.layout=org.apache.log4j.PatternLayout 57 | log4j.appender.oozieops.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 58 | 59 | log4j.appender.oozieinstrumentation=org.apache.log4j.DailyRollingFileAppender 60 | log4j.appender.oozieinstrumentation.DatePattern='.'yyyy-MM-dd 61 | log4j.appender.oozieinstrumentation.File=${oozie.log.dir}/oozie-instrumentation.log 62 | log4j.appender.oozieinstrumentation.Append=true 63 | log4j.appender.oozieinstrumentation.layout=org.apache.log4j.PatternLayout 64 | log4j.appender.oozieinstrumentation.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 65 | 66 | log4j.appender.oozieaudit=org.apache.log4j.DailyRollingFileAppender 67 | log4j.appender.oozieaudit.DatePattern='.'yyyy-MM-dd 68 | log4j.appender.oozieaudit.File=${oozie.log.dir}/oozie-audit.log 69 | log4j.appender.oozieaudit.Append=true 70 | log4j.appender.oozieaudit.layout=org.apache.log4j.PatternLayout 71 | log4j.appender.oozieaudit.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 72 | 73 | log4j.appender.openjpa=org.apache.log4j.DailyRollingFileAppender 74 | log4j.appender.openjpa.DatePattern='.'yyyy-MM-dd 75 | log4j.appender.openjpa.File=${oozie.log.dir}/oozie-jpa.log 76 | log4j.appender.openjpa.Append=true 77 | log4j.appender.openjpa.layout=org.apache.log4j.PatternLayout 78 | log4j.appender.openjpa.layout.ConversionPattern=%d{ISO8601} %5p %c{1}:%L - %m%n 79 | 80 | log4j.logger.openjpa=INFO, openjpa 81 | log4j.logger.oozieops=INFO, oozieops 82 | log4j.logger.oozieinstrumentation=ALL, oozieinstrumentation 83 | log4j.logger.oozieaudit=ALL, oozieaudit 84 | log4j.logger.org.apache.oozie=INFO, oozie 85 | log4j.logger.org.apache.hadoop=WARN, oozie 86 | log4j.logger.org.mortbay=WARN, oozie 87 | log4j.logger.org.hsqldb=WARN, oozie 88 | log4j.logger.org.apache.hadoop.security.authentication.server=WARN, oozie 89 | -------------------------------------------------------------------------------- /roles/oozie/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install oozie packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - oozie 6 | - unzip 7 | - postgresql 8 | 9 | - name: create configuration directory 10 | tags: config 11 | file: path={{ etc_folder }}/oozie state=directory 12 | 13 | - name: create log directory 14 | tags: config 15 | file: path={{ log_folder }}/oozie state=directory owner=oozie group=oozie mode=755 16 | 17 | - name: setup alternatives link 18 | tags: config 19 | alternatives: name=oozie-conf link=/etc/oozie/conf path={{ etc_folder }}/oozie 20 | 21 | - name: setup alternatives link 22 | tags: config 23 | alternatives: name=oozie-tomcat-conf link=/etc/oozie/oozie-tomcat-conf path=/etc/oozie/tomcat-conf.http 24 | 25 | - name: install template configurations 26 | tags: config 27 | template: src="{{ item }}.j2" dest={{ etc_folder }}/oozie/{{ item }} 28 | with_items: 29 | - oozie-site.xml 30 | - oozie-env.sh 31 | 32 | - name: install files configurations 33 | tags: config 34 | copy: src={{ item }} dest={{ etc_folder }}/oozie/{{ item }} 35 | with_items: 36 | - hadoop-config.xml 37 | - oozie-default.xml 38 | - oozie-log4j.properties 39 | 40 | - name: create config directories 41 | tags: config 42 | file: path=/etc/oozie/conf/{{ item }} state=directory 43 | with_items: 44 | - action-conf 45 | - hadoop-conf 46 | 47 | - name: install files configurations 48 | tags: config 49 | copy: src={{ item }} dest={{ etc_folder }}/oozie/action-conf/{{ item|basename }} 50 | with_fileglob: 51 | - action-conf/* 52 | 53 | - name: install files configurations 54 | tags: config 55 | copy: src={{ item }} dest={{ etc_folder }}/oozie/hadoop-conf/{{ item|basename }} 56 | with_fileglob: 57 | - hadoop-conf/* 58 | 59 | - name: create hdfs directories 60 | tags: init 61 | command: sudo -Hu hdfs hdfs dfs {{ item }} 62 | with_items: 63 | - -mkdir -p /user/oozie 64 | - -chown oozie:oozie /user/oozie 65 | run_once: true 66 | 67 | - name: install shared libraries to hdfs 68 | tags: 69 | - config 70 | - oozie-setup 71 | command: oozie-setup sharelib create -fs {% if groups['namenodes']|count > 1 %} hdfs://{{ cluster_name }} {% else %} hdfs://{{ groups['namenodes'][0] }}:8020 {% endif %} -locallib /usr/lib/oozie/oozie-sharelib-yarn 72 | run_once: true 73 | 74 | - name: generate sql 75 | tags: init 76 | template: src="oozie.sql.j2" dest=/tmp/oozie.sql 77 | 78 | - name: install .pgpass 79 | template: src=".pgpass.j2" dest=/root/.pgpass mode=0600 80 | 81 | - name: create database 82 | tags: init 83 | command: psql -h {{ groups['postgresql'][0] }} --username postgres -f /tmp/oozie.sql 84 | when: destroy_data 85 | run_once: true 86 | 87 | - name: remove sql and .pgpass files 88 | command: rm -f /tmp/oozie.sql /root/.pgpass 89 | 90 | - name: initialize database 91 | tags: init 92 | command: service oozie init 93 | when: destroy_data 94 | run_once: true 95 | 96 | - name: get ext-2.2.zip if not exists 97 | tags: download 98 | local_action: get_url url=http://archive.cloudera.com/gplextras/misc/ext-2.2.zip dest={{ inventory_dir }}/workdir/ext-2.2.zip 99 | run_once: true 100 | 101 | - name: extract ext-2.2 102 | tags: config 103 | unarchive: src={{ inventory_dir }}/workdir/ext-2.2.zip dest=/var/lib/oozie/ 104 | 105 | - name: start services 106 | tags: service 107 | service: name=oozie state=restarted enabled=yes 108 | 109 | - include: oozie-test.yaml tags=test 110 | -------------------------------------------------------------------------------- /roles/oozie/tasks/oozie-test.yaml: -------------------------------------------------------------------------------- 1 | - name: get examples name 2 | shell: rpm -ql oozie-client | grep oozie-examples 3 | register: o 4 | 5 | - name: extract examples 6 | unarchive: src={{ o.stdout_lines[0] }} dest=/tmp copy=no 7 | 8 | - name: put examples to hdfs 9 | command: sudo -Hi -u hdfs hdfs dfs -put -f /tmp/examples examples 10 | 11 | - name: amend example configuration 12 | lineinfile: dest=/tmp/examples/apps/map-reduce/job.properties regexp='^nameNode=' line="nameNode={% if groups['namenodes']|count > 1 %}hdfs://{{ cluster_name }}{% else %}hdfs://{{ groups['namenodes'][0] }}:8020{% endif %}" 13 | 14 | - name: amend example configuration 15 | lineinfile: dest=/tmp/examples/apps/map-reduce/job.properties regexp='^jobTracker=' line="jobTracker={% if groups['yarnresourcemanager']|count > 1 %}maprfs:///{% else %}hdfs://{{ groups['yarnresourcemanager'][0] }}:8032{% endif %}" 16 | 17 | - name: run map-reduce job 18 | command: sudo -Hi -u hdfs oozie job -oozie http://localhost:11000/oozie -config /tmp/examples/apps/map-reduce/job.properties -run 19 | register: j 20 | 21 | - name: check status 22 | shell: sudo -Hi -u hdfs oozie job -oozie http://localhost:11000/oozie -info {{ j.stdout_lines[0].split()[1] }} | awk '/^Status/{ print $3 }' 23 | register: s 24 | until: s.stdout == 'SUCCEEDED' 25 | delay: 5 26 | retries: 12 27 | 28 | - debug: msg="Job status is {{ s.stdout }}" 29 | 30 | - name: clean up examples 31 | file: path=/tmp/examples state=absent 32 | 33 | - name: remove examples from hdfs 34 | command: sudo -Hi -u hdfs hdfs dfs -rm -r examples 35 | -------------------------------------------------------------------------------- /roles/oozie/templates/.pgpass.j2: -------------------------------------------------------------------------------- 1 | {{ groups['postgresql'][0] }}:5432:postgres:postgres:{{ postgres_password }} 2 | {{ groups['postgresql'][0] }}:5432:oozie:oozie:{{ oozie_password }} 3 | -------------------------------------------------------------------------------- /roles/oozie/templates/oozie-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | export JAVA_LIBRARY_PATH="$JAVA_LIBRARY_PATH:/usr/lib/hadoop/lib/native" 21 | 22 | # Set Oozie specific environment variables here. 23 | 24 | export OOZIE_DATA=/var/lib/oozie 25 | export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat 26 | export CATALINA_TMPDIR=/var/lib/oozie 27 | export CATALINA_PID=/var/run/oozie/oozie.pid 28 | export CATALINA_BASE=/var/lib/oozie/tomcat-deployment 29 | 30 | # Settings for the Embedded Tomcat that runs Oozie 31 | # Java System properties for Oozie should be specified in this variable 32 | # 33 | export OOZIE_HTTPS_PORT=11443 34 | export OOZIE_HTTPS_KEYSTORE_PASS=password 35 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.port=${OOZIE_HTTPS_PORT}" 36 | export CATALINA_OPTS="$CATALINA_OPTS -Doozie.https.keystore.pass=${OOZIE_HTTPS_KEYSTORE_PASS}" 37 | export CATALINA_OPTS="$CATALINA_OPTS -Xmx{{ oozie_heapsize }}m" 38 | 39 | # Oozie configuration file to load from Oozie configuration directory 40 | # 41 | # export OOZIE_CONFIG_FILE=oozie-site.xml 42 | export OOZIE_CONFIG=/etc/oozie/conf 43 | 44 | # Oozie logs directory 45 | # 46 | # export OOZIE_LOG=${OOZIE_HOME}/logs 47 | export OOZIE_LOG={{ log_folder }}/oozie 48 | 49 | # Oozie Log4J configuration file to load from Oozie configuration directory 50 | # 51 | # export OOZIE_LOG4J_FILE=oozie-log4j.properties 52 | 53 | # Reload interval of the Log4J configuration file, in seconds 54 | # 55 | # export OOZIE_LOG4J_RELOAD=10 56 | 57 | # The port Oozie server runs 58 | # 59 | # export OOZIE_HTTP_PORT=11000 60 | 61 | # The port Oozie server runs if using SSL (HTTPS) 62 | # 63 | # export OOZIE_HTTPS_PORT=11443 64 | 65 | # The host name Oozie server runs on 66 | # 67 | # export OOZIE_HTTP_HOSTNAME=`hostname -f` 68 | 69 | # The base URL for callback URLs to Oozie 70 | # 71 | # export OOZIE_BASE_URL="http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie" 72 | 73 | # The location of the keystore for the Oozie server if using SSL (HTTPS) 74 | # 75 | # export OOZIE_HTTPS_KEYSTORE_FILE=${HOME}/.keystore 76 | 77 | # The password of the keystore for the Oozie server if using SSL (HTTPS) 78 | # 79 | # export OOZIE_HTTPS_KEYSTORE_PASS=password 80 | 81 | # The Oozie Instance ID 82 | # 83 | # export OOZIE_INSTANCE_ID="${OOZIE_HTTP_HOSTNAME}" 84 | -------------------------------------------------------------------------------- /roles/oozie/templates/oozie-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | oozie.service.JPAService.jdbc.driver 6 | org.postgresql.Driver 7 | 8 | 9 | 10 | oozie.service.JPAService.jdbc.url 11 | jdbc:postgresql://{{ groups['postgresql'][0] }}:5432/oozie 12 | 13 | 14 | 15 | oozie.service.JPAService.jdbc.username 16 | oozie 17 | 18 | 19 | 20 | oozie.service.JPAService.jdbc.password 21 | {{ oozie_password }} 22 | 23 | 24 | 25 | 26 | oozie.service.HadoopAccessorService.hadoop.configurations 27 | *=/etc/hadoop/conf 28 | Comma separated AUTHORITY=HADOOP_CONF_DIR, where AUTHORITY is the HOST:PORT of the Hadoop service (JobTracker, HDFS). The wildcard '*' configuration is used when there is no exact match for an authority. The HADOOP_CONF_DIR contains the relevant Hadoop *-site.xml files. If the path is relative is looked within the Oozie configuration directory; though the path can be absolute (i.e. to point to Hadoop client conf/ directories in the local filesystem. 29 | 30 | 31 | 32 | 33 | 34 | oozie.service.ProxyUserService.proxyuser.hue.hosts 35 | * 36 | 37 | 38 | 39 | oozie.service.ProxyUserService.proxyuser.hue.groups 40 | * 41 | 42 | 43 | {% if oozie_ext_properties %} 44 | {% for property in oozie_ext_properties %} 45 | 46 | {{property.name}} 47 | {{property.value}} 48 | 49 | {% endfor %} 50 | {% endif %} 51 | 52 | 53 | oozie.service.ActionService.executor.ext.classes 54 | 55 | {{oozie_ext_classes|join(',')}} 56 | 57 | 58 | 59 | 60 | oozie.service.SchemaService.wf.ext.schemas 61 | 62 | {{oozie_ext_schemas|join(',')}} 63 | 64 | 65 | 66 | 67 | oozie.action.fs.glob.max 68 | 500000 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /roles/oozie/templates/oozie.sql.j2: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS "oozie"; 2 | DROP ROLE IF EXISTS oozie; 3 | CREATE ROLE oozie LOGIN ENCRYPTED PASSWORD '{{ oozie_password }}' NOSUPERUSER INHERIT CREATEDB NOCREATEROLE; 4 | CREATE DATABASE "oozie" WITH OWNER = oozie 5 | ENCODING = 'UTF8' 6 | TABLESPACE = pg_default 7 | LC_COLLATE = 'en_US.UTF-8' 8 | LC_CTYPE = 'en_US.UTF-8' 9 | CONNECTION LIMIT = -1; 10 | \q 11 | -------------------------------------------------------------------------------- /roles/postgresql/files/pg_hba.conf: -------------------------------------------------------------------------------- 1 | # PostgreSQL Client Authentication Configuration File 2 | # =================================================== 3 | # 4 | # Refer to the "Client Authentication" section in the 5 | # PostgreSQL documentation for a complete description 6 | # of this file. A short synopsis follows. 7 | # 8 | # This file controls: which hosts are allowed to connect, how clients 9 | # are authenticated, which PostgreSQL user names they can use, which 10 | # databases they can access. Records take one of these forms: 11 | # 12 | # local DATABASE USER METHOD [OPTIONS] 13 | # host DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] 14 | # hostssl DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] 15 | # hostnossl DATABASE USER CIDR-ADDRESS METHOD [OPTIONS] 16 | # 17 | # (The uppercase items must be replaced by actual values.) 18 | # 19 | # The first field is the connection type: "local" is a Unix-domain socket, 20 | # "host" is either a plain or SSL-encrypted TCP/IP socket, "hostssl" is an 21 | # SSL-encrypted TCP/IP socket, and "hostnossl" is a plain TCP/IP socket. 22 | # 23 | # DATABASE can be "all", "sameuser", "samerole", a database name, or 24 | # a comma-separated list thereof. 25 | # 26 | # USER can be "all", a user name, a group name prefixed with "+", or 27 | # a comma-separated list thereof. In both the DATABASE and USER fields 28 | # you can also write a file name prefixed with "@" to include names from 29 | # a separate file. 30 | # 31 | # CIDR-ADDRESS specifies the set of hosts the record matches. 32 | # It is made up of an IP address and a CIDR mask that is an integer 33 | # (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that specifies 34 | # the number of significant bits in the mask. Alternatively, you can write 35 | # an IP address and netmask in separate columns to specify the set of hosts. 36 | # 37 | # METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", "krb5", 38 | # "ident", "pam", "ldap" or "cert". Note that "password" sends passwords 39 | # in clear text; "md5" is preferred since it sends encrypted passwords. 40 | # 41 | # OPTIONS are a set of options for the authentication in the format 42 | # NAME=VALUE. The available options depend on the different authentication 43 | # methods - refer to the "Client Authentication" section in the documentation 44 | # for a list of which options are available for which authentication methods. 45 | # 46 | # Database and user names containing spaces, commas, quotes and other special 47 | # characters must be quoted. Quoting one of the keywords "all", "sameuser" or 48 | # "samerole" makes the name lose its special character, and just match a 49 | # database or username with that name. 50 | # 51 | # This file is read on server startup and when the postmaster receives 52 | # a SIGHUP signal. If you edit the file on a running system, you have 53 | # to SIGHUP the postmaster for the changes to take effect. You can use 54 | # "pg_ctl reload" to do that. 55 | 56 | # Put your actual configuration here 57 | # ---------------------------------- 58 | # 59 | # If you want to allow non-local connections, you need to add more 60 | # "host" records. In that case you will also need to make PostgreSQL listen 61 | # on a non-local interface via the listen_addresses configuration parameter, 62 | # or via the -i or -h command line switches. 63 | # 64 | 65 | 66 | 67 | # TYPE DATABASE USER CIDR-ADDRESS METHOD 68 | 69 | # "local" is for Unix domain socket connections only 70 | local all all ident 71 | # IPv4 local connections: 72 | host all all 127.0.0.1/32 ident 73 | # IPv6 local connections: 74 | host all all ::1/128 ident 75 | # allow all connections 76 | host all all 0.0.0.0 0.0.0.0 md5 77 | -------------------------------------------------------------------------------- /roles/postgresql/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install postgresql server and jdbc 2 | tags: package 3 | yum: name=postgresql{{ postgres_version|default('')|replace('.', '') }}-server state=latest 4 | 5 | - name: destroy data and init data and init database 6 | tags: init 7 | shell: "{{ item }}" 8 | with_items: 9 | - service postgresql{% if postgres_version %}-{{ postgres_version }} {% endif %} stop 10 | - rm -rf /var/lib/pgsql/{{ postgres_version|default('') }}/data/* 11 | - service postgresql{% if postgres_version %}-{{ postgres_version }}{% endif %} initdb 12 | when: destroy_data 13 | 14 | - name: install configuration files 15 | tags: config 16 | copy: src={{ item }} dest=/var/lib/pgsql/{{ postgres_version|default('') }}/data/{{ item }} owner=postgres group=postgres 17 | with_items: 18 | - postgresql.conf 19 | - pg_hba.conf 20 | 21 | - name: start services 22 | tags: service 23 | service: name=postgresql{% if postgres_version %}-{{ postgres_version }}{% endif %} state=restarted enabled=yes 24 | 25 | - name: set postgres password 26 | tags: init 27 | command: sudo -Hu postgres psql -c "alter user postgres with password '{{ postgres_password }}';" 28 | when: destroy_data 29 | 30 | - name: install userdb sql 31 | tags: init 32 | template: src=userdb.sql.j2 dest=/tmp/userdb.sql 33 | when: user_database is defined 34 | 35 | - name: create userdb 36 | tags: init 37 | command: sudo -Hu postgres psql -f /tmp/userdb.sql 38 | when: user_database is defined and destroy_data 39 | 40 | - name: remove userdb sql 41 | tags: init 42 | command: rm -f /tmp/userdb.sql 43 | when: user_database is defined 44 | 45 | - name: execute user sql script 46 | shell: echo "{{ postgres_script }}" | sudo -Hu postgres psql 47 | when: postgres_script is defined 48 | -------------------------------------------------------------------------------- /roles/postgresql/templates/userdb.sql.j2: -------------------------------------------------------------------------------- 1 | drop database if exists "{{ user_database }}"; 2 | create database {{ user_database }}; 3 | \c {{ user_database }}; 4 | create user userdb_user with password '{{ userdb_password }}'; 5 | grant all privileges on database {{ user_database }} to userdb_user; 6 | -------------------------------------------------------------------------------- /roles/snmp/files/snmpd.conf: -------------------------------------------------------------------------------- 1 | com2sec notConfigUser default public 2 | group notConfigGroup v1 notConfigUser 3 | group notConfigGroup v2c notConfigUser 4 | access notConfigGroup "" any noauth exact systemview none none 5 | 6 | view systemview included .1 7 | 8 | #for hight load system comment the line above and uncomment view configuration below 9 | #this will exclude large tcp connection tables from default system view 10 | 11 | #view systemview included .1.3.6.1.2.1.1 12 | #view systemview included .1.3.6.1.2.1.2 13 | #view systemview included .1.3.6.1.2.1.4 14 | #view systemview included .1.3.6.1.2.1.25 15 | #view systemview included .1.3.6.1.2.1.31 16 | #view systemview included .1.3.6.1.4.1.777 17 | #view systemview included .1.3.6.1.4.1.2021 18 | #view systemview included .1.3.6.1.4.1.28675 19 | #view systemview included .1.3.6.1.4.1.57052 20 | 21 | master agentx 22 | agentxperms 770 770 daemon users 23 | 24 | dontLogTCPWrappersConnects 1 25 | interface lo 24 1000000000 26 | -------------------------------------------------------------------------------- /roles/snmp/files/subagent-shell-hadoop-conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /roles/snmp/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - net-snmp 6 | - net-snmp-subagent-shell 7 | - hadoop-monitoring-utility 8 | 9 | - name: install snmpd configurations 10 | tags: config 11 | copy: src={{ item }} dest=/etc/snmp/{{ item }} 12 | with_items: 13 | - snmpd.conf 14 | 15 | - name: install subagent-shell configurations 16 | tags: config 17 | copy: src={{ item }} dest=/etc/snmp/subagent-shell/{{ item }} 18 | with_items: 19 | - subagent-shell-hadoop-conf.xml 20 | 21 | - name: start snmp services 22 | tags: service 23 | service: name={{ item }} enabled=yes state=restarted 24 | with_items: 25 | - snmpd 26 | - subagent-shell 27 | -------------------------------------------------------------------------------- /roles/solr/files/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/solr/files/0 -------------------------------------------------------------------------------- /roles/solr/files/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergevs/ansible-cloudera-hadoop/6192791f9b11906f81a8babb3bc4b6a9f550825f/roles/solr/files/1 -------------------------------------------------------------------------------- /roles/solr/files/solr.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 28 | 29 | 30 | 31 | 32 | ${host:} 33 | ${jetty.port:8983} 34 | ${hostContext:solr} 35 | ${zkClientTimeout:30000} 36 | ${genericCoreNodeNames:true} 37 | 38 | 39 | ${zkACLProvider:} 40 | ${zkCredentialsProvider:} 41 | 42 | 43 | 44 | 46 | ${socketTimeout:0} 47 | ${connTimeout:0} 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /roles/solr/files/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | 10 | # the directory where the snapshot is stored. 11 | # dataDir=/opt/zookeeper/data 12 | # NOTE: Solr defaults the dataDir to /zoo_data 13 | 14 | # the port at which the clients will connect 15 | # clientPort=2181 16 | # NOTE: Solr sets this based on zkRun / zkHost params 17 | 18 | -------------------------------------------------------------------------------- /roles/solr/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - solr-server 6 | 7 | - name: create configuration directory 8 | file: path=/etc/solr/conf.{{ cluster_name }} state=directory 9 | 10 | - name: setup alternatives link 11 | alternatives: name=solr-conf link=/etc/solr/conf path=/etc/solr/conf.{{ cluster_name }} 12 | 13 | - name: install default configurations 14 | tags: config 15 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 16 | with_items: 17 | - solr 18 | 19 | #- name: install template configurations 20 | # tags: config 21 | # template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }} 22 | # with_items: 23 | # - hbase-site.xml 24 | # - regionservers 25 | 26 | - name: install files configurations 27 | tags: config 28 | copy: src={{ item }} dest=/etc/solr/conf/{{ item }} 29 | with_items: 30 | - solr.xml 31 | - zoo.cfg 32 | 33 | - name: create hdfs directories 34 | command: sudo -Hu hdfs hdfs dfs {{ item }} 35 | with_items: 36 | - -mkdir -p /solr 37 | - -chown solr /solr 38 | run_once: true 39 | 40 | - name: init zookeeper 41 | tags: init 42 | command: solrctl init 43 | run_once: true 44 | 45 | - name: start services 46 | tags: service 47 | service: name={{ item }} state=restarted enabled=yes 48 | with_items: 49 | - solr-server 50 | -------------------------------------------------------------------------------- /roles/solr/templates/default/solr.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | SOLR_PORT=8983 16 | SOLR_ADMIN_PORT=8984 17 | SOLR_LOG=/var/log/solr 18 | SOLR_ZK_ENSEMBLE={% for item in groups['zookeepernodes'] -%} 19 | {{ item }}:2181{% if not loop.last %},{% endif %} 20 | {%- endfor %}/solr 21 | 22 | {% if groups['namenodes']|count > 1 %} 23 | SOLR_HDFS_HOME=hdfs://{{ cluster_name }}/solr 24 | {% else %} 25 | SOLR_HDFS_HOME=hdfs://{{ groups['namenodes'][0] }}:8020/solr 26 | {% endif %} 27 | 28 | SOLR_HDFS_CONFIG=/etc/hadoop/conf 29 | # SOLR_KERBEROS_ENABLED=true 30 | # SOLR_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab 31 | # SOLR_KERBEROS_PRINCIPAL=solr/localhost@LOCALHOST 32 | SOLR_AUTHENTICATION_TYPE=simple 33 | SOLR_AUTHENTICATION_SIMPLE_ALLOW_ANON=true 34 | # SOLR_AUTHENTICATION_KERBEROS_KEYTAB=/etc/solr/conf/solr.keytab 35 | # SOLR_AUTHENTICATION_KERBEROS_PRINCIPAL=HTTP/localhost@LOCALHOST 36 | # SOLR_AUTHENTICATION_KERBEROS_NAME_RULES=DEFAULT 37 | # SOLR_AUTHENTICATION_JAAS_CONF=/etc/solr/conf/jaas.conf 38 | SOLR_SECURITY_ALLOWED_PROXYUSERS=hue 39 | SOLR_SECURITY_PROXYUSER_hue_HOSTS=* 40 | SOLR_SECURITY_PROXYUSER_hue_GROUPS=* 41 | # SOLR_AUTHORIZATION_SENTRY_SITE=/etc/solr/conf/sentry-site.xml 42 | # SOLR_AUTHORIZATION_SUPERUSER=solr 43 | SOLRD_WATCHDOG_TIMEOUT=30 44 | 45 | #SOLR_SSL_ENABLED=true 46 | #SOLR_KEYSTORE_PATH=/var/lib/solr/.keystore 47 | #SOLR_KEYSTORE_PASSWORD= 48 | #SOLR_TRUSTSTORE_PATH=/var/lib/solr/.truststore 49 | #SOLR_TRUSTSTORE_PASSWORD= 50 | 51 | -------------------------------------------------------------------------------- /roles/spark/files/fairscheduler.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | FAIR 5 | 1 6 | 2 7 | 8 | 9 | FIFO 10 | 2 11 | 3 12 | 13 | 14 | -------------------------------------------------------------------------------- /roles/spark/files/log4j.properties.template: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=INFO, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.err 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.eclipse.jetty=WARN 10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR 11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 13 | -------------------------------------------------------------------------------- /roles/spark/files/slaves.template: -------------------------------------------------------------------------------- 1 | # A Spark Worker will be started on each of the machines listed below. 2 | localhost -------------------------------------------------------------------------------- /roles/spark/files/spark-defaults.conf.template: -------------------------------------------------------------------------------- 1 | # Default system properties included when running spark-submit. 2 | # This is useful for setting default environmental settings. 3 | 4 | # Example: 5 | # spark.master spark://master:7077 6 | # spark.eventLog.enabled true 7 | # spark.eventLog.dir hdfs://namenode:8021/directory 8 | # spark.serializer org.apache.spark.serializer.KryoSerializer 9 | # spark.driver.memory 5g 10 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 11 | -------------------------------------------------------------------------------- /roles/spark/files/spark-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This file is sourced when running various Spark programs. 4 | # Copy it as spark-env.sh and edit that to configure Spark for your site. 5 | 6 | # Options read when launching programs locally with 7 | # ./bin/run-example or ./bin/spark-submit 8 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 9 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 10 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 11 | # - SPARK_CLASSPATH, default classpath entries to append 12 | 13 | # Options read by executors and drivers running inside the cluster 14 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 15 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 16 | # - SPARK_CLASSPATH, default classpath entries to append 17 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 18 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos 19 | 20 | # Options read in YARN client mode 21 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 22 | # - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2) 23 | # - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1). 24 | # - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) 25 | # - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb) 26 | # - SPARK_YARN_APP_NAME, The name of your application (Default: Spark) 27 | # - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’) 28 | # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job. 29 | # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. 30 | 31 | # Options for the daemons used in the standalone deploy mode 32 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname 33 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master 34 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") 35 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 36 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) 37 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker 38 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node 39 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 40 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") 41 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") 42 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") 43 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers 44 | 45 | # Generic options for the daemons used in the standalone deploy mode 46 | # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) 47 | # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) 48 | # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) 49 | # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) 50 | # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) 51 | -------------------------------------------------------------------------------- /roles/spark/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - spark-core 6 | 7 | - name: install packages 8 | tags: packages 9 | yum: name={{ item }} state=latest 10 | with_items: 11 | - spark-history-server 12 | when: ansible_hostname == groups['spark'][0] 13 | 14 | - name: create configuration directory 15 | file: path=/etc/spark/conf.{{ cluster_name }} state=directory 16 | 17 | - name: setup alternatives link 18 | alternatives: name=spark-conf link=/etc/spark/conf path=/etc/spark/conf.{{ cluster_name }} 19 | 20 | - name: install default configurations 21 | tags: config 22 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 23 | with_items: 24 | - spark 25 | 26 | - name: install template configurations 27 | tags: config 28 | template: src={{ item }}.j2 dest=/etc/spark/conf/{{ item }} 29 | with_items: 30 | - spark-defaults.conf 31 | 32 | - name: install files configurations 33 | tags: config 34 | copy: src={{ item }} dest=/etc/spark/conf/{{ item|basename }} 35 | with_fileglob: 36 | - ./* 37 | 38 | - name: create hdfs directories 39 | command: sudo -Hu hdfs hdfs dfs {{ item }} 40 | with_items: 41 | - -mkdir -p {{ spark_history_server_dir }} 42 | - -chown spark:spark {{ spark_history_server_dir }} 43 | - -chmod 1777 {{ spark_history_server_dir }} 44 | run_once: true 45 | 46 | - name: start services 47 | tags: service 48 | service: name={{ item }} state=restarted enabled=yes 49 | with_items: 50 | - spark-history-server 51 | when: ansible_hostname == groups['spark'][0] 52 | 53 | - name: test 54 | tags: test 55 | command: sudo -Hu hdfs spark-submit --master yarn-cluster --class org.apache.spark.examples.SparkPi --num-executors 2 --driver-cores 1 --driver-memory 512m --executor-memory 512m --executor-cores 2 --queue default /usr/lib/spark/lib/spark-examples.jar 10 56 | run_once: true 57 | -------------------------------------------------------------------------------- /roles/spark/templates/default/spark.j2: -------------------------------------------------------------------------------- 1 | {% if groups['namenodes']|count > 1 %} 2 | export SPARK_HISTORY_SERVER_LOG_DIR=hdfs://{{ cluster_name }}{{ spark_history_server_dir }} 3 | {% else %} 4 | export SPARK_HISTORY_SERVER_LOG_DIR=hdfs://{{ groups['namenodes'][0] }}:8020{{ spark_history_server_dir }} 5 | {% endif %} 6 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=${SPARK_HISTORY_SERVER_LOG_DIR}" 7 | export SPARK_CONF_DIR=/etc/spark/conf 8 | -------------------------------------------------------------------------------- /roles/spark/templates/spark-defaults.conf.j2: -------------------------------------------------------------------------------- 1 | # Default system properties included when running spark-submit. 2 | # This is useful for setting default environmental settings. 3 | 4 | # Example: 5 | # spark.master spark://master:7077 6 | spark.eventLog.enabled true 7 | {% if groups['namenodes']|count > 1 %} 8 | spark.eventLog.dir hdfs://{{ cluster_name }}{{ spark_history_server_dir }} 9 | {% else %} 10 | spark.eventLog.dir hdfs://{{ groups['namenodes'][0] }}:8020{{ spark_history_server_dir }} 11 | {% endif %} 12 | # spark.serializer org.apache.spark.serializer.KryoSerializer 13 | # spark.driver.memory 5g 14 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 15 | -------------------------------------------------------------------------------- /roles/spark/vars/main.yaml: -------------------------------------------------------------------------------- 1 | spark_history_server_dir: /var/log/spark/apps 2 | -------------------------------------------------------------------------------- /roles/syslog-ng/files/hadoop.pdb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | covers hadoop java logs. 6 | 7 | hadoop 8 | 9 | 10 | 11 | @ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname::@ @ANYSTRING:hadoop.message@ 12 | 13 | 14 | 15 | 2015-11-17 16:54:27,586 ERROR org.apache.hadoop.hdfs.server.namenode.NameNode: RECEIVED SIGNAL 15: SIGTERM 16 | 17 | 2015-11-17 18 | 16:54:27,586 19 | ERROR 20 | org.apache.hadoop.hdfs.server.namenode.NameNode 21 | RECEIVED SIGNAL 15: SIGTERM 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | ${hadoop.classname}: ${hadoop.message} 30 | true 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 42 | -------------------------------------------------------------------------------- /roles/syslog-ng/files/hive.pdb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | covers hive java logs. 6 | 7 | hive 8 | 9 | 10 | 11 | @ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname:(@@ESTRING:hadoop.filename::@@ESTRING:hadoop.funcname:(@@ESTRING:hadoop.lineno:)@) - @ANYSTRING:hadoop.message@ 12 | 13 | 14 | 15 | 2015-10-30 14:30:39,802 ERROR Datastore.Schema (Log4JLogger.java:error(125)) - Failed initialising database. 16 | 17 | 2015-10-30 18 | 14:30:39,802 19 | ERROR 20 | Datastore.Schema 21 | Log4JLogger.java 22 | error 23 | 125 24 | Failed initialising database. 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | ${hadoop.classname}: ${hadoop.message} 33 | true 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 45 | 46 | -------------------------------------------------------------------------------- /roles/syslog-ng/files/impala.pdb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | covers impala java logs. 6 | 7 | impala 8 | 9 | 10 | 11 | @ESTRING:hadoop.code: @@STRING:hadoop.time:.:@@QSTRING:hadoop.pid: @@ESTRING:hadoop.filename::@@ESTRING:hadoop.lineno:]@ @ANYSTRING:hadoop.message@ 12 | 13 | 14 | 15 | I1124 17:23:12.848029 6746 authentication.cc:1014] External communication is not authenticated 16 | 17 | I1124 18 | 17:23:12,848029 19 | 6746 20 | authentication.cc 21 | 1014 22 | External communication is not authenticated 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | [${hadoop.code}] ${hadoop.filename}: ${hadoop.message} 31 | true 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 44 | -------------------------------------------------------------------------------- /roles/syslog-ng/files/oozie.pdb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | covers oozie java logs. 6 | 7 | oozie 8 | 9 | 10 | 11 | @ESTRING:hadoop.date: @@ESTRING:hadoop.time: @@ESTRING:hadoop.severity: @@ESTRING:hadoop.classname::@@ESTRING:hadoop.lineno: @- SERVER[@ESTRING:hadoop.hostname:]@ @ANYSTRING:hadoop.message@ 12 | 13 | 14 | 15 | 2015-11-15 16:07:34,036 INFO PauseTransitService:520 - SERVER[gp-test0.ocslab.com] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[-] Acquired lock for [org.apache.oozie.service.PauseTransitService] 16 | 17 | 2015-11-15 18 | 16:07:34,036 19 | INFO 20 | PauseTransitService 21 | 520 22 | 2015-11-15 16:07:34,036 INFO PauseTransitService:520 - SERVER[gp-test0.ocslab.com] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[-] ACTION[-] Acquired lock for [org.apache.oozie.service.PauseTransitService] 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | ${hadoop.classname}: ${hadoop.message} 31 | true 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 44 | -------------------------------------------------------------------------------- /roles/syslog-ng/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install packages 2 | tags: packages 3 | yum: name={{ item }} state=latest 4 | with_items: 5 | - syslog-ng 6 | 7 | - name: create directories 8 | tags: config 9 | file: path=/etc/syslog-ng/{{ item }} state=directory mode=0755 10 | with_items: 11 | - conf.d 12 | - patterndb.d/hadoop 13 | 14 | - name: update patterndb.d 15 | tags: config 16 | copy: src={{ item }} dest=/etc/syslog-ng/patterndb.d/hadoop/{{ item }} 17 | with_items: 18 | - hadoop.pdb 19 | - hive.pdb 20 | - impala.pdb 21 | - oozie.pdb 22 | 23 | - name: update configuration 24 | tags: config 25 | template: src={{ item }} dest=/etc/syslog-ng/conf.d/{{ item }} 26 | with_items: 27 | - 30-hadoop.conf 28 | 29 | - name: reload syslog-ng service 30 | tags: service 31 | service: name=syslog-ng enabled=yes state=restarted 32 | -------------------------------------------------------------------------------- /roles/syslog-ng/vars/main.yaml: -------------------------------------------------------------------------------- 1 | syslog_ng_destination: d_logcollector_throttled 2 | -------------------------------------------------------------------------------- /roles/zookeeper/files/configuration.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
namevaluedescription
21 | 22 | 23 |
24 |
25 | -------------------------------------------------------------------------------- /roles/zookeeper/files/log4j.properties: -------------------------------------------------------------------------------- 1 | # Copyright 2012 The Apache Software Foundation 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Define some default values that can be overridden by system properties 20 | zookeeper.root.logger=INFO, CONSOLE 21 | 22 | zookeeper.console.threshold=INFO 23 | 24 | zookeeper.log.dir=. 25 | zookeeper.log.file=zookeeper.log 26 | zookeeper.log.threshold=INFO 27 | zookeeper.log.maxfilesize=256MB 28 | zookeeper.log.maxbackupindex=20 29 | 30 | zookeeper.tracelog.dir=. 31 | zookeeper.tracelog.file=zookeeper_trace.log 32 | 33 | log4j.rootLogger=${zookeeper.root.logger} 34 | 35 | # 36 | # console 37 | # Add "console" to rootlogger above if you want to use this 38 | # 39 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 40 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} 41 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 42 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 43 | 44 | # 45 | # Add ROLLINGFILE to rootLogger to get log file output 46 | # 47 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender 48 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold} 49 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file} 50 | log4j.appender.ROLLINGFILE.MaxFileSize=${zookeeper.log.maxfilesize} 51 | log4j.appender.ROLLINGFILE.MaxBackupIndex=${zookeeper.log.maxbackupindex} 52 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout 53 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 54 | 55 | # 56 | # Add TRACEFILE to rootLogger to get log file output 57 | # Log TRACE level and above messages to a log file 58 | # 59 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender 60 | log4j.appender.TRACEFILE.Threshold=TRACE 61 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file} 62 | 63 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout 64 | ### Notice we are including log4j's NDC here (%x) 65 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n 66 | -------------------------------------------------------------------------------- /roles/zookeeper/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | - name: install zookeeper-server 2 | tags: package 3 | yum: name=zookeeper-server state=latest 4 | 5 | - name: create configuration directory 6 | tags: config 7 | file: path={{ etc_folder }}/zookeeper state=directory 8 | 9 | - name: setup alternatives link 10 | tags: config 11 | alternatives: name=zookeeper-conf link=/etc/zookeeper/conf path={{ etc_folder }}/zookeeper 12 | 13 | - name: install template configurations 14 | tags: config 15 | template: src=zoo.cfg.j2 dest={{ etc_folder }}/zookeeper/zoo.cfg 16 | 17 | - name: install files configurations 18 | tags: config 19 | copy: src={{ item }} dest=/etc/zookeeper/conf/{{ item }} 20 | with_items: 21 | - configuration.xsl 22 | - log4j.properties 23 | 24 | - name: install default configurations 25 | tags: config 26 | template: src=default/{{ item }}.j2 dest=/etc/default/{{ item }} 27 | with_items: 28 | - zookeeper 29 | 30 | - name: ensure data dir 31 | tags: config 32 | file: path={{ zookeeper_data_dir }} state=directory owner=zookeeper group=zookeeper 33 | 34 | - name: clean zookeeper data directory 35 | tags: init 36 | shell: rm -rf {{ zookeeper_data_dir }}/* 37 | when: destroy_data 38 | 39 | - name: init zookeeper directory 40 | tags: init 41 | command: service zookeeper-server init 42 | when: destroy_data 43 | 44 | - name: install myid 45 | tags: init 46 | template: src=myid.j2 dest={{ zookeeper_data_dir }}/myid 47 | when: groups['zookeepernodes']|count > 1 and destroy_data 48 | 49 | - name: start zookeeper 50 | tags: service 51 | service: name=zookeeper-server state=restarted enabled=yes 52 | -------------------------------------------------------------------------------- /roles/zookeeper/templates/default/zookeeper.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Command-line parameters to pass to the JVM 17 | # export SERVER_JVMFLAGS="" 18 | -------------------------------------------------------------------------------- /roles/zookeeper/templates/myid.j2: -------------------------------------------------------------------------------- 1 | {% if groups['zookeepernodes']|count > 1 %} 2 | {% for item in groups['zookeepernodes'] %} 3 | {% if ansible_hostname == item %} 4 | {{ loop.index }} 5 | {% endif %} 6 | {% endfor %} 7 | {% endif %} 8 | -------------------------------------------------------------------------------- /roles/zookeeper/templates/zoo.cfg.j2: -------------------------------------------------------------------------------- 1 | maxClientCnxns=50 2 | # The number of milliseconds of each tick 3 | tickTime=2000 4 | # The number of ticks that the initial 5 | # synchronization phase can take 6 | initLimit=10 7 | # The number of ticks that can pass between 8 | # sending a request and getting an acknowledgement 9 | syncLimit=5 10 | # the directory where the snapshot is stored. 11 | dataDir={{zookeeper_data_dir}} 12 | # the port at which the clients will connect 13 | clientPort=2181 14 | # the directory where the transaction logs are stored. 15 | dataLogDir={{zookeeper_data_dir}} 16 | 17 | {% if groups['zookeepernodes']|count > 1 %} 18 | {% for item in groups['zookeepernodes'] %} 19 | server.{{ loop.index }}={{ item }}:2888:3888 20 | {% endfor %} 21 | {% endif %} 22 | -------------------------------------------------------------------------------- /site.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Check hosts file 3 | tags: check 4 | hosts: all 5 | roles: 6 | - check_config 7 | 8 | - name: Setup local environment 9 | tags: 10 | - common 11 | - clinit 12 | - interfaces 13 | - config 14 | hosts: all 15 | tasks: 16 | - name: create working directory 17 | local_action: file dest="{{ inventory_dir }}/workdir" state=directory 18 | run_once: true 19 | 20 | - name: Prepare hosts for cloudera hadoop cluster 21 | tags: common 22 | hosts: java 23 | roles: 24 | - common 25 | 26 | - name: Deploy zookeeper 27 | hosts: zookeepernodes 28 | tags: zookeepernodes 29 | roles: 30 | - zookeeper 31 | 32 | - name: Deploy hadoop configuration 33 | tags: 34 | - config 35 | - hadoop 36 | hosts: hadoop 37 | roles: 38 | - { role: hadoop, deploy: 'base' } 39 | 40 | - name: Deploy journal nodes 41 | tags: journalnodes 42 | hosts: journalnodes 43 | roles: 44 | - { role: hadoop, deploy: 'journalnodes' } 45 | 46 | - name: Deploy namenodes 47 | tags: namenodes 48 | hosts: namenodes 49 | roles: 50 | - { role: hadoop, deploy: 'namenodes' } 51 | - { role: hadoop, deploy: 'namenodes-fence' } 52 | 53 | - name: Deploy datanodes 54 | tags: datanodes 55 | hosts: datanodes 56 | roles: 57 | - { role: hadoop, deploy: 'datanodes' } 58 | 59 | - name: Test hdfs 60 | tags: test 61 | hosts: namenodes 62 | roles: 63 | - { role: hadoop, deploy: 'test-hdfs' } 64 | 65 | - name: Deploy yarn resource manager and job history server 66 | tags: yarnresourcemanager 67 | hosts: yarnresourcemanager 68 | roles: 69 | - { role: hadoop, deploy: 'resourcemanager' } 70 | 71 | - name: Test mapreduce 72 | tags: test 73 | hosts: yarnresourcemanager 74 | roles: 75 | - { role: hadoop, deploy: 'test-mapreduce' } 76 | 77 | - name: Deploy postgresql 78 | tags: postgresql 79 | hosts: postgresql 80 | roles: 81 | - postgresql 82 | 83 | - name: Deploy hive metastore 84 | tags: hivemetastore 85 | hosts: hivemetastore 86 | roles: 87 | - { role: hivemetastore, deploy: 'hive-server' } 88 | 89 | - name: Deploy hive client on datanodes 90 | tags: hive 91 | hosts: datanodes 92 | roles: 93 | - { role: hivemetastore, deploy: 'hive-client' } 94 | 95 | - name: Deploy impala state-store and catalog 96 | tags: impala 97 | hosts: impala-store-catalog 98 | roles: 99 | - { role: impala, deploy: 'impala' } 100 | 101 | - name: Deploy impala daemon on datanodes 102 | tags: impala 103 | hosts: datanodes 104 | roles: 105 | - { role: impala, deploy: 'impala-server' } 106 | 107 | - name: Deploy HBase 108 | tags: hbase 109 | hosts: hbasemaster 110 | roles: 111 | - { role: hbase, deploy: 'hbase-master' } 112 | 113 | - name: Deploy HBase regionservers on datanodes 114 | tags: hbase 115 | hosts: datanodes 116 | roles: 117 | - { role: hbase, deploy: 'regionserver' } 118 | 119 | - name: Deploy spark 120 | tags: spark 121 | hosts: spark 122 | roles: 123 | - spark 124 | 125 | - name: Deploy solr search 126 | tags: solr 127 | hosts: solr 128 | roles: 129 | - solr 130 | 131 | - name: Deploy oozie 132 | tags: oozie 133 | hosts: oozie 134 | roles: 135 | - oozie 136 | 137 | - name: Deploy kafka 138 | tags: kafka 139 | hosts: kafka 140 | roles: 141 | - kafka 142 | 143 | - name: Deploy Hue 144 | tags: hue 145 | hosts: hue 146 | roles: 147 | - hue 148 | 149 | - name: Deploy snmp monitoring 150 | tags: snmp 151 | hosts: java 152 | roles: 153 | - { role: snmp, when: enable_snmp } 154 | 155 | - name: Deploy syslog-ng monitoring 156 | tags: syslog 157 | hosts: java 158 | roles: 159 | - { role: syslog-ng, when: enable_syslog } 160 | 161 | - name: cluster 162 | tags: cluster 163 | hosts: yarnresourcemanager 164 | tasks: 165 | - debug: msg="{{ lookup('pipe', 'echo; clinit -S workdir/services.xml --nocolors tree;echo =')}}" 166 | run_once: true 167 | 168 | - name: dashboard 169 | tags: dashboard 170 | hosts: dashboard 171 | roles: 172 | - dashboard 173 | --------------------------------------------------------------------------------