├── group_vars └── .gitkeep ├── .gitignore ├── playbooks ├── group_vars │ └── all │ │ ├── hdfs_base │ │ ├── hdfs_spark │ │ ├── hdfs_yarn │ │ ├── mapreduce_history │ │ ├── namenode_bootstrapstandby │ │ ├── namenode_format │ │ ├── spark_user │ │ ├── hbase_master │ │ ├── spark_history │ │ ├── hbase_regionserver │ │ ├── hcatalog │ │ ├── java11 │ │ ├── journalnode │ │ ├── resourcemanager │ │ ├── namenode │ │ ├── slavenode │ │ ├── f500.dumpall │ │ ├── collect │ │ ├── os │ │ ├── hbase │ │ ├── hive │ │ ├── spark │ │ ├── hadoop │ │ ├── zookeeper_server │ │ ├── site-defaults │ │ ├── cgroups │ │ └── base ├── roles │ ├── os │ │ ├── defaults │ │ │ └── main.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── tasks │ │ │ ├── main.yml │ │ │ ├── thp.yml │ │ │ ├── kernel.yml │ │ │ └── limits.yml │ ├── java11 │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── install.yml │ │ └── meta │ │ │ └── main.yml │ ├── client │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── install.yml │ │ └── meta │ │ │ └── main.yml │ ├── hdfs_base │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ └── meta │ │ │ └── main.yml │ ├── hdfs_yarn │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ └── meta │ │ │ └── main.yml │ ├── namenode │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── jaas-hdfs.conf.j2 │ │ │ ├── hadoop-hdfs-zkfc.service.j2 │ │ │ ├── hadoop-hdfs-namenode.service.j2 │ │ │ ├── default_hadoop-hdfs-namenode.j2 │ │ │ ├── default_hadoop-hdfs-zkfc.j2 │ │ │ └── hdfs-balancer.sh.j2 │ ├── slavenode │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── hadoop-hdfs-datanode.service.j2 │ │ │ ├── hadoop-yarn-nodemanager.service.j2 │ │ │ ├── default_hadoop-yarn-nodemanager.j2 │ │ │ └── default_hadoop-hdfs-datanode.j2 │ ├── hbase_master │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── principal.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── regionservers.j2 │ │ │ ├── zk-jaas.conf.j2 │ │ │ ├── hbase-master.service.j2 │ │ │ ├── hadoop-metrics2-hbase.properties.j2 │ │ │ ├── hbase-master.j2 │ │ │ ├── hbase-service-test.rb.j2 │ │ │ ├── hbase-policy.xml.j2 │ │ │ ├── hbase-site.xml.j2 │ │ │ └── log4j.properties.j2 │ ├── hdfs_spark │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ └── meta │ │ │ └── main.yml │ ├── journalnode │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── hadoop-hdfs-journalnode.service.j2 │ │ │ └── default_hadoop-hdfs-journalnode.j2 │ ├── hbase_regionserver │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── principal.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── regionservers.j2 │ │ │ ├── zk-jaas.conf.j2 │ │ │ ├── hbase-regionserver.service.j2 │ │ │ ├── hadoop-metrics2-hbase.properties.j2 │ │ │ ├── hbase-regionserver.j2 │ │ │ ├── hbase-policy.xml.j2 │ │ │ ├── hbase-site.xml.j2 │ │ │ └── log4j.properties.j2 │ ├── mapreduce_history │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── hadoop-mapreduce-historyserver.service.j2 │ │ │ └── default_hadoop-mapreduce-historyserver.j2 │ ├── namenode_format │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ └── meta │ │ │ └── main.yml │ ├── resourcemanager │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── hadoop-yarn-resourcemanager.service.j2 │ │ │ └── default_hadoop-yarn-resourcemanager.j2 │ ├── timelineservice │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── hadoop-yarn-timelineserver.service.j2 │ │ │ └── default_hadoop-yarn-timelineserver.j2 │ ├── base │ │ ├── templates │ │ │ ├── zk-acl.txt.j2 │ │ │ ├── hosts.list.j2 │ │ │ ├── merge-keytabs.ktutil.j2 │ │ │ ├── hosts.exclude.j2 │ │ │ ├── container-executor.cfg.j2 │ │ │ ├── ssl-client.xml.j2 │ │ │ ├── ssl-server.xml.j2 │ │ │ ├── mapred-env.sh.j2 │ │ │ ├── hadoop-metrics2.properties.j2 │ │ │ ├── hadoop-metrics.properties.j2 │ │ │ ├── core-site.xml.j2 │ │ │ ├── mapred-site.xml.j2 │ │ │ └── capacity-scheduler.xml.j2 │ │ ├── meta │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── keytab.yml │ │ │ ├── conf.yml │ │ │ ├── principal.yml │ │ │ └── kerberos.yml │ │ ├── handlers │ │ │ └── main.yml │ │ └── defaults │ │ │ └── main.yml │ ├── hbase │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── install.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ └── command-wrapper.sh.j2 │ ├── hadoop │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── install.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ └── command-wrapper.sh.j2 │ ├── namenode_bootstrapstandby │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ └── meta │ │ │ └── main.yml │ ├── spark_user │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── user.yml │ │ ├── defaults │ │ │ └── main.yml │ │ └── meta │ │ │ └── main.yml │ ├── spark_history │ │ ├── tasks │ │ │ ├── main.yml │ │ │ └── config.yml │ │ ├── defaults │ │ │ └── main.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ ├── spark-history-server.service.j2 │ │ │ └── default_spark-history-server.j2 │ ├── collect │ │ ├── tasks │ │ │ └── main.yml │ │ ├── vars │ │ │ └── main.yml │ │ ├── handlers │ │ │ └── main.yml │ │ ├── defaults │ │ │ └── main.yml │ │ ├── README.md │ │ └── meta │ │ │ └── main.yml │ ├── journalnode_server_createdir │ │ └── tasks │ │ │ ├── main.yml │ │ │ └── conf.yml │ ├── zookeeper_server │ │ ├── templates │ │ │ ├── myid.j2 │ │ │ ├── command-wrapper.sh.j2 │ │ │ ├── jaas.conf.j2 │ │ │ ├── zookeeper-server.service.j2 │ │ │ ├── zookeeper-env.sh.j2 │ │ │ ├── zoo.cfg.j2 │ │ │ └── log4j.properties.j2 │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── install.yml │ │ │ ├── principal.yml │ │ │ └── config.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── defaults │ │ │ └── main.yml │ ├── cgroups │ │ ├── meta │ │ │ └── main.yml │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── install.yml │ │ │ ├── resource.yml │ │ │ └── conf.yml │ │ ├── templates │ │ │ ├── cgroups.sh.j2 │ │ │ └── cgconfig.conf.j2 │ │ └── defaults │ │ │ └── main.yml │ ├── datanode_server_deletedata │ │ └── tasks │ │ │ ├── main.yml │ │ │ └── delete.yml │ ├── journalnode_server_deletedata │ │ └── tasks │ │ │ ├── main.yml │ │ │ └── delete.yml │ ├── kerberos-server │ │ ├── templates │ │ │ ├── kadm5.acl.j2 │ │ │ ├── kpropd.acl.j2 │ │ │ ├── kdc.conf.j2 │ │ │ └── krb5.conf.j2 │ │ ├── files │ │ │ └── kprop_all │ │ ├── handlers │ │ │ └── main.yml │ │ └── tasks │ │ │ ├── host_principal.yml │ │ │ ├── main.yml │ │ │ ├── config_slave.yml │ │ │ └── config_master.yml │ ├── zookeeper_server_deletedata │ │ └── tasks │ │ │ ├── main.yml │ │ │ └── delete.yml │ ├── hcatalog │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── install.yml │ │ │ └── config.yml │ │ ├── defaults │ │ │ └── main.yml │ │ ├── meta │ │ │ └── main.yml │ │ └── templates │ │ │ └── hcat-env.sh.j2 │ ├── spark │ │ ├── tasks │ │ │ ├── main.yml │ │ │ ├── config.yml │ │ │ ├── principal.yml │ │ │ └── install.yml │ │ ├── files │ │ │ └── env_keep_sparkhome │ │ ├── meta │ │ │ └── main.yml │ │ ├── templates │ │ │ ├── command-wrapper.sh.j2 │ │ │ ├── spark.conf.j2 │ │ │ ├── spark.sh.j2 │ │ │ ├── fairscheduler.xml.j2 │ │ │ ├── log4j.properties.j2 │ │ │ ├── spark-defaults.conf.j2 │ │ │ └── spark-env.sh.j2 │ │ └── defaults │ │ │ └── main.yml │ ├── f500.dumpall │ │ ├── meta │ │ │ ├── .galaxy_install_info │ │ │ └── main.yml │ │ ├── defaults │ │ │ └── main.yml │ │ ├── tasks │ │ │ └── main.yml │ │ ├── templates │ │ │ └── dumpall.j2 │ │ └── README.md │ ├── kerberos-client │ │ ├── handlers │ │ │ └── main.yml │ │ ├── templates │ │ │ └── krb5.conf.j2 │ │ └── tasks │ │ │ ├── main.yml │ │ │ └── host_principal.yml │ └── site-defaults │ │ └── defaults │ │ └── main.yml ├── conf_base.retry ├── start_namenode.retry ├── conf_base.yml ├── install_hue.yml ├── install_hive.yml ├── install_httpfs.yml ├── install_pig.yml ├── install_client.yml ├── install_hcatalog.yml ├── install_spark.yml ├── install_namenode.yml ├── install_slavenode.yml ├── install-base.yml ├── install_journalnode.yml ├── install_hbase_master.yml ├── conf_tez.yml ├── install_zookeeper.yml ├── install_resourcemanager.yml ├── install_spark_historyserver.yml ├── install_timelineservice.yml ├── install_hbase_regionserver.yml ├── install_mapreduce_history.yml ├── conf_hdfs_tez.yml ├── conf_hdfs_base.yml ├── conf_hdfs_spark.yml ├── conf_hdfs_yarn.yml ├── start_hue.yml ├── install_kerberos.yml ├── start_httpfs.yml ├── start_zookeeper-server.yml ├── format_namenode.yml ├── start_journalnode.yml ├── stop_spark_historyserver.yml ├── conf_namenode_bootstrapstandby.yml ├── sync_kdc.yml ├── start_resourcemanager.yml ├── stop_timelineservice.yml ├── start_timelineservice.yml ├── stop_hbase_master.yml ├── start_mapreduce_historyserver.yml ├── stop_datanode.yml ├── stop_mapreduce_historyserver.yml ├── stop_nodemanager.yml ├── start_datanode.yml ├── enter_hdfs_safemode.yml ├── start_spark_historyserver.yml ├── stop_journalnode.yml ├── stop_resourcemanager.yml ├── start_nodemanager.yml ├── stop_zookeeper-server.yml ├── start_hbase_regionserver.yml ├── stop_hbase_regionserver.yml ├── start_hbase_master.yml ├── upgrade_namenode.yml ├── start_namenode.yml ├── stop_hcatalog.yml ├── start_hcatalog.yml └── stop_namenode.yml ├── images ├── structure.png └── list-hadoop-notebooks.png ├── LICENSE.txt ├── hosts.csv └── scripts ├── loader.py └── nova.py /group_vars/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/hdfs_base: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/hdfs_spark: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/hdfs_yarn: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/roles/os/defaults/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/conf_base.retry: -------------------------------------------------------------------------------- 1 | 157.1.141.64 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/mapreduce_history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/roles/java11/defaults/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/roles/client/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_base/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_yarn/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/start_namenode.retry: -------------------------------------------------------------------------------- 1 | 157.1.141.64 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/namenode_bootstrapstandby: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_spark/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_format/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/namenode_format: -------------------------------------------------------------------------------- 1 | dfs_namenode_name_dirs: -------------------------------------------------------------------------------- /playbooks/group_vars/all/spark_user: -------------------------------------------------------------------------------- 1 | spark_user_id: '511' 2 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/zk-acl.txt.j2: -------------------------------------------------------------------------------- 1 | sasl:hdfs:rwcda -------------------------------------------------------------------------------- /playbooks/roles/hbase/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/client/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/hadoop/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_base/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_spark/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_yarn/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/java11/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_bootstrapstandby/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/spark_user/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: user.yml 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/hbase_master: -------------------------------------------------------------------------------- 1 | hbase_master_heap_size: '1000' 2 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_format/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/spark_history/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/conf_base.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_all 2 | roles: 3 | - base 4 | 5 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/spark_history: -------------------------------------------------------------------------------- 1 | spark_log_dir: '/var/log/spark' 2 | -------------------------------------------------------------------------------- /playbooks/install_hue.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hue 2 | roles: 3 | - hue 4 | -------------------------------------------------------------------------------- /playbooks/roles/hadoop/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: java11} 3 | -------------------------------------------------------------------------------- /playbooks/roles/hbase/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: java11} 3 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/spark_user/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # spark_user_id: '511' 3 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/hbase_regionserver: -------------------------------------------------------------------------------- 1 | hbase_regionserver_heap_size: '1000' -------------------------------------------------------------------------------- /playbooks/group_vars/all/hcatalog: -------------------------------------------------------------------------------- 1 | hcatalog_log_dir: '/var/log/hive-hcatalog' 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/java11: -------------------------------------------------------------------------------- 1 | java_home: '/usr/lib/jvm/java-11-openjdk-amd64' 2 | -------------------------------------------------------------------------------- /playbooks/install_hive.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hive 2 | roles: 3 | - hive 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_httpfs.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_httpfs 2 | roles: 3 | - httpfs 4 | -------------------------------------------------------------------------------- /playbooks/install_pig.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_pig 2 | roles: 3 | - pig 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/collect/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # tasks file for roles/collect 3 | -------------------------------------------------------------------------------- /playbooks/roles/collect/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for roles/collect 3 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode_server_createdir/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: conf.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_bootstrapstandby/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: config.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/os/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/myid.j2: -------------------------------------------------------------------------------- 1 | {{ zookeeper_server_id }} 2 | -------------------------------------------------------------------------------- /playbooks/install_client.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_client 2 | roles: 3 | - client 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_hcatalog.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hive 2 | roles: 3 | - hcatalog 4 | -------------------------------------------------------------------------------- /playbooks/install_spark.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_spark 2 | roles: 3 | - spark 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | -------------------------------------------------------------------------------- /playbooks/roles/collect/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # handlers file for roles/collect 3 | -------------------------------------------------------------------------------- /playbooks/roles/datanode_server_deletedata/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: delete.yml 2 | 3 | -------------------------------------------------------------------------------- /playbooks/roles/java11/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode_server_deletedata/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: delete.yml 2 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/templates/kadm5.acl.j2: -------------------------------------------------------------------------------- 1 | */admin@{{ kerberos_realm }} * -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server_deletedata/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: delete.yml 2 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/journalnode: -------------------------------------------------------------------------------- 1 | journalnode_heap_size: '' 2 | journalnode_java_opts: '' -------------------------------------------------------------------------------- /playbooks/roles/cgroups/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | - include: conf.yml 3 | -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | - include: config.yml 3 | -------------------------------------------------------------------------------- /playbooks/roles/spark/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | - include: config.yml 3 | -------------------------------------------------------------------------------- /playbooks/roles/spark_history/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # spark_log_dir: '/var/log/spark' 3 | -------------------------------------------------------------------------------- /playbooks/roles/spark_user/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | -------------------------------------------------------------------------------- /playbooks/install_namenode.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_namenode 2 | roles: 3 | - namenode 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_slavenode.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_slavenode 2 | roles: 3 | - slavenode 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # hcatalog_log_dir: '/var/log/hive-hcatalog' 3 | -------------------------------------------------------------------------------- /playbooks/install-base.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_all 2 | become: yes 3 | roles: 4 | - os 5 | - cgroups -------------------------------------------------------------------------------- /playbooks/install_journalnode.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_journalnode 2 | roles: 3 | - journalnode 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: install.yml 2 | - include: config.yml 3 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/resourcemanager: -------------------------------------------------------------------------------- 1 | resourcemanager_heap_size: '' 2 | resourcemanager_java_opts: '' 3 | -------------------------------------------------------------------------------- /playbooks/install_hbase_master.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hbase_master 2 | roles: 3 | - hbase_master 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/os/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: limits.yml 2 | - include: thp.yml 3 | - include: kernel.yml 4 | -------------------------------------------------------------------------------- /playbooks/conf_tez.yml: -------------------------------------------------------------------------------- 1 | ## HDFSが利用可能(正常に読み書きできる)こと 2 | 3 | - hosts: hadoop_tez 4 | roles: 5 | - tez 6 | 7 | -------------------------------------------------------------------------------- /playbooks/install_zookeeper.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_zookeeperserver 2 | roles: 3 | - zookeeper_server 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/base/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: hadoop} 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_base/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: java11} 4 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_spark/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: hadoop} 4 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_yarn/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: hadoop} 4 | -------------------------------------------------------------------------------- /playbooks/roles/spark_history/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: spark} 4 | -------------------------------------------------------------------------------- /playbooks/install_resourcemanager.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_resourcemanager 2 | roles: 3 | - resourcemanager 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_spark_historyserver.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_spark_history 2 | roles: 3 | - spark_history 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_timelineservice.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_timelineservice 2 | roles: 3 | - timelineservice 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/f500.dumpall/meta/.galaxy_install_info: -------------------------------------------------------------------------------- 1 | {install_date: 'Tue Feb 3 11:02:12 2015', version: v1.0.0} 2 | -------------------------------------------------------------------------------- /images/structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NII-cloud-operation/Literate-computing-Hadoop/master/images/structure.png -------------------------------------------------------------------------------- /playbooks/roles/namenode_format/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: namenode} 4 | -------------------------------------------------------------------------------- /playbooks/install_hbase_regionserver.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hbase_regionserver 2 | roles: 3 | - hbase_regionserver 4 | 5 | -------------------------------------------------------------------------------- /playbooks/install_mapreduce_history.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_mapreduce_historyserver 2 | roles: 3 | - mapreduce_history 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/hosts.list.j2: -------------------------------------------------------------------------------- 1 | {% for server in hadoop_slavenode_servers %} 2 | {{ server }} 3 | {% endfor %} 4 | -------------------------------------------------------------------------------- /playbooks/roles/client/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: java8} 5 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/files/kprop_all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for slave in "$@" 4 | do 5 | kprop "$slave" 6 | done -------------------------------------------------------------------------------- /playbooks/group_vars/all/namenode: -------------------------------------------------------------------------------- 1 | namenode_heap_size: '' 2 | namenode_java_opts: '' 3 | 4 | zkfc_heap_size: '' 5 | zkfc_java_opts: '' -------------------------------------------------------------------------------- /playbooks/roles/base/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include: conf.yml 2 | 3 | - include: kerberos.yml 4 | when: kerberos_realm is defined 5 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hbase} 5 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/regionservers.j2: -------------------------------------------------------------------------------- 1 | {% for server in hadoop_slavenode_servers %} 2 | {{ server }} 3 | {% endfor %} -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: java8} 5 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_bootstrapstandby/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: namenode} 4 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /playbooks/roles/spark/files/env_keep_sparkhome: -------------------------------------------------------------------------------- 1 | Defaults env_keep += "SPARK_HOME" 2 | Defaults env_keep += "SPARK_PID_DIR" 3 | -------------------------------------------------------------------------------- /playbooks/conf_hdfs_tez.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSが利用可能(正常に読み書きできる)であること 2 | 3 | - hosts: hadoop_namenode_primary 4 | roles: 5 | - hdfs_tez 6 | -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /playbooks/roles/spark/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: java11} 4 | - {role: spark_user} 5 | 6 | -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: java11} 4 | - {role: base} 5 | -------------------------------------------------------------------------------- /playbooks/conf_hdfs_base.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSが利用可能(正常に読み書きできる)であること 2 | 3 | - hosts: hadoop_namenode_primary 4 | roles: 5 | - hdfs_base 6 | -------------------------------------------------------------------------------- /playbooks/conf_hdfs_spark.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSが利用可能(正常に読み書きできる)であること 2 | 3 | - hosts: hadoop_namenode_primary 4 | roles: 5 | - hdfs_spark 6 | -------------------------------------------------------------------------------- /playbooks/conf_hdfs_yarn.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSが利用可能(正常に読み書きできる)であること 2 | 3 | - hosts: hadoop_namenode_primary 4 | roles: 5 | - hdfs_yarn 6 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hbase} 5 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/regionservers.j2: -------------------------------------------------------------------------------- 1 | {% for server in hadoop_slavenode_servers %} 2 | {{ server }} 3 | {% endfor %} -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/meta/main.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - {role: site-defaults} 3 | - {role: base} 4 | - {role: hadoop} 5 | -------------------------------------------------------------------------------- /images/list-hadoop-notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NII-cloud-operation/Literate-computing-Hadoop/master/images/list-hadoop-notebooks.png -------------------------------------------------------------------------------- /playbooks/group_vars/all/slavenode: -------------------------------------------------------------------------------- 1 | datanode_heap_size: '' 2 | datanode_java_opts: '' 3 | 4 | nodemanager_heap_size: '' 5 | nodemanager_java_opts: '' 6 | -------------------------------------------------------------------------------- /playbooks/roles/spark_user/tasks/user.yml: -------------------------------------------------------------------------------- 1 | - name: create_spark_user 2 | become: yes 3 | user: name=spark createhome=no uid={{ spark_user_id }} 4 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # zookeeper_log_dir: '/var/log/zookeeper' 3 | # zookeeper_pid_dir: '/var/run/zookeeper' 4 | -------------------------------------------------------------------------------- /playbooks/start_hue.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hue 2 | become: yes 3 | tasks: 4 | - name: start_hue_server 5 | service: name=hue state=started 6 | -------------------------------------------------------------------------------- /playbooks/roles/hbase/templates/command-wrapper.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /etc/hbase/conf/hbase-env.sh 4 | 5 | {{ hbase_dir }}/current/bin/{{ item }} $@ -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/command-wrapper.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /etc/spark/conf/spark-env.sh 4 | 5 | {{ spark_dir }}/current/bin/{{ item }} $@ -------------------------------------------------------------------------------- /playbooks/install_kerberos.yml: -------------------------------------------------------------------------------- 1 | - hosts: kerberos_server 2 | roles: 3 | - kerberos-server 4 | 5 | - hosts: hadoop_all 6 | roles: 7 | - kerberos-client -------------------------------------------------------------------------------- /playbooks/roles/hadoop/templates/command-wrapper.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /etc/hadoop/conf/hadoop-env.sh 4 | 5 | {{ hadoop_dir }}/current/bin/{{ item }} $@ -------------------------------------------------------------------------------- /playbooks/roles/java11/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: install_openjdk 2 | become: yes 3 | apt: 4 | name: 5 | - openjdk-11-jdk 6 | state: present 7 | -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/spark.conf.j2: -------------------------------------------------------------------------------- 1 | SPARK_HOME={{ spark_dir }}/current/ 2 | SPARK_PID_DIR={{ spark_pid_dir }} 3 | PATH={{ spark_dir }}/current/bin:$PATH 4 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/f500.dumpall: -------------------------------------------------------------------------------- 1 | --- 2 | dumpall_flat_mode: yes 3 | dumpall_guest_destination: /tmp/ansible.all 4 | # dumpall_host_destination: /somewhere/local/ 5 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/templates/kpropd.acl.j2: -------------------------------------------------------------------------------- 1 | {% for kerberos_kdc_host in kerberos_kdc %} 2 | host/{{ kerberos_kdc_host }}@{{ kerberos_realm }} 3 | {% endfor %} -------------------------------------------------------------------------------- /playbooks/start_httpfs.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_httpfs 2 | become: yes 3 | tasks: 4 | - name: start_hadoop-httpfs 5 | service: name=hadoop-httpfs state=started 6 | -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/templates/hcat-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export HADOOP_HOME=/usr/hdp/current/hadoop-client 4 | export HIVE_HOME=/usr/hdp/current/hive-client 5 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/command-wrapper.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /etc/zookeeper/conf/zookeeper-env.sh 4 | 5 | {{ zookeeper_dir }}/current/bin/{{ item }} $@ -------------------------------------------------------------------------------- /playbooks/group_vars/all/collect: -------------------------------------------------------------------------------- 1 | dumpall_flat_mode: yes 2 | dumpall_guest_destination: /tmp/ansible.all 3 | dumpall_host_destination: /tmp/ansible_vars/{{ ansible_hostname }} 4 | 5 | -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: install_hcatalog_packages 2 | become: yes 3 | yum: name={{ item }} state=installed 4 | with_items: 5 | - hive-hcatalog 6 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_bootstrapstandby/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: BootstrapStandby_namenode 2 | become: yes 3 | become_user: hdfs 4 | shell: hdfs namenode -bootstrapstandby 5 | -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/spark.sh.j2: -------------------------------------------------------------------------------- 1 | export SPARK_HOME={{ spark_dir }}/current/ 2 | export SPARK_PID_DIR={{ spark_pid_dir }} 3 | export PATH={{ spark_dir }}/current/bin:$PATH 4 | -------------------------------------------------------------------------------- /playbooks/roles/f500.dumpall/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # --- 3 | # dumpall_flat_mode: yes 4 | # dumpall_guest_destination: /tmp/ansible.all 5 | # # dumpall_host_destination: /somewhere/local/ 6 | -------------------------------------------------------------------------------- /playbooks/start_zookeeper-server.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_zookeeperserver 2 | become: yes 3 | tasks: 4 | - name: start_zookeeper-server 5 | service: name=zookeeper-server state=started 6 | -------------------------------------------------------------------------------- /playbooks/format_namenode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNode(Primary側)のインストールが完了していること 2 | ## 前提2: NameNode(Primary側)を起動させていないこと 3 | 4 | - hosts: hadoop_namenode_primary 5 | roles: 6 | - namenode_format 7 | 8 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/merge-keytabs.ktutil.j2: -------------------------------------------------------------------------------- 1 | rkt {{ kerberos_princ_src_keytab_1 }} 2 | 3 | rkt {{ kerberos_princ_src_keytab_2 }} 4 | 5 | wkt {{ kerberos_princ_dest_keytab }} 6 | 7 | quit 8 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server_deletedata/tasks/delete.yml: -------------------------------------------------------------------------------- 1 | - name: delete zookeeper data directory 2 | file: path={{ item }} state=absent 3 | with_items: 4 | - "{{ zookeeper_data_dir }}" 5 | -------------------------------------------------------------------------------- /playbooks/start_journalnode.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_journalnode 2 | become: yes 3 | tasks: 4 | - name: start_hadoop-hdfs-journalnode 5 | service: name=hadoop-hdfs-journalnode state=started 6 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/hosts.exclude.j2: -------------------------------------------------------------------------------- 1 | {% if datanode_decommission_nodes is defined %} 2 | {% for server in datanode_decommission_nodes %} 3 | {{ server }} 4 | {% endfor %} 5 | {% endif %} 6 | 7 | -------------------------------------------------------------------------------- /playbooks/roles/collect/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # dumpall_flat_mode: yes 3 | # dumpall_guest_destination: /tmp/ansible.all 4 | # dumpall_host_destination: /tmp/ansible_vars/{{ ansible_hostname }} 5 | # 6 | -------------------------------------------------------------------------------- /playbooks/roles/spark/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # spark_md5sum: '303624669c204ca78188e647ea0d899e' 3 | # spark_tmp_path: '/tmp' 4 | # spark_history_server_port: '18080' 5 | # spark_scheduler_mode: 'FAIR' 6 | -------------------------------------------------------------------------------- /playbooks/stop_spark_historyserver.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_spark_history 2 | become: yes 3 | tasks: 4 | - name: stop_spark-history-server 5 | service: name=spark-history-server state=stopped 6 | -------------------------------------------------------------------------------- /playbooks/conf_namenode_bootstrapstandby.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNode(Primary側)が起動していること 2 | ## 前提2: NameNode(Backup側)が起動していないこと 3 | 4 | - hosts: hadoop_namenode 5 | roles: 6 | - namenode_bootstrapstandby 7 | 8 | -------------------------------------------------------------------------------- /playbooks/roles/client/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: install_hadoop-client_packages 2 | become: yes 3 | yum: name={{ item }} state=installed 4 | with_items: 5 | - hadoop-client 6 | - hadoop-mapreduce 7 | -------------------------------------------------------------------------------- /playbooks/sync_kdc.yml: -------------------------------------------------------------------------------- 1 | - hosts: kerberos_server 2 | become: yes 3 | tasks: 4 | - command: kdb5_util dump /var/lib/krb5kdc/replica_datatrans 5 | - command: kprop {{ kerberos_kdc_slaves | join(' ') }} -------------------------------------------------------------------------------- /playbooks/start_resourcemanager.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_resourcemanager 2 | become: yes 3 | tasks: 4 | - name: start_hadoop-yarn-resourcemanager 5 | service: name=hadoop-yarn-resourcemanager state=started 6 | -------------------------------------------------------------------------------- /playbooks/stop_timelineservice.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_timelineservice 2 | become: yes 3 | tasks: 4 | - name: stop_hadoop-yarn-timelineserver 5 | service: name=hadoop-yarn-timelineserver state=stopped 6 | -------------------------------------------------------------------------------- /playbooks/start_timelineservice.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_timelineservice 2 | become: yes 3 | tasks: 4 | - name: start_hadoop-yarn-timelineserver 5 | service: name=hadoop-yarn-timelineserver state=started 6 | 7 | -------------------------------------------------------------------------------- /playbooks/stop_hbase_master.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HBase上でクエリ等の処理が動作していないこと 2 | 3 | - hosts: hadoop_hbase_master 4 | become: yes 5 | tasks: 6 | - name: stop_hbase-master 7 | service: name=hbase-master state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/start_mapreduce_historyserver.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_mapreduce_historyserver 2 | become: yes 3 | tasks: 4 | - name: start_mapreduce-historyserver 5 | service: name=hadoop-mapreduce-historyserver state=started 6 | -------------------------------------------------------------------------------- /playbooks/stop_datanode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNode, ZKFCが停止していること 2 | 3 | - hosts: hadoop_slavenode 4 | become: yes 5 | tasks: 6 | - name: stop_hadoop-hdfs-datanode 7 | service: name=hadoop-hdfs-datanode state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/stop_mapreduce_historyserver.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_mapreduce_historyserver 2 | become: yes 3 | tasks: 4 | - name: stop_mapreduce-historyserver 5 | service: name=hadoop-mapreduce-historyserver state=stopped 6 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: install_cgroups 2 | become: yes 3 | apt: 4 | name: 5 | - cgroup-tools 6 | - cgroup-lite 7 | state: present 8 | register: result_install_cgroups 9 | 10 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-client/handlers/main.yml: -------------------------------------------------------------------------------- 1 | - name: Reload sshd 2 | become: yes 3 | service: name=sshd state=reloaded 4 | when: not (ansible_distribution == "Ubuntu" and ansible_distribution_version is version("24.04", ">=")) -------------------------------------------------------------------------------- /playbooks/stop_nodemanager.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: YARN上でアプリケーションが動作していないこと 2 | 3 | - hosts: hadoop_slavenode 4 | become: yes 5 | tasks: 6 | - name: stop_hadoop-yarn-nodemanager 7 | service: name=hadoop-yarn-nodemanager state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/start_datanode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNodeの少なくとも片系が起動して、Activeとなっていること 2 | 3 | - hosts: hadoop_slavenode 4 | become: yes 5 | tasks: 6 | - name: start_hadoop-hdfs-datanode 7 | service: name=hadoop-hdfs-datanode state=started 8 | -------------------------------------------------------------------------------- /playbooks/enter_hdfs_safemode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSへのアクセス(読み書き)が行われていないこと 2 | 3 | - hosts: hadoop_namenode 4 | become: yes 5 | become_user: hdfs 6 | tasks: 7 | - name: HDFS_enter_safemode 8 | shell: hdfs dfsadmin -safemode enter 9 | -------------------------------------------------------------------------------- /playbooks/start_spark_historyserver.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: HDFSが利用可能(正常に読み書きできる)であること 2 | 3 | - hosts: hadoop_spark_history 4 | become: yes 5 | tasks: 6 | - name: start_spark-history-server 7 | service: name=spark-history-server state=started 8 | -------------------------------------------------------------------------------- /playbooks/stop_journalnode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNode, ZKFC, DataNodeが全て停止していること 2 | 3 | - hosts: hadoop_journalnode 4 | become: yes 5 | tasks: 6 | - name: stop_hadoop-hdfs-journalnode 7 | service: name=hadoop-hdfs-journalnode state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode_server_createdir/tasks/conf.yml: -------------------------------------------------------------------------------- 1 | - name: create_journalnode_data_dir 2 | become: yes 3 | file: path={{ dfs_journalnode_edits_dir }}/{{ dfs_nameservices }}/current/paxos state=directory owner=hdfs group=hdfs mode=755 4 | 5 | -------------------------------------------------------------------------------- /playbooks/stop_resourcemanager.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: YARN上でアプリケーションが動作していないこと 2 | 3 | - hosts: hadoop_resourcemanager 4 | become: yes 5 | tasks: 6 | - name: stop_hadoop-yarn-resourcemanager 7 | service: name=hadoop-yarn-resourcemanager state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/start_nodemanager.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: ResourceManagerの少なくとも片系が起動して、Activeとなっていること 2 | 3 | - hosts: hadoop_slavenode 4 | become: yes 5 | tasks: 6 | - name: start_hadoop-yarn-nodemanager 7 | service: name=hadoop-yarn-nodemanager state=started 8 | -------------------------------------------------------------------------------- /playbooks/stop_zookeeper-server.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: Zookeeperを利用するサービス(HDFS, YARN, HBase)が全て停止していること 2 | 3 | - hosts: hadoop_zookeeperserver 4 | become: yes 5 | tasks: 6 | - name: stop_zookeeper-server 7 | service: name=zookeeper-server state=stopped 8 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/os: -------------------------------------------------------------------------------- 1 | nofile_soft_limit: '65536' 2 | nofile_hard_limit: '65536' 3 | core_soft_limit: 'unlimited' 4 | core_hard_limit: 'unlimited' 5 | 6 | ipv4_ip_local_port_range_min: '32768' 7 | ipv4_ip_local_port_range_max: '65000' 8 | net_core_somaxconn: '256' 9 | -------------------------------------------------------------------------------- /playbooks/start_hbase_regionserver.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: ZooKeeperが起動していること 2 | ## 前提2: HDFSが利用可能(正常に読み書きできる)であること 3 | 4 | - hosts: hadoop_hbase_regionserver 5 | become: yes 6 | tasks: 7 | - name: start_hbase-regionserver 8 | service: name=hbase-regionserver state=started 9 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/zk-jaas.conf.j2: -------------------------------------------------------------------------------- 1 | Client { 2 | com.sun.security.auth.module.Krb5LoginModule required 3 | useKeyTab=true 4 | useTicketCache=false 5 | keyTab="/etc/hbase/conf/hbase.keytab" 6 | principal="hbase/{{ ansible_fqdn }}@{{ kerberos_realm }}"; 7 | }; -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/zk-jaas.conf.j2: -------------------------------------------------------------------------------- 1 | Client { 2 | com.sun.security.auth.module.Krb5LoginModule required 3 | useKeyTab=true 4 | useTicketCache=false 5 | keyTab="/etc/hbase/conf/hbase.keytab" 6 | principal="hbase/{{ ansible_fqdn }}@{{ kerberos_realm }}"; 7 | }; -------------------------------------------------------------------------------- /playbooks/stop_hbase_regionserver.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: Masterが停止していること 2 | ## リージョンのスプリット等が発生するため、RegionServerから停止させてはならない 3 | 4 | - hosts: hadoop_hbase_regionserver 5 | become: yes 6 | tasks: 7 | - name: stop_hbase-regionserver 8 | service: name=hbase-regionserver state=stopped 9 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/container-executor.cfg.j2: -------------------------------------------------------------------------------- 1 | yarn.nodemanager.local-dirs=/hadoop/tmp/hadoop-yarn/nm-local-dir 2 | yarn.nodemanager.linux-container-executor.group=yarn 3 | yarn.nodemanager.log-dirs=/var/log/hadoop-yarn/userlogs 4 | banned.users=hdfs,yarn,mapred,bin 5 | min.user.id=1000 6 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/handlers/main.yml: -------------------------------------------------------------------------------- 1 | - name: Restart kdc 2 | service: name=krb5kdc state=restarted 3 | 4 | - name: Restart kadmin 5 | service: name=kadmin state=restarted 6 | when: kdc_role == 'master' 7 | 8 | - name: Restart kprop 9 | service: name=kprop state=restarted 10 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/jaas-hdfs.conf.j2: -------------------------------------------------------------------------------- 1 | Client { 2 | com.sun.security.auth.module.Krb5LoginModule required 3 | useKeyTab=true 4 | keyTab="/etc/hadoop/conf/hdfs.keytab" 5 | storeKey=true 6 | useTicketCache=false 7 | principal="hdfs/{{ ansible_fqdn }}@{{ kerberos_realm }}"; 8 | }; 9 | -------------------------------------------------------------------------------- /playbooks/start_hbase_master.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: ZooKeeperが起動していること 2 | ## 前提2: HDFSが利用可能(正常に読み書きできる)であること 3 | ## 前提3: 利用予定のRegionServerが全台起動していること 4 | 5 | - hosts: hadoop_hbase_master 6 | become: yes 7 | tasks: 8 | - name: start_hbase-master 9 | service: name=hbase-master state=started 10 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/jaas.conf.j2: -------------------------------------------------------------------------------- 1 | Server { 2 | com.sun.security.auth.module.Krb5LoginModule required 3 | useKeyTab=true 4 | keyTab="/etc/zookeeper/conf/zookeeper.keytab" 5 | storeKey=true 6 | useTicketCache=false 7 | principal="zookeeper/{{ ansible_fqdn }}@{{ kerberos_realm }}"; 8 | }; 9 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Literate-computing-Hadoop (c) by National Institute of Informatics 2 | 3 | Literate-computing-Hadoop is licensed under a 4 | Creative Commons Attribution 4.0 International License. 5 | 6 | You should have received a copy of the license along with this 7 | work. If not, see . -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hbase-master.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache HBase master Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hbase-master 7 | User=hbase 8 | ExecStart={{ hbase_dir }}/current/bin/hbase --config /etc/hbase/conf master start 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/hadoop-hdfs-zkfc.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop HDFS ZKFC Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-hdfs-zkfc 7 | User=hdfs 8 | ExecStart={{ hadoop_dir }}/current/bin/hdfs --config /etc/hadoop/conf zkfc 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/upgrade_namenode.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_namenode 2 | become: yes 3 | tasks: 4 | - name: start_hadoop-hdfs-zkfc 5 | service: name=hadoop-hdfs-zkfc state=started 6 | when: hadoop_namenode_servers|length > 1 7 | 8 | - name: start_hadoop-hdfs-namenode 9 | command: /etc/init.d/hadoop-hdfs-namenode upgrade 10 | -------------------------------------------------------------------------------- /playbooks/roles/os/tasks/thp.yml: -------------------------------------------------------------------------------- 1 | - name: set_transparent_hugepage 2 | become: yes 3 | lineinfile: dest=/etc/rc.local line="echo never > /sys/kernel/mm/transparent_hugepage/defrag" regexp="echo .+ > /sys/kernel/mm/transparent_hugepage/defrag" create=yes 4 | 5 | - name: ensure_rc_local 6 | become: yes 7 | service: name=rc-local state=started enabled=yes 8 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/hadoop-hdfs-namenode.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop HDFS NameNode Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-hdfs-namenode 7 | User=hdfs 8 | ExecStart={{ hadoop_dir }}/current/bin/hdfs --config /etc/hadoop/conf namenode 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/group_vars/all/hbase: -------------------------------------------------------------------------------- 1 | hbase_release_url: 'https://ftp.jaist.ac.jp/pub/apache/hbase/2.4.2/hbase-2.4.2-bin.tar.gz' 2 | hbase_release_checksum: 'sha512:1C3399EC C0F4E6EA B990F5A1 A6B869B0 9850E8BD 94EABE5F CFAB62CC E83B0368 0CC00FE6 D601D5C0 A356A03D 08D56556 E214198E BE6333E8 19CCED57 E2FECD9B' 3 | hbase_package_filename: 'hbase-2.4.2' 4 | 5 | hbase_dir: '/opt/hbase' 6 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/templates/hadoop-hdfs-journalnode.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop JournalNode Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-hdfs-journalnode 7 | User=hdfs 8 | ExecStart={{ hadoop_dir }}/current/bin/hdfs --config /etc/hadoop/conf journalnode 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/hbase-regionserver.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache HBase regionserver Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hbase-regionserver 7 | User=hbase 8 | ExecStart={{ hbase_dir }}/current/bin/hbase --config /etc/hbase/conf regionserver start 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/templates/hadoop-yarn-timelineserver.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop YARN TimelineServer Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-yarn-timelineserver 7 | User=yarn 8 | ExecStart={{ hadoop_dir }}/current/bin/yarn --config /etc/hadoop/conf timelineserver 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/templates/hadoop-yarn-resourcemanager.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop YARN ResourceManager Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-yarn-resourcemanager 7 | User=yarn 8 | ExecStart={{ hadoop_dir }}/current/bin/yarn --config /etc/hadoop/conf resourcemanager 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/zookeeper-server.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Zookeeper Service 3 | 4 | [Service] 5 | Type=forking 6 | User=zookeeper 7 | ExecStart={{ zookeeper_dir }}/current/bin/zkServer.sh --config /etc/zookeeper/conf start 8 | ExecStop={{ zookeeper_dir }}/current/bin/zkServer.sh --config /etc/zookeeper/conf stop 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/templates/hadoop-mapreduce-historyserver.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop MapReduce HistoryServer Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-mapreduce-historyserver 7 | User=mapred 8 | ExecStart={{ hadoop_dir }}/current/bin/mapred --config /etc/hadoop/conf historyserver 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/fairscheduler.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | FAIR 5 | 1 6 | 2 7 | 8 | 9 | FIFO 10 | 2 11 | 3 12 | 13 | 14 | -------------------------------------------------------------------------------- /playbooks/roles/spark_history/templates/spark-history-server.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Spark HistoryServer Service 3 | 4 | [Service] 5 | Type=forking 6 | EnvironmentFile=/etc/default/spark-history-server 7 | User=spark 8 | ExecStart={{ spark_dir }}/current/sbin/start-history-server.sh 9 | ExecStop={{ spark_dir }}/current/sbin/stop-history-server.sh 10 | 11 | [Install] 12 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/group_vars/all/hive: -------------------------------------------------------------------------------- 1 | hive_release_url: 'https://mirrors.ocf.berkeley.edu/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz' 2 | hive_release_checksum: 'sha512:612101b6fb6f98c3b4b2608a6f744a5052b0edbe2118dcba5a95019486e9bd5393705e058e571138dc909b91ee415f05c47f2912e065042fc5d5615211fcbd37' 3 | hive_package_filename: 'apache-hive-3.1.2-bin' 4 | 5 | hive_dir: '/opt/hive' 6 | 7 | hive_default_execution_engine: 'spark' 8 | -------------------------------------------------------------------------------- /playbooks/roles/os/tasks/kernel.yml: -------------------------------------------------------------------------------- 1 | - name: set_local_port_range 2 | become: yes 3 | lineinfile: dest=/etc/sysctl.conf line="net.ipv4.ip_local_port_range = {{ ipv4_ip_local_port_range_min }} {{ ipv4_ip_local_port_range_max }}" regexp="net.ipv4.ip_local_port_range" 4 | 5 | - name: set_somaxconn 6 | become: yes 7 | lineinfile: dest=/etc/sysctl.conf line="net.core.somaxconn = {{ net_core_somaxconn }}" regexp="net.core.somaxconn" 8 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/templates/hadoop-hdfs-datanode.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop DataNode Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-hdfs-datanode 7 | {% if kerberos_realm is defined %} 8 | User=root 9 | {% else %} 10 | User=hdfs 11 | {% endif %} 12 | ExecStart={{ hadoop_dir }}/current/bin/hdfs --config /etc/hadoop/conf datanode 13 | 14 | [Install] 15 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/roles/base/templates/ssl-client.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ssl.client.truststore.location 8 | {{ https_truststore_path }} 9 | 10 | 11 | 12 | ssl.client.truststore.password 13 | {{ https_truststore_pass }} 14 | 15 | 16 | -------------------------------------------------------------------------------- /playbooks/roles/namenode_format/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: check_format_state 2 | become: yes 3 | stat: path={{ item }}/current 4 | with_items: "{{ dfs_namenode_name_dirs }}" 5 | 6 | - name: FORMAT_namenode 7 | become: yes 8 | become_user: hdfs 9 | shell: hdfs namenode -format -force 10 | 11 | - name: FORMAT_ZKFC 12 | become: yes 13 | become_user: hdfs 14 | shell: hdfs zkfc -formatZK -force 15 | when: hadoop_namenode_servers|length > 1 16 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/templates/hadoop-yarn-nodemanager.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Apache Hadoop YARN NodeManager Service 3 | 4 | [Service] 5 | Type=simple 6 | EnvironmentFile=/etc/default/hadoop-yarn-nodemanager 7 | {% if kerberos_realm is defined %} 8 | User=root 9 | {% else %} 10 | User=yarn 11 | {% endif %} 12 | ExecStart={{ hadoop_dir }}/current/bin/yarn --config /etc/hadoop/conf nodemanager 13 | 14 | [Install] 15 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /playbooks/start_namenode.yml: -------------------------------------------------------------------------------- 1 | ## 前提1: NameNode(Primary側)のフォーマット、同期(Backup側)が完了していること 2 | ## 前提2: ZooKeeperとJournalNodeがそれぞれ過半数以上起動していること 3 | 4 | - hosts: hadoop_namenode 5 | become: yes 6 | tasks: 7 | - name: start_hadoop-hdfs-zkfc 8 | service: name=hadoop-hdfs-zkfc state=started 9 | when: hadoop_namenode_servers|length > 1 10 | 11 | - name: start_hadoop-hdfs-namenode 12 | service: name=hadoop-hdfs-namenode state=started 13 | -------------------------------------------------------------------------------- /playbooks/roles/f500.dumpall/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Dump all vars 4 | template: src=dumpall.j2 dest={{ dumpall_guest_destination }} 5 | 6 | - name: Retrieve dumpfile 7 | fetch: "src=/tmp/ansible.all dest={{ dumpall_host_destination }} flat={{ dumpall_flat_mode }}" 8 | when: "dumpall_host_destination is defined" 9 | 10 | - name: Remove dumpfile on the guest 11 | file: path={{ dumpall_guest_destination }} state=absent 12 | when: "dumpall_host_destination is defined" 13 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/templates/kdc.conf.j2: -------------------------------------------------------------------------------- 1 | [kdcdefaults] 2 | kdc_ports = 88 3 | kdc_tcp_ports = 88 4 | 5 | [realms] 6 | {{ kerberos_realm }} = { 7 | #master_key_type = aes256-cts 8 | acl_file = /etc/krb5kdc/kadm5.acl 9 | dict_file = /usr/share/dict/words 10 | admin_keytab = /var/lib/krb5kdc/kadm5.keytab 11 | supported_enctypes = aes128-cts:normal des3-hmac-sha1:normal arcfour-hmac:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal 12 | max_life = 30d 13 | } -------------------------------------------------------------------------------- /hosts.csv: -------------------------------------------------------------------------------- 1 | Cluster,Type,Name,Internal IP,Service IP,VCPUs,Memory(MiB),DFS Volumes,YARN VCPUs,YARN Total Memory(MB),Ganglia,ZooKeeper,JournalNode,NameNode,DataNode,ResourceManager,NodeManager,TimelineService,MapReduce HistoryServer,HBase Master,HBase RegionServer,Tez,Hive,Pig,Client,Spark,Spark HistoryServer,KDC Master,KDC Slave,Docker 2 | TestCluster01,testbaremetal,test01,,xxx.xxx.xxx.64,12,98304,1,6,32768,x,x,,x,x,x,x,x,x,x,x,x,x,x,x,x,x,,, 3 | TestCluster02,testbaremetal,test02,,xxx.xxx.xxx.65,12,98304,1,6,32768,x,x,,x,x,x,x,x,x,x,x,x,x,x,x,x,x,,, 4 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/tasks/resource.yml: -------------------------------------------------------------------------------- 1 | - name: create_settings_cgroups_group_settings 2 | become: yes 3 | template: src=settings.sh.j2 dest={{ cgroups_scripts_dir }}/settings/group/{{ item.group }} owner=root group=root mode=755 4 | with_items: "{{ cgroups_group_settings }}" 5 | 6 | - name: create_settings_cgroups_process_settings 7 | become: yes 8 | template: src=settings.sh.j2 dest={{ cgroups_scripts_dir }}/settings/process/{{ item.process }} owner=root group=root mode=755 9 | with_items: "{{ cgroups_process_settings }}" 10 | 11 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/spark: -------------------------------------------------------------------------------- 1 | spark_release_url: 'https://ftp.jaist.ac.jp/pub/apache/spark/spark-3.1.1/spark-3.1.1-bin-hadoop3.2.tgz' 2 | spark_release_checksum: 'sha512:E90B31E5 8F6D95A4 2900BA4D 288261D7 1F6C19FA 39C1CB71 862B792D 1B556494 1A320227 F6AB0E09 D946F16B 8C1969ED 2DEA2A36 9EC8F9D2 D7099189 234DE1BE' 3 | spark_package_filename: 'spark-3.1.1-bin-hadoop3.2' 4 | 5 | spark_dir: '/opt/spark' 6 | 7 | spark_md5sum: '303624669c204ca78188e647ea0d899e' 8 | spark_tmp_path: '/tmp' 9 | spark_history_server_port: '18080' 10 | spark_scheduler_mode: 'FAIR' 11 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-client/templates/krb5.conf.j2: -------------------------------------------------------------------------------- 1 | [logging] 2 | default = FILE:/var/log/krb5libs.log 3 | kdc = FILE:/var/log/krb5kdc.log 4 | admin_server = FILE:/var/log/kadmind.log 5 | 6 | [libdefaults] 7 | default_realm = {{ kerberos_realm }} 8 | dns_lookup_realm = false 9 | dns_lookup_kdc = false 10 | ticket_lifetime = 30d 11 | renew_lifetime = 30d 12 | forwardable = true 13 | 14 | [realms] 15 | {{ kerberos_realm }} = { 16 | {% for kerberos_kdc_host in kerberos_kdc %} 17 | kdc = {{ kerberos_kdc_host }} 18 | {% endfor %} 19 | admin_server = {{ kerberos_kadmin }} 20 | } 21 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/templates/krb5.conf.j2: -------------------------------------------------------------------------------- 1 | [logging] 2 | default = FILE:/var/log/krb5libs.log 3 | kdc = FILE:/var/log/krb5kdc.log 4 | admin_server = FILE:/var/log/kadmind.log 5 | 6 | [libdefaults] 7 | default_realm = {{ kerberos_realm }} 8 | dns_lookup_realm = false 9 | dns_lookup_kdc = false 10 | ticket_lifetime = 30d 11 | renew_lifetime = 30d 12 | forwardable = true 13 | 14 | [realms] 15 | {{ kerberos_realm }} = { 16 | {% for kerberos_kdc_host in kerberos_kdc %} 17 | kdc = {{ kerberos_kdc_host }} 18 | {% endfor %} 19 | admin_server = {{ kerberos_kadmin }} 20 | } 21 | -------------------------------------------------------------------------------- /playbooks/stop_hcatalog.yml: -------------------------------------------------------------------------------- 1 | - hosts: hadoop_hive 2 | become: yes 3 | become_user: hive 4 | tasks: 5 | - name: check_status_hcatalog 6 | shell: '[ -s /usr/hdp/current/hive-webhcat/var/log/hcat.pid ] && [ -x /proc/$(cat /usr/hdp/current/hive-webhcat/var/log/hcat.pid) ]' 7 | register: check_status_hcatalog 8 | changed_when: false 9 | failed_when: check_status_hcatalog.rc not in [0, 1] 10 | check_mode: no 11 | 12 | - name: stop_hcatalog_server 13 | shell: /usr/hdp/current/hive-webhcat/bin/hcat_server.sh stop 14 | when: check_status_hcatalog.rc == 0 15 | -------------------------------------------------------------------------------- /playbooks/roles/base/tasks/keytab.yml: -------------------------------------------------------------------------------- 1 | - name: check keytab 2 | become: yes 3 | stat: path={{ kerberos_princ_dest_keytab }} 4 | register: kerberos_princ_dest_keytab_stat 5 | 6 | - name: prepare_script 7 | become: yes 8 | template: src=merge-keytabs.ktutil.j2 dest=/root/merge-keytabs.ktutil 9 | when: kerberos_princ_dest_keytab_stat.stat.exists == False 10 | 11 | - name: run_script 12 | become: yes 13 | shell: ktutil 2 and len(row[0]) > 0 and len(row[1]) > 0: 16 | m = dict(zip(header, row)) 17 | for col in header[5:10]: 18 | m[col] = int(m[col].strip()) if m[col].strip() != '-' else None 19 | for col in header[10:]: 20 | m[col] = (len(m[col].strip()) > 0) 21 | machines.append(m) 22 | return (header, machines) 23 | -------------------------------------------------------------------------------- /playbooks/roles/hcatalog/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_hcatalog_conf_dir 2 | become: yes 3 | file: path=/etc/hive-hcatalog/conf state=directory owner=root group=root mode=755 4 | 5 | - name: copy_conf_files 6 | become: yes 7 | template: src={{ item }}.j2 dest=/etc/hive-hcatalog/conf/{{ item }} owner=hive group=hadoop mode=755 8 | with_items: 9 | - hcat-env.sh 10 | 11 | - name: create_hcatalog_log_dir 12 | become: yes 13 | file: path={{ hcatalog_log_dir }} state=directory owner=hive group=hadoop mode=755 14 | 15 | - name: create_hcatalog_var_dir 16 | become: yes 17 | file: path=/usr/hdp/current/hive-webhcat/var state=directory owner=hive group=hive mode=775 18 | 19 | - name: create_symbolic_link_to_log_dir 20 | become: yes 21 | file: path=/usr/hdp/current/hive-webhcat/var/log state=link src={{ hcatalog_log_dir }} 22 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-client/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: Install kerberos 2 | become: yes 3 | apt: name=krb5-user 4 | 5 | - name: Modify krb5.conf 6 | become: yes 7 | template: src=krb5.conf.j2 dest=/etc/krb5.conf 8 | 9 | - name: Prepare keytab for admin 10 | become: yes 11 | copy: src="{{ kerberos_admin_keytab_file }}" dest="{{ kerberos_admin_keytab }}" owner={{ ansible_ssh_user }} group={{ ansible_ssh_user }} 12 | 13 | - include: host_principal.yml 14 | 15 | - name: Change ssh config 16 | become: yes 17 | lineinfile: dest=/etc/ssh/ssh_config line="GSSAPIDelegateCredentials yes" regexp="^\s+GSSAPIDelegateCredentials" 18 | notify: Reload sshd 19 | 20 | - name: Change sshd config 21 | become: yes 22 | lineinfile: dest=/etc/ssh/sshd_config line="GSSAPIAuthentication yes" regexp=^GSSAPIAuthentication 23 | notify: Reload sshd -------------------------------------------------------------------------------- /playbooks/roles/hdfs_spark/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: check_exists_hfds_dir 2 | become: yes 3 | become_user: hdfs 4 | shell: hdfs dfs -ls {{ spark_event_log_dir }} 5 | register: check_exists_hdfs_dir 6 | changed_when: false 7 | failed_when: false 8 | check_mode: no 9 | 10 | - name: create_hdfs_dir 11 | become: yes 12 | become_user: hdfs 13 | shell: hdfs dfs -mkdir -p {{ spark_event_log_dir }} 14 | when: check_exists_hdfs_dir.rc != 0 15 | 16 | - name: change_owner_and_group_of_hdfs_dir 17 | become: yes 18 | become_user: hdfs 19 | shell: hdfs dfs -chown -R spark:spark {{ spark_event_log_dir }} 20 | when: check_exists_hdfs_dir.rc != 0 21 | 22 | - name: change_mode_of_hdfs_dir 23 | become: yes 24 | become_user: hdfs 25 | shell: hdfs dfs -chmod -R 1777 {{ spark_event_log_dir }} 26 | when: check_exists_hdfs_dir.rc != 0 27 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/ssl-server.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ssl.server.truststore.location 8 | {{ https_truststore_path }} 9 | 10 | 11 | 12 | ssl.server.truststore.password 13 | {{ https_truststore_pass }} 14 | 15 | 16 | 17 | ssl.server.keystore.location 18 | {{ https_keystore_path }} 19 | 20 | 21 | 22 | ssl.server.keystore.password 23 | {{ https_keystore_pass }} 24 | 25 | 26 | 27 | ssl.server.keystore.keypassword 28 | {{ https_privkey_pass }} 29 | 30 | 31 | -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: prepare_systemd_service 2 | become: yes 3 | template: 4 | src: "{{ item }}.service.j2" 5 | dest: "/etc/systemd/system/{{ item }}.service" 6 | with_items: 7 | - hadoop-yarn-resourcemanager 8 | 9 | - name: reload_systemd_service 10 | become: yes 11 | systemd: 12 | daemon_reload: yes 13 | 14 | - name: create_yarn_pid_dir 15 | become: yes 16 | file: path={{ yarn_pid_dir }} state=directory mode=755 owner=yarn group=yarn 17 | 18 | - name: create_yarn_log_dir 19 | become: yes 20 | file: path={{ yarn_log_dir }} state=directory mode=755 owner=yarn group=hadoop 21 | 22 | - name: copy_default_file 23 | become: yes 24 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=root group=root 25 | with_items: 26 | - hadoop-yarn-resourcemanager 27 | -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: prepare_systemd_service 2 | become: yes 3 | template: 4 | src: "{{ item }}.service.j2" 5 | dest: "/etc/systemd/system/{{ item }}.service" 6 | with_items: 7 | - hadoop-mapreduce-historyserver 8 | 9 | - name: reload_systemd_service 10 | become: yes 11 | systemd: 12 | daemon_reload: yes 13 | 14 | - name: create_pid_dir 15 | become: yes 16 | file: path={{ mapred_pid_dir }} state=directory mode=775 owner=mapred group=mapred 17 | 18 | - name: create_log_dir 19 | become: yes 20 | file: path={{ mapred_log_dir }} state=directory mode=755 owner=mapred group=hadoop 21 | 22 | - name: copy_default_file 23 | become: yes 24 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=root group=root 25 | with_items: 26 | - hadoop-mapreduce-historyserver 27 | -------------------------------------------------------------------------------- /playbooks/roles/base/tasks/conf.yml: -------------------------------------------------------------------------------- 1 | - name: create_hadoop_conf_dir 2 | become: yes 3 | file: path={{ hadoop_conf_dir }} state=directory owner=root group=root mode=755 4 | 5 | - name: copy_conf_files 6 | become: yes 7 | template: src={{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} 8 | with_items: 9 | - core-site.xml 10 | - hdfs-site.xml 11 | - yarn-site.xml 12 | - mapred-site.xml 13 | - hadoop-env.sh 14 | - yarn-env.sh 15 | - mapred-env.sh 16 | - hadoop-metrics.properties 17 | - hadoop-metrics2.properties 18 | - log4j.properties 19 | - capacity-scheduler.xml 20 | - hosts.exclude 21 | - hosts.list 22 | 23 | - name: copy_secure_conf_files 24 | become: yes 25 | template: src={{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} 26 | with_items: 27 | - ssl-server.xml 28 | - ssl-client.xml 29 | - zk-acl.txt 30 | - container-executor.cfg 31 | when: kerberos_realm is defined 32 | -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/spark-defaults.conf.j2: -------------------------------------------------------------------------------- 1 | # Default system properties included when running spark-submit. 2 | # This is useful for setting default environmental settings. 3 | 4 | {% if hadoop_namenode_servers is defined %} 5 | spark.eventLog.enabled true 6 | spark.eventLog.dir hdfs://{{ dfs_nameservices }}{{ spark_event_log_dir }} 7 | spark.history.fs.logDirectory hdfs://{{ dfs_nameservices }}{{ spark_event_log_dir }} 8 | {% endif %} 9 | {% if spark_history_server_host is defined %} 10 | spark.yarn.historyServer.address {{ spark_history_server_host }}:{{ spark_history_server_port }} 11 | {% endif %} 12 | spark.scheduler.mode {{ spark_scheduler_mode }} 13 | 14 | {% if kerberos_realm is defined %} 15 | spark.history.kerberos.enabled true 16 | spark.history.kerberos.principal spark/{{ ansible_fqdn }}@{{ kerberos_realm }} 17 | spark.history.kerberos.keytab /etc/spark/conf/spark.keytab 18 | {% endif %} 19 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/site-defaults: -------------------------------------------------------------------------------- 1 | # Hadoop 2 | hadoop_conf_dir: '/etc/hadoop/conf' 3 | 4 | # HDFS 5 | dfs_nameservices: 'hdfs-cluster' 6 | hdfs_log_dir: '/var/log/hadoop-hdfs' 7 | hdfs_pid_dir: '/var/run/hadoop-hdfs' 8 | 9 | # YARN 10 | yarn_rm_cluster_id: 'yarn-cluster' 11 | yarn_log_dir: '/var/log/hadoop-yarn' 12 | yarn_pid_dir: '/var/run/hadoop-yarn' 13 | yarn_rm_webapp_port: '8088' 14 | 15 | # HBase 16 | hbase_log_dir: '/var/log/hbase' 17 | hbase_pid_dir: '/var/run/hbase' 18 | hbase_opts: '' 19 | hbase_server_gc_opts: '' 20 | hbase_client_gc_opts: '' 21 | 22 | # Spark 23 | spark_event_log_dir: '/var/log/spark' 24 | 25 | ## 依存: spark_version, spark_md5sum 26 | spark_hadoop_version: 'hadoop2.4' 27 | 28 | spark_install_path: '/usr/local/lib' 29 | 30 | spark_pid_dir: '/var/run/spark' 31 | 32 | ## 依存: spark_hadoop_version, spark_md5sum 33 | spark_version: '1.2.0' 34 | 35 | 36 | # Tez 37 | tez_hdfs_dir: '/apps/tez' 38 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_journalnode_data_dir 2 | become: yes 3 | file: path={{ dfs_journalnode_edits_dir }} state=directory owner=hdfs group=hadoop mode=700 4 | 5 | - name: prepare_systemd_service 6 | become: yes 7 | template: 8 | src: hadoop-hdfs-journalnode.service.j2 9 | dest: /etc/systemd/system/hadoop-hdfs-journalnode.service 10 | 11 | - name: reload_systemd_service 12 | become: yes 13 | systemd: 14 | daemon_reload: yes 15 | 16 | - name: prepare_pid_dir 17 | become: yes 18 | file: 19 | path: "{{ hdfs_pid_dir }}" 20 | state: directory 21 | owner: hdfs 22 | 23 | - name: create_hdfs_log_dir 24 | become: yes 25 | file: path={{ hdfs_log_dir }} state=directory mode=755 owner=hdfs group=hadoop 26 | 27 | - name: copy_journalnode_defaults_file 28 | become: yes 29 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=hdfs group=hdfs 30 | with_items: 31 | - hadoop-hdfs-journalnode 32 | 33 | -------------------------------------------------------------------------------- /playbooks/roles/mapreduce_history/templates/default_hadoop-mapreduce-historyserver.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | HADOOP_MAPRED_PID_DIR={{ mapred_pid_dir }} 16 | HADOOP_MAPRED_LOG_DIR={{ mapred_log_dir }} 17 | -------------------------------------------------------------------------------- /playbooks/roles/f500.dumpall/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: "Jasper N. Brouwer, Ramon de la Fuente" 4 | description: Dumps all the ansbile variables into a file for inspection 5 | company: Future500 6 | license: LGPL 7 | min_ansible_version: 1.4 8 | platforms: 9 | - name: EL 10 | versions: 11 | - all 12 | - name: GenericUNIX 13 | versions: 14 | - all 15 | - name: Fedora 16 | versions: 17 | - all 18 | - name: opensuse 19 | versions: 20 | - all 21 | - name: GenericBSD 22 | versions: 23 | - all 24 | - name: FreeBSD 25 | versions: 26 | - all 27 | - name: Ubuntu 28 | versions: 29 | - all 30 | - name: SLES 31 | versions: 32 | - all 33 | - name: GenericLinux 34 | versions: 35 | - all 36 | - name: Debian 37 | versions: 38 | - all 39 | categories: 40 | - system 41 | - development 42 | dependencies: [] 43 | -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/templates/default_hadoop-yarn-timelineserver.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | YARN_IDENT_STRING=yarn 16 | YARN_PID_DIR={{ yarn_pid_dir }} 17 | YARN_LOG_DIR={{ yarn_log_dir }} 18 | YARN_CONF_DIR={{ hadoop_conf_dir }} 19 | -------------------------------------------------------------------------------- /playbooks/roles/timelineservice/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: prepare_systemd_service 2 | become: yes 3 | template: 4 | src: "{{ item }}.service.j2" 5 | dest: "/etc/systemd/system/{{ item }}.service" 6 | with_items: 7 | - hadoop-yarn-timelineserver 8 | 9 | - name: reload_systemd_service 10 | become: yes 11 | systemd: 12 | daemon_reload: yes 13 | 14 | - name: create_yarn_pid_dir 15 | become: yes 16 | file: path={{ yarn_pid_dir }} state=directory mode=755 owner=yarn group=yarn 17 | 18 | - name: create_yarn_log_dir 19 | become: yes 20 | file: path={{ yarn_log_dir }} state=directory mode=755 owner=yarn group=hadoop 21 | 22 | - name: create_timeline_dir 23 | become: yes 24 | file: path={{ hadoop_tmp_dir[0:hadoop_tmp_dir.rfind('/')] }}/hadoop-yarn/yarn/timeline state=directory mode=755 owner=yarn group=yarn 25 | 26 | - name: copy_default_file 27 | become: yes 28 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=root group=root 29 | with_items: 30 | - hadoop-yarn-timelineserver 31 | -------------------------------------------------------------------------------- /playbooks/roles/spark/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: set_SPARK_HOME 2 | become: yes 3 | template: src=spark.sh.j2 dest=/etc/profile.d/spark.sh owner=root group=root mode=644 4 | 5 | - name: set_environment 6 | become: yes 7 | template: src=spark.conf.j2 dest=/etc/environment.d/50-spark.conf owner=root group=root mode=644 8 | 9 | - name: create_spark_conf_dir 10 | become: yes 11 | file: path=/etc/spark/conf/ state=directory owner=root group=root mode=755 12 | 13 | - include: principal.yml 14 | vars: { kerberos_princ_username: spark, kerberos_princ_keytab_path: /etc/spark/conf/spark.keytab, kerberos_princ_keytab_owner: spark } 15 | when: kerberos_realm is defined 16 | 17 | - name: copy_spark_conf_files 18 | become: yes 19 | template: src={{ item }}.j2 dest=/etc/spark/conf/{{ item }} owner=root group=root mode=755 20 | with_items: 21 | - spark-defaults.conf 22 | - spark-env.sh 23 | - metrics.properties 24 | - log4j.properties 25 | - fairscheduler.xml 26 | 27 | - name: copy_sudoers_conf_of_SPARK_HOME 28 | become: yes 29 | copy: src=env_keep_sparkhome dest=/etc/sudoers.d/ owner=root mode=440 30 | -------------------------------------------------------------------------------- /playbooks/roles/spark_history/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_spark_log_dir 2 | become: yes 3 | file: path={{ spark_log_dir }} state=directory owner=spark group=hadoop mode=755 4 | 5 | - name: remove_spark_log_dir 6 | become: yes 7 | file: path={{ spark_dir }}/logs state=absent 8 | 9 | - name: create_symbolic_link_to_spark_log_dir 10 | become: yes 11 | file: path={{ spark_dir }}/logs state=link src={{ spark_log_dir }} 12 | register: md5sum_spark 13 | 14 | - name: create_spark_pid_dir 15 | become: yes 16 | file: path={{ spark_pid_dir }} state=directory owner=spark group=spark mode=775 17 | 18 | - name: prepare_systemd_service 19 | become: yes 20 | template: 21 | src: "{{ item }}.service.j2" 22 | dest: "/etc/systemd/system/{{ item }}.service" 23 | with_items: 24 | - spark-history-server 25 | 26 | - name: reload_systemd_service 27 | become: yes 28 | systemd: 29 | daemon_reload: yes 30 | 31 | - name: copy_spark_history_defaults_file 32 | become: yes 33 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=spark group=spark 34 | with_items: 35 | - spark-history-server -------------------------------------------------------------------------------- /playbooks/roles/slavenode/templates/default_hadoop-yarn-nodemanager.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | YARN_IDENT_STRING=yarn 16 | YARN_PID_DIR={{ yarn_pid_dir }} 17 | YARN_LOG_DIR={{ yarn_log_dir }} 18 | YARN_CONF_DIR={{ hadoop_conf_dir }} 19 | 20 | YARN_NODEMANAGER_HEAPSIZE="{{ nodemanager_heap_size }}" 21 | YARN_NODEMANAGER_OPTS="{{ nodemanager_java_opts }}" 22 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-client/tasks/host_principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: host_principals 5 | changed_when: host_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: host_principals is defined and host_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k /etc/krb5.keytab 15 | register: host_keytabs 16 | changed_when: host_keytabs.rc != 0 or host_keytabs.stdout.find('host/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytabs 20 | become: yes 21 | command: kadmin -q "ktadd -k /etc/krb5.keytab host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: host_keytabs is defined and host_keytabs.rc != 0 or host_keytabs.stdout.find('host/' + ansible_fqdn) == -1 23 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/tasks/host_principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: host_principals 5 | changed_when: host_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: host_principals is defined and host_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k /etc/krb5.keytab 15 | register: host_keytabs 16 | changed_when: host_keytabs.rc != 0 or host_keytabs.stdout.find('host/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytabs 20 | become: yes 21 | command: kadmin -q "ktadd -k /etc/krb5.keytab host/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: host_keytabs is defined and host_keytabs.rc != 0 or host_keytabs.stdout.find('host/' + ansible_fqdn) == -1 23 | -------------------------------------------------------------------------------- /playbooks/roles/resourcemanager/templates/default_hadoop-yarn-resourcemanager.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | YARN_IDENT_STRING=yarn 16 | YARN_PID_DIR={{ yarn_pid_dir }} 17 | YARN_LOG_DIR={{ yarn_log_dir }} 18 | YARN_CONF_DIR={{ hadoop_conf_dir }} 19 | 20 | YARN_RESOURCEMANAGER_HEAPSIZE="{{ resourcemanager_heap_size }}" 21 | YARN_RESOURCEMANAGER_OPTS=""{{ resourcemanager_java_opts }}" 22 | 23 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hadoop-metrics2-hbase.properties.j2: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period 6 | *.period=10 7 | 8 | # Below are some examples of sinks that could be used 9 | # to monitor different hbase daemons. 10 | 11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink 12 | # hbase.sink.file-all.filename=all.metrics 13 | 14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink 15 | # hbase.sink.file0.context=hmaster 16 | # hbase.sink.file0.filename=master.metrics 17 | 18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink 19 | # hbase.sink.file1.context=thrift-one 20 | # hbase.sink.file1.filename=thrift-one.metrics 21 | 22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink 23 | # hbase.sink.file2.context=thrift-two 24 | # hbase.sink.file2.filename=thrift-one.metrics 25 | 26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink 27 | # hbase.sink.file3.context=rest 28 | # hbase.sink.file3.filename=rest.metrics 29 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/hadoop-metrics2-hbase.properties.j2: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | *.sink.file*.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period 6 | *.period=10 7 | 8 | # Below are some examples of sinks that could be used 9 | # to monitor different hbase daemons. 10 | 11 | # hbase.sink.file-all.class=org.apache.hadoop.metrics2.sink.FileSink 12 | # hbase.sink.file-all.filename=all.metrics 13 | 14 | # hbase.sink.file0.class=org.apache.hadoop.metrics2.sink.FileSink 15 | # hbase.sink.file0.context=hmaster 16 | # hbase.sink.file0.filename=master.metrics 17 | 18 | # hbase.sink.file1.class=org.apache.hadoop.metrics2.sink.FileSink 19 | # hbase.sink.file1.context=thrift-one 20 | # hbase.sink.file1.filename=thrift-one.metrics 21 | 22 | # hbase.sink.file2.class=org.apache.hadoop.metrics2.sink.FileSink 23 | # hbase.sink.file2.context=thrift-two 24 | # hbase.sink.file2.filename=thrift-one.metrics 25 | 26 | # hbase.sink.file3.class=org.apache.hadoop.metrics2.sink.FileSink 27 | # hbase.sink.file3.context=rest 28 | # hbase.sink.file3.filename=rest.metrics 29 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: Install kerberos 2 | become: yes 3 | apt: name=krb5-kdc,krb5-admin-server,krb5-config,krb5-kpropd state=latest 4 | 5 | - name: Modify krb5.conf 6 | become: yes 7 | template: src=krb5.conf.j2 dest=/etc/krb5.conf 8 | notify: 9 | - Restart kdc 10 | - Restart kadmin 11 | 12 | - name: Modify kdc.conf 13 | become: yes 14 | template: src=kdc.conf.j2 dest=/var/lib/krb5kdc/kdc.conf 15 | notify: 16 | - Restart kdc 17 | - Restart kadmin 18 | 19 | - name: Modify kadm5.acl 20 | become: yes 21 | template: src=kadm5.acl.j2 dest=/etc/krb5kdc/kadm5.acl 22 | notify: 23 | - Restart kdc 24 | - Restart kadmin 25 | when: kdc_role == 'master' 26 | 27 | - include: config_master.yml 28 | when: kdc_role == 'master' 29 | 30 | - include: config_slave.yml 31 | when: kdc_role == 'slave' 32 | 33 | - name: Prepare script 34 | become: yes 35 | copy: src=kprop_all dest=/usr/local/bin/kprop_all mode=0755 36 | 37 | - name: Prepare crontab 38 | become: yes 39 | cron: name="sync kdc" minute=10 hour=3 user=root 40 | job="kdb5_util dump /var/lib/krb5kdc/replica_datatrans; /usr/local/bin/kprop_all {{ kerberos_kdc_slaves | join(' ') }}" 41 | when: kdc_role == 'master' -------------------------------------------------------------------------------- /playbooks/roles/base/tasks/principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: target_principals 5 | changed_when: target_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: target_principals is defined and target_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k {{ kerberos_princ_keytab_path }} 15 | register: target_keytabs 16 | changed_when: target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytab 20 | become: yes 21 | command: kadmin -q "ktadd -k {{ kerberos_princ_keytab_path }} {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: target_keytabs is defined and target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 23 | 24 | -------------------------------------------------------------------------------- /playbooks/roles/site-defaults/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # # Java 3 | # jdk8_version: '31' 4 | # 5 | # # HDP 6 | # hdp_version: '2.2.0.0-2041' 7 | # 8 | # # Hadoop 9 | # hadoop_conf_dir: '/etc/hadoop/conf' 10 | # 11 | # # HDFS 12 | # dfs_nameservices: 'hdfs-cluster' 13 | # hdfs_log_dir: '/var/log/hadoop-hdfs' 14 | # hdfs_pid_dir: '/var/run/hadoop-hdfs' 15 | # 16 | # # YARN 17 | # yarn_rm_cluster_id: 'yarn-cluster' 18 | # yarn_log_dir: '/var/log/hadoop-yarn' 19 | # yarn_pid_dir: '/var/run/hadoop-yarn' 20 | # yarn_rm_webapp_port: '8088' 21 | # 22 | # # HBase 23 | # hbase_log_dir: '/var/log/hbase' 24 | # hbase_pid_dir: '/var/run/hbase' 25 | # 26 | # # Presto 27 | # presto_cluster_id: 'prestocluster' 28 | # presto_data_dir: '' 29 | # presto_hive_metastore_port: '9083' 30 | # presto_http_port: '9090' 31 | # presto_install_path: '/usr/local/lib' 32 | # presto_log_dir: '/var/log/presto' 33 | # presto_md5sum: 'c0bc337c2fc15daa284bd8063cb31738' 34 | # presto_pid_dir: '/var/run/presto' 35 | # presto_task_max_memory: '1GB' 36 | # presto_tmp_path: '/tmp' 37 | # presto_version: '0.89' 38 | # 39 | # # Spark 40 | # spark_event_log_dir: '/var/log/spark' 41 | # spark_hadoop_version: 'hadoop2.4' 42 | # spark_install_path: '/usr/local/lib' 43 | # spark_pid_dir: '/var/run/spark' 44 | # spark_version: '1.2.0' 45 | # 46 | # # Tez 47 | # tez_hdfs_dir: '/apps/tez' 48 | -------------------------------------------------------------------------------- /playbooks/roles/journalnode/templates/default_hadoop-hdfs-journalnode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | HADOOP_HOME={{ hadoop_dir }}/current/ 16 | HADOOP_PID_DIR={{ hdfs_pid_dir }} 17 | HADOOP_LOG_DIR={{ hdfs_log_dir }} 18 | HADOOP_NAMENODE_USER=hdfs 19 | HADOOP_SECONDARYNAMENODE_USER=hdfs 20 | HADOOP_DATANODE_USER=hdfs 21 | HADOOP_IDENT_STRING=hdfs 22 | 23 | HADOOP_HEAPSIZE="{{ journalnode_heap_size }}" 24 | HADOOP_JOURNALNODE_OPTS="{{ journalnode_java_opts }}" 25 | 26 | # HADOOP_SECURE_DN_USER=hdfs 27 | # HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 28 | # HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 29 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/default_hadoop-hdfs-namenode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | HADOOP_HOME={{ hadoop_dir }}/current/ 16 | HADOOP_PID_DIR={{ hdfs_pid_dir }} 17 | HADOOP_LOG_DIR={{ hdfs_log_dir }} 18 | HADOOP_NAMENODE_USER=hdfs 19 | HADOOP_SECONDARYNAMENODE_USER=hdfs 20 | HADOOP_DATANODE_USER=hdfs 21 | HADOOP_IDENT_STRING=hdfs 22 | 23 | HADOOP_HEAPSIZE="{{ namenode_heap_size }}" 24 | HADOOP_NAMENODE_OPTS="{{ namenode_java_opts }}" 25 | 26 | # export HADOOP_SECURE_DN_USER=hdfs 27 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 28 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 29 | -------------------------------------------------------------------------------- /playbooks/roles/f500.dumpall/README.md: -------------------------------------------------------------------------------- 1 | Dumpall 2 | ======== 3 | 4 | Dump all remote variables and optionally copy the result to a destination on the host. 5 | 6 | Based on the excellent work by [Lester Wade](https://coderwall.com/p/13lh6w)! 7 | 8 | Requirements 9 | ------------ 10 | 11 | None. 12 | 13 | Role Variables 14 | -------------- 15 | 16 | dumpall_flat_mode: yes 17 | dumpall_guest_destination: /tmp/ansible.all 18 | dumpall_host_destination: /somewhere/local/ 19 | 20 | Example Playbook 21 | ------------------------- 22 | 23 | Example without a host_destination will result in a dumpfile /tmp/ansible.all on the guest: 24 | 25 | - hosts: servers 26 | roles: 27 | - f500.dumpall 28 | 29 | Example with a host_destination will result in a dumpfile /examine/ansible.all on the host machine: 30 | (the dumpfile on the guest is removed) 31 | 32 | - hosts: servers 33 | roles: 34 | - { role: f500.dumpall, dumpall_host_destination: /examine/ } 35 | 36 | If you also set the flat_mode to false, the local filename will be the entire path of the guest_destination, 37 | prepended by the hostname of the current play. See the Ansible _fetch_ module for more information. 38 | 39 | License 40 | ------- 41 | 42 | LGPL 43 | 44 | Author Information 45 | ------------------ 46 | 47 | Jasper N. Brouwer, jasper@nerdsweide.nl 48 | 49 | Ramon de la Fuente, ramon@delafuente.nl 50 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: prepare_package_dir 2 | become: yes 3 | file: 4 | path: "{{ zookeeper_dir }}/package" 5 | state: directory 6 | 7 | - name: download_zookeeper_package 8 | become: yes 9 | get_url: 10 | url: "{{ zookeeper_release_url }}" 11 | dest: "{{ zookeeper_dir }}/package/{{ zookeeper_package_filename }}.tar.gz" 12 | checksum: "{{ zookeeper_release_checksum }}" 13 | 14 | - name: extract_zookeeper_package 15 | become: yes 16 | unarchive: 17 | src: "{{ zookeeper_dir }}/package/{{ zookeeper_package_filename }}.tar.gz" 18 | dest: "{{ zookeeper_dir }}" 19 | remote_src: yes 20 | 21 | - name: link_latest_package 22 | become: yes 23 | file: 24 | path: "{{ zookeeper_dir }}/current" 25 | src: "{{ zookeeper_dir }}/{{ zookeeper_package_filename }}" 26 | state: link 27 | 28 | - name: ensure_hadoop_group 29 | become: yes 30 | group: 31 | name: hadoop 32 | 33 | - name: create_zookeeper_user 34 | become: yes 35 | user: 36 | name: zookeeper 37 | groups: 38 | - hadoop 39 | 40 | - name: prepare_commands 41 | become: yes 42 | template: 43 | src: "command-wrapper.sh.j2" 44 | dest: "/usr/local/bin/{{ item }}" 45 | mode: '755' 46 | owner: root 47 | group: root 48 | with_items: 49 | - zkCli.sh 50 | - zkTxnLogToolkit.sh 51 | - zkCleanup.sh 52 | - zkServer-initialize.sh 53 | - zkSnapShotToolkit.sh 54 | - zkEnv.sh 55 | - zkServer.sh -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/tasks/config_slave.yml: -------------------------------------------------------------------------------- 1 | - name: Prepare keytab for admin 2 | become: yes 3 | copy: src="{{ kerberos_admin_keytab_file }}" dest="{{ kerberos_admin_keytab }}" owner={{ ansible_ssh_user }} group={{ ansible_ssh_user }} 4 | 5 | - include: host_principal.yml 6 | 7 | - name: Modify kpropd.acl 8 | become: yes 9 | template: src=kpropd.acl.j2 dest=/etc/krb5kdc/kpropd.acl 10 | notify: Restart kprop 11 | 12 | - name: Start kprop 13 | become: yes 14 | service: name=krb5-kpropd state=started enabled=yes 15 | 16 | - name: Check admin principal 17 | become: yes 18 | command: kadmin.local -q "getprinc {{ kerberos_admin_principal }}" 19 | register: localadmin_principals 20 | changed_when: localadmin_principals.rc != 0 or localadmin_principals.stderr.find('Principal does not exist') != -1 21 | ignore_errors: yes 22 | 23 | - name: Propagate Master DB 24 | become: yes 25 | shell: "kdb5_util dump /var/lib/krb5kdc/replica_datatrans; kprop {{ inventory_hostname }}" 26 | delegate_to: "{{ kerberos_kdc_master }}" 27 | when: localadmin_principals is defined and (localadmin_principals.rc != 0 or localadmin_principals.stderr.find('Principal does not exist') != -1) 28 | 29 | - name: Prepare master key 30 | become: yes 31 | copy: src="{{ kerberos_mkey_file }}" dest="/etc/krb5kdc/.k5.{{ kerberos_realm }}" owner=root group=root mode=0600 32 | 33 | - name: Start kdc 34 | become: yes 35 | service: name=krb5-kdc state=started enabled=yes 36 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/templates/default_hadoop-hdfs-datanode.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | HADOOP_HOME={{ hadoop_dir }}/current/ 16 | HADOOP_PID_DIR={{ hdfs_pid_dir }} 17 | HADOOP_LOG_DIR={{ hdfs_log_dir }} 18 | HADOOP_NAMENODE_USER=hdfs 19 | HADOOP_SECONDARYNAMENODE_USER=hdfs 20 | HADOOP_DATANODE_USER=hdfs 21 | HADOOP_IDENT_STRING=hdfs 22 | 23 | HADOOP_HEAPSIZE='{{ datanode_heap_size }}' 24 | HADOOP_DATANODE_OPTS='{{ datanode_java_opts }}' 25 | 26 | {% if kerberos_realm is defined %} 27 | HADOOP_SECURE_DN_USER=hdfs 28 | HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 29 | HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 30 | JSVC_HOME=/usr/lib/bigtop-utils/ 31 | {% endif %} 32 | -------------------------------------------------------------------------------- /playbooks/roles/collect/README.md: -------------------------------------------------------------------------------- 1 | Role Name 2 | ========= 3 | 4 | A brief description of the role goes here. 5 | 6 | Requirements 7 | ------------ 8 | 9 | Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. 10 | 11 | Role Variables 12 | -------------- 13 | 14 | A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. 15 | 16 | Dependencies 17 | ------------ 18 | 19 | A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. 20 | 21 | Example Playbook 22 | ---------------- 23 | 24 | Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: 25 | 26 | - hosts: servers 27 | roles: 28 | - { role: username.rolename, x: 42 } 29 | 30 | License 31 | ------- 32 | 33 | BSD 34 | 35 | Author Information 36 | ------------------ 37 | 38 | An optional section for the role authors to include contact information, or a website (HTML is not allowed). 39 | -------------------------------------------------------------------------------- /playbooks/roles/spark/tasks/principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: target_principals 5 | changed_when: target_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: target_principals is defined and target_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k {{ kerberos_princ_keytab_path }} 15 | register: target_keytabs 16 | changed_when: target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytab 20 | become: yes 21 | command: kadmin -q "ktadd -k {{ kerberos_princ_keytab_path }} {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: target_keytabs is defined and target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 23 | 24 | - name: Modify permissions of keytab 25 | become: yes 26 | file: path={{ kerberos_princ_keytab_path }} owner={{ kerberos_princ_keytab_owner }} group=hadoop mode=0400 27 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/tasks/principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: target_principals 5 | changed_when: target_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: target_principals is defined and target_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k {{ kerberos_princ_keytab_path }} 15 | register: target_keytabs 16 | changed_when: target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytab 20 | become: yes 21 | command: kadmin -q "ktadd -k {{ kerberos_princ_keytab_path }} {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: target_keytabs is defined and target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 23 | 24 | - name: Modify permissions of keytab 25 | become: yes 26 | file: path={{ kerberos_princ_keytab_path }} owner={{ kerberos_princ_keytab_owner }} group=hadoop mode=0400 27 | -------------------------------------------------------------------------------- /playbooks/roles/spark/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: prepare_package_dir 2 | become: yes 3 | file: 4 | path: "{{ spark_dir }}/package" 5 | state: directory 6 | 7 | - name: download_spark_package 8 | become: yes 9 | get_url: 10 | url: "{{ spark_release_url }}" 11 | dest: "{{ spark_dir }}/package/{{ spark_package_filename }}.tgz" 12 | checksum: "{{ spark_release_checksum }}" 13 | 14 | - name: extract_spark_package 15 | become: yes 16 | unarchive: 17 | src: "{{ spark_dir }}/package/{{ spark_package_filename }}.tgz" 18 | dest: "{{ spark_dir }}" 19 | remote_src: yes 20 | 21 | - name: link_latest_package 22 | become: yes 23 | file: 24 | path: "{{ spark_dir }}/current" 25 | src: "{{ spark_dir }}/{{ spark_package_filename }}" 26 | state: link 27 | 28 | - name: ensure_spark_group 29 | become: yes 30 | group: 31 | name: spark 32 | 33 | - name: create_spark_user 34 | become: yes 35 | user: 36 | name: "{{ item.name }}" 37 | groups: 38 | - spark 39 | with_items: 40 | - { name: spark } 41 | 42 | - name: prepare_commands 43 | become: yes 44 | template: 45 | src: "command-wrapper.sh.j2" 46 | dest: "/usr/local/bin/{{ item }}" 47 | mode: '755' 48 | owner: root 49 | group: root 50 | with_items: 51 | - beeline 52 | - run-example 53 | - sparkR 54 | - load-spark-env.sh 55 | - spark-sql 56 | - docker-image-tool.sh 57 | - pyspark 58 | - spark-class 59 | - find-spark-home 60 | - spark-shell 61 | - spark-submit -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/tasks/principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: target_principals 5 | changed_when: target_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: target_principals is defined and target_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k {{ kerberos_princ_keytab_path }} 15 | register: target_keytabs 16 | changed_when: target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytab 20 | become: yes 21 | command: kadmin -q "ktadd -k {{ kerberos_princ_keytab_path }} {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: target_keytabs is defined and target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 23 | 24 | - name: Modify permissions of keytab 25 | become: yes 26 | file: path={{ kerberos_princ_keytab_path }} owner={{ kerberos_princ_keytab_owner }} group=hadoop mode=0400 27 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hbase-master.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | JAVA_HOME={{ java_home }} 17 | 18 | HBASE_HOME={{ hbase_dir }}/current/ 19 | HBASE_CONF_DIR="/etc/hbase/conf" 20 | 21 | HBASE_PID_DIR={{ hbase_pid_dir }} 22 | HBASE_LOG_DIR={{ hbase_log_dir }} 23 | HBASE_IDENT_STRING=hbase 24 | 25 | HADOOP_HOME={{ hadoop_dir }}/current/ 26 | HADOOP_CONF_DIR=/etc/hadoop/conf 27 | 28 | # Up to 100 region servers can be run on a single host by specifying offsets 29 | # here or as CLI args when using init scripts. Each offset identifies an 30 | # instance and is used to determine the network ports it uses. Each instance 31 | # will have have its own log and pid files. 32 | # 33 | # REGIONSERVER_OFFSETS="1 2 3" 34 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/hbase-regionserver.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | JAVA_HOME={{ java_home }} 17 | 18 | HBASE_HOME={{ hbase_dir }}/current/ 19 | HBASE_CONF_DIR="/etc/hbase/conf" 20 | 21 | HBASE_PID_DIR={{ hbase_pid_dir }} 22 | HBASE_LOG_DIR={{ hbase_log_dir }} 23 | HBASE_IDENT_STRING=hbase 24 | 25 | HADOOP_HOME={{ hadoop_dir }}/current/ 26 | HADOOP_CONF_DIR=/etc/hadoop/conf 27 | 28 | # Up to 100 region servers can be run on a single host by specifying offsets 29 | # here or as CLI args when using init scripts. Each offset identifies an 30 | # instance and is used to determine the network ports it uses. Each instance 31 | # will have have its own log and pid files. 32 | # 33 | # REGIONSERVER_OFFSETS="1 2 3" 34 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/default_hadoop-hdfs-zkfc.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | HADOOP_HOME={{ hadoop_dir }}/current/ 16 | HADOOP_PID_DIR={{ hdfs_pid_dir }} 17 | HADOOP_LOG_DIR={{ hdfs_log_dir }} 18 | HADOOP_NAMENODE_USER=hdfs 19 | HADOOP_SECONDARYNAMENODE_USER=hdfs 20 | HADOOP_DATANODE_USER=hdfs 21 | HADOOP_IDENT_STRING=hdfs 22 | 23 | HADOOP_HEAPSIZE="{{ zkfc_heap_size }}" 24 | {% if kerberos_realm is defined %} 25 | ZKFC_JAAS_CONF='-Djava.security.auth.login.config=/etc/hadoop/conf/jaas-hdfs.conf ' 26 | {% endif %} 27 | HADOOP_ZKFC_OPTS=${ZKFC_JAAS_CONF}"{{ zkfc_java_opts }}" 28 | 29 | # export HADOOP_SECURE_DN_USER=hdfs 30 | # export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop-hdfs 31 | # export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop-hdfs 32 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/tasks/principal.yml: -------------------------------------------------------------------------------- 1 | - name: Check principal 2 | become: yes 3 | command: kadmin -q "getprinc {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 4 | register: target_principals 5 | changed_when: target_principals.stderr.find('Principal does not exist') != -1 6 | 7 | - name: Add principal 8 | become: yes 9 | command: kadmin -q "addprinc -randkey {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 10 | when: target_principals is defined and target_principals.stderr.find('Principal does not exist') != -1 11 | 12 | - name: Check keytab 13 | become: yes 14 | command: klist -k {{ kerberos_princ_keytab_path }} 15 | register: target_keytabs 16 | changed_when: target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 17 | ignore_errors: yes 18 | 19 | - name: Prepare keytab 20 | become: yes 21 | command: kadmin -q "ktadd -k {{ kerberos_princ_keytab_path }} {{ kerberos_princ_username }}/{{ ansible_fqdn }}" -p {{ kerberos_admin_principal }} -k -t {{ kerberos_admin_keytab }} 22 | when: target_keytabs is defined and target_keytabs.rc != 0 or target_keytabs.stdout.find(kerberos_princ_username + '/' + ansible_fqdn) == -1 23 | 24 | - name: Modify permissions of keytab 25 | become: yes 26 | file: path={{ kerberos_princ_keytab_path }} owner={{ kerberos_princ_keytab_owner }} group={{ kerberos_princ_keytab_owner }} mode=0400 27 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_hbase_conf_dir 2 | become: yes 3 | file: path=/etc/hbase/conf state=directory owner=root group=root mode=755 4 | 5 | - include: principal.yml 6 | vars: { kerberos_princ_username: hbase, kerberos_princ_keytab_path: /etc/hbase/conf/hbase.keytab, kerberos_princ_keytab_owner: hbase } 7 | when: kerberos_realm is defined 8 | 9 | - name: copy_jaas_conf 10 | become: yes 11 | template: src=zk-jaas.conf.j2 dest=/etc/hbase/conf/zk-jaas.conf 12 | when: kerberos_realm is defined 13 | 14 | - name: copy_hbase_conf_files 15 | become: yes 16 | template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }} owner=hbase group=hadoop mode=755 17 | with_items: 18 | - hbase-site.xml 19 | - hbase-env.sh 20 | - hbase-policy.xml 21 | - regionservers 22 | - log4j.properties 23 | - hadoop-metrics2-hbase.properties 24 | 25 | - name: prepare_systemd_service 26 | become: yes 27 | template: 28 | src: "{{ item }}.service.j2" 29 | dest: "/etc/systemd/system/{{ item }}.service" 30 | with_items: 31 | - hbase-regionserver 32 | 33 | - name: reload_systemd_service 34 | become: yes 35 | systemd: 36 | daemon_reload: yes 37 | 38 | - name: copy_hbase_default_file 39 | become: yes 40 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} owner=hbase group=hadoop mode=755 41 | with_items: 42 | - hbase-regionserver 43 | 44 | - name: create_hbase_log_dir 45 | become: yes 46 | file: path={{ hbase_log_dir }} state=directory mode=755 owner=hbase group=hadoop 47 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/zookeeper-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | export JAVA_HOME={{ java_home }} 17 | export SERVER_JVMFLAGS="{{ zookeeper_server_jvm_opt }}" 18 | export ZOO_LOG_DIR={{zookeeper_log_dir }} 19 | export ZOOPIDFILE={{ zookeeper_pid_dir }}/zookeeper_server.pid 20 | export DAEMON_SCRIPT=/usr/hdp/{{ hdp_version }}/zookeeper/bin/zookeeper-server 21 | 22 | {% if zookeeper_heap_size != '' %} 23 | export ZOOKEEPER_HEAPSIZE='-Xmx{{ zookeeper_heap_size }}m ' 24 | {% endif %} 25 | {% if kerberos_realm is defined %} 26 | export ZOOKEEPER_JAAS_CONF='-Djava.security.auth.login.config=/etc/zookeeper/conf/jaas.conf ' 27 | {% endif %} 28 | export SERVER_JVMFLAGS=${ZOOKEEPER_HEAPSIZE}${ZOOKEEPER_JAAS_CONF}{{ zookeeper_jvm_flags }} 29 | -------------------------------------------------------------------------------- /playbooks/roles/base/tasks/kerberos.yml: -------------------------------------------------------------------------------- 1 | - include: principal.yml 2 | vars: { kerberos_princ_username: hdfs, kerberos_princ_keytab_path: /etc/hadoop/conf/hdfs-unmerged.keytab, kerberos_princ_keytab_owner: hdfs } 3 | 4 | - include: principal.yml 5 | vars: { kerberos_princ_username: mapred, kerberos_princ_keytab_path: /etc/hadoop/conf/mapred-unmerged.keytab, kerberos_princ_keytab_owner: mapred } 6 | 7 | - include: principal.yml 8 | vars: { kerberos_princ_username: yarn, kerberos_princ_keytab_path: /etc/hadoop/conf/yarn-unmerged.keytab, kerberos_princ_keytab_owner: yarn } 9 | 10 | - include: principal.yml 11 | vars: { kerberos_princ_username: HTTP, kerberos_princ_keytab_path: /etc/hadoop/conf/http.keytab, kerberos_princ_keytab_owner: hdfs } 12 | 13 | - include: keytab.yml 14 | vars: { kerberos_princ_src_keytab_1: /etc/hadoop/conf/hdfs-unmerged.keytab, kerberos_princ_src_keytab_2: /etc/hadoop/conf/http.keytab, kerberos_princ_dest_keytab: /etc/hadoop/conf/hdfs.keytab, kerberos_princ_keytab_owner: hdfs } 15 | 16 | - include: keytab.yml 17 | vars: { kerberos_princ_src_keytab_1: /etc/hadoop/conf/mapred-unmerged.keytab, kerberos_princ_src_keytab_2: /etc/hadoop/conf/http.keytab, kerberos_princ_dest_keytab: /etc/hadoop/conf/mapred.keytab, kerberos_princ_keytab_owner: mapred } 18 | 19 | - include: keytab.yml 20 | vars: { kerberos_princ_src_keytab_1: /etc/hadoop/conf/yarn-unmerged.keytab, kerberos_princ_src_keytab_2: /etc/hadoop/conf/http.keytab, kerberos_princ_dest_keytab: /etc/hadoop/conf/yarn.keytab, kerberos_princ_keytab_owner: yarn } 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /playbooks/roles/slavenode/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_datanode_data_dir 2 | become: yes 3 | file: path={{ item }} state=directory owner=hdfs group=hadoop mode=700 4 | with_items: "{{ dfs_datanode_data_dirs }}" 5 | 6 | - name: prepare_systemd_service 7 | become: yes 8 | template: 9 | src: "{{ item }}.service.j2" 10 | dest: "/etc/systemd/system/{{ item }}.service" 11 | with_items: 12 | - hadoop-hdfs-datanode 13 | - hadoop-yarn-nodemanager 14 | 15 | - name: reload_systemd_service 16 | become: yes 17 | systemd: 18 | daemon_reload: yes 19 | 20 | - name: create_holder_directory_for_hadoop_tmp_dir 21 | become: yes 22 | file: path={{ hadoop_tmp_dir[0:hadoop_tmp_dir.rfind('/')] }} state=directory mode=755 owner=yarn group=yarn 23 | 24 | - name: create_hdfs_pid_dir 25 | become: yes 26 | file: path={{ hdfs_pid_dir }} state=directory mode=755 owner=hdfs group=hdfs 27 | 28 | - name: create_yarn_pid_dir 29 | become: yes 30 | file: path={{ yarn_pid_dir }} state=directory mode=755 owner=yarn group=yarn 31 | 32 | - name: create_hdfs_log_dir 33 | become: yes 34 | file: path={{ hdfs_log_dir }} state=directory mode=755 owner=hdfs group=hadoop 35 | 36 | - name: create_yarn_log_dir 37 | become: yes 38 | file: path={{ yarn_log_dir }} state=directory mode=755 owner=yarn group=hadoop 39 | 40 | - name: copy_defaults_file 41 | become: yes 42 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=root group=root 43 | with_items: 44 | - hadoop-hdfs-datanode 45 | - hadoop-yarn-nodemanager 46 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_yarn/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: check_exists_hfds_dir 2 | become: yes 3 | become_user: hdfs 4 | shell: hdfs dfs -ls {{ item.path }} 5 | register: check_exists_hdfs_dir 6 | changed_when: false 7 | failed_when: false 8 | check_mode: no 9 | with_items: 10 | - {path: '/var', mode: '1777', owner: 'hdfs', group: 'hadoop'} 11 | - {path: '/var/log', mode: '1777', owner: 'hdfs', group: 'hadoop'} 12 | - {path: '/mapred', mode: '755', owner: 'mapred', group: 'hadoop'} 13 | - {path: '/mapred/staging', mode: '1777', owner: 'mapred', group: 'hadoop'} 14 | - {path: '/mapred/system', mode: '700', owner: 'mapred', group: 'hadoop'} 15 | - {path: '/tmp/hadoop-yarn', mode: '1777', owner: 'yarn', group: 'hadoop'} 16 | - {path: '/tmp/hadoop-yarn/staging', mode: '1777', owner: 'mapred', group: 'hadoop'} 17 | 18 | - name: create_hdfs_dir 19 | become: yes 20 | become_user: hdfs 21 | shell: hdfs dfs -mkdir -p {{ item.item.path }} 22 | when: item.rc != 0 23 | with_items: "{{ check_exists_hdfs_dir.results }}" 24 | 25 | - name: change_owner_and_group_of_hdfs_dir 26 | become: yes 27 | become_user: hdfs 28 | shell: hdfs dfs -chown -R {{ item.item.owner }}:{{ item.item.group }} {{ item.item.path }} 29 | when: item.rc != 0 30 | with_items: "{{ check_exists_hdfs_dir.results }}" 31 | 32 | - name: change_mode_of_hdfs_dir 33 | become: yes 34 | become_user: hdfs 35 | shell: hdfs dfs -chmod -R {{ item.item.mode }} {{ item.item.path }} 36 | when: item.rc != 0 37 | with_items: "{{ check_exists_hdfs_dir.results }}" 38 | 39 | -------------------------------------------------------------------------------- /playbooks/roles/hdfs_base/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: check_hdfs_root_permission 2 | become: yes 3 | become_user: hdfs 4 | shell: echo "$(hdfs dfs -ls -d /)" | grep "drwxrwxrwx" 5 | register: check_hdfs_root_permission 6 | changed_when: false 7 | failed_when: false 8 | check_mode: no 9 | 10 | - name: chmod_hdfs_root 11 | become: yes 12 | become_user: hdfs 13 | shell: hdfs dfs -chmod 777 / 14 | when: check_hdfs_root_permission.rc != 0 15 | 16 | - name: check_exists_hdfs_dir 17 | become: yes 18 | become_user: hdfs 19 | shell: hdfs dfs -ls {{ item.path }} 20 | register: check_exists_hdfs_dir 21 | changed_when: false 22 | failed_when: false 23 | check_mode: no 24 | with_items: 25 | - {path: '/tmp', mode: '1777', owner: 'hdfs', group: 'hadoop'} 26 | - {path: '/user', mode: '1777', owner: 'hdfs', group: 'hadoop'} 27 | 28 | - name: create_hdfs_dir 29 | become: yes 30 | become_user: hdfs 31 | shell: hdfs dfs -mkdir -p {{ item.item.path }} 32 | when: item.rc != 0 33 | with_items: "{{ check_exists_hdfs_dir.results }}" 34 | 35 | - name: change_owner_and_group_of_hdfs_dir 36 | become: yes 37 | become_user: hdfs 38 | shell: hdfs dfs -chown -R {{ item.item.owner }}:{{ item.item.group }} {{ item.item.path }} 39 | when: item.rc != 0 40 | with_items: "{{ check_exists_hdfs_dir.results }}" 41 | 42 | - name: change_mode_of_hdfs_dir 43 | become: yes 44 | become_user: hdfs 45 | shell: hdfs dfs -chmod -R {{ item.item.mode }} {{ item.item.path }} 46 | when: item.rc != 0 47 | with_items: "{{ check_exists_hdfs_dir.results }}" 48 | 49 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_namenode_data_dir 2 | become: yes 3 | file: path={{ item }} state=directory owner=hdfs group=hadoop mode=700 4 | with_items: "{{ dfs_namenode_name_dirs }}" 5 | 6 | - name: prepare_systemd_service 7 | become: yes 8 | template: 9 | src: "{{ item }}.service.j2" 10 | dest: "/etc/systemd/system/{{ item }}.service" 11 | with_items: 12 | - hadoop-hdfs-namenode 13 | - hadoop-hdfs-zkfc 14 | 15 | - name: reload_systemd_service 16 | become: yes 17 | systemd: 18 | daemon_reload: yes 19 | 20 | - name: prepare_pid_dir 21 | become: yes 22 | file: 23 | path: "{{ hdfs_pid_dir }}" 24 | state: directory 25 | owner: hdfs 26 | 27 | - name: deploy kick script for balancer. 28 | become: yes 29 | template: src={{ item }}.j2 dest=/usr/local/bin/{{ item }} mode=755 30 | with_items: 31 | - hdfs-balancer.sh 32 | 33 | - name: create_hdfs_log_dir 34 | become: yes 35 | file: path={{ hdfs_log_dir }} state=directory mode=755 owner=hdfs group=hadoop 36 | 37 | - name: create_hdfs_pid_dir 38 | become: yes 39 | file: path={{ hdfs_pid_dir }} state=directory mode=755 owner=hdfs group=hdfs 40 | 41 | - name: create_jaas_conf 42 | become: yes 43 | template: src=jaas-hdfs.conf.j2 dest=/etc/hadoop/conf/jaas-hdfs.conf owner=hdfs group=hdfs 44 | when: kerberos_realm is defined 45 | 46 | - name: copy_defaults_file 47 | become: yes 48 | template: src=default_{{ item }}.j2 dest=/etc/default/{{ item }} mode=755 owner=hdfs group=hdfs 49 | with_items: 50 | - hadoop-hdfs-zkfc 51 | - hadoop-hdfs-namenode 52 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hbase-service-test.rb.j2: -------------------------------------------------------------------------------- 1 | # This script should be called in hbase-shell. 2 | 3 | def is_table_present(name) 4 | # command "exists" can do the same check, 5 | # but as it doesn't return any result, 6 | # we need this function in ruby scripts. 7 | existing_tables = list 8 | table_already_exists = existing_tables.find_index(name) 9 | if table_already_exists == nil then 10 | return false 11 | end 12 | return true 13 | end 14 | 15 | def delete_table_if_exists(name) 16 | if is_table_present(name) 17 | disable name 18 | drop name 19 | end 20 | end 21 | 22 | 23 | tablename = 'servicetesttable' 24 | 25 | print("*** Delete table for service test if already exists.\n") 26 | delete_table_if_exists(tablename) 27 | 28 | print("*** Create table.\n") 29 | create tablename, 'f1', 'f2' 30 | 31 | print("*** Verify table.\n") 32 | if not is_table_present(tablename) 33 | print("*** Failure when create table.\n") 34 | exit 1 35 | end 36 | 37 | print("*** Put data\n") 38 | put tablename, 'row1', 'f1:c1', 'value1' 39 | put tablename, 'row2', 'f1:c1', 'value2' 40 | put tablename, 'row2', 'f2:c2', 'value3' 41 | 42 | print("*** Count and verify data\n") 43 | num = count tablename 44 | 45 | if num != 2 then 46 | print("*** Failure when put data.\n") 47 | exit 1 48 | end 49 | 50 | print("*** Get data.\n") 51 | print("*** You can see the record data stored in servicetesttable.\n") 52 | get tablename, 'row2' 53 | 54 | print("*** Delete table.\n") 55 | delete_table_if_exists(tablename) 56 | 57 | print ("*** Service test finished successfully.\n") 58 | exit 0 59 | 60 | -------------------------------------------------------------------------------- /playbooks/roles/hbase/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: create_hbase_groups 2 | become: yes 3 | group: 4 | name: "{{ item.name }}" 5 | gid: "{{ item.gid | default(omit) }}" 6 | with_items: 7 | - { name: hbase, gid: "{{ hbase_gid | default(omit) }}" } 8 | - { name: hadoop, gid: "{{ hadoop_gid | default(omit) }}" } 9 | 10 | - name: create_hbase_user 11 | become: yes 12 | user: 13 | name: "{{ item.name }}" 14 | uid: "{{ item.uid | default(omit) }}" 15 | group: "{{ item.name }}" 16 | groups: 17 | - hadoop 18 | with_items: 19 | - { name: hbase, uid: "{{ hbase_uid | default(omit) }}" } 20 | 21 | - name: prepare_package_dir 22 | become: yes 23 | file: 24 | path: "{{ hbase_dir }}/package" 25 | state: directory 26 | 27 | - name: download_hbase_package 28 | become: yes 29 | get_url: 30 | url: "{{ hbase_release_url }}" 31 | dest: "{{ hbase_dir }}/package/{{ hbase_package_filename }}.tar.gz" 32 | checksum: "{{ hbase_release_checksum }}" 33 | 34 | - name: extract_hbase_package 35 | become: yes 36 | unarchive: 37 | src: "{{ hbase_dir }}/package/{{ hbase_package_filename }}.tar.gz" 38 | dest: "{{ hbase_dir }}" 39 | remote_src: yes 40 | 41 | - name: link_latest_package 42 | become: yes 43 | file: 44 | path: "{{ hbase_dir }}/current" 45 | src: "{{ hbase_dir }}/{{ hbase_package_filename }}" 46 | state: link 47 | 48 | - name: prepare_commands 49 | become: yes 50 | template: 51 | src: "command-wrapper.sh.j2" 52 | dest: "/usr/local/bin/{{ item }}" 53 | mode: '755' 54 | owner: root 55 | group: root 56 | with_items: 57 | - hbase 58 | - hbase-jruby -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_config_dir 2 | become: yes 3 | file: path=/etc/zookeeper/conf state=directory mode=755 4 | 5 | - include: principal.yml 6 | vars: { kerberos_princ_username: zookeeper, kerberos_princ_keytab_path: /etc/zookeeper/conf/zookeeper.keytab, kerberos_princ_keytab_owner: zookeeper } 7 | when: kerberos_realm is defined 8 | 9 | - name: copy_jaas_conf 10 | become: yes 11 | template: src=jaas.conf.j2 dest=/etc/zookeeper/conf/jaas.conf 12 | when: kerberos_realm is defined 13 | 14 | - name: copy_config_files 15 | become: yes 16 | template: src={{ item }}.j2 dest=/etc/zookeeper/conf/{{ item }} 17 | with_items: 18 | - zoo.cfg 19 | - log4j.properties 20 | 21 | - name: create_zookeeper_data_dir 22 | become: yes 23 | file: path={{ zookeeper_data_dir }} state=directory owner=zookeeper mode=755 24 | 25 | - name: copy_myid_file 26 | become: yes 27 | template: src=myid.j2 dest={{zookeeper_data_dir}}/myid 28 | 29 | - name: copy_zookeeper-env.sh 30 | become: yes 31 | template: src={{ item }}.j2 dest=/etc/zookeeper/conf/{{ item }} 32 | with_items: 33 | - zookeeper-env.sh 34 | 35 | - name: create_zookeeper_log_dir 36 | become: yes 37 | file: path={{ zookeeper_log_dir }} state=directory owner=zookeeper mode=755 38 | 39 | - name: prepare_systemd_service 40 | become: yes 41 | template: 42 | src: zookeeper-server.service.j2 43 | dest: /etc/systemd/system/zookeeper-server.service 44 | 45 | - name: reload_systemd_service 46 | become: yes 47 | systemd: 48 | daemon_reload: yes 49 | 50 | - name: prepare_pid_dir 51 | become: yes 52 | file: 53 | path: "{{ zookeeper_pid_dir }}" 54 | state: directory 55 | owner: zookeeper 56 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/tasks/config.yml: -------------------------------------------------------------------------------- 1 | - name: create_hbase_conf_dir 2 | become: yes 3 | file: path=/etc/hbase/conf state=directory owner=root group=root mode=755 4 | 5 | - include: principal.yml 6 | vars: { kerberos_princ_username: hbase, kerberos_princ_keytab_path: /etc/hbase/conf/hbase.keytab, kerberos_princ_keytab_owner: hbase } 7 | when: kerberos_realm is defined 8 | 9 | - name: copy_jaas_conf 10 | become: yes 11 | template: src=zk-jaas.conf.j2 dest=/etc/hbase/conf/zk-jaas.conf 12 | when: kerberos_realm is defined 13 | 14 | - name: copy_hbase_conf_files 15 | become: yes 16 | template: src={{ item }}.j2 dest=/etc/hbase/conf/{{ item }} owner=hbase group=hadoop mode=755 17 | with_items: 18 | - hbase-site.xml 19 | - hbase-env.sh 20 | - hbase-policy.xml 21 | - regionservers 22 | - log4j.properties 23 | - hadoop-metrics2-hbase.properties 24 | 25 | - name: prepare_systemd_service 26 | become: yes 27 | template: 28 | src: "{{ item }}.service.j2" 29 | dest: "/etc/systemd/system/{{ item }}.service" 30 | with_items: 31 | - hbase-master 32 | 33 | - name: reload_systemd_service 34 | become: yes 35 | systemd: 36 | daemon_reload: yes 37 | 38 | - name: copy_hbase_default_file 39 | become: yes 40 | template: src={{ item }}.j2 dest=/etc/default/{{ item }} owner=hbase group=hadoop mode=755 41 | with_items: 42 | - hbase-master 43 | 44 | - name: create_hbase_log_dir 45 | become: yes 46 | file: path={{ hbase_log_dir }} state=directory mode=755 owner=hbase group=hadoop 47 | 48 | - name: deploy_service_test_script 49 | become: yes 50 | template: src={{ item }}.j2 dest=/usr/local/bin/{{ item }} owner=hbase group=hadoop mode=644 51 | with_items: 52 | - hbase-service-test.rb 53 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/mapred-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # export JAVA_HOME=/home/y/libexec/jdk1.6.0/ 17 | 18 | export JAVA_HOME={{ java_home }} 19 | export HADOOP_MAPRED_HOME={{ hadoop_dir }}/current/ 20 | export HADOOP_MAPRED_ROOT_LOGGER={{mapred_root_logger }} 21 | export HADOOP_MAPRED_LOG_DIR={{ mapred_log_dir }} 22 | export HADOOP_MAPRED_PID_DIR={{ mapred_pid_dir }} 23 | export HADOOP_MAPRED_IDENT_STRING={{ mapred_ident_string }} 24 | export HADOOP_JOB_HISTORYSERVER_HEAPSIZE={{ job_historyserver_heapsize }} 25 | 26 | #export HADOOP_JOB_HISTORYSERVER_OPTS= 27 | #export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default. 28 | #export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. 29 | #export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. 30 | #export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default 31 | #export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0. 32 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/zoo.cfg.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | maxClientCnxns={{ zookeeper_max_client_connections }} 17 | # The number of milliseconds of each tick 18 | tickTime={{ zookeeper_tick_time }} 19 | # The number of ticks that the initial 20 | # synchronization phase can take 21 | initLimit={{ zookeeper_init_limit }} 22 | # The number of ticks that can pass between 23 | # sending a request and getting an acknowledgement 24 | syncLimit={{ zookeeper_sync_limit }} 25 | # the directory where the snapshot is stored. 26 | dataDir={{ zookeeper_data_dir }} 27 | # the port at which the clients will connect 28 | clientPort={{ zookeeper_client_port }} 29 | 30 | {% for server in zookeeper_servers %} 31 | server.{{ loop.index }}={{ server }}:2888:3888 32 | {% endfor %} 33 | {% if kerberos_realm is defined %} 34 | authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider 35 | jaasLoginRenew=3600000 36 | kerberos.removeHostFromPrincipal=true 37 | kerberos.removeRealmFromPrincipal=true 38 | {% endif %} 39 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/templates/cgroups.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # return code list 4 | RETURN_SUCCESS=0 5 | RETURN_ILLEGAL_ARGUMENTS=1 6 | RETURN_USER_NOT_ROOT=2 7 | RETURN_DIRECTORY_NOT_EXIST=3 8 | RETURN_PID_NOT_EXIST=4 9 | RETURN_CGROUPS_REGIST_FAILED=5 10 | RETURN_GROUP_NOT_DEFINED=6 11 | RETURN_PROCESS_NOT_DEFINED=7 12 | 13 | # argument check 14 | if [ $# -ne 3 ]; then 15 | echo "Usage: $0 " 1>&2 16 | exit ${RETURN_ELLEGAL_ARGUMENTS} 17 | fi 18 | 19 | if [ $(id -u) -ne 0 ]; then 20 | echo "This script requires root privileges." 1>&2 21 | exit ${RETURN_USER_NOT_ROOT} 22 | fi 23 | 24 | # variables 25 | PID=${3} 26 | CGROUPS_GROUP_NAME=${1} 27 | CGROUPS_PROCESS_NAME=${2} 28 | 29 | for resource in "blkio" "cpu" "cpuacct" "cpuset" "devices" "freezer" "memory" "net_cls"; 30 | do 31 | CGROUPS_ROOT_DIR=/cgroup/${resource} 32 | CGROUPS_HADOOP_DIR=${CGROUPS_ROOT_DIR}/{{ cgroups_root_group_name }} 33 | CGROUPS_GROUP_DIR=${CGROUPS_HADOOP_DIR}/${CGROUPS_GROUP_NAME} 34 | CGROUPS_PROCESS_DIR=${CGROUPS_GROUP_DIR}/${CGROUPS_PROCESS_NAME} 35 | CGROUPS_SCRIPTS_DIR={{ cgroups_scripts_dir }} 36 | 37 | # cgroups directory check 38 | if [ ! -e ${CGROUPS_HADOOP_DIR} ]; then 39 | continue 40 | fi 41 | 42 | if [ ! -e ${CGROUPS_GROUP_DIR} ]; then 43 | continue 44 | fi 45 | 46 | if [ ! -e ${CGROUPS_PROCESS_DIR} ]; then 47 | continue 48 | fi 49 | 50 | # pid exist check 51 | kill -0 ${PID} > /dev/null 2>&1 52 | if [ $? -ne 0 ]; then 53 | echo "Process ${PID} is not found." 2>&1 54 | continue 55 | fi 56 | 57 | # register pid to cgroups 58 | echo ${PID} >> ${CGROUPS_PROCESS_DIR}/tasks 59 | if [ $? -eq 0 ]; then 60 | echo "Process ${PID} is registered to cgroups hadoop/${CGROUPS_GROUP_NAME}/${CGROUPS_PROCESS_NAME}" 61 | else 62 | echo "Failed to register Process ${PID} to hadoop/${CGROUPS_GROUP_NAME}/${CGROUPS_PROCESS_NAME}" 63 | continue 64 | fi 65 | done 66 | 67 | -------------------------------------------------------------------------------- /playbooks/roles/kerberos-server/tasks/config_master.yml: -------------------------------------------------------------------------------- 1 | - name: Create master db 2 | become: yes 3 | command: creates=/var/lib/krb5kdc/principal kdb5_util create -s -P {{ kerberos_master_key_password }} 4 | 5 | - name: Check admin principal 6 | become: yes 7 | command: kadmin.local -q "getprinc {{ kerberos_admin_principal }}" 8 | register: admin_principals 9 | changed_when: admin_principals.stderr.find('Principal does not exist') != -1 10 | 11 | - name: Add admin principal 12 | become: yes 13 | command: kadmin.local -q "addprinc -randkey {{ kerberos_admin_principal }}" 14 | when: admin_principals is defined and admin_principals.stderr.find('Principal does not exist') != -1 15 | 16 | - name: Start kdc 17 | become: yes 18 | service: name=krb5-kdc state=started enabled=yes 19 | 20 | - name: Start kadmin 21 | become: yes 22 | service: name=krb5-admin-server state=started enabled=yes 23 | 24 | - name: Check admin keytab for ansible 25 | become: yes 26 | command: klist -k {{ kerberos_admin_keytab }} 27 | register: admin_keytabs 28 | changed_when: admin_keytabs.rc != 0 or admin_keytabs.stdout.find(kerberos_admin_principal) == -1 29 | ignore_errors: yes 30 | 31 | - name: Prepare admin keytab 32 | become: yes 33 | command: kadmin.local -q "ktadd -k {{ kerberos_admin_keytab }} {{ kerberos_admin_principal }}" 34 | when: admin_keytabs is defined and admin_keytabs.rc != 0 or admin_keytabs.stdout.find(kerberos_admin_principal) == -1 35 | 36 | - name: Change owner of admin keytab 37 | become: yes 38 | file: path={{ kerberos_admin_keytab }} owner={{ ansible_ssh_user }} group={{ ansible_ssh_user }} state=file 39 | 40 | - include: host_principal.yml 41 | 42 | - name: Download admin keytab 43 | become: yes 44 | fetch: src="{{ kerberos_admin_keytab }}" dest="{{ kerberos_admin_keytab_file }}" flat=yes 45 | 46 | - name: Download master key 47 | become: yes 48 | fetch: src="/etc/krb5kdc/.k5.{{ kerberos_realm }}" dest="{{ kerberos_mkey_file }}" flat=yes 49 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/cgroups: -------------------------------------------------------------------------------- 1 | cgroups_root_dir: 'resource' 2 | cgroups_root_group_name: 'hadoop' 3 | 4 | cgroups_scripts_dir: '/var/local/cgroups' 5 | 6 | cgroups_configure: 7 | - group: 'hadoop' 8 | params: 9 | 10 | - group: 'hadoop/zookeeper' 11 | params: 12 | 13 | - group: 'hadoop/zookeeper/zookeeper-server' 14 | params: 15 | 16 | - group: 'hadoop/hdfs' 17 | params: 18 | 19 | - group: 'hadoop/hdfs/namenode' 20 | params: 21 | 22 | - group: 'hadoop/hdfs/datanode' 23 | params: 24 | 25 | - group: 'hadoop/hdfs/journalnode' 26 | params: 27 | 28 | - group: 'hadoop/hdfs/zkfc' 29 | params: 30 | 31 | - group: 'hadoop/hbase' 32 | params: 33 | 34 | - group: 'hadoop/hbase/hbase_master' 35 | params: 36 | 37 | - group: 'hadoop/hbase/hbase_regionserver' 38 | params: 39 | 40 | - group: 'hadoop/yarn' 41 | params: 42 | 43 | - group: 'hadoop/yarn/resourcemanager' 44 | params: 45 | 46 | - group: 'hadoop/yarn/nodemanager' 47 | params: 48 | 49 | - group: 'hadoop/mapreduce' 50 | params: 51 | 52 | - group: 'hadoop/mapreduce/historyserver' 53 | params: 54 | 55 | - group: 'hadoop/hive' 56 | params: 57 | 58 | - group: 'hadoop/hive/hcatalog' 59 | params: 60 | 61 | - group: 'hadoop/spark' 62 | params: 63 | 64 | - group: 'hadoop/spark/historyserver' 65 | params: 66 | 67 | - group: 'hadoop/presto' 68 | params: 69 | 70 | - group: 'hadoop/presto/presto_server' 71 | params: 72 | 73 | - group: 'hadoop/presto/postgresql' 74 | params: 75 | 76 | - group: 'hadoop/presto/prestogres' 77 | params: 78 | 79 | - group: 'hadoop/storm' 80 | params: 81 | 82 | - group: 'hadoop/storm/nimbus' 83 | params: 84 | 85 | - group: 'hadoop/storm/supervisor' 86 | params: 87 | 88 | - group: 'hadoop/storm/ui' 89 | params: 90 | 91 | - group: 'hadoop/hue' 92 | params: 93 | 94 | - group: 'hadoop/hue/hue' 95 | params: 96 | 97 | - group: 'hadoop/hue/httpfs' 98 | params: 99 | 100 | -------------------------------------------------------------------------------- /playbooks/roles/namenode/templates/hdfs-balancer.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Verify if user is hdfs. 4 | if [ "$USER" != "hdfs" ]; then 5 | echo "Run hdfs-balander.sh as hdfs user." 6 | exit 1 7 | fi 8 | 9 | # Determine PID and LOG directory path. 10 | CONF_DIR="/etc/hadoop/conf" 11 | . $CONF_DIR/hadoop-env.sh 12 | PIDFILE="$HADOOP_PID_DIR/hadoop-hdfs-balancer.pid" 13 | LOGFILE="$HADOOP_LOG_DIR/hadoop-hdfs-balancer.log" 14 | PREVPID=-1 15 | 16 | # return 1 if balancer is not running. 17 | # return 0 if balancer is running. 18 | function isRunning(){ 19 | # Check if another balancer is running. 20 | if [ -f $PIDFILE ]; then 21 | PREVPID=$(cat $PIDFILE) 22 | kill -s 0 $PREVPID >/dev/null 2>&1 23 | if [ $? -eq 0 ]; then 24 | return 0 25 | fi 26 | fi 27 | return 1 28 | } 29 | 30 | function start(){ 31 | # Check if another balancer is running. 32 | isRunning 33 | if [ $? -eq 0 ]; then 34 | echo "HDFS balancer is already running (PID:$PREVPID)" 35 | exit 1 36 | fi 37 | 38 | # Start balancer 39 | nohup hdfs balancer >>$LOGFILE 2>&1 & 40 | PID=$! 41 | echo $PID > $PIDFILE 42 | 43 | # Output start message. 44 | echo "HDFS balancer started (PID:$PID)" 45 | echo "HDFS balancer started (PID:$PID) at $(date)" >$LOGFILE 46 | } 47 | 48 | function stop(){ 49 | # Check if another balancer is running. 50 | isRunning 51 | if [ $? -ne 0 ]; then 52 | echo "no HDFS balancer to stop" 53 | exit 1 54 | fi 55 | 56 | # kill 57 | kill $PREVPID 58 | 59 | # Output kill message. 60 | echo "Killed HDFS balancer (PID:$PREVPID)" 61 | echo "Killed HDFS balancer (PID:$PREVPID) at $(date)" >>$LOGFILE 62 | } 63 | 64 | function status(){ 65 | # Check if another balancer is running. 66 | isRunning 67 | if [ $? -eq 0 ]; then 68 | echo "HDFS balancer is running (PID:$PREVPID)" 69 | exit 0 70 | else 71 | echo "HDFS balancer is not running" 72 | exit 3 73 | fi 74 | } 75 | 76 | case "$1" in 77 | start) 78 | start 79 | ;; 80 | stop) 81 | stop 82 | ;; 83 | status) 84 | status 85 | ;; 86 | *) 87 | echo "Usage: $0 {start|stop|status}" 88 | exit 2 89 | ;; 90 | esac 91 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # cgroups_root_dir: 'resource' 3 | # cgroups_root_group_name: 'hadoop' 4 | # 5 | # cgroups_configure: 6 | # - group: 'hadoop' 7 | # params: 8 | # 9 | # - group: 'hadoop/zookeeper' 10 | # params: 11 | # 12 | # - group: 'hadoop/zookeeper/zookeeper-server' 13 | # params: 14 | # memory: 15 | # - { param: 'memory.limit_in_bytes', value: '11111111' } 16 | # 17 | # - group: 'hadoop/hdfs' 18 | # params: 19 | # 20 | # - group: 'hadoop/hdfs/namenode' 21 | # params: 22 | # 23 | # - group: 'hadoop/hdfs/datanode' 24 | # params: 25 | # 26 | # - group: 'hadoop/hdfs/journalnode' 27 | # params: 28 | # 29 | # - group: 'hadoop/hdfs/zkfc' 30 | # params: 31 | # 32 | # - group: 'hadoop/hbase' 33 | # params: 34 | # 35 | # - group: 'hadoop/hbase/hbase_master' 36 | # params: 37 | # 38 | # - group: 'hadoop/hbase/hbase_regionserver' 39 | # params: 40 | # 41 | # - group: 'hadoop/yarn' 42 | # params: 43 | # 44 | # - group: 'hadoop/yarn/resourcemanager' 45 | # params: 46 | # 47 | # - group: 'hadoop/yarn/nodemanager' 48 | # params: 49 | # 50 | # - group: 'hadoop/mapreduce' 51 | # params: 52 | # 53 | # - group: 'hadoop/mapreduce/historyserver' 54 | # params: 55 | # 56 | # - group: 'hadoop/hive' 57 | # params: 58 | # 59 | # - group: 'hadoop/hive/hcatalog' 60 | # params: 61 | # 62 | # - group: 'hadoop/spark' 63 | # params: 64 | # 65 | # - group: 'hadoop/spark/historyserver' 66 | # params: 67 | # 68 | # - group: 'hadoop/presto' 69 | # params: 70 | # 71 | # - group: 'hadoop/presto/presto_server' 72 | # params: 73 | # 74 | # - group: 'hadoop/presto/postgresql' 75 | # params: 76 | # 77 | # - group: 'hadoop/presto/prestogres' 78 | # params: 79 | # 80 | # - group: 'hadoop/storm' 81 | # params: 82 | # 83 | # - group: 'hadoop/storm/nimbus' 84 | # params: 85 | # 86 | # - group: 'hadoop/storm/supervisor' 87 | # params: 88 | # 89 | # - group: 'hadoop/storm/ui' 90 | # params: 91 | # 92 | # - group: 'hadoop/hue' 93 | # params: 94 | # 95 | # - group: 'hadoop/hue/hue' 96 | # params: 97 | # 98 | # - group: 'hadoop/hue/httpfs' 99 | # params: 100 | # 101 | -------------------------------------------------------------------------------- /playbooks/roles/base/defaults/main.yml: -------------------------------------------------------------------------------- 1 | 2 | # # yum repository 3 | # # hdp.repo.j2 4 | # base_hdp_hdp_baseurl: 'http://share/archive/hdp2.2/centos6/2.x/HDP-2.2.0.0' 5 | # base_hdp_hdp_gpgcheck: '1' 6 | # base_hdp_hdp_gpgkeyurl: 'http://share/archive/hdp2.2/centos6/2.x/rpm-gpg-key/RPM-GPG-KEY-Jenkins' 7 | # base_hdp_util_baseurl: 'http://share/archive/hdp2.2/centos6/2.x/HDP-UTILS-1.1.0.20' 8 | # base_hdp_util_gpgcheck: '1' 9 | # base_hdp_util_gpgkeyurl: 'http://share/archive/hdp2.2/centos6/2.x/rpm-gpg-key/RPM-GPG-KEY-Jenkins' 10 | # 11 | # # Hadoop Configuration 12 | # 13 | # # HDFS 14 | # namenode_rpc_port: '8020' 15 | # namenode_http_port: '50070' 16 | # dfs_datanode_balance_bandwidthPerSec: '1048576' 17 | # dfs_datanode_balance_max_concurrent_moves: '100' 18 | # dfs_namenode_replication_max_streams: '2' 19 | # dfs_namenode_replication_max_streams_hard_limit: '4' 20 | # dfs_namenode_replication_work_multiplier_per_iteration: '2' 21 | # ipc_server_read_threadpool_size: '1' 22 | # dfs_namenode_safemode_extension: '10000' 23 | # dfs_namenode_checkpoint_period: '1800' 24 | # dfs_namenode_handler_count: '32' 25 | # dfs_datanode_handler_count: '10' 26 | # dfs_datanode_max_transfer_threads: '4098' 27 | # dfs_datanode_du_reserved: '0' 28 | # dfs_namenode_checkpoint_txns: '400000' 29 | # 30 | # # YARN 31 | # yarn_app_log_dir: '/var/log/yarn' 32 | # yarn_rm_rpc_port: '8032' 33 | # yarn_rm_scheduler_port: '8030' 34 | # yarn_rm_admin_port: '8033' 35 | # yarn_rm_resource_tracker_port: '8031' 36 | # 37 | # # MapReduce 38 | # mapreduce_am_resource_mb: '1024' 39 | # mapreduce_am_command_opts: '-Xmx768m' 40 | # mapreduce_map_cpu_vcores: '1' 41 | # mapreduce_map_memory_mb: '1024' 42 | # mapreduce_map_java_opts: '-Xmx768m' 43 | # mapreduce_reduce_cpu_vcores: '1' 44 | # mapreduce_reduce_memory_mb: '1024' 45 | # mapreduce_reduce_java_opts: '-Xmx768m' 46 | # 47 | # # hadoop-env 48 | # hdfs_ident_string: 'hdfs' 49 | # yarn_ident_string: 'yarn' 50 | # tez_conf_dir: '/etc/tez/conf' 51 | # 52 | # # mapred-env 53 | # job_historyserver_heapsize: '1000' 54 | # mapred_root_logger: 'INFO,RFA' 55 | # mapred_log_dir: '/var/log/hadoop-mapreduce' 56 | # mapred_pid_dir: '/var/run/hadoop-mapreduce' 57 | # mapred_ident_string: 'mapred' 58 | -------------------------------------------------------------------------------- /playbooks/roles/zookeeper_server/templates/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | zookeeper.root.logger=INFO, CONSOLE 3 | zookeeper.console.threshold=INFO 4 | zookeeper.log.dir=. 5 | zookeeper.log.file=zookeeper.log 6 | zookeeper.log.threshold=DEBUG 7 | zookeeper.tracelog.dir=. 8 | zookeeper.tracelog.file=zookeeper_trace.log 9 | 10 | # 11 | # ZooKeeper Logging Configuration 12 | # 13 | 14 | # Format is " (, )+ 15 | 16 | # DEFAULT: console appender only 17 | log4j.rootLogger=${zookeeper.root.logger} 18 | 19 | # Example with rolling log file 20 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE 21 | 22 | # Example with rolling log file and tracing 23 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE 24 | 25 | # 26 | # Log INFO level and above messages to the console 27 | # 28 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 29 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} 30 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 31 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 32 | 33 | # 34 | # Add ROLLINGFILE to rootLogger to get log file output 35 | # Log DEBUG level and above messages to a log file 36 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender 37 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold} 38 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file} 39 | 40 | # Max log file size of 10MB 41 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB 42 | # uncomment the next line to limit number of backup files 43 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10 44 | 45 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout 46 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 47 | 48 | 49 | # 50 | # Add TRACEFILE to rootLogger to get log file output 51 | # Log DEBUG level and above messages to a log file 52 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender 53 | log4j.appender.TRACEFILE.Threshold=TRACE 54 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file} 55 | 56 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout 57 | ### Notice we are including log4j's NDC here (%x) 58 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n 59 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hbase-policy.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | 24 | 25 | security.client.protocol.acl 26 | * 27 | ACL for ClientProtocol and AdminProtocol implementations (ie. 28 | clients talking to HRegionServers) 29 | The ACL is a comma-separated list of user and group names. The user and 30 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 31 | A special value of "*" means all users are allowed. 32 | 33 | 34 | 35 | security.admin.protocol.acl 36 | * 37 | ACL for HMasterInterface protocol implementation (ie. 38 | clients talking to HMaster for admin operations). 39 | The ACL is a comma-separated list of user and group names. The user and 40 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 41 | A special value of "*" means all users are allowed. 42 | 43 | 44 | 45 | security.masterregion.protocol.acl 46 | * 47 | ACL for HMasterRegionInterface protocol implementations 48 | (for HRegionServers communicating with HMaster) 49 | The ACL is a comma-separated list of user and group names. The user and 50 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 51 | A special value of "*" means all users are allowed. 52 | 53 | 54 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/hbase-policy.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | 24 | 25 | security.client.protocol.acl 26 | * 27 | ACL for ClientProtocol and AdminProtocol implementations (ie. 28 | clients talking to HRegionServers) 29 | The ACL is a comma-separated list of user and group names. The user and 30 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 31 | A special value of "*" means all users are allowed. 32 | 33 | 34 | 35 | security.admin.protocol.acl 36 | * 37 | ACL for HMasterInterface protocol implementation (ie. 38 | clients talking to HMaster for admin operations). 39 | The ACL is a comma-separated list of user and group names. The user and 40 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 41 | A special value of "*" means all users are allowed. 42 | 43 | 44 | 45 | security.masterregion.protocol.acl 46 | * 47 | ACL for HMasterRegionInterface protocol implementations 48 | (for HRegionServers communicating with HMaster) 49 | The ACL is a comma-separated list of user and group names. The user and 50 | group list is separated by a blank. For e.g. "alice,bob users,wheel". 51 | A special value of "*" means all users are allowed. 52 | 53 | 54 | -------------------------------------------------------------------------------- /scripts/nova.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from keystoneclient.v2_0 import client as kc 4 | from novaclient import client as nc 5 | from quantumclient.v2_0 import client as qc 6 | from glanceclient.v2 import client as gc 7 | import requests 8 | import subprocess 9 | 10 | kv_pattern = re.compile(r'\s*export\s+([A-Za-z_]+)\=(.*)\s*$') 11 | 12 | 13 | class BaremetalService(object): 14 | def __init__(self, keystone): 15 | self.keystone = keystone 16 | 17 | def list_nodes(self): 18 | nova_url = self.keystone.service_catalog.get_endpoints()['compute'][0]['adminURL'] 19 | return requests.get(nova_url + '/os-baremetal-nodes', headers={ 'X-Auth-Token': self.keystone.auth_token }).json()['nodes'] 20 | 21 | class ClientFactory(object): 22 | def __init__(self, authinfo): 23 | self.authinfo = authinfo 24 | 25 | def keystone(self): 26 | return kc.Client(auth_url=self.authinfo['OS_AUTH_URL'], username=self.authinfo['OS_USERNAME'], 27 | password=self.authinfo['OS_PASSWORD'], tenant_name=self.authinfo['OS_TENANT_NAME']) 28 | 29 | def nova(self): 30 | return nc.Client('2', self.authinfo['OS_USERNAME'], self.authinfo['OS_PASSWORD'], self.authinfo['OS_TENANT_NAME'], 31 | auth_url=self.authinfo['OS_AUTH_URL']) 32 | 33 | def quantum(self): 34 | return qc.Client(auth_url=self.authinfo['OS_AUTH_URL'], username=self.authinfo['OS_USERNAME'], 35 | password=self.authinfo['OS_PASSWORD'], tenant_name=self.authinfo['OS_TENANT_NAME']) 36 | 37 | def glance(self): 38 | keystone = self.keystone() 39 | return gc.Client(endpoint=keystone.service_catalog.get_endpoints()['image'][0]['publicURL'], token=keystone.auth_token) 40 | 41 | def nova_baremetal(self): 42 | return BaremetalService(self.keystone()) 43 | 44 | def get_client_factory(aic_auth_path): 45 | authinfo = {} 46 | with open(os.path.expanduser(aic_auth_path), 'r') as f: 47 | for line in f.readlines(): 48 | m = kv_pattern.match(line) 49 | if m: 50 | authinfo[m.group(1)] = m.group(2) 51 | return ClientFactory(authinfo) 52 | 53 | def run_client(envfile, cmd): 54 | env = os.environ.copy() 55 | with open(os.path.expanduser(envfile), 'r') as f: 56 | for line in f.readlines(): 57 | m = kv_pattern.match(line) 58 | if m: 59 | env[m.group(1)] = m.group(2) 60 | return subprocess.check_output(cmd, env=env).split('\n') 61 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/hadoop-metrics2.properties.j2: -------------------------------------------------------------------------------- 1 | # syntax: [prefix].[source|sink].[instance].[options] 2 | # See javadoc of package-info.java for org.apache.hadoop.metrics2 for details 3 | 4 | #*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink 5 | # default sampling period, in seconds 6 | *.period=10 7 | 8 | # The namenode-metrics.out will contain metrics from all context 9 | #namenode.sink.file.filename=namenode-metrics.out 10 | # Specifying a special sampling period for namenode: 11 | #namenode.sink.*.period=8 12 | 13 | #datanode.sink.file.filename=datanode-metrics.out 14 | 15 | #resourcemanager.sink.file.filename=resourcemanager-metrics.out 16 | 17 | #nodemanager.sink.file.filename=nodemanager-metrics.out 18 | 19 | #mrappmaster.sink.file.filename=mrappmaster-metrics.out 20 | 21 | #jobhistoryserver.sink.file.filename=jobhistoryserver-metrics.out 22 | 23 | # the following example split metrics of different 24 | # context to different sinks (in this case files) 25 | #nodemanager.sink.file_jvm.class=org.apache.hadoop.metrics2.sink.FileSink 26 | #nodemanager.sink.file_jvm.context=jvm 27 | #nodemanager.sink.file_jvm.filename=nodemanager-jvm-metrics.out 28 | #nodemanager.sink.file_mapred.class=org.apache.hadoop.metrics2.sink.FileSink 29 | #nodemanager.sink.file_mapred.context=mapred 30 | #nodemanager.sink.file_mapred.filename=nodemanager-mapred-metrics.out 31 | 32 | # 33 | # Below are for sending metrics to Ganglia 34 | # 35 | # for Ganglia 3.0 support 36 | # *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30 37 | # 38 | # for Ganglia 3.1 support 39 | *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31 40 | 41 | *.sink.ganglia.period=10 42 | 43 | # default for supportsparse is false 44 | # *.sink.ganglia.supportsparse=true 45 | 46 | *.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both 47 | *.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40 48 | 49 | # Tag values to use for the ganglia prefix. If not defined no tags are used. 50 | # If '*' all tags are used. If specifiying multiple tags separate them with 51 | # commas. Note that the last segment of the property name is the context name. 52 | # 53 | #*.sink.ganglia.tagsForPrefix.jvm=ProcesName 54 | #*.sink.ganglia.tagsForPrefix.dfs= 55 | #*.sink.ganglia.tagsForPrefix.rpc= 56 | #*.sink.ganglia.tagsForPrefix.mapred= 57 | 58 | namenode.sink.ganglia.servers={{ gmond }}:8649 59 | 60 | datanode.sink.ganglia.servers={{ gmond }}:8649 61 | 62 | resourcemanager.sink.ganglia.servers={{ gmond }}:8649 63 | 64 | nodemanager.sink.ganglia.servers={{ gmond }}:8649 65 | 66 | mrappmaster.sink.ganglia.servers={{ gmond }}:8649 67 | 68 | jobhistoryserver.sink.ganglia.servers={{ gmond }}:8649 69 | -------------------------------------------------------------------------------- /playbooks/roles/cgroups/templates/cgconfig.conf.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright IBM Corporation. 2007 3 | # 4 | # Authors: Balbir Singh 5 | # This program is free software; you can redistribute it and/or modify it 6 | # under the terms of version 2.1 of the GNU Lesser General Public License 7 | # as published by the Free Software Foundation. 8 | # 9 | # This program is distributed in the hope that it would be useful, but 10 | # WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | # 13 | # See man cgconfig.conf for further details. 14 | # 15 | # By default, mount all controllers to /cgroup/ 16 | 17 | mount { 18 | cpuset = /cgroup/cpuset; 19 | cpu = /cgroup/cpu; 20 | cpuacct = /cgroup/cpuacct; 21 | memory = /cgroup/memory; 22 | devices = /cgroup/devices; 23 | freezer = /cgroup/freezer; 24 | net_cls = /cgroup/net_cls; 25 | blkio = /cgroup/blkio; 26 | } 27 | 28 | {% for subgroup in cgroups_configure %} 29 | group {{ subgroup.group }} { 30 | {% if subgroup.params.cpuset is defined %} 31 | cpuset { 32 | {% for configure in subgroup.params.cpuset %} 33 | {{ configure.param }} = {{ configure.value }}; 34 | {% endfor %} 35 | } 36 | {% endif %} 37 | {% if subgroup.params.cpu is defined %} 38 | cpu { 39 | {% for configure in subgroup.params.cpu %} 40 | {{ configure.param }} = {{ configure.value }}; 41 | {% endfor %} 42 | } 43 | {% endif %} 44 | {% if subgroup.params.cpuacct is defined %} 45 | cpuacct { 46 | {% for configure in subgroup.params.cpuacct %} 47 | {{ configure.param }} = {{ configure.value }}; 48 | {% endfor %} 49 | } 50 | {% endif %} 51 | memory { 52 | {% if subgroup.params.memory is defined %} 53 | {% for configure in subgroup.params.memory %} 54 | {{ configure.param }} = {{ configure.value }}; 55 | {% endfor %} 56 | {% endif %} 57 | } 58 | {% if subgroup.params.devices is defined %} 59 | devices { 60 | {% for configure in subgroup.params.devices %} 61 | {{ configure.param }} = {{ configure.value }}; 62 | {% endfor %} 63 | } 64 | {% endif %} 65 | {% if subgroup.params.freezer is defined %} 66 | freezer { 67 | {% for configure in subgroup.params.freezer %} 68 | {{ configure.param }} = {{ configure.value }}; 69 | {% endfor %} 70 | } 71 | {% endif %} 72 | {% if subgroup.params.net_cls is defined %} 73 | net_cls { 74 | {% for configure in subgroup.params.net_cls %} 75 | {{ configure.param }} = {{ configure.value }}; 76 | {% endfor %} 77 | } 78 | {% endif %} 79 | {% if subgroup.params.blkio is defined %} 80 | blkio { 81 | {% for configure in subgroup.params.blkio %} 82 | {{ configure.param }} = {{ configure.value }}; 83 | {% endfor %} 84 | } 85 | {% endif %} 86 | } 87 | 88 | {% endfor %} 89 | 90 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/hadoop-metrics.properties.j2: -------------------------------------------------------------------------------- 1 | # Configuration of the "dfs" context for null 2 | dfs.class=org.apache.hadoop.metrics.spi.NullContext 3 | 4 | # Configuration of the "dfs" context for file 5 | #dfs.class=org.apache.hadoop.metrics.file.FileContext 6 | #dfs.period=10 7 | #dfs.fileName=/tmp/dfsmetrics.log 8 | 9 | # Configuration of the "dfs" context for ganglia 10 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 11 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext 12 | # dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 13 | # dfs.period=10 14 | # dfs.servers=localhost:8649 15 | 16 | 17 | # Configuration of the "mapred" context for null 18 | mapred.class=org.apache.hadoop.metrics.spi.NullContext 19 | 20 | # Configuration of the "mapred" context for file 21 | #mapred.class=org.apache.hadoop.metrics.file.FileContext 22 | #mapred.period=10 23 | #mapred.fileName=/tmp/mrmetrics.log 24 | 25 | # Configuration of the "mapred" context for ganglia 26 | # Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) 27 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext 28 | # mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 29 | # mapred.period=10 30 | # mapred.servers=localhost:8649 31 | 32 | 33 | # Configuration of the "jvm" context for null 34 | #jvm.class=org.apache.hadoop.metrics.spi.NullContext 35 | 36 | # Configuration of the "jvm" context for file 37 | #jvm.class=org.apache.hadoop.metrics.file.FileContext 38 | #jvm.period=10 39 | #jvm.fileName=/tmp/jvmmetrics.log 40 | 41 | # Configuration of the "jvm" context for ganglia 42 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext 43 | # jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 44 | # jvm.period=10 45 | # jvm.servers=localhost:8649 46 | 47 | # Configuration of the "rpc" context for null 48 | rpc.class=org.apache.hadoop.metrics.spi.NullContext 49 | 50 | # Configuration of the "rpc" context for file 51 | #rpc.class=org.apache.hadoop.metrics.file.FileContext 52 | #rpc.period=10 53 | #rpc.fileName=/tmp/rpcmetrics.log 54 | 55 | # Configuration of the "rpc" context for ganglia 56 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext 57 | # rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 58 | # rpc.period=10 59 | # rpc.servers=localhost:8649 60 | 61 | 62 | # Configuration of the "ugi" context for null 63 | ugi.class=org.apache.hadoop.metrics.spi.NullContext 64 | 65 | # Configuration of the "ugi" context for file 66 | #ugi.class=org.apache.hadoop.metrics.file.FileContext 67 | #ugi.period=10 68 | #ugi.fileName=/tmp/ugimetrics.log 69 | 70 | # Configuration of the "ugi" context for ganglia 71 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext 72 | # ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 73 | # ugi.period=10 74 | # ugi.servers=localhost:8649 75 | 76 | -------------------------------------------------------------------------------- /playbooks/roles/collect/meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: your name 4 | description: 5 | company: your company (optional) 6 | # Some suggested licenses: 7 | # - BSD (default) 8 | # - MIT 9 | # - GPLv2 10 | # - GPLv3 11 | # - Apache 12 | # - CC-BY 13 | license: license (GPLv2, CC-BY, etc) 14 | min_ansible_version: 1.2 15 | # 16 | # Below are all platforms currently available. Just uncomment 17 | # the ones that apply to your role. If you don't see your 18 | # platform on this list, let us know and we'll get it added! 19 | # 20 | #platforms: 21 | #- name: EL 22 | # versions: 23 | # - all 24 | # - 5 25 | # - 6 26 | # - 7 27 | #- name: GenericUNIX 28 | # versions: 29 | # - all 30 | # - any 31 | #- name: Fedora 32 | # versions: 33 | # - all 34 | # - 16 35 | # - 17 36 | # - 18 37 | # - 19 38 | # - 20 39 | #- name: SmartOS 40 | # versions: 41 | # - all 42 | # - any 43 | #- name: opensuse 44 | # versions: 45 | # - all 46 | # - 12.1 47 | # - 12.2 48 | # - 12.3 49 | # - 13.1 50 | # - 13.2 51 | #- name: Amazon 52 | # versions: 53 | # - all 54 | # - 2013.03 55 | # - 2013.09 56 | #- name: GenericBSD 57 | # versions: 58 | # - all 59 | # - any 60 | #- name: FreeBSD 61 | # versions: 62 | # - all 63 | # - 8.0 64 | # - 8.1 65 | # - 8.2 66 | # - 8.3 67 | # - 8.4 68 | # - 9.0 69 | # - 9.1 70 | # - 9.1 71 | # - 9.2 72 | #- name: Ubuntu 73 | # versions: 74 | # - all 75 | # - lucid 76 | # - maverick 77 | # - natty 78 | # - oneiric 79 | # - precise 80 | # - quantal 81 | # - raring 82 | # - saucy 83 | # - trusty 84 | #- name: SLES 85 | # versions: 86 | # - all 87 | # - 10SP3 88 | # - 10SP4 89 | # - 11 90 | # - 11SP1 91 | # - 11SP2 92 | # - 11SP3 93 | #- name: GenericLinux 94 | # versions: 95 | # - all 96 | # - any 97 | #- name: Debian 98 | # versions: 99 | # - all 100 | # - etch 101 | # - lenny 102 | # - squeeze 103 | # - wheezy 104 | # 105 | # Below are all categories currently available. Just as with 106 | # the platforms above, uncomment those that apply to your role. 107 | # 108 | #categories: 109 | #- cloud 110 | #- cloud:ec2 111 | #- cloud:gce 112 | #- cloud:rax 113 | #- clustering 114 | #- database 115 | #- database:nosql 116 | #- database:sql 117 | #- development 118 | #- monitoring 119 | #- networking 120 | #- packaging 121 | #- system 122 | #- web 123 | dependencies: [] 124 | # List your role dependencies here, one per line. Only 125 | # dependencies available via galaxy should be listed here. 126 | # Be sure to remove the '[]' above if you add dependencies 127 | # to this list. 128 | 129 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/hbase-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | {% if kerberos_realm is defined %} 26 | 27 | hbase.security.authentication 28 | kerberos 29 | 30 | 31 | 32 | hbase.rpc.engine 33 | org.apache.hadoop.hbase.ipc.SecureRpcEngine 34 | 35 | 36 | 37 | hbase.master.kerberos.principal 38 | hbase/_HOST@{{ kerberos_realm }} 39 | 40 | 41 | 42 | hbase.master.keytab.file 43 | /etc/hbase/conf/hbase.keytab 44 | 45 | 46 | 47 | hbase.regionserver.kerberos.principal 48 | hbase/_HOST@{{ kerberos_realm }} 49 | 50 | 51 | 52 | hbase.regionserver.keytab.file 53 | /etc/hbase/conf/hbase.keytab 54 | 55 | 56 | 57 | hbase.security.authorization 58 | true 59 | 60 | 61 | 62 | hbase.coprocessor.master.classes 63 | org.apache.hadoop.hbase.security.access.AccessController 64 | 65 | 66 | 67 | hbase.coprocessor.region.classes 68 | org.apache.hadoop.hbase.security.token.TokenProvider,org.apache.hadoop.hbase.security.access.AccessController 69 | 70 | {% endif %} 71 | 72 | 73 | hbase.rootdir 74 | hdfs://{{ dfs_nameservices }}{{ hbase_root_dir }} 75 | 76 | 77 | 78 | hbase.master.info.port 79 | 60010 80 | 81 | 82 | 83 | hbase.cluster.distributed 84 | true 85 | 86 | 87 | 88 | hbase.zookeeper.quorum 89 | {{ zookeeper_servers | join(',') }} 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/hbase-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | {% if kerberos_realm is defined %} 26 | 27 | hbase.security.authentication 28 | kerberos 29 | 30 | 31 | 32 | hbase.rpc.engine 33 | org.apache.hadoop.hbase.ipc.SecureRpcEngine 34 | 35 | 36 | 37 | hbase.master.kerberos.principal 38 | hbase/_HOST@{{ kerberos_realm }} 39 | 40 | 41 | 42 | hbase.master.keytab.file 43 | /etc/hbase/conf/hbase.keytab 44 | 45 | 46 | 47 | hbase.regionserver.kerberos.principal 48 | hbase/_HOST@{{ kerberos_realm }} 49 | 50 | 51 | 52 | hbase.regionserver.keytab.file 53 | /etc/hbase/conf/hbase.keytab 54 | 55 | 56 | 57 | hbase.security.authorization 58 | true 59 | 60 | 61 | 62 | hbase.coprocessor.master.classes 63 | org.apache.hadoop.hbase.security.access.AccessController 64 | 65 | 66 | 67 | hbase.coprocessor.region.classes 68 | org.apache.hadoop.hbase.security.token.TokenProvider,org.apache.hadoop.hbase.security.access.AccessController 69 | 70 | {% endif %} 71 | 72 | 73 | hbase.rootdir 74 | hdfs://{{ dfs_nameservices }}{{ hbase_root_dir }} 75 | 76 | 77 | 78 | hbase.master.info.port 79 | 60010 80 | 81 | 82 | 83 | hbase.cluster.distributed 84 | true 85 | 86 | 87 | 88 | hbase.zookeeper.quorum 89 | {{ zookeeper_servers | join(',') }} 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /playbooks/group_vars/all/base: -------------------------------------------------------------------------------- 1 | # yum repository 2 | # hdp.repo.j2 3 | base_hdp_hdp_baseurl: 'http://share/archive/hdp2.2/centos6/2.x/HDP-2.2.0.0' 4 | 5 | base_hdp_hdp_gpgcheck: '1' 6 | 7 | base_hdp_hdp_gpgkeyurl: 'http://share/archive/hdp2.2/centos6/2.x/rpm-gpg-key/RPM-GPG-KEY-Jenkins' 8 | 9 | base_hdp_util_baseurl: 'http://share/archive/hdp2.2/centos6/2.x/HDP-UTILS-1.1.0.20' 10 | 11 | base_hdp_util_gpgcheck: '1' 12 | 13 | base_hdp_util_gpgkeyurl: 'http://share/archive/hdp2.2/centos6/2.x/rpm-gpg-key/RPM-GPG-KEY-Jenkins' 14 | 15 | # Hadoop Configuration 16 | hadoop_tmp_dir: '/hadoop/tmp/hadoop-${user.name}' 17 | 18 | net_topology_script_file_name: '' 19 | 20 | fs_trash_interval: '0' 21 | 22 | dfs_namenode_heartbeat_recheck_interval: '300000' 23 | 24 | io_file_buffer_size: '4096' 25 | 26 | # HDFS 27 | namenode_rpc_port: '8020' 28 | 29 | namenode_http_port: '50070' 30 | 31 | namenode_https_port: '50470' 32 | 33 | datanode_http_policy: 'HTTPS_ONLY' 34 | datanode_https_port: '50470' 35 | 36 | # https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.5/bk_command-line-installation/content/configuring-namenode-heap-size.html 37 | namenode_total_java_heap: '1126m' 38 | namenode_young_generation_size: '128m' 39 | 40 | dfs_datanode_balance_bandwidthPerSec: '1048576' 41 | 42 | dfs_namenode_safemode_extension: '10000' 43 | 44 | dfs_namenode_checkpoint_period: '1800' 45 | 46 | dfs_namenode_handler_count: '32' 47 | 48 | dfs_datanode_handler_count: '10' 49 | 50 | dfs_datanode_max_transfer_threads: '4098' 51 | 52 | dfs_datanode_du_reserved: '0' 53 | 54 | dfs_namenode_checkpoint_txns: '400000' 55 | 56 | dfs_replication: '2' 57 | 58 | # YARN 59 | yarn_app_log_dir: '/var/log/yarn' 60 | 61 | yarn_rm_rpc_port: '8032' 62 | 63 | yarn_rm_scheduler_port: '8030' 64 | 65 | yarn_rm_admin_port: '8033' 66 | 67 | yarn_rm_resource_tracker_port: '8031' 68 | 69 | # MapReduce 70 | ## 依存: mapreduce_am_resource_mb 71 | mapreduce_am_resource_mb: '1024' 72 | 73 | ## 依存: mapreduce_am_resource_mb 74 | mapreduce_am_command_opts: '-Xmx768m' 75 | 76 | mapreduce_map_cpu_vcores: '1' 77 | 78 | ## 依存: mapreduce_map_java_opts 79 | mapreduce_map_memory_mb: '1024' 80 | 81 | ## 依存: mapreduce_map_memory_mb 82 | mapreduce_map_java_opts: '-Xmx768m' 83 | 84 | mapreduce_reduce_cpu_vcores: '1' 85 | 86 | ## 依存: mapreduce_reduce_java_opts 87 | mapreduce_reduce_memory_mb: '1024' 88 | 89 | ## 依存: mapreduce_reduce_memory_mb 90 | mapreduce_reduce_java_opts: '-Xmx768m' 91 | 92 | mapreduce_http_policy: 'HTTPS_ONLY' 93 | 94 | yarn_http_policy: 'HTTPS_ONLY' 95 | 96 | # hadoop-env 97 | hdfs_ident_string: 'hdfs' 98 | yarn_ident_string: 'yarn' 99 | tez_conf_dir: '/etc/tez/conf' 100 | 101 | # mapred-env 102 | job_historyserver_heapsize: '1000' 103 | mapred_root_logger: 'INFO,RFA' 104 | mapred_log_dir: '/var/log/hadoop-mapreduce' 105 | mapred_pid_dir: '/var/run/hadoop-mapreduce' 106 | mapred_ident_string: 'mapred' 107 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | {% if kerberos_realm is defined %} 22 | 23 | hadoop.security.authentication 24 | kerberos 25 | 26 | 27 | 28 | hadoop.security.authorization 29 | true 30 | 31 | {% endif %} 32 | 33 | 34 | fs.defaultFS 35 | hdfs://{{ dfs_nameservices }} 36 | 37 | 38 | 39 | hadoop.tmp.dir 40 | {{ hadoop_tmp_dir }} 41 | 42 | 43 | 44 | net.topology.script.file.name 45 | {{ net_topology_script_file_name }} 46 | 47 | 48 | 49 | fs.trash.interval 50 | {{ fs_trash_interval }} 51 | 52 | 53 | 54 | dfs.namenode.heartbeat.recheck-interval 55 | {{ dfs_namenode_heartbeat_recheck_interval }} 56 | 57 | 58 | 59 | io.file.buffer.size 60 | {{ io_file_buffer_size }} 61 | 62 | 63 | 64 | ha.zookeeper.quorum 65 | {{ zookeeper_servers | join(':2181,') }}:2181 66 | 67 | 68 | 69 | hadoop.proxyuser.hue.hosts 70 | * 71 | 72 | 73 | 74 | hadoop.proxyuser.hue.groups 75 | * 76 | 77 | 78 | 79 | hadoop.proxyuser.hcat.hosts 80 | * 81 | 82 | 83 | 84 | hadoop.proxyuser.hcat.groups 85 | * 86 | 87 | 88 | 89 | hadoop.proxyuser.httpfs.hosts 90 | * 91 | 92 | 93 | 94 | hadoop.proxyuser.httpfs.groups 95 | * 96 | 97 | 98 | 99 | hadoop.proxyuser.hdfs.hosts 100 | * 101 | 102 | 103 | 104 | hadoop.proxyuser.hdfs.groups 105 | * 106 | 107 | 108 | 109 | hadoop.proxyuser.hive.hosts 110 | * 111 | 112 | 113 | 114 | hadoop.proxyuser.hive.groups 115 | * 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /playbooks/roles/base/templates/mapred-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | {% if kerberos_realm is defined %} 22 | 23 | mapreduce.jobhistory.keytab 24 | /etc/hadoop/conf/mapred.keytab 25 | 26 | 27 | 28 | mapreduce.jobhistory.principal 29 | mapred/_HOST@{{ kerberos_realm }} 30 | 31 | 32 | 33 | mapreduce.jobhistory.http.policy 34 | {{ mapreduce_http_policy }} 35 | 36 | {% endif %} 37 | 38 | 39 | mapreduce.jobhistory.address 40 | {{ mapreduce_historyserver_address }}:10020 41 | 42 | 43 | 44 | mapreduce.jobhistory.webapp.address 45 | {{ mapreduce_historyserver_address }}:19888 46 | 47 | 48 | 49 | yarn.app.mapreduce.am.resource.mb 50 | {{ mapreduce_am_resource_mb }} 51 | 52 | 53 | 54 | yarn.app.mapreduce.am.command-opts 55 | {{ mapreduce_am_command_opts }} 56 | 57 | 58 | 59 | mapreduce.framework.name 60 | yarn 61 | 62 | 63 | 64 | mapreduce.map.cpu.vcores 65 | {{ mapreduce_map_cpu_vcores }} 66 | 67 | 68 | 69 | mapreduce.map.memory.mb 70 | {{ mapreduce_map_memory_mb }} 71 | 72 | 73 | 74 | mapreduce.map.java.opts 75 | {{ mapreduce_map_java_opts }} 76 | 77 | 78 | 79 | mapreduce.reduce.cpu.vcores 80 | {{ mapreduce_reduce_cpu_vcores }} 81 | 82 | 83 | 84 | mapreduce.reduce.memory.mb 85 | {{ mapreduce_reduce_memory_mb }} 86 | 87 | 88 | 89 | mapreduce.reduce.java.opts 90 | {{ mapreduce_reduce_java_opts }} 91 | 92 | 93 | 94 | yarn.app.mapreduce.am.env 95 | HADOOP_MAPRED_HOME={{ hadoop_dir }}/current/ 96 | 97 | 98 | 99 | mapreduce.map.env 100 | HADOOP_MAPRED_HOME={{ hadoop_dir }}/current/ 101 | 102 | 103 | 104 | mapreduce.reduce.env 105 | HADOOP_MAPRED_HOME={{ hadoop_dir }}/current/ 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_master/templates/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | hbase.root.logger=INFO,console 3 | hbase.security.logger=INFO,console 4 | hbase.log.dir=. 5 | hbase.log.file=hbase.log 6 | 7 | # Define the root logger to the system property "hbase.root.logger". 8 | log4j.rootLogger=${hbase.root.logger} 9 | 10 | # Logging Threshold 11 | log4j.threshold=ALL 12 | 13 | # 14 | # Daily Rolling File Appender 15 | # 16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file} 18 | 19 | # Rollver at midnight 20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 21 | 22 | # 30-day backup 23 | #log4j.appender.DRFA.MaxBackupIndex=30 24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 25 | 26 | # Pattern format: Date LogLevel LoggerName LogMessage 27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 28 | 29 | # Rolling File Appender properties 30 | hbase.log.maxfilesize=256MB 31 | hbase.log.maxbackupindex=20 32 | 33 | # Rolling File Appender 34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file} 36 | 37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize} 38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex} 39 | 40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 42 | 43 | # 44 | # Security audit appender 45 | # 46 | hbase.security.log.file=SecurityAuth.audit 47 | hbase.security.log.maxfilesize=256MB 48 | hbase.security.log.maxbackupindex=20 49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file} 51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize} 52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex} 53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 55 | log4j.category.SecurityLogger=${hbase.security.logger} 56 | log4j.additivity.SecurityLogger=false 57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE 58 | 59 | # 60 | # Null Appender 61 | # 62 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 63 | 64 | # 65 | # console 66 | # Add "console" to rootlogger above if you want to use this 67 | # 68 | log4j.appender.console=org.apache.log4j.ConsoleAppender 69 | log4j.appender.console.target=System.err 70 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 71 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 72 | log4j.appender.console.threshold=WARN 73 | 74 | # Custom Logging levels 75 | 76 | log4j.logger.org.apache.zookeeper=INFO 77 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 78 | log4j.logger.org.apache.hadoop.hbase=DEBUG 79 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug. 80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO 81 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO 82 | #log4j.logger.org.apache.hadoop.dfs=DEBUG 83 | # Set this class to log INFO only otherwise its OTT 84 | # Enable this to get detailed connection error/retry logging. 85 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE 86 | 87 | 88 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output) 89 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG 90 | 91 | # Uncomment the below if you want to remove logging of client region caching' 92 | # and scan of .META. messages 93 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO 94 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO 95 | -------------------------------------------------------------------------------- /playbooks/roles/hbase_regionserver/templates/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | hbase.root.logger=INFO,console 3 | hbase.security.logger=INFO,console 4 | hbase.log.dir=. 5 | hbase.log.file=hbase.log 6 | 7 | # Define the root logger to the system property "hbase.root.logger". 8 | log4j.rootLogger=${hbase.root.logger} 9 | 10 | # Logging Threshold 11 | log4j.threshold=ALL 12 | 13 | # 14 | # Daily Rolling File Appender 15 | # 16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.DRFA.File=${hbase.log.dir}/${hbase.log.file} 18 | 19 | # Rollver at midnight 20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 21 | 22 | # 30-day backup 23 | #log4j.appender.DRFA.MaxBackupIndex=30 24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 25 | 26 | # Pattern format: Date LogLevel LoggerName LogMessage 27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 28 | 29 | # Rolling File Appender properties 30 | hbase.log.maxfilesize=256MB 31 | hbase.log.maxbackupindex=20 32 | 33 | # Rolling File Appender 34 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 35 | log4j.appender.RFA.File=${hbase.log.dir}/${hbase.log.file} 36 | 37 | log4j.appender.RFA.MaxFileSize=${hbase.log.maxfilesize} 38 | log4j.appender.RFA.MaxBackupIndex=${hbase.log.maxbackupindex} 39 | 40 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 41 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 42 | 43 | # 44 | # Security audit appender 45 | # 46 | hbase.security.log.file=SecurityAuth.audit 47 | hbase.security.log.maxfilesize=256MB 48 | hbase.security.log.maxbackupindex=20 49 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 50 | log4j.appender.RFAS.File=${hbase.log.dir}/${hbase.security.log.file} 51 | log4j.appender.RFAS.MaxFileSize=${hbase.security.log.maxfilesize} 52 | log4j.appender.RFAS.MaxBackupIndex=${hbase.security.log.maxbackupindex} 53 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 54 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 55 | log4j.category.SecurityLogger=${hbase.security.logger} 56 | log4j.additivity.SecurityLogger=false 57 | #log4j.logger.SecurityLogger.org.apache.hadoop.hbase.security.access.AccessController=TRACE 58 | 59 | # 60 | # Null Appender 61 | # 62 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 63 | 64 | # 65 | # console 66 | # Add "console" to rootlogger above if you want to use this 67 | # 68 | log4j.appender.console=org.apache.log4j.ConsoleAppender 69 | log4j.appender.console.target=System.err 70 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 71 | log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %m%n 72 | log4j.appender.console.threshold=WARN 73 | 74 | # Custom Logging levels 75 | 76 | log4j.logger.org.apache.zookeeper=INFO 77 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 78 | log4j.logger.org.apache.hadoop.hbase=DEBUG 79 | # Make these two classes INFO-level. Make them DEBUG to see more zk debug. 80 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZKUtil=INFO 81 | log4j.logger.org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher=INFO 82 | #log4j.logger.org.apache.hadoop.dfs=DEBUG 83 | # Set this class to log INFO only otherwise its OTT 84 | # Enable this to get detailed connection error/retry logging. 85 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=TRACE 86 | 87 | 88 | # Uncomment this line to enable tracing on _every_ RPC call (this can be a lot of output) 89 | #log4j.logger.org.apache.hadoop.ipc.HBaseServer.trace=DEBUG 90 | 91 | # Uncomment the below if you want to remove logging of client region caching' 92 | # and scan of .META. messages 93 | # log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation=INFO 94 | # log4j.logger.org.apache.hadoop.hbase.client.MetaScanner=INFO 95 | -------------------------------------------------------------------------------- /playbooks/roles/spark/templates/spark-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This file is sourced when running various Spark programs. 4 | # Copy it as spark-env.sh and edit that to configure Spark for your site. 5 | 6 | # Options read when launching programs locally with 7 | # ./bin/run-example or ./bin/spark-submit 8 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 9 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 10 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 11 | # - SPARK_CLASSPATH, default classpath entries to append 12 | 13 | # Options read by executors and drivers running inside the cluster 14 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 15 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 16 | # - SPARK_CLASSPATH, default classpath entries to append 17 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 18 | # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos 19 | 20 | # Options read in YARN client mode 21 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 22 | # - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2) 23 | # - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1). 24 | # - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) 25 | # - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb) 26 | # - SPARK_YARN_APP_NAME, The name of your application (Default: Spark) 27 | # - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’) 28 | # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job. 29 | # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. 30 | 31 | # Options for the daemons used in the standalone deploy mode 32 | # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname 33 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master 34 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") 35 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 36 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) 37 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker 38 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node 39 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 40 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") 41 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") 42 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") 43 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers 44 | 45 | # Generic options for the daemons used in the standalone deploy mode 46 | # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) 47 | # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) 48 | # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) 49 | # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) 50 | # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) 51 | 52 | export JAVA_HOME={{ java_home }} 53 | 54 | export HADOOP_CONF_DIR={{ hadoop_conf_dir }} 55 | export YARN_CONF_DIR={{ hadoop_conf_dir }} 56 | export HADOOP_HOME={{ hadoop_dir }}/current/ 57 | export SPARK_CLASSPATH=$SPARK_CLASSPATH:$HADOOP_HOME/lib/jackson-mapper-asl-1.9.13.jar 58 | export SPARK_PID_DIR={{ spark_pid_dir }} 59 | export SPARK_YARN_USER_ENV="CLASSPATH=/etc/hadoop/conf:{{ hadoop_dir }}/current/:{{ hadoop_dir }}/current/lib/" 60 | 61 | export SPARK_CONF_DIR=/etc/spark/conf 62 | export SPARK_LOG_DIR={{ spark_log_dir }} 63 | 64 | {% if kerberos_realm is defined %} 65 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.kerberos.enabled=true " 66 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.kerberos.principal=spark/{{ ansible_fqdn }}@{{ kerberos_realm }} " 67 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.kerberos.keytab=/etc/spark/conf/spark.keytab " 68 | {% endif %} 69 | -------------------------------------------------------------------------------- /playbooks/roles/hadoop/tasks/install.yml: -------------------------------------------------------------------------------- 1 | - name: create_hadoop_groups 2 | become: yes 3 | group: 4 | name: "{{ item.name }}" 5 | gid: "{{ item.gid | default(omit) }}" 6 | with_items: 7 | - { name: hdfs, gid: "{{ hdfs_gid | default(omit) }}" } 8 | - { name: yarn, gid: "{{ yarn_gid | default(omit) }}" } 9 | - { name: mapred, gid: "{{ mapred_gid | default(omit) }}" } 10 | - { name: hadoop, gid: "{{ hadoop_gid | default(omit) }}" } 11 | 12 | - name: create_hdfs_user 13 | become: yes 14 | user: 15 | name: "{{ item.name }}" 16 | uid: "{{ item.uid | default(omit) }}" 17 | group: "{{ item.name }}" 18 | groups: 19 | - hadoop 20 | with_items: 21 | - { name: hdfs, uid: "{{ hdfs_uid | default(omit) }}" } 22 | - { name: yarn, uid: "{{ yarn_uid | default(omit) }}" } 23 | - { name: mapred, uid: "{{ mapred_uid | default(omit) }}" } 24 | 25 | - name: prepare_package_dir 26 | become: yes 27 | file: 28 | path: "{{ hadoop_dir }}/package" 29 | state: directory 30 | 31 | - name: download_hadoop_package 32 | become: yes 33 | get_url: 34 | url: "{{ hadoop_release_url }}" 35 | dest: "{{ hadoop_dir }}/package/{{ hadoop_package_filename }}.tar.gz" 36 | checksum: "{{ hadoop_release_checksum }}" 37 | owner: root 38 | group: root 39 | 40 | - name: extract_hadoop_package 41 | become: yes 42 | unarchive: 43 | src: "{{ hadoop_dir }}/package/{{ hadoop_package_filename }}.tar.gz" 44 | dest: "{{ hadoop_dir }}" 45 | remote_src: yes 46 | owner: root 47 | group: root 48 | 49 | - name: fix_etc 50 | become: yes 51 | command: 52 | cmd: "mv hadoop hadoop.backup" 53 | chdir: "{{ hadoop_dir }}/{{ hadoop_package_filename }}/etc" 54 | creates: "{{ hadoop_dir }}/{{ hadoop_package_filename }}/etc/hadoop.backup" 55 | 56 | - name: fix_etc 57 | become: yes 58 | file: 59 | path: "{{ hadoop_dir }}/{{ hadoop_package_filename }}/etc/hadoop" 60 | state: absent 61 | 62 | - name: link_etc 63 | become: yes 64 | file: 65 | path: "{{ hadoop_dir }}/{{ hadoop_package_filename }}/etc/hadoop" 66 | src: "/etc/hadoop/conf" 67 | state: link 68 | 69 | - name: link_latest_package 70 | become: yes 71 | file: 72 | path: "{{ hadoop_dir }}/current" 73 | src: "{{ hadoop_dir }}/{{ hadoop_package_filename }}" 74 | state: link 75 | 76 | - name: fix_permission_of_container_executor 77 | become: yes 78 | file: 79 | path: "{{ hadoop_dir }}/{{ hadoop_package_filename }}/bin/container-executor" 80 | owner: root 81 | group: yarn 82 | mode: "u=rwx,g=r,o=r,u+s" 83 | 84 | - name: prepare_package_dir 85 | become: yes 86 | file: 87 | path: "{{ jsvc_dir }}/package" 88 | state: directory 89 | 90 | - name: download_jsvc_package 91 | become: yes 92 | get_url: 93 | url: "{{ jsvc_release_url }}" 94 | dest: "{{ jsvc_dir }}/package/{{ jsvc_package_filename }}.tar.gz" 95 | checksum: "{{ jsvc_release_checksum }}" 96 | 97 | - name: extract_jsvc_package 98 | become: yes 99 | unarchive: 100 | src: "{{ jsvc_dir }}/package/{{ jsvc_package_filename }}.tar.gz" 101 | dest: "{{ jsvc_dir }}" 102 | owner: "{{ ansible_ssh_user }}" 103 | group: "{{ ansible_ssh_user }}" 104 | remote_src: yes 105 | 106 | - name: install_build_tools 107 | become: yes 108 | apt: name=build-essential,autoconf state=latest 109 | 110 | - name: build_jsvc_package 111 | shell: 112 | cmd: "sh support/buildconf.sh && ./configure --with-java={{ java_home }} && make" 113 | chdir: "{{ jsvc_dir }}/{{ jsvc_package_filename }}/src/native/unix" 114 | creates: "{{ jsvc_dir }}/{{ jsvc_package_filename }}/src/native/unix/jsvc" 115 | 116 | - name: copy_jsvc_binary 117 | become: yes 118 | copy: 119 | src: "{{ jsvc_dir }}/{{ jsvc_package_filename }}/src/native/unix/jsvc" 120 | dest: /usr/bin/jsvc 121 | remote_src: yes 122 | owner: root 123 | group: root 124 | mode: '0755' 125 | 126 | - name: prepare_commands 127 | become: yes 128 | template: 129 | src: "command-wrapper.sh.j2" 130 | dest: "/usr/local/bin/{{ item }}" 131 | mode: '755' 132 | owner: root 133 | group: root 134 | with_items: 135 | - container-executor 136 | - test-container-executor 137 | - hadoop 138 | - hdfs 139 | - mapred 140 | - oom-listener 141 | - yarn -------------------------------------------------------------------------------- /playbooks/roles/base/templates/capacity-scheduler.xml.j2: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | yarn.scheduler.capacity.maximum-applications 18 | 10000 19 | 20 | Maximum number of applications that can be pending and running. 21 | 22 | 23 | 24 | 25 | yarn.scheduler.capacity.maximum-am-resource-percent 26 | 0.1 27 | 28 | Maximum percent of resources in the cluster which can be used to run 29 | application masters i.e. controls number of concurrent running 30 | applications. 31 | 32 | 33 | 34 | 35 | yarn.scheduler.capacity.resource-calculator 36 | org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator 37 | 38 | The ResourceCalculator implementation to be used to compare 39 | Resources in the scheduler. 40 | The default i.e. DefaultResourceCalculator only uses Memory while 41 | DominantResourceCalculator uses dominant-resource to compare 42 | multi-dimensional resources such as Memory, CPU etc. 43 | 44 | 45 | 46 | 47 | yarn.scheduler.capacity.root.queues 48 | default 49 | 50 | The queues at the this level (root is the root queue). 51 | 52 | 53 | 54 | 55 | yarn.scheduler.capacity.root.default.capacity 56 | 100 57 | Default queue target capacity. 58 | 59 | 60 | 61 | yarn.scheduler.capacity.root.default.user-limit-factor 62 | 1 63 | 64 | Default queue user limit a percentage from 0.0 to 1.0. 65 | 66 | 67 | 68 | 69 | yarn.scheduler.capacity.root.default.maximum-capacity 70 | 100 71 | 72 | The maximum capacity of the default queue. 73 | 74 | 75 | 76 | 77 | yarn.scheduler.capacity.root.default.state 78 | RUNNING 79 | 80 | The state of the default queue. State can be one of RUNNING or STOPPED. 81 | 82 | 83 | 84 | 85 | yarn.scheduler.capacity.root.default.acl_submit_applications 86 | * 87 | 88 | The ACL of who can submit jobs to the default queue. 89 | 90 | 91 | 92 | 93 | yarn.scheduler.capacity.root.default.acl_administer_queue 94 | * 95 | 96 | The ACL of who can administer jobs on the default queue. 97 | 98 | 99 | 100 | 101 | yarn.scheduler.capacity.node-locality-delay 102 | 40 103 | 104 | Number of missed scheduling opportunities after which the CapacityScheduler 105 | attempts to schedule rack-local containers. 106 | Typically this should be set to number of nodes in the cluster, By default is setting 107 | approximately number of nodes in one rack which is 40. 108 | 109 | 110 | 111 | 112 | yarn.scheduler.capacity.queue-mappings 113 | 114 | 115 | A list of mappings that will be used to assign jobs to queues 116 | The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]* 117 | Typically this list will be used to map users to queues, 118 | for example, u:%user:%user maps all users to queues with the same name 119 | as the user. 120 | 121 | 122 | 123 | 124 | yarn.scheduler.capacity.queue-mappings-override.enable 125 | false 126 | 127 | If a queue mapping is present, will it override the value specified 128 | by the user? This can be used by administrators to place jobs in queues 129 | that are different than the one specified by the user. 130 | The default is false. 131 | 132 | 133 | 134 | 135 | --------------------------------------------------------------------------------