├── .gitignore ├── .travis.yml ├── .yamllint ├── LICENSE ├── README.md ├── ansible.cfg ├── defaults └── main.yml ├── handlers └── main.yml ├── inventory ├── meta └── main.yml ├── molecule └── default │ ├── Dockerfile.j2 │ ├── INSTALL.rst │ ├── create.yml │ ├── destroy.yml │ ├── molecule.yml │ ├── playbook.yml │ ├── prepare.yml │ ├── templates │ └── hosts.j2 │ └── tests │ ├── test_default.py │ └── test_namenodes.py ├── requirements.yml ├── tasks ├── base.yml ├── bootstrap_ha.yml ├── bootstrap_spof.yml ├── config.yml ├── datanode.yml ├── journalnode.yml ├── main.yml ├── namenode.yml ├── native.yml ├── scripts.yml ├── secondarynamenode.yml ├── ssh_fence.yml ├── upgrade.yml ├── upgrade_datanode.yml ├── upgrade_journalnode.yml ├── upgrade_namenode.yml └── user.yml └── templates ├── audit-compress-rotate.sh.j2 ├── core-site.xml.j2 ├── dfs.hosts.exclude.j2 ├── hadoop-env.sh.j2 ├── hadoop_sudoers.j2 ├── hdfs-service.j2 ├── hdfs-site.xml.j2 ├── hdfs.service.j2 ├── log4j.properties.j2 ├── mapred-site.xml.j2 ├── rack-awareness.sh.j2 └── slaves.j2 /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac OS 2 | .DS_Store 3 | 4 | # IntelliJ Idea generated files 5 | *.iml 6 | *.ipr 7 | .idea_modules/ 8 | .idea/ 9 | *.iws 10 | .bundle/ 11 | vendor/ 12 | *.retry 13 | .molecule 14 | .cache 15 | __pycache__ 16 | rsa_key/ 17 | **/*.pyc 18 | hadoop-* 19 | pytestdebug.log 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | sudo: required 3 | language: python 4 | services: 5 | - docker 6 | 7 | install: 8 | - pip install ansible 9 | - pip install molecule 10 | - pip install docker-py 11 | 12 | script: 13 | - molecule --version 14 | - ansible --version 15 | - molecule test 16 | notifications: 17 | webhooks: https://galaxy.ansible.com/api/v1/notifications/ 18 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | extends: default 2 | 3 | rules: 4 | braces: 5 | max-spaces-inside: 1 6 | level: error 7 | brackets: 8 | max-spaces-inside: 1 9 | level: error 10 | line-length: disable 11 | truthy: disable 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## HDFS 2 | [![Build Status](https://travis-ci.org/teralytics/ansible-hdfs.svg?branch=master)](https://travis-ci.org/teralytics/ansible-hdfs) 3 | 4 | 5 | ### Introduction 6 | This role installs HDFS on Ubuntu/Debian Linux servers. 7 | 8 | ### Role dependencies 9 | * The role requires java and zookeeper to be installed, configured and running. 10 | 11 | ### Example 12 | 13 | ```yml 14 | - hosts: hadoop_hosts 15 | become: True 16 | roles: 17 | - hdfs 18 | ``` 19 | 20 | For an example inventory please check the [inventory](https://github.com/teralytics/ansible-hdfs/blob/master/inventory) file. 21 | 22 | If ```hdfs_ssh_fence``` is set to ```true``` the playbook has to be run with the ```-K``` option of ansible-playbook! 23 | 24 | 25 | ### Configuration 26 | This role supports two different modes of installation: 27 | 28 | * Single Namenode with Secondary Namenode 29 | * Two Namenodes in HA mode 30 | 31 | The number of *namenodes* specifies the mode. If two namenodes are specified HDFS will be installed in an HA fashion. 32 | 33 | 34 | For documentation details of HDFS please refer to the official [Hadoop documentation](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html). 35 | 36 | #### Preparation of your Inventory 37 | This role makes use of groups to figure out which server needs which installation. The groups are listed below: 38 | 39 | * namenodes 40 | * datanodes 41 | * secondarynamenode (Single NN setup only) 42 | * zookeeper_hosts (High availability mode only) 43 | * journalnodes (High availability mode only) 44 | 45 | Alternatively variables like ```hdfs_namenodes``` can be overwritten (see [defaults/main.yml](https://github.com/teralytics/ansible-hdfs/blob/master/defaults/main.yml)). 46 | 47 | #### Important variables: 48 | The following gives a list of important variables that have to be set for a specific deployment. Most variables can be set in *group_vars* or *host_vars*. 49 | 50 | * ```hdfs_cluster_name```: Name of your cluster 51 | * ```hdfs_parent_dir```: Where to install HDFS to 52 | * ```hdfs_version```: Hadoop version to use 53 | * ```hdfs_tmpdir```: Where to write HDFS tmp files 54 | * ```hdfs_namenode_dir_list```: Files of namenodes 55 | * ```hdfs_datanode_dir_list```: Files of datanodes 56 | * ```hdfs_namenode_checkpoint_dir_list```: Files of secondary namenode 57 | * ```hdfs_distribution_method```: Should tar.gz be 'downloaded', 'local_file' or 'compile' install? 58 | * ```hdfs_bootstrap```: Should the cluster be formatted? (If you have an already existing installation this option is not recommended) 59 | * ```hdfs_host_domain_name```: Only set this variable if your host entries are not FQDNs. E.g. value: "node.dns.example.com" 60 | * ```hdfs_upgrade```: Only set this variable to perform an upgrade (given that hdfs_version is changed) 61 | * ```hdfs_upgrade_force```: Only set this variable to force an upgrade (the playbook will run even if the version hasn't changed. Good when something went wrong and a node has been already upgraded) 62 | 63 | For more configuration variables see the documentation in [defaults/main.yml](https://github.com/teralytics/ansible-hdfs/blob/master/defaults/main.yml). 64 | 65 | If ```hdfs_upgrade``` is set to ```true``` the playbook will assume an upgrade is taking place and some input from the user might be required. 66 | 67 | #### Additional HDFS configuration 68 | Additional configuration to ```hdfs-site.xml``` and ```core-site.xml``` can be added by overwriting the following variables: 69 | 70 | - ```hdfs_site_additional_properties``` 71 | - ```core_site_additional_properties``` 72 | 73 | #### Description of playbooks 74 | This section gives a brief description on what each playbook does. 75 | 76 | ##### Native (experimental) 77 | CURRENTLY ONLY WORKS WITH Ubuntu 14.04. (16.04. has a newer protobuf version and compilation fails) 78 | 79 | This playbook will compile hadoop on server *hdfs_compile_node* to enable [hadoop native libraries](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/NativeLibraries.html) (Compression codecs and [HDFS Short-Circuit Local Reads](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html)). 80 | This playbook will install the development tools necessary to be able to compile hadoop. (Download and compilation may take a while depending on you internet connection and server power (10-20 min)) 81 | 82 | To activate this playbook enable set ```hdfs_distribution_method``` to ```compile```. 83 | 84 | Known issues: 85 | 86 | * Sometimes the git download fails for the first time. Just run it again. 87 | 88 | Options: 89 | 90 | * ```hdfs_compile_node```: Server to compile on 91 | * ```hdfs_compile_from_git```: True if it should download the latest version from github.com 92 | * ```hdfs_compile_version```: Version to download from github (tags usable by e.g. 'tags/rel/release-2.7.2' or 'HEAD') 93 | * ```hdfs_fetch_folder```: Local folder to download the compiled tar to. 94 | 95 | ##### base 96 | This playbook installs the hadoop binaries and creates links for easy usage. 97 | 98 | ##### config 99 | This playbook writes the configuration files. 100 | 101 | #### upgrade 102 | This playbook upgrades HDFS in a controlled way (applicable only to HA modes). This follows a procedure of no downtime that can be summarized as follows: 103 | 104 | 1. Prepare rolling upgrade, wait for "Proceed with rolling upgrade" 105 | ..1. Perform upgrade of active namenode (by means of failover to standby) 106 | ..2. Failover to newly upgraded namenode, upgrade the second namenode 107 | 2. Perform upgrade of the datanodes in a rolling fashion 108 | ..1. Stop running datanode (check if running) 109 | ..2. Install the new version 110 | ..3. Restart it with the new program version (check if running) 111 | 3. Finalize the rolling upgrade 112 | 113 | Be prepared to react to some input made by the playbook specially when dealing with starting and stopping of services. 114 | If anything goes wrong, and some nodes were already upgraded, run the playbook again setting ```hdfs_upgrade_force``` set to ```True```. This process is idempotent. 115 | 116 | ##### user 117 | This playbook will create a user ```hdfs_user```, generate an ssh-key for it, distribute the key and register all servers in known_hosts file of each other. 118 | 119 | ##### ssh_fence 120 | This playbook sets up SSH access for the ```hdfs_user``` between the namenode servers. Used if an SSH fence is the preferred method as fencing method. (See [HA Documentation](https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html)) 121 | 122 | ##### namenode 123 | This playbook writes configuration files needed only by the namenode, creates folder and sets up services for namenode and zkfc. 124 | 125 | ##### datanode 126 | This playbook creates the folders specified in ```hdfs_datanode_dir_list``` and registers the hdfs-datanode service. 127 | 128 | ##### journalnode 129 | This playbook will install the journal node service. 130 | 131 | ##### secondarynamenode 132 | This playbook will install and register hdfs-secondarynamenode service. 133 | 134 | ##### bootstrap_ha 135 | This playbook bootstraps a cluster in HA mode 136 | 137 | ##### bootstrap_spof 138 | This playbook bootstraps a cluster in SPOF mode. (One namenode and one secondary namenode) 139 | 140 | ### Testing 141 | The tests are run using [molecule](https://github.com/metacloud/molecule) and a docker container. 142 | 143 | ##### Requirements: 144 | - Docker 145 | - molecule (pip module) 146 | - docker-py (pip module) 147 | 148 | #### Running tests 149 | 150 | From the root folder run ```molecule test```. 151 | 152 | ### License 153 | Apache 2.0 154 | 155 | ### Author information 156 | 157 | - Bertrand Bossy 158 | - Florian Froese 159 | - Laurent Hoss 160 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | roles_path = ../:../../ 3 | hostfile = inventory 4 | -------------------------------------------------------------------------------- /defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # ------------------------------------------------------------------------------ 3 | # General cluster settings 4 | # ------------------------------------------------------------------------------ 5 | hdfs_cluster_name: cluster1 6 | hdfs_user: hdfs 7 | hdfs_group: hadoop 8 | hdfs_user_home: "/var/lib/{{ hdfs_user }}" 9 | hdfs_version: 2.8.2 10 | hdfs_upgrade: False 11 | hdfs_upgrade_force: False 12 | 13 | hdfs_java_home: /usr/lib/jvm/java-1.8.0-openjdk-amd64 14 | 15 | hdfs_extra_classpath: [] 16 | 17 | # Bootstraps the cluster ( Format namenodes, zkfc, journalnodes, start all services) 18 | # Please read the code before you activate this option. 19 | # Especially if you already have a hadoop setup in place. 20 | hdfs_bootstrap: False 21 | 22 | # Use ansible handlers? 23 | hdfs_ansible_handlers: True 24 | # Redistribute ssh keys every time? 25 | hdfs_redistribute_ssh_keys: False 26 | 27 | hdfs_parent_dir: /usr/local # hadoop binaries will be copied here 28 | hdfs_ssh_known_hosts_file: "{{ hdfs_user_home }}/.ssh/known_hosts" 29 | 30 | # ------------------------------------------------------------------------------ 31 | # Hadoop installation source 32 | # ------------------------------------------------------------------------------ 33 | hdfs_distribution_method: "download" # Method to use for archive installation ("download", "local_file" or "compile") 34 | hdfs_download_url: "https://archive.apache.org/dist/hadoop/core/hadoop-{{ hdfs_version }}/hadoop-{{ hdfs_version }}.tar.gz" 35 | hdfs_local_archive_path: "./" 36 | 37 | # ------------------------------------------------------------------------------ 38 | # Hadoop host variables 39 | # ------------------------------------------------------------------------------ 40 | hdfs_namenodes: "{{ groups.namenodes }}" 41 | hdfs_hadoop_hosts: "{{ groups.hadoop_hosts }}" 42 | hdfs_journalnodes: "{{ groups.journalnodes }}" 43 | hdfs_secondary_namenode: "{{ groups.secondarynamenode if groups.secondarynamenode is defined else [] }}" 44 | hdfs_datanodes: "{{ groups.datanodes }}" 45 | hdfs_zookeeper_hosts: "{{ groups.zookeeper_hosts }}" 46 | 47 | # ------------------------------------------------------------------------------ 48 | # Hadoop native libraries (experimental) 49 | # ------------------------------------------------------------------------------ 50 | hdfs_compile_from_source: "{{ hdfs_distribution_method == 'compile' }}" 51 | hdfs_compile_node: "{{ hdfs_namenodes[0] }}" 52 | hdfs_compile_from_git: True 53 | hdfs_compile_version: "tags/rel/release-{{hdfs_version}}" 54 | hdfs_fetch_folder: /tmp/ansible_fetch 55 | 56 | # ------------------------------------------------------------------------------ 57 | # HA specific setup 58 | # ------------------------------------------------------------------------------ 59 | # Use ssh as fencing method (other option is shell(/bin/true) 60 | hdfs_ssh_fence: True 61 | 62 | hdfs_ha_enabled: "{{hdfs_namenodes | count > 1}}" 63 | hdfs_default_fs: "hdfs://{{ hdfs_nameservices if hdfs_ha_enabled else hdfs_namenodes[0] + ':8020' }}" 64 | hdfs_nameservices: "{{ hdfs_cluster_name }}" 65 | hdfs_zookeeper_client_port: 2181 66 | hdfs_zookeeper_quorum: "{{ hdfs_zookeeper_hosts | join(':' + (hdfs_zookeeper_client_port | string) + ',') }}:{{ hdfs_zookeeper_client_port | string }}" 67 | 68 | 69 | # ------------------------------------------------------------------------------ 70 | # Non-HA specific setup 71 | # ------------------------------------------------------------------------------ 72 | hdfs_secondary_namenode_http_address: "0.0.0.0:50090" 73 | 74 | # ------------------------------------------------------------------------------ 75 | # Hadoop configuration 76 | # ------------------------------------------------------------------------------ 77 | 78 | # Symlink for hadoop to the version you are installing 79 | hdfs_hadoop_home: "{{hdfs_parent_dir}}/hadoop" 80 | hdfs_conf_dir: "{{hdfs_hadoop_home}}/etc/hadoop" 81 | hdfs_bin_dir: "{{hdfs_hadoop_home}}/bin" 82 | hdfs_log_dir: /var/log/hadoop 83 | hdfs_tmpdir: "/tmp" 84 | 85 | # Directories where namenode should store metadata 86 | hdfs_namenode_dir_list: 87 | - "/tmp/dfs/name" 88 | # Directories where secondary namenode should store temporary images to merge 89 | hdfs_namenode_checkpoint_dir_list: 90 | - "/tmp/dfs/secondaryname" 91 | # Directories where datanodes should store data 92 | hdfs_datanode_dir_list: 93 | - "/tmp/dfs/data" 94 | 95 | # Directories where journal nodes should store edits 96 | hdfs_dfs_journalnode_edits_dir: "/tmp/dfs/journaldir" 97 | hdfs_dfs_journalnode_edits_dir_perm: "700" 98 | 99 | hdfs_enable_short_circuit_reads: true # IMPORTANT: this property should be 'true' or 'false' 100 | 101 | # ------------------------------------------------------------------------------ 102 | # Extended core-site.xml 103 | # ------------------------------------------------------------------------------ 104 | hdfs_tmpdir_user: "{{hdfs_tmpdir}}/hadoop-${user.name}" 105 | hdfs_fs_trash_interval: 0 106 | hdfs_fs_trash_checkpoint_interval: 0 # If 0 this is set to the value of hdfs_fs_trash_interval by hadoop 107 | 108 | # ------------------------------------------------------------------------------ 109 | # Extended hdfs-site.xml 110 | # ------------------------------------------------------------------------------ 111 | 112 | hdfs_fs_permissions_umask_mode: "002" 113 | hdfs_dfs_permissions_superusergroup: "{{hdfs_group}}" 114 | hdfs_dfs_blocksize: 134217728 115 | hdfs_dfs_namenode_write_stale_datanode_ratio: "0.5f" 116 | hdfs_dfs_datanode_du_reserved: "1073741824" 117 | hdfs_dfs_datanode_data_dir_perm: "700" 118 | hdfs_dfs_datanode_max_transfer_threads: 4096 119 | hdfs_dfs_replication: 3 120 | hdfs_dfs_replication_max: 50 121 | hdfs_dfs_namenode_replication_min: 1 122 | hdfs_dfs_namenode_checkpoint_period: 3600 123 | # the recommended 'namenode handler count' is best defined by formula: lb(#datanodes) * 20 124 | # and recommended 'service handler count' 50% of the previous value 125 | # Ref: https://community.hortonworks.com/articles/43839/scaling-the-hdfs-namenode-part-2.html 126 | # -> for an average cluster 10-20 DNs the value 64 is a good average (for 32+ DNs -> 100+) 127 | hdfs_dfs_namenode_handler_count: 32 128 | hdfs_dfs_namenode_service_handler_count: "{{ (hdfs_dfs_namenode_handler_count / 2)|int}}" 129 | hdfs_dfs_namenode_avoid_read_stale_datanode: true 130 | hdfs_dfs_namenode_avoid_write_stale_datanode: true 131 | hdfs_dfs_namenode_audit_log_async: false 132 | hdfs_dfs_client_file_block_storage_locations_num_threads: 10 133 | hdfs_dfs_client_file_block_storage_locations_timeout_millis: 1000 134 | hdfs_dfs_domain_socket_path_folder: /var/lib/hadoop-hdfs 135 | 136 | # ------------------------------------------------------------------------------ 137 | # log4j.properties vars 138 | # ------------------------------------------------------------------------------ 139 | 140 | hadoop_log_maxfilesize: "256MB" 141 | hadoop_log_maxbackupindex: 20 142 | 143 | # ------------------------------------------------------------------------------ 144 | # hadoop-env.sh vars 145 | # ------------------------------------------------------------------------------ 146 | hdfs_namenode_heap_size: "2048m" 147 | hdfs_namenode_javaOpts: "-Xmx{{hdfs_namenode_heap_size}}" 148 | hdfs_datanode_javaOpts: "" 149 | 150 | # default logger selection used in hadoop-env.sh 151 | hadoop_security_logger: "INFO,RFAS" 152 | hadoop_audit_logger: "INFO,NullAppender" 153 | 154 | # ------------------------------------------------------------------------------ 155 | # Rack specific 156 | # ------------------------------------------------------------------------------ 157 | 158 | # rack awareness script: see https://bigdataprocessing.wordpress.com/2013/07/30/hadoop-rack-awareness-and-configuration/) 159 | # and templates/rack-awareness.sh.j2 160 | # if this is not defined, the hdfs will not be rack aware. DO NOT USE SINGLE QUOTES (or make sure it works) 161 | # hdfs_rack_script_awk: '"{if ($4 < 3) print "rack-1"; else print "rack-2" }"' 162 | hdfs_rack_script_path: "{{hdfs_conf_dir}}/rack-awareness.sh" 163 | 164 | # ------------------------------------------------------------------------------ 165 | # Custom scripts 166 | # ------------------------------------------------------------------------------ 167 | hdfs_audit_rotate_days: 90 # ISO 27001 compliance 168 | -------------------------------------------------------------------------------- /handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Force systemd to reread configs 3 | systemd: daemon_reload=yes 4 | when: ansible_service_mgr == "systemd" 5 | 6 | - name: Restart namenode 7 | service: name=hdfs-namenode state=restarted 8 | when: 9 | - inventory_hostname in hdfs_namenodes 10 | - hdfs_ansible_handlers|bool 11 | 12 | - name: Restart datanode 13 | service: name=hdfs-datanode state=restarted 14 | when: 15 | - inventory_hostname in hdfs_datanodes 16 | - hdfs_ansible_handlers|bool 17 | 18 | - name: Restart journalnode 19 | service: name=hdfs-journalnode state=restarted 20 | when: 21 | - hdfs_ha_enabled 22 | - inventory_hostname in hdfs_journalnodes 23 | - hdfs_ansible_handlers|bool 24 | 25 | - name: Restart zkfc 26 | service: name=hdfs-zkfc state=restarted 27 | when: 28 | - hdfs_ha_enabled 29 | - inventory_hostname in hdfs_namenodes 30 | - hdfs_ansible_handlers|bool 31 | 32 | - name: Restart secondary namenode 33 | service: name=hdfs-secondarynamenode state=restarted 34 | when: 35 | - not hdfs_ha_enabled 36 | - inventory_hostname in hdfs_secondary_namenode 37 | - hdfs_ansible_handlers|bool 38 | -------------------------------------------------------------------------------- /inventory: -------------------------------------------------------------------------------- 1 | [hadoop_hosts] 2 | hdfs0[1:3] 3 | 4 | [namenodes] 5 | hdfs0[1:2] 6 | 7 | [journalnodes] 8 | hdfs0[1:3] 9 | 10 | [datanodes] 11 | hdfs0[1:3] 12 | 13 | [zookeeper_hosts] 14 | hdfs0[1:3] 15 | -------------------------------------------------------------------------------- /meta/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | galaxy_info: 3 | author: Florian Froese 4 | description: Installing HDFS for Ubuntu. 5 | company: Teralytics AG 6 | license: Apache 2.0 7 | min_ansible_version: 2.4 8 | platforms: 9 | - name: Ubuntu 10 | versions: 11 | - trusty 12 | - saucy 13 | - raring 14 | - vivid 15 | - wily 16 | - xenial 17 | - name: Debian 18 | versions: 19 | - squeeze 20 | - wheezy 21 | - jessie 22 | categories: 23 | - hdfs 24 | galaxy_tags: 25 | - hdfs 26 | - hadoop 27 | - native 28 | - installer 29 | -------------------------------------------------------------------------------- /molecule/default/Dockerfile.j2: -------------------------------------------------------------------------------- 1 | # Molecule managed 2 | 3 | FROM {{ item.image }} 4 | 5 | RUN if [ $(command -v apt-get) ]; then apt-get update && apt-get upgrade -y && apt-get install -y python sudo bash ca-certificates && apt-get clean; \ 6 | elif [ $(command -v dnf) ]; then dnf makecache && dnf --assumeyes install python sudo python-devel python2-dnf bash && dnf clean all; \ 7 | elif [ $(command -v yum) ]; then yum makecache fast && yum update -y && yum install -y python sudo yum-plugin-ovl bash && sed -i 's/plugins=0/plugins=1/g' /etc/yum.conf && yum clean all; \ 8 | elif [ $(command -v zypper) ]; then zypper refresh && zypper update -y && zypper install -y python sudo bash python-xml && zypper clean -a; \ 9 | elif [ $(command -v apk) ]; then apk update && apk add --no-cache python sudo bash ca-certificates; fi 10 | -------------------------------------------------------------------------------- /molecule/default/INSTALL.rst: -------------------------------------------------------------------------------- 1 | ******* 2 | Install 3 | ******* 4 | 5 | Requirements 6 | ============ 7 | 8 | * Docker Engine 9 | * docker-py 10 | 11 | Install 12 | ======= 13 | 14 | .. code-block:: bash 15 | 16 | $ sudo pip install docker-py 17 | -------------------------------------------------------------------------------- /molecule/default/create.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create 3 | hosts: localhost 4 | connection: local 5 | gather_facts: false 6 | no_log: "{{ not lookup('env', 'MOLECULE_DEBUG') | bool }}" 7 | vars: 8 | molecule_file: "{{ lookup('env', 'MOLECULE_FILE') }}" 9 | molecule_ephemeral_directory: "{{ lookup('env', 'MOLECULE_EPHEMERAL_DIRECTORY') }}" 10 | molecule_scenario_directory: "{{ lookup('env', 'MOLECULE_SCENARIO_DIRECTORY') }}" 11 | molecule_yml: "{{ lookup('file', molecule_file) | molecule_from_yaml }}" 12 | tasks: 13 | - name: Create Dockerfiles from image names 14 | template: 15 | src: "{{ molecule_scenario_directory }}/Dockerfile.j2" 16 | dest: "{{ molecule_ephemeral_directory }}/Dockerfile_{{ item.image | regex_replace('[^a-zA-Z0-9_]', '_') }}" 17 | with_items: "{{ molecule_yml.platforms }}" 18 | register: platforms 19 | 20 | - name: Discover local Docker images 21 | docker_image_facts: 22 | name: "molecule_local/{{ item.item.name }}" 23 | with_items: "{{ platforms.results }}" 24 | register: docker_images 25 | 26 | - name: Build an Ansible compatible image 27 | docker_image: 28 | path: "{{ molecule_ephemeral_directory }}" 29 | name: "molecule_local/{{ item.item.image }}" 30 | dockerfile: "{{ item.item.dockerfile | default(item.invocation.module_args.dest) }}" 31 | force: "{{ item.item.force | default(true) }}" 32 | with_items: "{{ platforms.results }}" 33 | when: platforms.changed or docker_images.results | map(attribute='images') | select('equalto', []) | list | count >= 0 34 | 35 | - name: Create molecule instance(s) 36 | docker_container: 37 | name: "{{ item.name }}" 38 | hostname: "{{ item.name }}" 39 | image: "molecule_local/{{ item.image }}" 40 | state: started 41 | recreate: false 42 | log_driver: json-file 43 | command: "{{ item.command | default('bash -c \"while true; do sleep 10000; done\"') }}" 44 | privileged: "{{ item.privileged | default(omit) }}" 45 | volumes: "{{ item.volumes | default(omit) }}" 46 | capabilities: "{{ item.capabilities | default(omit) }}" 47 | ports: "{{ item.exposed_ports | default(omit) }}" 48 | ulimits: "{{ item.ulimits | default(omit) }}" 49 | register: server 50 | with_items: "{{ molecule_yml.platforms }}" 51 | async: 7200 52 | poll: 0 53 | 54 | - name: Wait for instance(s) creation to complete 55 | async_status: 56 | jid: "{{ item.ansible_job_id }}" 57 | register: docker_jobs 58 | until: docker_jobs.finished 59 | retries: 300 60 | with_items: "{{ server.results }}" 61 | -------------------------------------------------------------------------------- /molecule/default/destroy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Destroy 3 | hosts: localhost 4 | connection: local 5 | gather_facts: false 6 | no_log: "{{ not lookup('env', 'MOLECULE_DEBUG') | bool }}" 7 | vars: 8 | molecule_file: "{{ lookup('env', 'MOLECULE_FILE') }}" 9 | molecule_yml: "{{ lookup('file', molecule_file) | molecule_from_yaml }}" 10 | tasks: 11 | - name: Destroy molecule instance(s) 12 | docker_container: 13 | name: "{{ item.name }}" 14 | state: absent 15 | force_kill: "{{ item.force_kill | default(true) }}" 16 | register: server 17 | with_items: "{{ molecule_yml.platforms }}" 18 | async: 7200 19 | poll: 0 20 | 21 | - name: Wait for instance(s) deletion to complete 22 | async_status: 23 | jid: "{{ item.ansible_job_id }}" 24 | register: docker_jobs 25 | until: docker_jobs.finished 26 | retries: 300 27 | with_items: "{{ server.results }}" 28 | -------------------------------------------------------------------------------- /molecule/default/molecule.yml: -------------------------------------------------------------------------------- 1 | --- 2 | dependency: 3 | name: galaxy 4 | options: 5 | ignore-certs: True 6 | ignore-errors: True 7 | role-file: requirements.yml 8 | lint: 9 | name: yamllint 10 | driver: 11 | name: docker 12 | platforms: 13 | - name: hdfs1 14 | hostname: "{{ item.name }}" 15 | image: solita/ubuntu-systemd:16.04 16 | command: /sbin/init 17 | privileged: True 18 | groups: 19 | - namenodes 20 | - hadoop_hosts 21 | - journalnodes 22 | - datanodes 23 | - zookeeper_hosts 24 | - name: hdfs2 25 | hostname: "{{ item.name }}" 26 | image: solita/ubuntu-systemd:16.04 27 | command: /sbin/init 28 | privileged: True 29 | groups: 30 | - namenodes 31 | - hadoop_hosts 32 | - journalnodes 33 | - datanodes 34 | - zookeeper_hosts 35 | - name: hdfs3 36 | hostname: "{{ item.name }}" 37 | image: solita/ubuntu-systemd:16.04 38 | command: /sbin/init 39 | privileged: True 40 | groups: 41 | - hadoop_hosts 42 | - journalnodes 43 | - datanodes 44 | - zookeeper_hosts 45 | provisioner: 46 | name: ansible 47 | inventory: 48 | group_vars: 49 | all: 50 | java_home: /usr/lib/jvm/java-8-openjdk-amd64 51 | java_packages: 52 | - openjdk-8-jdk 53 | java_openjdk_ppa_repos_support: true 54 | 55 | hdfs_bootstrap: True 56 | hdfs_ansible_handlers: False 57 | hdfs_ssh_fence: False 58 | zookeeper_hosts: 59 | zookeeper_version: 3.4.8 60 | zookeeper_client_port: "2181" 61 | # used by other role defaults (just grep for it) 62 | zookeeper_hostnames: "{{ groups.zookeeper_hosts | join(':' + zookeeper_client_port + ',') }}:{{ zookeeper_client_port }}" 63 | # used in the role: ansible-zookeepr 64 | zookeeper_hosts: "{{ groups.zookeeper_hosts }}" 65 | zookeeper_register_path_env: true 66 | zookeeper_debian_apt_install: true 67 | zookeeper_debian_apt_repositories: 68 | - repository_url: "ppa:ufscar/zookeeper" 69 | distro_version: "14.04" 70 | 71 | # custom zookeeper (bin) dir (role default is in /opt ) 72 | zookeeper_dir: /usr/local/zookeeper-{{zookeeper_version}} 73 | zookeeper_tarball_dir: /tmp/ 74 | lint: 75 | name: ansible-lint 76 | scenario: 77 | name: default 78 | verifier: 79 | name: testinfra 80 | lint: 81 | name: flake8 82 | -------------------------------------------------------------------------------- /molecule/default/playbook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: all 3 | become: True 4 | vars: 5 | hostgroup: 'all' # Which group in the ansible inventory should be considered 6 | ipaddrnr: 0 7 | hosts_file: /etc/hosts 8 | tasks: 9 | - name: Install dnsmasq 10 | apt: name=dnsmasq state=present 11 | - name: Redo setup again after dnsmasq was installed 12 | setup: 13 | - name: Overwrite nameserver in /etc/resolv.conf 14 | shell: bash -c "echo 'nameserver 127.0.0.1' > /etc/resolv.conf" && touch /etc/changedResolv 15 | args: 16 | creates: /etc/changedResolv 17 | - name: Generate /etc/hosts from group '{{ hostgroup }}' 18 | template: src=hosts.j2 dest=/tmp/hosts owner=root group=root mode=0644 backup=yes 19 | - name: Add /tmp/hosts to dnsmasq.conf 20 | lineinfile: dest=/etc/dnsmasq.conf regexp='^addn-hosts=' line='addn-hosts=/tmp/hosts' 21 | register: dnsmasqconf_changed 22 | - name: Add server 8.8.8.8 to dnsmasq.conf 23 | lineinfile: dest=/etc/dnsmasq.conf regexp='^server=8.8.8.8' line='server=8.8.8.8' 24 | - name: Restart dnsmasq 25 | service: name=dnsmasq state=restarted 26 | when: dnsmasqconf_changed.changed 27 | tags: 28 | - skip_ansible_lint 29 | - name: Install netstat for unit tests 30 | apt: name=net-tools state=present 31 | 32 | ## End of docker hack 33 | 34 | - hosts: all 35 | become: True 36 | roles: 37 | - role: "lhoss.java" 38 | 39 | - hosts: zookeeper_hosts 40 | become: True 41 | roles: 42 | - { role: "teralytics.zookeeper" } 43 | 44 | - hosts: hadoop_hosts 45 | become: True 46 | roles: 47 | - ansible-hdfs 48 | serial: "{{ serial|default(0) }}" 49 | -------------------------------------------------------------------------------- /molecule/default/prepare.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Prepare 3 | hosts: all 4 | gather_facts: false 5 | tasks: [] 6 | -------------------------------------------------------------------------------- /molecule/default/templates/hosts.j2: -------------------------------------------------------------------------------- 1 | 127.0.0.1 localhost 2 | 3 | # The following lines are desirable for IPv6 capable hosts 4 | #::1 ip6-localhost ip6-loopback 5 | #fe00::0 ip6-localnet 6 | #ff00::0 ip6-mcastprefix 7 | #ff02::1 ip6-allnodes 8 | #ff02::2 ip6-allrouters 9 | #ff02::3 ip6-allhosts 10 | 11 | ## {{ ansible_managed }} 12 | 13 | {% for host in groups[ hostgroup ] %} 14 | {{ hostvars[host]["ansible_eth0"]["ipv4"]["address"] }} {{ host }} 15 | {% endfor %} 16 | 17 | {% if additional_host_lines is defined %} 18 | ## Additional hosts 19 | {% for host_line in additional_host_lines %} 20 | {{ host_line }} 21 | {% endfor %} 22 | {% endif %} 23 | -------------------------------------------------------------------------------- /molecule/default/tests/test_default.py: -------------------------------------------------------------------------------- 1 | import testinfra.utils.ansible_runner 2 | 3 | testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner( 4 | '.molecule/inventory').get_hosts('all') 5 | 6 | 7 | def get(e, nodeName): 8 | arg = r"./property[name='{nodename}']".format(nodename=nodeName) 9 | return e.find(arg)[1].text 10 | 11 | 12 | def test_hosts_file(File): 13 | f = File('/etc/hosts') 14 | 15 | assert f.exists 16 | assert f.user == 'root' 17 | assert f.group == 'root' 18 | 19 | 20 | def test_hdfs_site(File): 21 | f = File('/usr/local/hadoop/etc/hadoop/hdfs-site.xml') 22 | 23 | import xml.etree.ElementTree 24 | e = xml.etree.ElementTree.fromstring(f.content_string) 25 | assert e.tag == 'configuration' 26 | assert get(e, 'dfs.nameservices') == 'cluster1' 27 | assert get(e, 'dfs.nameservice.id') == 'cluster1' 28 | assert set(get(e, 'dfs.ha.namenodes.cluster1').split( 29 | ',')) == set('hdfs1,hdfs2'.split(',')) 30 | 31 | assert f.exists 32 | assert f.user == 'hdfs' 33 | assert f.group == 'hadoop' 34 | assert f.mode == 0o755 35 | 36 | 37 | def test_core_site(File): 38 | f = File('/usr/local/hadoop/etc/hadoop/core-site.xml') 39 | 40 | import xml.etree.ElementTree 41 | e = xml.etree.ElementTree.fromstring(f.content_string) 42 | assert e.tag == 'configuration' 43 | assert get(e, 'fs.defaultFS') == 'hdfs://cluster1' 44 | assert set(get(e, 'ha.zookeeper.quorum').split(',')) == set( 45 | 'hdfs1:2181,hdfs2:2181,hdfs3:2181'.split(',')) 46 | 47 | assert f.exists 48 | assert f.user == 'hdfs' 49 | assert f.group == 'hadoop' 50 | assert f.mode == 0o755 51 | 52 | 53 | def test_hdfs_datanode_running(Service): 54 | service = Service('hdfs-datanode') 55 | 56 | assert service.is_running 57 | assert service.is_enabled 58 | 59 | 60 | def test_zookeeper_running(Service): 61 | service = Service('zookeeper') 62 | 63 | assert service.is_running 64 | assert service.is_enabled 65 | 66 | 67 | def test_hdfs_journal_running(Service): 68 | service = Service('hdfs-journalnode') 69 | 70 | assert service.is_running 71 | 72 | 73 | def test_hdfs_datanode_web_listening(Socket): 74 | socket = Socket('tcp://0.0.0.0:50075') 75 | 76 | assert socket.is_listening 77 | -------------------------------------------------------------------------------- /molecule/default/tests/test_namenodes.py: -------------------------------------------------------------------------------- 1 | import testinfra.utils.ansible_runner 2 | 3 | testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner( 4 | '.molecule/inventory').get_hosts('namenodes') 5 | 6 | 7 | def test_hdfs_printTopology_command(Sudo, Command): 8 | with Sudo("hdfs"): 9 | c = Command("/usr/local/hadoop/bin/hdfs dfsadmin -printTopology") 10 | 11 | assert len(c.stdout.rstrip().split('\n')) == 4 12 | assert c.rc == 0 13 | 14 | 15 | def test_hdfs_check_safemode_is_off(Sudo, Command): 16 | with Sudo("hdfs"): 17 | c = Command("/usr/local/hadoop/bin/hdfs dfsadmin -safemode get") 18 | 19 | assert len(c.stdout.rstrip().split('\n')) == 2 20 | for row in c.stdout.rstrip().split('\n'): 21 | assert row.find("OFF") != -1 22 | assert c.rc == 0 23 | 24 | 25 | def test_hdfs_is_empty(Sudo, Command): 26 | with Sudo("hdfs"): 27 | c = Command("/usr/local/hadoop/bin/hdfs dfs -ls /") 28 | 29 | assert c.stdout.rstrip() == '' 30 | assert c.rc == 0 31 | 32 | 33 | def test_hdfs_namenode_running(Service): 34 | service = Service('hdfs-namenode') 35 | 36 | assert service.is_running 37 | assert service.is_enabled 38 | 39 | 40 | def test_hdfs_zkfc_running(Service): 41 | service = Service('hdfs-zkfc') 42 | 43 | assert service.is_running 44 | assert service.is_enabled 45 | 46 | 47 | def test_hdfs_listening(Socket): 48 | socket = Socket('tcp://0.0.0.0:8020') 49 | assert socket.is_listening 50 | 51 | 52 | def test_hdfs_web_listening(Socket): 53 | socket = Socket('tcp://0.0.0.0:50070') 54 | assert socket.is_listening 55 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Java Open JDK 3 | - name: lhoss.java 4 | src: git+https://github.com/lhoss/ansible-role-java 5 | version: remotes/origin/support_configurable_ppa_repos 6 | 7 | ## Zookeeper 8 | - src: git+https://github.com/teralytics/ansible-zookeeper 9 | name: teralytics.zookeeper 10 | version: tags/v0.17.0 11 | -------------------------------------------------------------------------------- /tasks/base.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install some packages needed for native use 3 | apt: name={{item}} state=present update_cache=yes cache_valid_time=3600 4 | with_items: 5 | - "libssl-dev" 6 | - "libsnappy-dev" 7 | 8 | - name: Make sure parent directory exists 9 | file: path={{hdfs_parent_dir}} state=directory owner=root group=root mode=0755 follow=yes 10 | 11 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it (from local archive) 12 | unarchive: src=hadoop-{{hdfs_version}}.tar.gz dest="{{hdfs_parent_dir}}/" owner={{hdfs_user}} group={{hdfs_group}} mode=0755 creates="{{hdfs_hadoop_home}}-{{hdfs_version}}" 13 | when: hdfs_distribution_method == "local_file" 14 | 15 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it (from native compiled archive) 16 | unarchive: src="{{hdfs_fetch_folder}}/hadoop-{{hdfs_version}}.tar.gz" dest="{{hdfs_parent_dir}}/" owner={{hdfs_user}} group={{hdfs_group}} mode=0755 creates="{{hdfs_hadoop_home}}-{{hdfs_version}}" 17 | when: hdfs_distribution_method == "compile" 18 | 19 | - name: Download Hadoop .tgz to {{hdfs_parent_dir}} 20 | get_url: url={{hdfs_download_url}} dest="{{hdfs_parent_dir}}/hadoop-{{ hdfs_version }}.tar.gz" validate_certs=no 21 | when: hdfs_distribution_method == "download" 22 | 23 | - name: Unarchive downloaded Hadoop 24 | unarchive: src="{{hdfs_parent_dir}}/hadoop-{{ hdfs_version }}.tar.gz" dest="{{hdfs_parent_dir}}" remote_src=true creates="{{hdfs_hadoop_home}}-{{ hdfs_version }}" 25 | when: hdfs_distribution_method == "download" 26 | 27 | - name: Link hadoop version to {{hdfs_hadoop_home}} 28 | file: src={{hdfs_hadoop_home}}-{{hdfs_version}} dest={{hdfs_hadoop_home}} owner={{hdfs_user}} group={{hdfs_group}} state=link 29 | 30 | - name: Create folder /etc/hadoop 31 | file: path=/etc/hadoop state=directory owner={{hdfs_user}} group={{hdfs_group}} 32 | 33 | - name: Create hadoop link for conf to /etc/hadoop 34 | file: src={{hdfs_conf_dir}} dest=/etc/hadoop/conf owner={{hdfs_user}} group={{hdfs_group}} state=link 35 | 36 | - name: Create link for hdfs to /usr/local/bin 37 | file: src="{{hdfs_bin_dir}}/hdfs" dest=/usr/local/bin/hdfs owner={{hdfs_user}} group={{hdfs_group}} mode=0755 state=link 38 | 39 | - name: Create link for hadoop to /usr/local/bin 40 | file: src="{{hdfs_bin_dir}}/hadoop" dest=/usr/local/bin/hadoop owner={{hdfs_user}} group={{hdfs_group}} mode=0755 state=link 41 | 42 | - name: Export hadoop variables 43 | copy: content="export HADOOP_HOME={{hdfs_hadoop_home}}\nexport HADOOP_PREFIX={{hdfs_hadoop_home}}\nexport HADOOP_CONF_DIR={{hdfs_conf_dir}}\nexport HADOOP_LIBEXEC_DIR={{hdfs_hadoop_home}}/libexec\nexport HADOOP_CLASSPATH=`/usr/local/bin/hadoop classpath`" dest="/etc/profile.d/hadoop_exports.sh" mode=0755 44 | 45 | - name: Allow hadoop variables keeping for sudoers 46 | template: src=hadoop_sudoers.j2 dest=/etc/sudoers.d/hadoop owner=root group=root mode=0644 47 | 48 | - name: Create rack awareness script 49 | template: src=rack-awareness.sh.j2 dest={{hdfs_rack_script_path}} owner={{hdfs_user}} group={{hdfs_group}} mode=0755 50 | when: hdfs_rack_script_awk is defined 51 | 52 | - name: Create hadoop tmp dir 53 | file: path={{hdfs_tmpdir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=1777 54 | tags: 55 | - skip_ansible_lint 56 | 57 | - name: Create hadoop log dir 58 | file: path={{hdfs_log_dir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0755 59 | 60 | - name: Create directory for unix sockets 61 | file: path={{hdfs_dfs_domain_socket_path_folder}} state=directory owner={{hdfs_user}} group=root mode=0755 62 | when: hdfs_enable_short_circuit_reads 63 | -------------------------------------------------------------------------------- /tasks/bootstrap_ha.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Pause - Bootstrapping is about to begin 3 | pause: prompt="Are you sure that you want to continue bootstrapping HDFS?" seconds=10 4 | 5 | - name: Ensure that zookeeper is running 6 | service: name=zookeeper state=started 7 | when: inventory_hostname in hdfs_zookeeper_hosts 8 | 9 | - name: Ensure that journal nodes are running 10 | service: name=hdfs-journalnode state=started 11 | when: inventory_hostname in hdfs_journalnodes 12 | 13 | - name: Format namenode {{hdfs_namenodes[0]}} 14 | command: bash -lc "./hdfs namenode -format {{ hdfs_cluster_name }} -force" 15 | args: 16 | creates: "{{hdfs_namenode_dir_list[0]}}/current/VERSION" 17 | chdir: "{{ hdfs_bin_dir }}" 18 | become: true 19 | become_user: "{{hdfs_user}}" 20 | when: inventory_hostname == hdfs_namenodes[0] 21 | 22 | - name: Start namenode {{hdfs_namenodes[0]}} 23 | service: name=hdfs-namenode state=started 24 | when: inventory_hostname == hdfs_namenodes[0] 25 | 26 | - name: Wait for the namenode {{ hdfs_namenodes[0] }} to come online 27 | wait_for: host={{ hdfs_namenodes[0] }} port=50070 28 | when: inventory_hostname in hdfs_namenodes 29 | 30 | - name: Bootstrap the standby namenode ({{hdfs_namenodes[1]}}) 31 | command: bash -lc "./hdfs namenode -bootstrapStandby && touch {{hdfs_namenode_dir_list[0]}}/.bootstrapped" 32 | args: 33 | creates: "{{hdfs_namenode_dir_list[0]}}/.bootstrapped" 34 | chdir: "{{ hdfs_bin_dir }}" 35 | become: true 36 | become_user: "{{hdfs_user}}" 37 | when: inventory_hostname == hdfs_namenodes[1] 38 | 39 | - name: Start namenode {{hdfs_namenodes[1]}} 40 | service: name=hdfs-namenode state=started 41 | when: inventory_hostname == hdfs_namenodes[1] 42 | 43 | - name: Format ZK for zkfc 44 | command: bash -lc "./hdfs zkfc -formatZK -force && touch {{hdfs_namenode_dir_list[0]}}/.zkFormatted" 45 | args: 46 | creates: "{{hdfs_namenode_dir_list[0]}}/.zkFormatted" 47 | chdir: "{{ hdfs_bin_dir }}" 48 | become: true 49 | become_user: "{{hdfs_user}}" 50 | when: inventory_hostname == hdfs_namenodes[0] 51 | 52 | - name: Start zkfc services 53 | service: name=hdfs-zkfc state=started 54 | when: inventory_hostname in hdfs_namenodes 55 | 56 | - name: Start data nodes 57 | service: name=hdfs-datanode state=started 58 | when: inventory_hostname in hdfs_datanodes 59 | 60 | - name: Bootstrapping complete 61 | debug: "msg='New namenode can be opened at http://{{ inventory_hostname }}:50070/'" 62 | when: inventory_hostname in hdfs_namenodes 63 | -------------------------------------------------------------------------------- /tasks/bootstrap_spof.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Pause - Bootstrapping is about to begin 3 | pause: prompt="Are you sure that you want to continue bootstrapping HDFS?" seconds=10 4 | 5 | - name: Format namenode {{hdfs_namenodes[0]}} 6 | command: "./hdfs namenode -format {{ hdfs_cluster_name }} -force" 7 | args: 8 | creates: "{{hdfs_namenode_dir_list[0]}}/current/VERSION" 9 | chdir: "{{ hdfs_bin_dir }}" 10 | become: true 11 | become_user: "{{hdfs_user}}" 12 | when: inventory_hostname == hdfs_namenodes[0] 13 | 14 | - name: Start namenode {{hdfs_namenodes[0]}} 15 | service: name=hdfs-namenode state=started 16 | when: inventory_hostname == hdfs_namenodes[0] 17 | 18 | - name: Wait for the namenode {{ hdfs_namenodes[0] }} to come online 19 | wait_for: host={{ hdfs_namenodes[0] }} port=50070 20 | when: inventory_hostname in hdfs_secondary_namenode 21 | 22 | - name: Start secondary namenode 23 | service: name=hdfs-secondarynamenode state=started 24 | when: inventory_hostname in hdfs_secondary_namenode 25 | 26 | - name: Start data nodes 27 | service: name=hdfs-datanode state=started 28 | when: inventory_hostname in hdfs_datanodes 29 | 30 | - name: Bootstrapping complete 31 | debug: "msg='New namenode can be opened at http://{{ inventory_hostname }}:50070/'" 32 | when: inventory_hostname in hdfs_namenodes 33 | -------------------------------------------------------------------------------- /tasks/config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure hadoop-env.sh 3 | template: src=hadoop-env.sh.j2 dest={{hdfs_conf_dir}}/hadoop-env.sh owner={{hdfs_user}} group={{hdfs_group}} mode=0755 4 | notify: 5 | - Restart namenode 6 | - Restart secondary namenode 7 | - Restart datanode 8 | - Restart journalnode 9 | - Restart zkfc 10 | tags: 11 | - hdfs-env 12 | - hdfs-log4j-config 13 | 14 | - name: Configure core-site.xml 15 | template: src=core-site.xml.j2 dest={{hdfs_conf_dir}}/core-site.xml owner={{hdfs_user}} group={{hdfs_group}} mode=0755 16 | notify: 17 | - Restart namenode 18 | - Restart secondary namenode 19 | - Restart datanode 20 | - Restart journalnode 21 | - Restart zkfc 22 | tags: 23 | - hdfs-core-config 24 | 25 | - name: Configure hdfs-site.xml 26 | template: src=hdfs-site.xml.j2 dest={{hdfs_conf_dir}}/hdfs-site.xml owner={{hdfs_user}} group={{hdfs_group}} mode=0755 27 | notify: 28 | - Restart namenode 29 | - Restart secondary namenode 30 | - Restart datanode 31 | - Restart journalnode 32 | - Restart zkfc 33 | tags: 34 | - hdfs-site-config 35 | 36 | - name: Configure log4j.properties 37 | template: src=log4j.properties.j2 dest={{hdfs_conf_dir}}/log4j.properties owner={{hdfs_user}} group={{hdfs_group}} mode=0755 38 | notify: 39 | - Restart namenode 40 | - Restart secondary namenode 41 | - Restart datanode 42 | - Restart journalnode 43 | - Restart zkfc 44 | tags: 45 | - hdfs-log4j-config 46 | -------------------------------------------------------------------------------- /tasks/datanode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create datanode directories 3 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode={{ hdfs_dfs_datanode_data_dir_perm }} mode=0700 4 | with_items: "{{ hdfs_datanode_dir_list }}" 5 | 6 | - name: Set program variable to 'datanode' 7 | set_fact: program="datanode" 8 | 9 | - name: Deploy init.d service for datanode 10 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-datanode owner=root group=root mode=0755 11 | when: ansible_service_mgr != "systemd" 12 | 13 | - name: Deploy systemd service for datanode 14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-datanode.service owner=root group=root mode=0644 15 | register: datanode_systemd_unit_altered 16 | when: ansible_service_mgr == "systemd" 17 | 18 | # daemon-reload cannot be sent to handler because it is needed before the end of play 19 | # also, we cannot use flush_handlers before the end of play, because we have other handlers 20 | - name: Reload systemd daemon 21 | command: "systemctl daemon-reload" 22 | when: ansible_service_mgr == "systemd" and datanode_systemd_unit_altered.changed 23 | tags: 24 | - skip_ansible_lint 25 | 26 | - name: Register datanode service 27 | service: name=hdfs-datanode enabled=yes 28 | -------------------------------------------------------------------------------- /tasks/journalnode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create journalnode edits dir 3 | file: path={{hdfs_dfs_journalnode_edits_dir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode={{ hdfs_dfs_journalnode_edits_dir_perm }} 4 | 5 | - name: Set program variable to 'journalnode' 6 | set_fact: program="journalnode" 7 | 8 | - name: Deploy init.d service for journalnode 9 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-journalnode owner=root group=root mode=0755 10 | register: journalnode_systemd_unit_altered 11 | when: ansible_service_mgr != "systemd" 12 | 13 | - name: Deploy systemd service for journalnode 14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-journalnode.service owner=root group=root mode=0644 15 | when: ansible_service_mgr == "systemd" 16 | 17 | # daemon-reload cannot be sent to handler because it is needed before the end of play 18 | # also, we cannot use flush_handlers before the end of play, because we have other handlers 19 | - name: Reload systemd daemon 20 | command: "systemctl daemon-reload" 21 | when: ansible_service_mgr == "systemd" and journalnode_systemd_unit_altered.changed 22 | tags: 23 | - skip_ansible_lint 24 | 25 | - name: Register journalnode service 26 | service: name=hdfs-journalnode enabled=yes 27 | -------------------------------------------------------------------------------- /tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - import_tasks: user.yml 3 | tags: user 4 | 5 | - import_tasks: native.yml 6 | when: hdfs_compile_from_source and inventory_hostname == hdfs_compile_node 7 | tags: native 8 | 9 | - import_tasks: base.yml 10 | tags: base 11 | 12 | - import_tasks: config.yml 13 | tags: base,config 14 | 15 | - import_tasks: datanode.yml 16 | when: inventory_hostname in hdfs_datanodes 17 | tags: datanode 18 | 19 | - import_tasks: namenode.yml 20 | when: inventory_hostname in hdfs_namenodes 21 | tags: namenode 22 | 23 | - import_tasks: secondarynamenode.yml 24 | when: inventory_hostname in hdfs_secondary_namenode and not hdfs_ha_enabled 25 | tags: secondarynamenode 26 | 27 | - import_tasks: journalnode.yml 28 | when: inventory_hostname in hdfs_journalnodes and hdfs_ha_enabled 29 | tags: journalnode 30 | 31 | - import_tasks: bootstrap_spof.yml 32 | when: hdfs_bootstrap and not hdfs_ha_enabled 33 | tags: bootstrap 34 | 35 | - import_tasks: bootstrap_ha.yml 36 | when: hdfs_bootstrap and hdfs_ha_enabled 37 | tags: bootstrap 38 | 39 | - import_tasks: upgrade.yml 40 | any_errors_fatal: True 41 | when: hdfs_upgrade 42 | tags: upgrade 43 | 44 | - import_tasks: scripts.yml 45 | tags: scripts 46 | -------------------------------------------------------------------------------- /tasks/namenode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Configure mapred-site.xml 3 | template: src=mapred-site.xml.j2 dest="{{hdfs_conf_dir}}/mapred-site.xml" owner={{hdfs_user}} group={{hdfs_group}} 4 | 5 | - name: Configure slaves 6 | template: src=slaves.j2 dest="{{hdfs_conf_dir}}//slaves" owner={{hdfs_user}} group={{hdfs_group}} mode=0744 7 | 8 | - name: Create namenode directories 9 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700 10 | with_items: "{{ hdfs_namenode_dir_list }}" 11 | 12 | - name: Create exclude file 13 | template: src=dfs.hosts.exclude.j2 dest={{hdfs_conf_dir}}/dfs.hosts.exclude owner={{hdfs_user}} group={{hdfs_group}} mode=0700 14 | notify: Restart namenode 15 | 16 | - name: Set program variable to 'namenode' 17 | set_fact: program="namenode" 18 | 19 | - name: Deploy init.d service for namenode 20 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-namenode owner=root group=root mode=0755 21 | when: ansible_service_mgr != "systemd" 22 | 23 | - name: Deploy systemd service for namenode 24 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-namenode.service owner=root group=root mode=0644 25 | register: namenode_systemd_unit_altered 26 | when: ansible_service_mgr == "systemd" 27 | 28 | - name: Set program variable to 'zkfc' 29 | set_fact: program="zkfc" 30 | when: hdfs_ha_enabled 31 | 32 | - name: Deploy init.d service for zkfc 33 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-zkfc owner=root group=root mode=0755 34 | register: zkfc_systemd_unit_altered 35 | when: hdfs_ha_enabled and ansible_service_mgr != "systemd" 36 | 37 | - name: Deploy systemd service for zkfc 38 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-zkfc.service owner=root group=root mode=0644 39 | when: hdfs_ha_enabled and ansible_service_mgr == "systemd" 40 | 41 | # daemon-reload cannot be sent to handler because it is needed before the end of play 42 | # also, we cannot use flush_handlers before the end of play, because we have other handlers 43 | - name: Reload systemd daemon 44 | command: "systemctl daemon-reload" 45 | when: ansible_service_mgr == "systemd" and ( namenode_systemd_unit_altered.changed or zkfc_systemd_unit_altered.changed ) 46 | tags: 47 | - skip_ansible_lint 48 | 49 | - name: Register namenode service 50 | service: name=hdfs-namenode enabled=yes 51 | 52 | - name: Register zkfc service 53 | service: name=hdfs-zkfc enabled=yes 54 | when: hdfs_ha_enabled 55 | -------------------------------------------------------------------------------- /tasks/native.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Make sure base dir exists 3 | file: path={{hdfs_parent_dir}} state=directory owner=root group=root mode=0755 follow=yes 4 | 5 | - name: Install some tools 6 | apt: name={{item}} state=present update_cache=yes 7 | with_items: 8 | - "git" 9 | - "maven" 10 | - "gcc" 11 | - "g++" 12 | - "make" 13 | - "cmake" 14 | - "zlib1g-dev" 15 | - "libcurl4-openssl-dev" 16 | - "libprotobuf8" 17 | - "protobuf-compiler" 18 | - "libssl-dev" 19 | - "bzip2" 20 | - "libbz2-dev" 21 | - "libsnappy-dev" 22 | 23 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it 24 | unarchive: src=hadoop-{{hdfs_version}}-src.tar.gz dest={{hdfs_parent_dir}} owner={{hdfs_user}} group={{hdfs_group}} creates="{{hdfs_hadoop_home}}-{{hdfs_version}}-src" 25 | when: not hdfs_compile_from_git 26 | 27 | - name: Clone hadoop git repo 28 | git: repo="https://github.com/apache/hadoop.git" dest="{{hdfs_hadoop_home}}-{{hdfs_version}}-src" version="{{hdfs_compile_version}}" depth=1 29 | when: hdfs_compile_from_git 30 | 31 | - name: Fix ownerships for git repo 32 | file: path="{{hdfs_hadoop_home}}-{{hdfs_version}}-src" owner={{hdfs_user}} group={{hdfs_group}} recurse=yes 33 | 34 | - name: Compile 35 | command: mvn package -Pdist,native -DskipTests -Dtar -e -Drequire.snappy -Drequire.bzip2 36 | args: 37 | chdir: "{{hdfs_hadoop_home}}-{{hdfs_version}}-src" 38 | creates: "{{hdfs_hadoop_home}}-{{hdfs_version}}-src/hadoop-dist/target/hadoop-{{hdfs_version}}.tar.gz" 39 | become: true 40 | become_user: "{{hdfs_user}}" 41 | 42 | - name: Fetch hadoop-{{hdfs_version}}.tar.gz to {{hdfs_fetch_folder}} 43 | fetch: src="{{hdfs_hadoop_home}}-{{hdfs_version}}-src/hadoop-dist/target/hadoop-{{hdfs_version}}.tar.gz" dest="{{hdfs_fetch_folder}}/" flat=yes 44 | -------------------------------------------------------------------------------- /tasks/scripts.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create log compress and rotate script on {{ hdfs_bin_dir }} 3 | template: 4 | src: "audit-compress-rotate.sh.j2" 5 | dest: "{{ hdfs_bin_dir }}/audit-compress-rotate.sh" 6 | owner: "{{ hdfs_user }}" 7 | group: "{{ hdfs_group }}" 8 | mode: "0750" 9 | when: inventory_hostname in hdfs_namenodes 10 | 11 | # run compress and rotate after the linux log rotate (default 6:25 AM) 12 | - cron: 13 | name: Log compress and rotate cronjob 14 | state: present 15 | minute: "0" 16 | hour: "7" 17 | job: "{{ hdfs_bin_dir }}/audit-compress-rotate.sh 2>&1 | /usr/bin/logger -t hdfs" 18 | -------------------------------------------------------------------------------- /tasks/secondarynamenode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set program variable to 'secondarynamenode' 3 | set_fact: program="secondarynamenode" 4 | 5 | - name: Create directories for namenode checkpoints 6 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700 7 | with_items: "{{ hdfs_namenode_checkpoint_dir_list }}" 8 | 9 | - name: Deploy init.d service for secondarynamenode 10 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-secondarynamenode owner=root group=root mode=0755 11 | when: ansible_service_mgr != "systemd" 12 | 13 | - name: Deploy systemd service for secondarynamenode 14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-secondarynamenode.service owner=root group=root mode=0644 15 | register: secondarynamenode_systemd_unit_altered 16 | when: ansible_service_mgr == "systemd" 17 | 18 | # daemon-reload cannot be sent to handler because it is needed before the end of play 19 | # also, we cannot use flush_handlers before the end of play, because we have other handlers 20 | - name: Reload systemd daemon 21 | command: "systemctl daemon-reload" 22 | when: ansible_service_mgr == "systemd" and secondarynamenode_systemd_unit_altered.changed 23 | tags: 24 | - skip_ansible_lint 25 | 26 | - name: Register secondarynamenode service 27 | service: name=hdfs-secondarynamenode enabled=yes 28 | -------------------------------------------------------------------------------- /tasks/ssh_fence.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #### only for ssh fence 3 | - name: Check if ssh keys should be distributed 4 | stat: path={{ hdfs_ssh_known_hosts_file }} 5 | register: host_file_status 6 | 7 | - name: Set distribute keys variable 8 | set_fact: distribute_keys={{not host_file_status.stat.exists or hdfs_redistribute_ssh_keys or created_user.changed}} 9 | 10 | - name: Fetch private key 11 | fetch: src={{ hdfs_user_home }}/.ssh/id_rsa dest=rsa_key 12 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys 13 | check_mode: no 14 | 15 | - name: Fetch public key 16 | fetch: src={{ hdfs_user_home }}/.ssh/id_rsa.pub dest=rsa_key 17 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys 18 | check_mode: no 19 | 20 | - name: Create .ssh directory for {{hdfs_user}} 21 | file: path={{ hdfs_user_home }}/.ssh state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700 22 | when: distribute_keys 23 | 24 | - name: Copy private key to all machines 25 | copy: src=rsa_key/{{hdfs_namenodes[0]}}{{ hdfs_user_home }}/.ssh/id_rsa dest={{ hdfs_user_home }}/.ssh/id_rsa owner={{hdfs_user}} group={{hdfs_group}} mode=0600 26 | when: distribute_keys 27 | 28 | - name: Add pubkeys to master server 29 | authorized_key: user={{hdfs_user}} key="{{ lookup('file', 'rsa_key/{{hdfs_namenodes[0]}}{{ hdfs_user_home }}/.ssh/id_rsa.pub') }}" 30 | when: distribute_keys 31 | 32 | - name: Make sure the known hosts file exists 33 | file: path={{ hdfs_ssh_known_hosts_file }} state=touch owner={{hdfs_user}} group={{hdfs_group}} mode=0644 34 | when: (not host_file_status.stat.exists) or (distribute_keys and host_file_status.stat.mode == "0644") 35 | changed_when: (not host_file_status.stat.exists) or host_file_status.stat.mode != "0644" 36 | 37 | - set_fact: hdfs_namenodes_long={{ hdfs_namenodes }} 38 | 39 | - name: Add long names to namenodes for proper key deployment 40 | set_fact: hdfs_namenodes_long={{ hdfs_namenodes_long | map('regex_replace', '(.*)', '\\1.' + hdfs_host_domain_name ) | list}} 41 | when: hdfs_host_domain_name is defined 42 | 43 | - name: Check host name availability 44 | shell: "ssh-keygen -f {{ hdfs_ssh_known_hosts_file }} -F {{ item }}" 45 | with_items: "{{ hdfs_namenodes_long }}" 46 | when: distribute_keys 47 | register: ssh_known_host_results 48 | ignore_errors: yes 49 | changed_when: False 50 | tags: 51 | - skip_ansible_lint 52 | 53 | - name: Scan the public key 54 | shell: "ssh-keyscan -H -T 10 {{ item.item }} >> {{ hdfs_ssh_known_hosts_file }}" 55 | with_items: "{{ ssh_known_host_results.results }}" 56 | when: item.stdout is defined and item.stdout == "" 57 | no_log: True 58 | 59 | - name: Delete key locally 60 | local_action: file path=rsa_key state=absent 61 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys 62 | become: no 63 | -------------------------------------------------------------------------------- /tasks/upgrade.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - block: 3 | - name: End play if not HA mode 4 | debug: 5 | msg: "Nothing to upgrade, ending play. This installation is not HA mode. No support for single instance." 6 | - meta: end_play 7 | when: hdfs_namenodes | length != 2 8 | 9 | - name: Check current version 10 | shell: "./hdfs version 2> /dev/null | grep ^Hadoop | cut -c8-" 11 | args: 12 | chdir: "{{ hdfs_bin_dir }}" 13 | become: yes 14 | become_user: "{{ hdfs_user }}" 15 | register: current_hdfs_version 16 | changed_when: False 17 | 18 | - debug: 19 | msg: "Current HDFS version is: {{ current_hdfs_version.stdout }}" 20 | verbosity: 1 21 | 22 | - block: 23 | - name: End play if nothing to upgrade 24 | debug: 25 | msg: "Nothing to upgrade, ending play. Current version is >= than upgrading version." 26 | - meta: end_play 27 | when: 28 | - current_hdfs_version.stdout | version_compare(hdfs_version, '>=') 29 | - not hdfs_upgrade_force 30 | 31 | - name: Set if should upgrade HDFS 32 | set_fact: 33 | hdfs_uptodate: "{{ current_hdfs_version.stdout | version_compare(hdfs_version, '>=') }}" 34 | 35 | - name: Should upgrade hdfs 36 | debug: 37 | msg: "Variable is {{ hdfs_uptodate }}" 38 | verbosity: 2 39 | 40 | - name: Prepare HDFS upgrade (create an fsimage for rollback) 41 | command: "./hdfs dfsadmin -rollingUpgrade prepare" 42 | args: 43 | chdir: "{{ hdfs_bin_dir }}" 44 | become: yes 45 | become_user: "{{ hdfs_user }}" 46 | when: 47 | - inventory_hostname == hdfs_namenodes[0] 48 | - not hdfs_uptodate 49 | 50 | - name: Check HDFS upgrade preparation (retry for 10 minutes) 51 | command: "./hdfs dfsadmin -rollingUpgrade query" 52 | args: 53 | chdir: "{{ hdfs_bin_dir }}" 54 | register: result 55 | become: yes 56 | become_user: "{{ hdfs_user }}" 57 | until: result.stdout.find("Proceed with rolling upgrade") != -1 58 | retries: 72 59 | delay: 10 60 | when: 61 | - inventory_hostname == hdfs_namenodes[0] 62 | - not hdfs_uptodate 63 | 64 | - name: Failover namenode "{{ hdfs_namenodes[0] }}" to "{{ hdfs_namenodes[1] }} if HA mode" 65 | command: "./hdfs haadmin -failover {{ hdfs_namenodes[0] }} {{ hdfs_namenodes[1] }}" 66 | args: 67 | chdir: "{{ hdfs_bin_dir }}" 68 | become: yes 69 | become_user: "{{ hdfs_user }}" 70 | when: 71 | - inventory_hostname == hdfs_namenodes[0] 72 | - hdfs_ha_enabled 73 | - not hdfs_uptodate 74 | 75 | - name: Upgrade namenode "{{ hdfs_namenodes[0] }}" 76 | include_tasks: upgrade_namenode.yml 77 | loop_control: 78 | loop_var: hdfs_namenode 79 | with_items: 80 | - "{{ hdfs_namenodes[0] }}" 81 | 82 | - name: Ask for confirmation if namenode is up 83 | pause: 84 | prompt: "Make sure namenode service on {{ hdfs_namenodes[0] }} is FULLY up! Check the startup progress on the web GUI." 85 | run_once: true 86 | delegate_to: hdfs_namenodes[0] 87 | when: 88 | - not hdfs_uptodate 89 | 90 | - name: Ask for confirmation if safe mode is off 91 | pause: 92 | prompt: "Make sure SAFE MODE is OFF. You might have to wait a couple of seconds (usually 30 seconds). Follow the progress on the web GUI." 93 | run_once: true 94 | delegate_to: hdfs_namenodes[0] 95 | when: 96 | - not hdfs_uptodate 97 | 98 | - name: HDFS namenode on "{{ hdfs_namenodes[0] }}" is running 99 | command: "./hdfs haadmin -checkHealth {{ hdfs_namenodes[0] }}" 100 | args: 101 | chdir: "{{ hdfs_bin_dir }}" 102 | become: yes 103 | become_user: "{{ hdfs_user }}" 104 | any_errors_fatal: true 105 | when: 106 | - inventory_hostname == hdfs_namenodes[0] 107 | - not hdfs_uptodate 108 | 109 | - name: Failover namenode "{{ hdfs_namenodes[1] }}" to "{{ hdfs_namenodes[0] }} if HA mode" 110 | command: "./hdfs haadmin -failover {{ hdfs_namenodes[1] }} {{ hdfs_namenodes[0] }}" 111 | args: 112 | chdir: "{{ hdfs_bin_dir }}" 113 | become: yes 114 | become_user: "{{ hdfs_user }}" 115 | when: 116 | - inventory_hostname == hdfs_namenodes[1] 117 | - hdfs_ha_enabled 118 | 119 | - name: Upgrade namenode "{{ hdfs_namenodes[1] }}" 120 | include_tasks: upgrade_namenode.yml 121 | with_items: 122 | - "{{ hdfs_namenodes[1] }}" 123 | loop_control: 124 | loop_var: hdfs_namenode 125 | when: 126 | - not hdfs_uptodate 127 | 128 | - name: Ask for confirmation if namenode is up 129 | pause: 130 | prompt: "Make sure namenode service on {{ hdfs_namenodes[1] }} is FULLY up! Check the startup progress on the web GUI." 131 | run_once: true 132 | delegate_to: hdfs_namenodes[1] 133 | when: 134 | - not hdfs_uptodate 135 | 136 | - name: Ask for confirmation if safe mode is off 137 | pause: 138 | prompt: "Make sure SAFE MODE is OFF. You might have to wait a couple of seconds (usually 30 seconds). Follow the progress on the web GUI." 139 | run_once: true 140 | delegate_to: hdfs_namenodes[1] 141 | when: 142 | - not hdfs_uptodate 143 | 144 | - name: Upgrade datanodes 145 | include_tasks: upgrade_datanode.yml 146 | with_items: 147 | - "{{ hdfs_datanodes }}" 148 | loop_control: 149 | loop_var: hdfs_datanode 150 | 151 | - name: Upgrade journalnodes 152 | include_tasks: upgrade_journalnode.yml 153 | with_items: 154 | - "{{ hdfs_journalnodes }}" 155 | loop_control: 156 | loop_var: hdfs_journalnode 157 | 158 | - name: Finalize HDFS upgrade 159 | command: "./hdfs dfsadmin -rollingUpgrade finalize" 160 | args: 161 | chdir: "{{ hdfs_bin_dir }}" 162 | become: yes 163 | become_user: "{{ hdfs_user }}" 164 | when: inventory_hostname == hdfs_namenodes[0] 165 | -------------------------------------------------------------------------------- /tasks/upgrade_datanode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - block: 3 | - name: Shutdown HDFS datanode 4 | command: "./hdfs dfsadmin -shutdownDatanode {{ hdfs_datanode }}:50020" 5 | args: 6 | chdir: "{{ hdfs_bin_dir }}" 7 | become: yes 8 | become_user: "{{ hdfs_user }}" 9 | register: datanode_shutdown 10 | 11 | - name: Output from shutdown 12 | debug: 13 | msg: "Output from shutdown: {{ datanode_shutdown.stdout }}" 14 | verbosity: 1 15 | 16 | - name: Check HDFS datanode is running 17 | command: "./hdfs dfsadmin -getDatanodeInfo {{ hdfs_datanode }}:50020" 18 | args: 19 | chdir: "{{ hdfs_bin_dir }}" 20 | register: result 21 | become: yes 22 | become_user: "{{ hdfs_user }}" 23 | until: result.stderr.find("Connection refused") != -1 24 | retries: 2 25 | delay: 10 26 | failed_when: result.rc == 0 27 | when: 28 | - inventory_hostname == hdfs_datanode 29 | - not hdfs_uptodate 30 | tags: upgrade_datanode 31 | 32 | # The pause runs on the first (single one) node on the inventory list 33 | - name: Ask for confirmation if datanode is down 34 | pause: 35 | prompt: "Make sure hdfs-datanode service on {{ hdfs_datanode }} is down" 36 | when: not hdfs_uptodate 37 | 38 | - block: 39 | - import_tasks: base.yml 40 | - import_tasks: config.yml 41 | - import_tasks: datanode.yml 42 | - name: Restart datanode 43 | service: name=hdfs-datanode state=restarted 44 | when: 45 | - inventory_hostname == hdfs_datanode 46 | - not hdfs_uptodate 47 | tags: upgrade_datanode 48 | 49 | # The pause runs on the first (single one) node on the inventory list 50 | - name: Ask for confirmation if datanode is up 51 | pause: 52 | prompt: "Make sure hdfs-datanode service on {{ hdfs_datanode }} is up" 53 | when: not hdfs_uptodate 54 | -------------------------------------------------------------------------------- /tasks/upgrade_journalnode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - block: 3 | - name: Restart journalnode 4 | service: name=hdfs-journalnode state=restarted 5 | - name: Wait for Journal Node port to become open, don't start checking for 10 seconds 6 | wait_for: 7 | port: 8485 8 | delay: 10 9 | - name: Pause for 20 seconds until the JournalNode is properly restarted 10 | pause: 11 | seconds: 20 12 | when: 13 | - inventory_hostname == hdfs_journalnode 14 | tags: upgrade_journalnode 15 | -------------------------------------------------------------------------------- /tasks/upgrade_namenode.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - block: 3 | - import_tasks: base.yml 4 | - import_tasks: config.yml 5 | - import_tasks: namenode.yml 6 | - name: Restart namenode 7 | service: name=hdfs-namenode state=restarted 8 | when: 9 | - inventory_hostname == hdfs_namenode 10 | - not hdfs_uptodate 11 | tags: upgrade_namenode 12 | -------------------------------------------------------------------------------- /tasks/user.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Add hadoop group on all machines 3 | group: name={{hdfs_group}} state=present 4 | 5 | - name: Add hadoop user on first namenode only and generate an ssh key 6 | user: name={{hdfs_user}} comment="Hadoop superuser" uid=1040 group={{hdfs_group}} createhome=yes generate_ssh_key="{{ hdfs_ssh_fence }}" 7 | when: inventory_hostname == hdfs_namenodes[0] 8 | 9 | - name: Create user on all machines 10 | user: name={{hdfs_user}} comment="Hadoop superuser" uid=1040 group={{hdfs_group}} home={{ hdfs_user_home }} createhome=yes 11 | register: created_user 12 | 13 | - import_tasks: ssh_fence.yml 14 | when: hdfs_ssh_fence and inventory_hostname in hdfs_namenodes 15 | -------------------------------------------------------------------------------- /templates/audit-compress-rotate.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | LOGDIR={{ hdfs_log_dir }} 3 | TODAY=$(date +'%Y%m%d') 4 | 5 | cd $LOGDIR 6 | 7 | # compress any hdfs-audit files that are not still being used (already rotated) 8 | for LOGFILE in $(ls | egrep '[[:digit:]]{4}\-[[:digit:]]{2}\-[[:digit:]]{2}$'); do 9 | LOGFILEDATE=$(echo $LOGFILE | cut -d'.' -f3 | sed -e 's/-//g'); 10 | [ $LOGFILEDATE -lt $TODAY ] && gzip $LOGFILE 11 | done 12 | 13 | # rotate files older than {{ hdfs_audit_rotate_days }} days 14 | find -iname "hdfs-audit*" -mtime +{{ hdfs_audit_rotate_days }} -delete 15 | -------------------------------------------------------------------------------- /templates/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | {{ hdfs_default_fs }} 8 | 9 | 10 | hadoop.tmp.dir 11 | {{ hdfs_tmpdir_user }} 12 | 13 | 14 | io.serializations 15 | org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization 16 | 17 | 18 | fs.trash.interval 19 | {{ hdfs_fs_trash_interval }} 20 | 21 | 22 | fs.trash.checkpoint.interval 23 | {{ hdfs_fs_trash_checkpoint_interval }} 24 | 25 | {% if hdfs_ha_enabled %} 26 | 27 | ha.zookeeper.quorum 28 | {{ hdfs_zookeeper_quorum }} 29 | 30 | {% endif %} 31 | {% if hdfs_rack_script_awk is defined %} 32 | 33 | topology.node.switch.mapping.impl 34 | org.apache.hadoop.net.ScriptBasedMapping 35 | 36 | 37 | topology.script.file.name 38 | {{ hdfs_rack_script_path }} 39 | 40 | {% endif %} 41 | {% if core_site_additional_properties is defined %} 42 | {% for property in core_site_additional_properties %} 43 | 44 | {{ property.name }} 45 | {{ property.value }} 46 | {% if property.final is defined %} 47 | true 48 | {% endif %} 49 | 50 | {% endfor %} 51 | {% endif %} 52 | 53 | -------------------------------------------------------------------------------- /templates/dfs.hosts.exclude.j2: -------------------------------------------------------------------------------- 1 | # This file contains a list of hosts that will be excluded from the DFS 2 | {% if dfs_hosts_exclude is defined %} 3 | {% for host in dfs_hosts_exclude %} 4 | {{ host }} 5 | {% endfor %} 6 | {% endif %} 7 | -------------------------------------------------------------------------------- /templates/hadoop-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Set Hadoop-specific environment variables here. 18 | 19 | # The only required environment variable is JAVA_HOME. All others are 20 | # optional. When running a distributed configuration it is best to 21 | # set JAVA_HOME in this file, so that it is correctly defined on 22 | # remote nodes. 23 | 24 | # The java implementation to use. 25 | export JAVA_HOME=${JAVA_HOME:-"{{ hdfs_java_home }}"} 26 | 27 | # The jsvc implementation to use. Jsvc is required to run secure datanodes 28 | # that bind to privileged ports to provide authentication of data transfer 29 | # protocol. Jsvc is not required if SASL is configured for authentication of 30 | # data transfer protocol using non-privileged ports. 31 | #export JSVC_HOME=${JSVC_HOME} 32 | 33 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} 34 | 35 | # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. 36 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar {{ hdfs_extra_classpath | join(" ") }} ; do 37 | if [ "$HADOOP_CLASSPATH" ]; then 38 | export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f 39 | else 40 | export HADOOP_CLASSPATH=$f 41 | fi 42 | done 43 | 44 | # The maximum amount of heap to use, in MB. Default is 1000. 45 | #export HADOOP_HEAPSIZE= 46 | #export HADOOP_NAMENODE_INIT_HEAPSIZE="" 47 | 48 | # Extra Java runtime options. Empty by default. 49 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" 50 | 51 | # Command specific options appended to HADOOP_OPTS when specified 52 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:{{hadoop_security_logger}} } -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-{{hadoop_audit_logger}} } {{hdfs_namenode_javaOpts}} $HADOOP_NAMENODE_OPTS" 53 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS {{hdfs_datanode_javaOpts}} $HADOOP_DATANODE_OPTS" 54 | 55 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-{{hadoop_security_logger}}} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-{{hadoop_audit_logger}}} $HADOOP_SECONDARYNAMENODE_OPTS" 56 | 57 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" 58 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" 59 | 60 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc) 61 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" 62 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" 63 | 64 | # On secure datanodes, user to run the datanode as after dropping privileges. 65 | # This **MUST** be uncommented to enable secure HDFS if using privileged ports 66 | # to provide authentication of data transfer protocol. This **MUST NOT** be 67 | # defined if SASL is configured for authentication of data transfer protocol 68 | # using non-privileged ports. 69 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} 70 | 71 | # Where log files are stored. $HADOOP_HOME/logs by default. 72 | export HADOOP_LOG_DIR={{ hdfs_log_dir }} 73 | 74 | # Where log files are stored in the secure data environment. 75 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} 76 | 77 | ### 78 | # HDFS Mover specific parameters 79 | ### 80 | # Specify the JVM options to be used when starting the HDFS Mover. 81 | # These options will be appended to the options specified as HADOOP_OPTS 82 | # and therefore may override any similar flags set in HADOOP_OPTS 83 | # 84 | # export HADOOP_MOVER_OPTS="" 85 | 86 | ### 87 | # Advanced Users Only! 88 | ### 89 | 90 | # The directory where pid files are stored. /tmp by default. 91 | # NOTE: this should be set to a directory that can only be written to by 92 | # the user that will run the hadoop daemons. Otherwise there is the 93 | # potential for a symlink attack. 94 | export HADOOP_PID_DIR=${HADOOP_PID_DIR} 95 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} 96 | 97 | # A string representing this instance of hadoop. $USER by default. 98 | export HADOOP_IDENT_STRING=$USER 99 | -------------------------------------------------------------------------------- /templates/hadoop_sudoers.j2: -------------------------------------------------------------------------------- 1 | # Keep environment variables so sudo users can sudo to hdfs user without worrying about environment variables 2 | Defaults env_keep += "HADOOP_HOME" 3 | Defaults env_keep += "HADOOP_PREFIX" 4 | Defaults env_keep += "HADOOP_CONF_DIR" 5 | Defaults env_keep += "HADOOP_LIBEXEC_DIR" 6 | Defaults env_keep += "HADOOP_CLASSPATH" 7 | -------------------------------------------------------------------------------- /templates/hdfs-service.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ### BEGIN INIT INFO 4 | # Provides: hdfs-{{program}} 5 | # Required-Start: $remote_fs $network 6 | # Required-Stop: $remote_fs 7 | # Default-Start: 2 3 4 5 8 | # Default-Stop: 0 1 6 9 | # Short-Description: Start hdfs-{{program}} daemon 10 | # Description: Start hdfs-{{program}} daemon 11 | ### END INIT INFO 12 | 13 | HADOOP_HOME={{hdfs_hadoop_home}} 14 | HADOOP_CONF_DIR={{hdfs_conf_dir}} 15 | 16 | . $HADOOP_CONF_DIR/hadoop-env.sh 17 | 18 | HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/run/} 19 | NAME="hdfs-{{program}}" 20 | PIDFILE="$HADOOP_PID_DIR/hadoop-${NAME}.pid" 21 | 22 | . /lib/lsb/init-functions 23 | 24 | case "$1" in 25 | start) 26 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start {{program}}" 27 | ;; 28 | stop) 29 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop {{program}}" 30 | ;; 31 | status) 32 | status_of_proc -p $PIDFILE "$NAME" "$NAME" && exit 0 || exit $? 33 | ;; 34 | restart) 35 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop {{program}}" 36 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start {{program}}" 37 | ;; 38 | *) 39 | echo "Usage: $0 {start|stop|restart}" 40 | exit 1 41 | esac 42 | 43 | exit 0 44 | -------------------------------------------------------------------------------- /templates/hdfs-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {% if hdfs_ha_enabled %} 6 | 7 | dfs.nameservices 8 | {{ hdfs_nameservices }} 9 | 10 | 11 | dfs.nameservice.id 12 | {{ hdfs_nameservices }} 13 | 14 | 15 | dfs.ha.namenodes.{{ hdfs_nameservices }} 16 | {{ hdfs_namenodes | join(',') }} 17 | 18 | {% if hdfs_ha_enabled and inventory_hostname in hdfs_namenodes -%} 19 | 20 | dfs.ha.namenode.id 21 | {{ inventory_hostname }} 22 | 23 | {% endif -%} 24 | {% for host in hdfs_namenodes -%} 25 | 26 | dfs.namenode.rpc-address.{{ hdfs_nameservices }}.{{ host }} 27 | {{ host }}{%- if hdfs_host_domain_name is defined and hdfs_host_domain_name | length > 0 -%}.{{ hdfs_host_domain_name }}{%- endif -%}:8020 28 | 29 | 30 | dfs.namenode.rpc-bind-host.{{ hdfs_nameservices }}.{{ host }} 31 | 0.0.0.0 32 | 33 | {% endfor -%} 34 | {% for host in hdfs_namenodes -%} 35 | 36 | dfs.namenode.http-address.{{ hdfs_nameservices }}.{{ host }} 37 | {{ host }}{%- if hdfs_host_domain_name is defined and hdfs_host_domain_name | length > 0 -%}.{{ hdfs_host_domain_name }}{%- endif -%}:50070 38 | 39 | 40 | dfs.namenode.http-bind-host.{{ hdfs_nameservices }}.{{ host }} 41 | 0.0.0.0 42 | 43 | {% endfor -%} 44 | 45 | dfs.namenode.shared.edits.dir 46 | qjournal://{{ hdfs_journalnodes | join(':8485' + ';') }}:8485/{{ hdfs_nameservices }} 47 | 48 | 49 | dfs.journalnode.edits.dir 50 | {{ hdfs_dfs_journalnode_edits_dir }} 51 | 52 | 53 | dfs.client.failover.proxy.provider.{{ hdfs_nameservices }} 54 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 55 | 56 | {% if hdfs_ssh_fence -%} 57 | 58 | dfs.ha.fencing.methods 59 | sshfence 60 | 61 | 62 | dfs.ha.fencing.ssh.private-key-files 63 | {{ hdfs_user_home }}/.ssh/id_rsa 64 | 65 | {% else -%} 66 | 67 | dfs.ha.fencing.methods 68 | shell(/bin/true) 69 | 70 | {% endif -%} 71 | 72 | dfs.ha.automatic-failover.enabled 73 | true 74 | 75 | {% else %} 76 | 77 | dfs.namenode.secondary.http-address 78 | {{ hdfs_secondary_namenode_http_address }} 79 | 80 | 81 | dfs.namenode.checkpoint.dir 82 | {{ hdfs_namenode_checkpoint_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }} 83 | 84 | {% endif %} 85 | 86 | dfs.replication 87 | {{ hdfs_dfs_replication }} 88 | 89 | 90 | dfs.datanode.data.dir 91 | {{ hdfs_datanode_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }} 92 | 93 | {% if inventory_hostname in hdfs_namenodes %} 94 | 95 | dfs.namenode.name.dir 96 | {{ hdfs_namenode_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }} 97 | 98 | {% endif %} 99 | 100 | dfs.permissions.superusergroup 101 | {{ hdfs_dfs_permissions_superusergroup }} 102 | 103 | 104 | fs.permissions.umask-mode 105 | {{ hdfs_fs_permissions_umask_mode }} 106 | 107 | 108 | dfs.hosts.exclude 109 | {{ hdfs_conf_dir }}/dfs.hosts.exclude 110 | 111 | 112 | dfs.blocksize 113 | {{ hdfs_dfs_blocksize }} 114 | true 115 | 116 | 117 | dfs.namenode.avoid.read.stale.datanode 118 | {{ hdfs_dfs_namenode_avoid_read_stale_datanode | lower }} 119 | 120 | 121 | dfs.namenode.avoid.write.stale.datanode 122 | {{ hdfs_dfs_namenode_avoid_write_stale_datanode | lower }} 123 | 124 | 125 | dfs.support.append 126 | true 127 | 128 | 129 | dfs.namenode.write.stale.datanode.ratio 130 | {{ hdfs_dfs_namenode_write_stale_datanode_ratio }} 131 | 132 | 133 | dfs.namenode.handler.count 134 | {{ hdfs_dfs_namenode_handler_count }} 135 | 136 | 137 | dfs.namenode.service.handler.count 138 | {{ hdfs_dfs_namenode_service_handler_count }} 139 | 140 | 141 | dfs.datanode.du.reserved 142 | {{ hdfs_dfs_datanode_du_reserved }} 143 | 144 | 145 | dfs.datanode.data.dir.perm 146 | {{ hdfs_dfs_datanode_data_dir_perm }} 147 | 148 | 149 | dfs.datanode.max.transfer.threads 150 | {{ hdfs_dfs_datanode_max_transfer_threads }} 151 | 152 | 153 | dfs.datanode.fsdataset.volume.choosing.policy 154 | org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy 155 | 156 | 157 | 158 | dfs.replication.max 159 | {{ hdfs_dfs_replication_max }} 160 | 161 | 162 | dfs.namenode.replication.min 163 | {{ hdfs_dfs_namenode_replication_min }} 164 | 165 | 166 | dfs.namenode.checkpoint.period 167 | {{ hdfs_dfs_namenode_checkpoint_period }} 168 | 169 | 170 | dfs.namenode.audit.log.async 171 | {{ hdfs_dfs_namenode_audit_log_async | lower }} 172 | 173 | 174 | dfs.client.file-block-storage-locations.num-threads 175 | {{ hdfs_dfs_client_file_block_storage_locations_num_threads }} 176 | 177 | 178 | dfs.client.file-block-storage-locations.timeout.millis 179 | {{ hdfs_dfs_client_file_block_storage_locations_timeout_millis }} 180 | 181 | 182 | dfs.client.read.shortcircuit 183 | {{ hdfs_enable_short_circuit_reads | lower }} 184 | 185 | {% if hdfs_enable_short_circuit_reads is defined %} 186 | 187 | dfs.domain.socket.path 188 | {{ hdfs_dfs_domain_socket_path_folder }}/dn._PORT 189 | 190 | {% endif %} 191 | {% if hdfs_site_additional_properties is defined %} 192 | {% for property in hdfs_site_additional_properties -%} 193 | 194 | {{property.name}} 195 | {{property.value}} 196 | {% if property_final is defined -%} 197 | true 198 | {% endif %} 199 | 200 | {% endfor -%} 201 | {% endif %} 202 | 203 | -------------------------------------------------------------------------------- /templates/hdfs.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=HDFS {{ program }} 3 | 4 | [Service] 5 | Type=forking 6 | User={{hdfs_user}} 7 | Group={{hdfs_group}} 8 | ExecStart={{ hdfs_hadoop_home }}/sbin/hadoop-daemon.sh --config {{ hdfs_conf_dir }} --script hdfs start {{ program }} 9 | ExecStop={{ hdfs_hadoop_home }}/sbin/hadoop-daemon.sh --config {{ hdfs_conf_dir }} --script hdfs stop {{ program }} 10 | RemainAfterExit=no 11 | Restart=on-failure 12 | PIDFile=/tmp/hadoop-hdfs-{{program}}.pid 13 | 14 | TimeoutSec=300 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | -------------------------------------------------------------------------------- /templates/log4j.properties.j2: -------------------------------------------------------------------------------- 1 | ### original file taken from hadoop 2.7.2 tarball ## 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Define some default values that can be overridden by system properties 20 | hadoop.root.logger=INFO,console 21 | hadoop.log.dir=. 22 | hadoop.log.file=hadoop.log 23 | 24 | # Define the root logger to the system property "hadoop.root.logger". 25 | log4j.rootLogger=${hadoop.root.logger}, EventCounter 26 | 27 | # Logging Threshold 28 | log4j.threshold=ALL 29 | #log4j.debug=true 30 | 31 | # Null Appender 32 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 33 | 34 | # 35 | # Rolling File Appender - cap space usage at 5gb. 36 | # 37 | hadoop.log.maxfilesize={{hadoop_log_maxfilesize}} 38 | hadoop.log.maxbackupindex={{hadoop_log_maxbackupindex}} 39 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 40 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} 41 | 42 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} 43 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} 44 | 45 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 46 | 47 | # Pattern format: Date LogLevel LoggerName LogMessage 48 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 49 | # Debugging Pattern format 50 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 51 | 52 | 53 | # 54 | # Daily Rolling File Appender 55 | # 56 | 57 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 58 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} 59 | 60 | # Rollover at midnight 61 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 62 | 63 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 64 | 65 | # Pattern format: Date LogLevel LoggerName LogMessage 66 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 67 | # Debugging Pattern format 68 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 69 | 70 | 71 | # 72 | # console 73 | # Add "console" to rootlogger above if you want to use this 74 | # 75 | 76 | log4j.appender.console=org.apache.log4j.ConsoleAppender 77 | log4j.appender.console.target=System.err 78 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 79 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 80 | 81 | # 82 | # TaskLog Appender 83 | # 84 | 85 | #Default values 86 | hadoop.tasklog.taskid=null 87 | hadoop.tasklog.iscleanup=false 88 | hadoop.tasklog.noKeepSplits=4 89 | hadoop.tasklog.totalLogFileSize=100 90 | hadoop.tasklog.purgeLogSplits=true 91 | hadoop.tasklog.logsRetainHours=12 92 | 93 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender 94 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} 95 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup} 96 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} 97 | 98 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout 99 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 100 | 101 | # 102 | # HDFS block state change log from block manager 103 | # 104 | # Uncomment the following to suppress normal block state change 105 | # messages from BlockManager in NameNode. 106 | #log4j.logger.BlockStateChange=WARN 107 | 108 | # 109 | #Security appender 110 | # 111 | hadoop.security.logger=INFO,NullAppender 112 | hadoop.security.log.maxfilesize=100MB 113 | hadoop.security.log.maxbackupindex={{hadoop_log_maxbackupindex}} 114 | log4j.category.SecurityLogger=${hadoop.security.logger} 115 | hadoop.security.log.file=SecurityAuth-${user.name}.audit 116 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 117 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 118 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 119 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 120 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} 121 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} 122 | 123 | # 124 | # Daily Rolling Security appender 125 | # 126 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 127 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 128 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout 129 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 130 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd 131 | 132 | # 133 | # hadoop configuration logging 134 | # 135 | 136 | # Uncomment the following line to turn off configuration deprecation warnings. 137 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN 138 | 139 | # 140 | # hdfs audit logging 141 | # 142 | hdfs.audit.logger=INFO,NullAppender 143 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} 144 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false 145 | log4j.appender.RFAAUDIT=org.apache.log4j.DailyRollingFileAppender 146 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log 147 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout 148 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 149 | log4j.appender.RFAAUDIT.DatePattern=.yyyy-MM-dd 150 | log4j.appender.RFAAUDIT.rollingPolicy.ActiveFileName=${hadoop.log.dir}/hdfs-audit.log 151 | 152 | # 153 | # mapred audit logging 154 | # 155 | mapred.audit.logger=INFO,NullAppender 156 | mapred.audit.log.maxfilesize=256MB 157 | mapred.audit.log.maxbackupindex=20 158 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} 159 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false 160 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender 161 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log 162 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout 163 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 164 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize} 165 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex} 166 | 167 | # Custom Logging levels 168 | 169 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG 170 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG 171 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG 172 | 173 | # Jets3t library 174 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR 175 | 176 | # AWS SDK & S3A FileSystem 177 | log4j.logger.com.amazonaws=ERROR 178 | log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR 179 | log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN 180 | 181 | # 182 | # Event Counter Appender 183 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 184 | # 185 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter 186 | 187 | # 188 | # Job Summary Appender 189 | # 190 | # Use following logger to send summary to separate file defined by 191 | # hadoop.mapreduce.jobsummary.log.file : 192 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA 193 | # 194 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} 195 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log 196 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB 197 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20 198 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender 199 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} 200 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize} 201 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex} 202 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout 203 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 204 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} 205 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false 206 | 207 | # 208 | # Yarn ResourceManager Application Summary Log 209 | # 210 | # Set the ResourceManager summary log filename 211 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log 212 | # Set the ResourceManager summary log level and appender 213 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} 214 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY 215 | 216 | # To enable AppSummaryLogging for the RM, 217 | # set yarn.server.resourcemanager.appsummary.logger to 218 | # ,RMSUMMARY in hadoop-env.sh 219 | 220 | # Appender for ResourceManager Application Summary Log 221 | # Requires the following properties to be set 222 | # - hadoop.log.dir (Hadoop Log directory) 223 | # - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename) 224 | # - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender) 225 | 226 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger} 227 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false 228 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender 229 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file} 230 | log4j.appender.RMSUMMARY.MaxFileSize=256MB 231 | log4j.appender.RMSUMMARY.MaxBackupIndex=20 232 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout 233 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 234 | 235 | # HS audit log configs 236 | #mapreduce.hs.audit.logger=INFO,HSAUDIT 237 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger} 238 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false 239 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender 240 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log 241 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout 242 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 243 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd 244 | 245 | # Http Server Request Logs 246 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog 247 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 248 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log 249 | #log4j.appender.namenoderequestlog.RetainDays=3 250 | 251 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog 252 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 253 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log 254 | #log4j.appender.datanoderequestlog.RetainDays=3 255 | 256 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog 257 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 258 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log 259 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3 260 | 261 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog 262 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 263 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log 264 | #log4j.appender.jobhistoryrequestlog.RetainDays=3 265 | 266 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog 267 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 268 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log 269 | #log4j.appender.nodemanagerrequestlog.RetainDays=3 270 | 271 | -------------------------------------------------------------------------------- /templates/mapred-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | mapred.job.tracker 4 | {{ hdfs_namenodes[0] }}:8021 5 | 6 | 7 | -------------------------------------------------------------------------------- /templates/rack-awareness.sh.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $@ | xargs -n 1 | awk -F '.' '{{ hdfs_rack_script_awk }}' -------------------------------------------------------------------------------- /templates/slaves.j2: -------------------------------------------------------------------------------- 1 | {% for slave in hdfs_datanodes %} 2 | {{ slave }} 3 | {% endfor %} 4 | --------------------------------------------------------------------------------