├── .gitignore
├── .travis.yml
├── .yamllint
├── LICENSE
├── README.md
├── ansible.cfg
├── defaults
└── main.yml
├── handlers
└── main.yml
├── inventory
├── meta
└── main.yml
├── molecule
└── default
│ ├── Dockerfile.j2
│ ├── INSTALL.rst
│ ├── create.yml
│ ├── destroy.yml
│ ├── molecule.yml
│ ├── playbook.yml
│ ├── prepare.yml
│ ├── templates
│ └── hosts.j2
│ └── tests
│ ├── test_default.py
│ └── test_namenodes.py
├── requirements.yml
├── tasks
├── base.yml
├── bootstrap_ha.yml
├── bootstrap_spof.yml
├── config.yml
├── datanode.yml
├── journalnode.yml
├── main.yml
├── namenode.yml
├── native.yml
├── scripts.yml
├── secondarynamenode.yml
├── ssh_fence.yml
├── upgrade.yml
├── upgrade_datanode.yml
├── upgrade_journalnode.yml
├── upgrade_namenode.yml
└── user.yml
└── templates
├── audit-compress-rotate.sh.j2
├── core-site.xml.j2
├── dfs.hosts.exclude.j2
├── hadoop-env.sh.j2
├── hadoop_sudoers.j2
├── hdfs-service.j2
├── hdfs-site.xml.j2
├── hdfs.service.j2
├── log4j.properties.j2
├── mapred-site.xml.j2
├── rack-awareness.sh.j2
└── slaves.j2
/.gitignore:
--------------------------------------------------------------------------------
1 | # Mac OS
2 | .DS_Store
3 |
4 | # IntelliJ Idea generated files
5 | *.iml
6 | *.ipr
7 | .idea_modules/
8 | .idea/
9 | *.iws
10 | .bundle/
11 | vendor/
12 | *.retry
13 | .molecule
14 | .cache
15 | __pycache__
16 | rsa_key/
17 | **/*.pyc
18 | hadoop-*
19 | pytestdebug.log
20 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | ---
2 | sudo: required
3 | language: python
4 | services:
5 | - docker
6 |
7 | install:
8 | - pip install ansible
9 | - pip install molecule
10 | - pip install docker-py
11 |
12 | script:
13 | - molecule --version
14 | - ansible --version
15 | - molecule test
16 | notifications:
17 | webhooks: https://galaxy.ansible.com/api/v1/notifications/
18 |
--------------------------------------------------------------------------------
/.yamllint:
--------------------------------------------------------------------------------
1 | extends: default
2 |
3 | rules:
4 | braces:
5 | max-spaces-inside: 1
6 | level: error
7 | brackets:
8 | max-spaces-inside: 1
9 | level: error
10 | line-length: disable
11 | truthy: disable
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## HDFS
2 | [](https://travis-ci.org/teralytics/ansible-hdfs)
3 |
4 |
5 | ### Introduction
6 | This role installs HDFS on Ubuntu/Debian Linux servers.
7 |
8 | ### Role dependencies
9 | * The role requires java and zookeeper to be installed, configured and running.
10 |
11 | ### Example
12 |
13 | ```yml
14 | - hosts: hadoop_hosts
15 | become: True
16 | roles:
17 | - hdfs
18 | ```
19 |
20 | For an example inventory please check the [inventory](https://github.com/teralytics/ansible-hdfs/blob/master/inventory) file.
21 |
22 | If ```hdfs_ssh_fence``` is set to ```true``` the playbook has to be run with the ```-K``` option of ansible-playbook!
23 |
24 |
25 | ### Configuration
26 | This role supports two different modes of installation:
27 |
28 | * Single Namenode with Secondary Namenode
29 | * Two Namenodes in HA mode
30 |
31 | The number of *namenodes* specifies the mode. If two namenodes are specified HDFS will be installed in an HA fashion.
32 |
33 |
34 | For documentation details of HDFS please refer to the official [Hadoop documentation](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html).
35 |
36 | #### Preparation of your Inventory
37 | This role makes use of groups to figure out which server needs which installation. The groups are listed below:
38 |
39 | * namenodes
40 | * datanodes
41 | * secondarynamenode (Single NN setup only)
42 | * zookeeper_hosts (High availability mode only)
43 | * journalnodes (High availability mode only)
44 |
45 | Alternatively variables like ```hdfs_namenodes``` can be overwritten (see [defaults/main.yml](https://github.com/teralytics/ansible-hdfs/blob/master/defaults/main.yml)).
46 |
47 | #### Important variables:
48 | The following gives a list of important variables that have to be set for a specific deployment. Most variables can be set in *group_vars* or *host_vars*.
49 |
50 | * ```hdfs_cluster_name```: Name of your cluster
51 | * ```hdfs_parent_dir```: Where to install HDFS to
52 | * ```hdfs_version```: Hadoop version to use
53 | * ```hdfs_tmpdir```: Where to write HDFS tmp files
54 | * ```hdfs_namenode_dir_list```: Files of namenodes
55 | * ```hdfs_datanode_dir_list```: Files of datanodes
56 | * ```hdfs_namenode_checkpoint_dir_list```: Files of secondary namenode
57 | * ```hdfs_distribution_method```: Should tar.gz be 'downloaded', 'local_file' or 'compile' install?
58 | * ```hdfs_bootstrap```: Should the cluster be formatted? (If you have an already existing installation this option is not recommended)
59 | * ```hdfs_host_domain_name```: Only set this variable if your host entries are not FQDNs. E.g. value: "node.dns.example.com"
60 | * ```hdfs_upgrade```: Only set this variable to perform an upgrade (given that hdfs_version is changed)
61 | * ```hdfs_upgrade_force```: Only set this variable to force an upgrade (the playbook will run even if the version hasn't changed. Good when something went wrong and a node has been already upgraded)
62 |
63 | For more configuration variables see the documentation in [defaults/main.yml](https://github.com/teralytics/ansible-hdfs/blob/master/defaults/main.yml).
64 |
65 | If ```hdfs_upgrade``` is set to ```true``` the playbook will assume an upgrade is taking place and some input from the user might be required.
66 |
67 | #### Additional HDFS configuration
68 | Additional configuration to ```hdfs-site.xml``` and ```core-site.xml``` can be added by overwriting the following variables:
69 |
70 | - ```hdfs_site_additional_properties```
71 | - ```core_site_additional_properties```
72 |
73 | #### Description of playbooks
74 | This section gives a brief description on what each playbook does.
75 |
76 | ##### Native (experimental)
77 | CURRENTLY ONLY WORKS WITH Ubuntu 14.04. (16.04. has a newer protobuf version and compilation fails)
78 |
79 | This playbook will compile hadoop on server *hdfs_compile_node* to enable [hadoop native libraries](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/NativeLibraries.html) (Compression codecs and [HDFS Short-Circuit Local Reads](http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html)).
80 | This playbook will install the development tools necessary to be able to compile hadoop. (Download and compilation may take a while depending on you internet connection and server power (10-20 min))
81 |
82 | To activate this playbook enable set ```hdfs_distribution_method``` to ```compile```.
83 |
84 | Known issues:
85 |
86 | * Sometimes the git download fails for the first time. Just run it again.
87 |
88 | Options:
89 |
90 | * ```hdfs_compile_node```: Server to compile on
91 | * ```hdfs_compile_from_git```: True if it should download the latest version from github.com
92 | * ```hdfs_compile_version```: Version to download from github (tags usable by e.g. 'tags/rel/release-2.7.2' or 'HEAD')
93 | * ```hdfs_fetch_folder```: Local folder to download the compiled tar to.
94 |
95 | ##### base
96 | This playbook installs the hadoop binaries and creates links for easy usage.
97 |
98 | ##### config
99 | This playbook writes the configuration files.
100 |
101 | #### upgrade
102 | This playbook upgrades HDFS in a controlled way (applicable only to HA modes). This follows a procedure of no downtime that can be summarized as follows:
103 |
104 | 1. Prepare rolling upgrade, wait for "Proceed with rolling upgrade"
105 | ..1. Perform upgrade of active namenode (by means of failover to standby)
106 | ..2. Failover to newly upgraded namenode, upgrade the second namenode
107 | 2. Perform upgrade of the datanodes in a rolling fashion
108 | ..1. Stop running datanode (check if running)
109 | ..2. Install the new version
110 | ..3. Restart it with the new program version (check if running)
111 | 3. Finalize the rolling upgrade
112 |
113 | Be prepared to react to some input made by the playbook specially when dealing with starting and stopping of services.
114 | If anything goes wrong, and some nodes were already upgraded, run the playbook again setting ```hdfs_upgrade_force``` set to ```True```. This process is idempotent.
115 |
116 | ##### user
117 | This playbook will create a user ```hdfs_user```, generate an ssh-key for it, distribute the key and register all servers in known_hosts file of each other.
118 |
119 | ##### ssh_fence
120 | This playbook sets up SSH access for the ```hdfs_user``` between the namenode servers. Used if an SSH fence is the preferred method as fencing method. (See [HA Documentation](https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html))
121 |
122 | ##### namenode
123 | This playbook writes configuration files needed only by the namenode, creates folder and sets up services for namenode and zkfc.
124 |
125 | ##### datanode
126 | This playbook creates the folders specified in ```hdfs_datanode_dir_list``` and registers the hdfs-datanode service.
127 |
128 | ##### journalnode
129 | This playbook will install the journal node service.
130 |
131 | ##### secondarynamenode
132 | This playbook will install and register hdfs-secondarynamenode service.
133 |
134 | ##### bootstrap_ha
135 | This playbook bootstraps a cluster in HA mode
136 |
137 | ##### bootstrap_spof
138 | This playbook bootstraps a cluster in SPOF mode. (One namenode and one secondary namenode)
139 |
140 | ### Testing
141 | The tests are run using [molecule](https://github.com/metacloud/molecule) and a docker container.
142 |
143 | ##### Requirements:
144 | - Docker
145 | - molecule (pip module)
146 | - docker-py (pip module)
147 |
148 | #### Running tests
149 |
150 | From the root folder run ```molecule test```.
151 |
152 | ### License
153 | Apache 2.0
154 |
155 | ### Author information
156 |
157 | - Bertrand Bossy
158 | - Florian Froese
159 | - Laurent Hoss
160 |
--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 | roles_path = ../:../../
3 | hostfile = inventory
4 |
--------------------------------------------------------------------------------
/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | # ------------------------------------------------------------------------------
3 | # General cluster settings
4 | # ------------------------------------------------------------------------------
5 | hdfs_cluster_name: cluster1
6 | hdfs_user: hdfs
7 | hdfs_group: hadoop
8 | hdfs_user_home: "/var/lib/{{ hdfs_user }}"
9 | hdfs_version: 2.8.2
10 | hdfs_upgrade: False
11 | hdfs_upgrade_force: False
12 |
13 | hdfs_java_home: /usr/lib/jvm/java-1.8.0-openjdk-amd64
14 |
15 | hdfs_extra_classpath: []
16 |
17 | # Bootstraps the cluster ( Format namenodes, zkfc, journalnodes, start all services)
18 | # Please read the code before you activate this option.
19 | # Especially if you already have a hadoop setup in place.
20 | hdfs_bootstrap: False
21 |
22 | # Use ansible handlers?
23 | hdfs_ansible_handlers: True
24 | # Redistribute ssh keys every time?
25 | hdfs_redistribute_ssh_keys: False
26 |
27 | hdfs_parent_dir: /usr/local # hadoop binaries will be copied here
28 | hdfs_ssh_known_hosts_file: "{{ hdfs_user_home }}/.ssh/known_hosts"
29 |
30 | # ------------------------------------------------------------------------------
31 | # Hadoop installation source
32 | # ------------------------------------------------------------------------------
33 | hdfs_distribution_method: "download" # Method to use for archive installation ("download", "local_file" or "compile")
34 | hdfs_download_url: "https://archive.apache.org/dist/hadoop/core/hadoop-{{ hdfs_version }}/hadoop-{{ hdfs_version }}.tar.gz"
35 | hdfs_local_archive_path: "./"
36 |
37 | # ------------------------------------------------------------------------------
38 | # Hadoop host variables
39 | # ------------------------------------------------------------------------------
40 | hdfs_namenodes: "{{ groups.namenodes }}"
41 | hdfs_hadoop_hosts: "{{ groups.hadoop_hosts }}"
42 | hdfs_journalnodes: "{{ groups.journalnodes }}"
43 | hdfs_secondary_namenode: "{{ groups.secondarynamenode if groups.secondarynamenode is defined else [] }}"
44 | hdfs_datanodes: "{{ groups.datanodes }}"
45 | hdfs_zookeeper_hosts: "{{ groups.zookeeper_hosts }}"
46 |
47 | # ------------------------------------------------------------------------------
48 | # Hadoop native libraries (experimental)
49 | # ------------------------------------------------------------------------------
50 | hdfs_compile_from_source: "{{ hdfs_distribution_method == 'compile' }}"
51 | hdfs_compile_node: "{{ hdfs_namenodes[0] }}"
52 | hdfs_compile_from_git: True
53 | hdfs_compile_version: "tags/rel/release-{{hdfs_version}}"
54 | hdfs_fetch_folder: /tmp/ansible_fetch
55 |
56 | # ------------------------------------------------------------------------------
57 | # HA specific setup
58 | # ------------------------------------------------------------------------------
59 | # Use ssh as fencing method (other option is shell(/bin/true)
60 | hdfs_ssh_fence: True
61 |
62 | hdfs_ha_enabled: "{{hdfs_namenodes | count > 1}}"
63 | hdfs_default_fs: "hdfs://{{ hdfs_nameservices if hdfs_ha_enabled else hdfs_namenodes[0] + ':8020' }}"
64 | hdfs_nameservices: "{{ hdfs_cluster_name }}"
65 | hdfs_zookeeper_client_port: 2181
66 | hdfs_zookeeper_quorum: "{{ hdfs_zookeeper_hosts | join(':' + (hdfs_zookeeper_client_port | string) + ',') }}:{{ hdfs_zookeeper_client_port | string }}"
67 |
68 |
69 | # ------------------------------------------------------------------------------
70 | # Non-HA specific setup
71 | # ------------------------------------------------------------------------------
72 | hdfs_secondary_namenode_http_address: "0.0.0.0:50090"
73 |
74 | # ------------------------------------------------------------------------------
75 | # Hadoop configuration
76 | # ------------------------------------------------------------------------------
77 |
78 | # Symlink for hadoop to the version you are installing
79 | hdfs_hadoop_home: "{{hdfs_parent_dir}}/hadoop"
80 | hdfs_conf_dir: "{{hdfs_hadoop_home}}/etc/hadoop"
81 | hdfs_bin_dir: "{{hdfs_hadoop_home}}/bin"
82 | hdfs_log_dir: /var/log/hadoop
83 | hdfs_tmpdir: "/tmp"
84 |
85 | # Directories where namenode should store metadata
86 | hdfs_namenode_dir_list:
87 | - "/tmp/dfs/name"
88 | # Directories where secondary namenode should store temporary images to merge
89 | hdfs_namenode_checkpoint_dir_list:
90 | - "/tmp/dfs/secondaryname"
91 | # Directories where datanodes should store data
92 | hdfs_datanode_dir_list:
93 | - "/tmp/dfs/data"
94 |
95 | # Directories where journal nodes should store edits
96 | hdfs_dfs_journalnode_edits_dir: "/tmp/dfs/journaldir"
97 | hdfs_dfs_journalnode_edits_dir_perm: "700"
98 |
99 | hdfs_enable_short_circuit_reads: true # IMPORTANT: this property should be 'true' or 'false'
100 |
101 | # ------------------------------------------------------------------------------
102 | # Extended core-site.xml
103 | # ------------------------------------------------------------------------------
104 | hdfs_tmpdir_user: "{{hdfs_tmpdir}}/hadoop-${user.name}"
105 | hdfs_fs_trash_interval: 0
106 | hdfs_fs_trash_checkpoint_interval: 0 # If 0 this is set to the value of hdfs_fs_trash_interval by hadoop
107 |
108 | # ------------------------------------------------------------------------------
109 | # Extended hdfs-site.xml
110 | # ------------------------------------------------------------------------------
111 |
112 | hdfs_fs_permissions_umask_mode: "002"
113 | hdfs_dfs_permissions_superusergroup: "{{hdfs_group}}"
114 | hdfs_dfs_blocksize: 134217728
115 | hdfs_dfs_namenode_write_stale_datanode_ratio: "0.5f"
116 | hdfs_dfs_datanode_du_reserved: "1073741824"
117 | hdfs_dfs_datanode_data_dir_perm: "700"
118 | hdfs_dfs_datanode_max_transfer_threads: 4096
119 | hdfs_dfs_replication: 3
120 | hdfs_dfs_replication_max: 50
121 | hdfs_dfs_namenode_replication_min: 1
122 | hdfs_dfs_namenode_checkpoint_period: 3600
123 | # the recommended 'namenode handler count' is best defined by formula: lb(#datanodes) * 20
124 | # and recommended 'service handler count' 50% of the previous value
125 | # Ref: https://community.hortonworks.com/articles/43839/scaling-the-hdfs-namenode-part-2.html
126 | # -> for an average cluster 10-20 DNs the value 64 is a good average (for 32+ DNs -> 100+)
127 | hdfs_dfs_namenode_handler_count: 32
128 | hdfs_dfs_namenode_service_handler_count: "{{ (hdfs_dfs_namenode_handler_count / 2)|int}}"
129 | hdfs_dfs_namenode_avoid_read_stale_datanode: true
130 | hdfs_dfs_namenode_avoid_write_stale_datanode: true
131 | hdfs_dfs_namenode_audit_log_async: false
132 | hdfs_dfs_client_file_block_storage_locations_num_threads: 10
133 | hdfs_dfs_client_file_block_storage_locations_timeout_millis: 1000
134 | hdfs_dfs_domain_socket_path_folder: /var/lib/hadoop-hdfs
135 |
136 | # ------------------------------------------------------------------------------
137 | # log4j.properties vars
138 | # ------------------------------------------------------------------------------
139 |
140 | hadoop_log_maxfilesize: "256MB"
141 | hadoop_log_maxbackupindex: 20
142 |
143 | # ------------------------------------------------------------------------------
144 | # hadoop-env.sh vars
145 | # ------------------------------------------------------------------------------
146 | hdfs_namenode_heap_size: "2048m"
147 | hdfs_namenode_javaOpts: "-Xmx{{hdfs_namenode_heap_size}}"
148 | hdfs_datanode_javaOpts: ""
149 |
150 | # default logger selection used in hadoop-env.sh
151 | hadoop_security_logger: "INFO,RFAS"
152 | hadoop_audit_logger: "INFO,NullAppender"
153 |
154 | # ------------------------------------------------------------------------------
155 | # Rack specific
156 | # ------------------------------------------------------------------------------
157 |
158 | # rack awareness script: see https://bigdataprocessing.wordpress.com/2013/07/30/hadoop-rack-awareness-and-configuration/)
159 | # and templates/rack-awareness.sh.j2
160 | # if this is not defined, the hdfs will not be rack aware. DO NOT USE SINGLE QUOTES (or make sure it works)
161 | # hdfs_rack_script_awk: '"{if ($4 < 3) print "rack-1"; else print "rack-2" }"'
162 | hdfs_rack_script_path: "{{hdfs_conf_dir}}/rack-awareness.sh"
163 |
164 | # ------------------------------------------------------------------------------
165 | # Custom scripts
166 | # ------------------------------------------------------------------------------
167 | hdfs_audit_rotate_days: 90 # ISO 27001 compliance
168 |
--------------------------------------------------------------------------------
/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Force systemd to reread configs
3 | systemd: daemon_reload=yes
4 | when: ansible_service_mgr == "systemd"
5 |
6 | - name: Restart namenode
7 | service: name=hdfs-namenode state=restarted
8 | when:
9 | - inventory_hostname in hdfs_namenodes
10 | - hdfs_ansible_handlers|bool
11 |
12 | - name: Restart datanode
13 | service: name=hdfs-datanode state=restarted
14 | when:
15 | - inventory_hostname in hdfs_datanodes
16 | - hdfs_ansible_handlers|bool
17 |
18 | - name: Restart journalnode
19 | service: name=hdfs-journalnode state=restarted
20 | when:
21 | - hdfs_ha_enabled
22 | - inventory_hostname in hdfs_journalnodes
23 | - hdfs_ansible_handlers|bool
24 |
25 | - name: Restart zkfc
26 | service: name=hdfs-zkfc state=restarted
27 | when:
28 | - hdfs_ha_enabled
29 | - inventory_hostname in hdfs_namenodes
30 | - hdfs_ansible_handlers|bool
31 |
32 | - name: Restart secondary namenode
33 | service: name=hdfs-secondarynamenode state=restarted
34 | when:
35 | - not hdfs_ha_enabled
36 | - inventory_hostname in hdfs_secondary_namenode
37 | - hdfs_ansible_handlers|bool
38 |
--------------------------------------------------------------------------------
/inventory:
--------------------------------------------------------------------------------
1 | [hadoop_hosts]
2 | hdfs0[1:3]
3 |
4 | [namenodes]
5 | hdfs0[1:2]
6 |
7 | [journalnodes]
8 | hdfs0[1:3]
9 |
10 | [datanodes]
11 | hdfs0[1:3]
12 |
13 | [zookeeper_hosts]
14 | hdfs0[1:3]
15 |
--------------------------------------------------------------------------------
/meta/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | galaxy_info:
3 | author: Florian Froese
4 | description: Installing HDFS for Ubuntu.
5 | company: Teralytics AG
6 | license: Apache 2.0
7 | min_ansible_version: 2.4
8 | platforms:
9 | - name: Ubuntu
10 | versions:
11 | - trusty
12 | - saucy
13 | - raring
14 | - vivid
15 | - wily
16 | - xenial
17 | - name: Debian
18 | versions:
19 | - squeeze
20 | - wheezy
21 | - jessie
22 | categories:
23 | - hdfs
24 | galaxy_tags:
25 | - hdfs
26 | - hadoop
27 | - native
28 | - installer
29 |
--------------------------------------------------------------------------------
/molecule/default/Dockerfile.j2:
--------------------------------------------------------------------------------
1 | # Molecule managed
2 |
3 | FROM {{ item.image }}
4 |
5 | RUN if [ $(command -v apt-get) ]; then apt-get update && apt-get upgrade -y && apt-get install -y python sudo bash ca-certificates && apt-get clean; \
6 | elif [ $(command -v dnf) ]; then dnf makecache && dnf --assumeyes install python sudo python-devel python2-dnf bash && dnf clean all; \
7 | elif [ $(command -v yum) ]; then yum makecache fast && yum update -y && yum install -y python sudo yum-plugin-ovl bash && sed -i 's/plugins=0/plugins=1/g' /etc/yum.conf && yum clean all; \
8 | elif [ $(command -v zypper) ]; then zypper refresh && zypper update -y && zypper install -y python sudo bash python-xml && zypper clean -a; \
9 | elif [ $(command -v apk) ]; then apk update && apk add --no-cache python sudo bash ca-certificates; fi
10 |
--------------------------------------------------------------------------------
/molecule/default/INSTALL.rst:
--------------------------------------------------------------------------------
1 | *******
2 | Install
3 | *******
4 |
5 | Requirements
6 | ============
7 |
8 | * Docker Engine
9 | * docker-py
10 |
11 | Install
12 | =======
13 |
14 | .. code-block:: bash
15 |
16 | $ sudo pip install docker-py
17 |
--------------------------------------------------------------------------------
/molecule/default/create.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Create
3 | hosts: localhost
4 | connection: local
5 | gather_facts: false
6 | no_log: "{{ not lookup('env', 'MOLECULE_DEBUG') | bool }}"
7 | vars:
8 | molecule_file: "{{ lookup('env', 'MOLECULE_FILE') }}"
9 | molecule_ephemeral_directory: "{{ lookup('env', 'MOLECULE_EPHEMERAL_DIRECTORY') }}"
10 | molecule_scenario_directory: "{{ lookup('env', 'MOLECULE_SCENARIO_DIRECTORY') }}"
11 | molecule_yml: "{{ lookup('file', molecule_file) | molecule_from_yaml }}"
12 | tasks:
13 | - name: Create Dockerfiles from image names
14 | template:
15 | src: "{{ molecule_scenario_directory }}/Dockerfile.j2"
16 | dest: "{{ molecule_ephemeral_directory }}/Dockerfile_{{ item.image | regex_replace('[^a-zA-Z0-9_]', '_') }}"
17 | with_items: "{{ molecule_yml.platforms }}"
18 | register: platforms
19 |
20 | - name: Discover local Docker images
21 | docker_image_facts:
22 | name: "molecule_local/{{ item.item.name }}"
23 | with_items: "{{ platforms.results }}"
24 | register: docker_images
25 |
26 | - name: Build an Ansible compatible image
27 | docker_image:
28 | path: "{{ molecule_ephemeral_directory }}"
29 | name: "molecule_local/{{ item.item.image }}"
30 | dockerfile: "{{ item.item.dockerfile | default(item.invocation.module_args.dest) }}"
31 | force: "{{ item.item.force | default(true) }}"
32 | with_items: "{{ platforms.results }}"
33 | when: platforms.changed or docker_images.results | map(attribute='images') | select('equalto', []) | list | count >= 0
34 |
35 | - name: Create molecule instance(s)
36 | docker_container:
37 | name: "{{ item.name }}"
38 | hostname: "{{ item.name }}"
39 | image: "molecule_local/{{ item.image }}"
40 | state: started
41 | recreate: false
42 | log_driver: json-file
43 | command: "{{ item.command | default('bash -c \"while true; do sleep 10000; done\"') }}"
44 | privileged: "{{ item.privileged | default(omit) }}"
45 | volumes: "{{ item.volumes | default(omit) }}"
46 | capabilities: "{{ item.capabilities | default(omit) }}"
47 | ports: "{{ item.exposed_ports | default(omit) }}"
48 | ulimits: "{{ item.ulimits | default(omit) }}"
49 | register: server
50 | with_items: "{{ molecule_yml.platforms }}"
51 | async: 7200
52 | poll: 0
53 |
54 | - name: Wait for instance(s) creation to complete
55 | async_status:
56 | jid: "{{ item.ansible_job_id }}"
57 | register: docker_jobs
58 | until: docker_jobs.finished
59 | retries: 300
60 | with_items: "{{ server.results }}"
61 |
--------------------------------------------------------------------------------
/molecule/default/destroy.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Destroy
3 | hosts: localhost
4 | connection: local
5 | gather_facts: false
6 | no_log: "{{ not lookup('env', 'MOLECULE_DEBUG') | bool }}"
7 | vars:
8 | molecule_file: "{{ lookup('env', 'MOLECULE_FILE') }}"
9 | molecule_yml: "{{ lookup('file', molecule_file) | molecule_from_yaml }}"
10 | tasks:
11 | - name: Destroy molecule instance(s)
12 | docker_container:
13 | name: "{{ item.name }}"
14 | state: absent
15 | force_kill: "{{ item.force_kill | default(true) }}"
16 | register: server
17 | with_items: "{{ molecule_yml.platforms }}"
18 | async: 7200
19 | poll: 0
20 |
21 | - name: Wait for instance(s) deletion to complete
22 | async_status:
23 | jid: "{{ item.ansible_job_id }}"
24 | register: docker_jobs
25 | until: docker_jobs.finished
26 | retries: 300
27 | with_items: "{{ server.results }}"
28 |
--------------------------------------------------------------------------------
/molecule/default/molecule.yml:
--------------------------------------------------------------------------------
1 | ---
2 | dependency:
3 | name: galaxy
4 | options:
5 | ignore-certs: True
6 | ignore-errors: True
7 | role-file: requirements.yml
8 | lint:
9 | name: yamllint
10 | driver:
11 | name: docker
12 | platforms:
13 | - name: hdfs1
14 | hostname: "{{ item.name }}"
15 | image: solita/ubuntu-systemd:16.04
16 | command: /sbin/init
17 | privileged: True
18 | groups:
19 | - namenodes
20 | - hadoop_hosts
21 | - journalnodes
22 | - datanodes
23 | - zookeeper_hosts
24 | - name: hdfs2
25 | hostname: "{{ item.name }}"
26 | image: solita/ubuntu-systemd:16.04
27 | command: /sbin/init
28 | privileged: True
29 | groups:
30 | - namenodes
31 | - hadoop_hosts
32 | - journalnodes
33 | - datanodes
34 | - zookeeper_hosts
35 | - name: hdfs3
36 | hostname: "{{ item.name }}"
37 | image: solita/ubuntu-systemd:16.04
38 | command: /sbin/init
39 | privileged: True
40 | groups:
41 | - hadoop_hosts
42 | - journalnodes
43 | - datanodes
44 | - zookeeper_hosts
45 | provisioner:
46 | name: ansible
47 | inventory:
48 | group_vars:
49 | all:
50 | java_home: /usr/lib/jvm/java-8-openjdk-amd64
51 | java_packages:
52 | - openjdk-8-jdk
53 | java_openjdk_ppa_repos_support: true
54 |
55 | hdfs_bootstrap: True
56 | hdfs_ansible_handlers: False
57 | hdfs_ssh_fence: False
58 | zookeeper_hosts:
59 | zookeeper_version: 3.4.8
60 | zookeeper_client_port: "2181"
61 | # used by other role defaults (just grep for it)
62 | zookeeper_hostnames: "{{ groups.zookeeper_hosts | join(':' + zookeeper_client_port + ',') }}:{{ zookeeper_client_port }}"
63 | # used in the role: ansible-zookeepr
64 | zookeeper_hosts: "{{ groups.zookeeper_hosts }}"
65 | zookeeper_register_path_env: true
66 | zookeeper_debian_apt_install: true
67 | zookeeper_debian_apt_repositories:
68 | - repository_url: "ppa:ufscar/zookeeper"
69 | distro_version: "14.04"
70 |
71 | # custom zookeeper (bin) dir (role default is in /opt )
72 | zookeeper_dir: /usr/local/zookeeper-{{zookeeper_version}}
73 | zookeeper_tarball_dir: /tmp/
74 | lint:
75 | name: ansible-lint
76 | scenario:
77 | name: default
78 | verifier:
79 | name: testinfra
80 | lint:
81 | name: flake8
82 |
--------------------------------------------------------------------------------
/molecule/default/playbook.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: all
3 | become: True
4 | vars:
5 | hostgroup: 'all' # Which group in the ansible inventory should be considered
6 | ipaddrnr: 0
7 | hosts_file: /etc/hosts
8 | tasks:
9 | - name: Install dnsmasq
10 | apt: name=dnsmasq state=present
11 | - name: Redo setup again after dnsmasq was installed
12 | setup:
13 | - name: Overwrite nameserver in /etc/resolv.conf
14 | shell: bash -c "echo 'nameserver 127.0.0.1' > /etc/resolv.conf" && touch /etc/changedResolv
15 | args:
16 | creates: /etc/changedResolv
17 | - name: Generate /etc/hosts from group '{{ hostgroup }}'
18 | template: src=hosts.j2 dest=/tmp/hosts owner=root group=root mode=0644 backup=yes
19 | - name: Add /tmp/hosts to dnsmasq.conf
20 | lineinfile: dest=/etc/dnsmasq.conf regexp='^addn-hosts=' line='addn-hosts=/tmp/hosts'
21 | register: dnsmasqconf_changed
22 | - name: Add server 8.8.8.8 to dnsmasq.conf
23 | lineinfile: dest=/etc/dnsmasq.conf regexp='^server=8.8.8.8' line='server=8.8.8.8'
24 | - name: Restart dnsmasq
25 | service: name=dnsmasq state=restarted
26 | when: dnsmasqconf_changed.changed
27 | tags:
28 | - skip_ansible_lint
29 | - name: Install netstat for unit tests
30 | apt: name=net-tools state=present
31 |
32 | ## End of docker hack
33 |
34 | - hosts: all
35 | become: True
36 | roles:
37 | - role: "lhoss.java"
38 |
39 | - hosts: zookeeper_hosts
40 | become: True
41 | roles:
42 | - { role: "teralytics.zookeeper" }
43 |
44 | - hosts: hadoop_hosts
45 | become: True
46 | roles:
47 | - ansible-hdfs
48 | serial: "{{ serial|default(0) }}"
49 |
--------------------------------------------------------------------------------
/molecule/default/prepare.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Prepare
3 | hosts: all
4 | gather_facts: false
5 | tasks: []
6 |
--------------------------------------------------------------------------------
/molecule/default/templates/hosts.j2:
--------------------------------------------------------------------------------
1 | 127.0.0.1 localhost
2 |
3 | # The following lines are desirable for IPv6 capable hosts
4 | #::1 ip6-localhost ip6-loopback
5 | #fe00::0 ip6-localnet
6 | #ff00::0 ip6-mcastprefix
7 | #ff02::1 ip6-allnodes
8 | #ff02::2 ip6-allrouters
9 | #ff02::3 ip6-allhosts
10 |
11 | ## {{ ansible_managed }}
12 |
13 | {% for host in groups[ hostgroup ] %}
14 | {{ hostvars[host]["ansible_eth0"]["ipv4"]["address"] }} {{ host }}
15 | {% endfor %}
16 |
17 | {% if additional_host_lines is defined %}
18 | ## Additional hosts
19 | {% for host_line in additional_host_lines %}
20 | {{ host_line }}
21 | {% endfor %}
22 | {% endif %}
23 |
--------------------------------------------------------------------------------
/molecule/default/tests/test_default.py:
--------------------------------------------------------------------------------
1 | import testinfra.utils.ansible_runner
2 |
3 | testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(
4 | '.molecule/inventory').get_hosts('all')
5 |
6 |
7 | def get(e, nodeName):
8 | arg = r"./property[name='{nodename}']".format(nodename=nodeName)
9 | return e.find(arg)[1].text
10 |
11 |
12 | def test_hosts_file(File):
13 | f = File('/etc/hosts')
14 |
15 | assert f.exists
16 | assert f.user == 'root'
17 | assert f.group == 'root'
18 |
19 |
20 | def test_hdfs_site(File):
21 | f = File('/usr/local/hadoop/etc/hadoop/hdfs-site.xml')
22 |
23 | import xml.etree.ElementTree
24 | e = xml.etree.ElementTree.fromstring(f.content_string)
25 | assert e.tag == 'configuration'
26 | assert get(e, 'dfs.nameservices') == 'cluster1'
27 | assert get(e, 'dfs.nameservice.id') == 'cluster1'
28 | assert set(get(e, 'dfs.ha.namenodes.cluster1').split(
29 | ',')) == set('hdfs1,hdfs2'.split(','))
30 |
31 | assert f.exists
32 | assert f.user == 'hdfs'
33 | assert f.group == 'hadoop'
34 | assert f.mode == 0o755
35 |
36 |
37 | def test_core_site(File):
38 | f = File('/usr/local/hadoop/etc/hadoop/core-site.xml')
39 |
40 | import xml.etree.ElementTree
41 | e = xml.etree.ElementTree.fromstring(f.content_string)
42 | assert e.tag == 'configuration'
43 | assert get(e, 'fs.defaultFS') == 'hdfs://cluster1'
44 | assert set(get(e, 'ha.zookeeper.quorum').split(',')) == set(
45 | 'hdfs1:2181,hdfs2:2181,hdfs3:2181'.split(','))
46 |
47 | assert f.exists
48 | assert f.user == 'hdfs'
49 | assert f.group == 'hadoop'
50 | assert f.mode == 0o755
51 |
52 |
53 | def test_hdfs_datanode_running(Service):
54 | service = Service('hdfs-datanode')
55 |
56 | assert service.is_running
57 | assert service.is_enabled
58 |
59 |
60 | def test_zookeeper_running(Service):
61 | service = Service('zookeeper')
62 |
63 | assert service.is_running
64 | assert service.is_enabled
65 |
66 |
67 | def test_hdfs_journal_running(Service):
68 | service = Service('hdfs-journalnode')
69 |
70 | assert service.is_running
71 |
72 |
73 | def test_hdfs_datanode_web_listening(Socket):
74 | socket = Socket('tcp://0.0.0.0:50075')
75 |
76 | assert socket.is_listening
77 |
--------------------------------------------------------------------------------
/molecule/default/tests/test_namenodes.py:
--------------------------------------------------------------------------------
1 | import testinfra.utils.ansible_runner
2 |
3 | testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(
4 | '.molecule/inventory').get_hosts('namenodes')
5 |
6 |
7 | def test_hdfs_printTopology_command(Sudo, Command):
8 | with Sudo("hdfs"):
9 | c = Command("/usr/local/hadoop/bin/hdfs dfsadmin -printTopology")
10 |
11 | assert len(c.stdout.rstrip().split('\n')) == 4
12 | assert c.rc == 0
13 |
14 |
15 | def test_hdfs_check_safemode_is_off(Sudo, Command):
16 | with Sudo("hdfs"):
17 | c = Command("/usr/local/hadoop/bin/hdfs dfsadmin -safemode get")
18 |
19 | assert len(c.stdout.rstrip().split('\n')) == 2
20 | for row in c.stdout.rstrip().split('\n'):
21 | assert row.find("OFF") != -1
22 | assert c.rc == 0
23 |
24 |
25 | def test_hdfs_is_empty(Sudo, Command):
26 | with Sudo("hdfs"):
27 | c = Command("/usr/local/hadoop/bin/hdfs dfs -ls /")
28 |
29 | assert c.stdout.rstrip() == ''
30 | assert c.rc == 0
31 |
32 |
33 | def test_hdfs_namenode_running(Service):
34 | service = Service('hdfs-namenode')
35 |
36 | assert service.is_running
37 | assert service.is_enabled
38 |
39 |
40 | def test_hdfs_zkfc_running(Service):
41 | service = Service('hdfs-zkfc')
42 |
43 | assert service.is_running
44 | assert service.is_enabled
45 |
46 |
47 | def test_hdfs_listening(Socket):
48 | socket = Socket('tcp://0.0.0.0:8020')
49 | assert socket.is_listening
50 |
51 |
52 | def test_hdfs_web_listening(Socket):
53 | socket = Socket('tcp://0.0.0.0:50070')
54 | assert socket.is_listening
55 |
--------------------------------------------------------------------------------
/requirements.yml:
--------------------------------------------------------------------------------
1 | ---
2 | ## Java Open JDK
3 | - name: lhoss.java
4 | src: git+https://github.com/lhoss/ansible-role-java
5 | version: remotes/origin/support_configurable_ppa_repos
6 |
7 | ## Zookeeper
8 | - src: git+https://github.com/teralytics/ansible-zookeeper
9 | name: teralytics.zookeeper
10 | version: tags/v0.17.0
11 |
--------------------------------------------------------------------------------
/tasks/base.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Install some packages needed for native use
3 | apt: name={{item}} state=present update_cache=yes cache_valid_time=3600
4 | with_items:
5 | - "libssl-dev"
6 | - "libsnappy-dev"
7 |
8 | - name: Make sure parent directory exists
9 | file: path={{hdfs_parent_dir}} state=directory owner=root group=root mode=0755 follow=yes
10 |
11 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it (from local archive)
12 | unarchive: src=hadoop-{{hdfs_version}}.tar.gz dest="{{hdfs_parent_dir}}/" owner={{hdfs_user}} group={{hdfs_group}} mode=0755 creates="{{hdfs_hadoop_home}}-{{hdfs_version}}"
13 | when: hdfs_distribution_method == "local_file"
14 |
15 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it (from native compiled archive)
16 | unarchive: src="{{hdfs_fetch_folder}}/hadoop-{{hdfs_version}}.tar.gz" dest="{{hdfs_parent_dir}}/" owner={{hdfs_user}} group={{hdfs_group}} mode=0755 creates="{{hdfs_hadoop_home}}-{{hdfs_version}}"
17 | when: hdfs_distribution_method == "compile"
18 |
19 | - name: Download Hadoop .tgz to {{hdfs_parent_dir}}
20 | get_url: url={{hdfs_download_url}} dest="{{hdfs_parent_dir}}/hadoop-{{ hdfs_version }}.tar.gz" validate_certs=no
21 | when: hdfs_distribution_method == "download"
22 |
23 | - name: Unarchive downloaded Hadoop
24 | unarchive: src="{{hdfs_parent_dir}}/hadoop-{{ hdfs_version }}.tar.gz" dest="{{hdfs_parent_dir}}" remote_src=true creates="{{hdfs_hadoop_home}}-{{ hdfs_version }}"
25 | when: hdfs_distribution_method == "download"
26 |
27 | - name: Link hadoop version to {{hdfs_hadoop_home}}
28 | file: src={{hdfs_hadoop_home}}-{{hdfs_version}} dest={{hdfs_hadoop_home}} owner={{hdfs_user}} group={{hdfs_group}} state=link
29 |
30 | - name: Create folder /etc/hadoop
31 | file: path=/etc/hadoop state=directory owner={{hdfs_user}} group={{hdfs_group}}
32 |
33 | - name: Create hadoop link for conf to /etc/hadoop
34 | file: src={{hdfs_conf_dir}} dest=/etc/hadoop/conf owner={{hdfs_user}} group={{hdfs_group}} state=link
35 |
36 | - name: Create link for hdfs to /usr/local/bin
37 | file: src="{{hdfs_bin_dir}}/hdfs" dest=/usr/local/bin/hdfs owner={{hdfs_user}} group={{hdfs_group}} mode=0755 state=link
38 |
39 | - name: Create link for hadoop to /usr/local/bin
40 | file: src="{{hdfs_bin_dir}}/hadoop" dest=/usr/local/bin/hadoop owner={{hdfs_user}} group={{hdfs_group}} mode=0755 state=link
41 |
42 | - name: Export hadoop variables
43 | copy: content="export HADOOP_HOME={{hdfs_hadoop_home}}\nexport HADOOP_PREFIX={{hdfs_hadoop_home}}\nexport HADOOP_CONF_DIR={{hdfs_conf_dir}}\nexport HADOOP_LIBEXEC_DIR={{hdfs_hadoop_home}}/libexec\nexport HADOOP_CLASSPATH=`/usr/local/bin/hadoop classpath`" dest="/etc/profile.d/hadoop_exports.sh" mode=0755
44 |
45 | - name: Allow hadoop variables keeping for sudoers
46 | template: src=hadoop_sudoers.j2 dest=/etc/sudoers.d/hadoop owner=root group=root mode=0644
47 |
48 | - name: Create rack awareness script
49 | template: src=rack-awareness.sh.j2 dest={{hdfs_rack_script_path}} owner={{hdfs_user}} group={{hdfs_group}} mode=0755
50 | when: hdfs_rack_script_awk is defined
51 |
52 | - name: Create hadoop tmp dir
53 | file: path={{hdfs_tmpdir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=1777
54 | tags:
55 | - skip_ansible_lint
56 |
57 | - name: Create hadoop log dir
58 | file: path={{hdfs_log_dir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0755
59 |
60 | - name: Create directory for unix sockets
61 | file: path={{hdfs_dfs_domain_socket_path_folder}} state=directory owner={{hdfs_user}} group=root mode=0755
62 | when: hdfs_enable_short_circuit_reads
63 |
--------------------------------------------------------------------------------
/tasks/bootstrap_ha.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Pause - Bootstrapping is about to begin
3 | pause: prompt="Are you sure that you want to continue bootstrapping HDFS?" seconds=10
4 |
5 | - name: Ensure that zookeeper is running
6 | service: name=zookeeper state=started
7 | when: inventory_hostname in hdfs_zookeeper_hosts
8 |
9 | - name: Ensure that journal nodes are running
10 | service: name=hdfs-journalnode state=started
11 | when: inventory_hostname in hdfs_journalnodes
12 |
13 | - name: Format namenode {{hdfs_namenodes[0]}}
14 | command: bash -lc "./hdfs namenode -format {{ hdfs_cluster_name }} -force"
15 | args:
16 | creates: "{{hdfs_namenode_dir_list[0]}}/current/VERSION"
17 | chdir: "{{ hdfs_bin_dir }}"
18 | become: true
19 | become_user: "{{hdfs_user}}"
20 | when: inventory_hostname == hdfs_namenodes[0]
21 |
22 | - name: Start namenode {{hdfs_namenodes[0]}}
23 | service: name=hdfs-namenode state=started
24 | when: inventory_hostname == hdfs_namenodes[0]
25 |
26 | - name: Wait for the namenode {{ hdfs_namenodes[0] }} to come online
27 | wait_for: host={{ hdfs_namenodes[0] }} port=50070
28 | when: inventory_hostname in hdfs_namenodes
29 |
30 | - name: Bootstrap the standby namenode ({{hdfs_namenodes[1]}})
31 | command: bash -lc "./hdfs namenode -bootstrapStandby && touch {{hdfs_namenode_dir_list[0]}}/.bootstrapped"
32 | args:
33 | creates: "{{hdfs_namenode_dir_list[0]}}/.bootstrapped"
34 | chdir: "{{ hdfs_bin_dir }}"
35 | become: true
36 | become_user: "{{hdfs_user}}"
37 | when: inventory_hostname == hdfs_namenodes[1]
38 |
39 | - name: Start namenode {{hdfs_namenodes[1]}}
40 | service: name=hdfs-namenode state=started
41 | when: inventory_hostname == hdfs_namenodes[1]
42 |
43 | - name: Format ZK for zkfc
44 | command: bash -lc "./hdfs zkfc -formatZK -force && touch {{hdfs_namenode_dir_list[0]}}/.zkFormatted"
45 | args:
46 | creates: "{{hdfs_namenode_dir_list[0]}}/.zkFormatted"
47 | chdir: "{{ hdfs_bin_dir }}"
48 | become: true
49 | become_user: "{{hdfs_user}}"
50 | when: inventory_hostname == hdfs_namenodes[0]
51 |
52 | - name: Start zkfc services
53 | service: name=hdfs-zkfc state=started
54 | when: inventory_hostname in hdfs_namenodes
55 |
56 | - name: Start data nodes
57 | service: name=hdfs-datanode state=started
58 | when: inventory_hostname in hdfs_datanodes
59 |
60 | - name: Bootstrapping complete
61 | debug: "msg='New namenode can be opened at http://{{ inventory_hostname }}:50070/'"
62 | when: inventory_hostname in hdfs_namenodes
63 |
--------------------------------------------------------------------------------
/tasks/bootstrap_spof.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Pause - Bootstrapping is about to begin
3 | pause: prompt="Are you sure that you want to continue bootstrapping HDFS?" seconds=10
4 |
5 | - name: Format namenode {{hdfs_namenodes[0]}}
6 | command: "./hdfs namenode -format {{ hdfs_cluster_name }} -force"
7 | args:
8 | creates: "{{hdfs_namenode_dir_list[0]}}/current/VERSION"
9 | chdir: "{{ hdfs_bin_dir }}"
10 | become: true
11 | become_user: "{{hdfs_user}}"
12 | when: inventory_hostname == hdfs_namenodes[0]
13 |
14 | - name: Start namenode {{hdfs_namenodes[0]}}
15 | service: name=hdfs-namenode state=started
16 | when: inventory_hostname == hdfs_namenodes[0]
17 |
18 | - name: Wait for the namenode {{ hdfs_namenodes[0] }} to come online
19 | wait_for: host={{ hdfs_namenodes[0] }} port=50070
20 | when: inventory_hostname in hdfs_secondary_namenode
21 |
22 | - name: Start secondary namenode
23 | service: name=hdfs-secondarynamenode state=started
24 | when: inventory_hostname in hdfs_secondary_namenode
25 |
26 | - name: Start data nodes
27 | service: name=hdfs-datanode state=started
28 | when: inventory_hostname in hdfs_datanodes
29 |
30 | - name: Bootstrapping complete
31 | debug: "msg='New namenode can be opened at http://{{ inventory_hostname }}:50070/'"
32 | when: inventory_hostname in hdfs_namenodes
33 |
--------------------------------------------------------------------------------
/tasks/config.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Configure hadoop-env.sh
3 | template: src=hadoop-env.sh.j2 dest={{hdfs_conf_dir}}/hadoop-env.sh owner={{hdfs_user}} group={{hdfs_group}} mode=0755
4 | notify:
5 | - Restart namenode
6 | - Restart secondary namenode
7 | - Restart datanode
8 | - Restart journalnode
9 | - Restart zkfc
10 | tags:
11 | - hdfs-env
12 | - hdfs-log4j-config
13 |
14 | - name: Configure core-site.xml
15 | template: src=core-site.xml.j2 dest={{hdfs_conf_dir}}/core-site.xml owner={{hdfs_user}} group={{hdfs_group}} mode=0755
16 | notify:
17 | - Restart namenode
18 | - Restart secondary namenode
19 | - Restart datanode
20 | - Restart journalnode
21 | - Restart zkfc
22 | tags:
23 | - hdfs-core-config
24 |
25 | - name: Configure hdfs-site.xml
26 | template: src=hdfs-site.xml.j2 dest={{hdfs_conf_dir}}/hdfs-site.xml owner={{hdfs_user}} group={{hdfs_group}} mode=0755
27 | notify:
28 | - Restart namenode
29 | - Restart secondary namenode
30 | - Restart datanode
31 | - Restart journalnode
32 | - Restart zkfc
33 | tags:
34 | - hdfs-site-config
35 |
36 | - name: Configure log4j.properties
37 | template: src=log4j.properties.j2 dest={{hdfs_conf_dir}}/log4j.properties owner={{hdfs_user}} group={{hdfs_group}} mode=0755
38 | notify:
39 | - Restart namenode
40 | - Restart secondary namenode
41 | - Restart datanode
42 | - Restart journalnode
43 | - Restart zkfc
44 | tags:
45 | - hdfs-log4j-config
46 |
--------------------------------------------------------------------------------
/tasks/datanode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Create datanode directories
3 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode={{ hdfs_dfs_datanode_data_dir_perm }} mode=0700
4 | with_items: "{{ hdfs_datanode_dir_list }}"
5 |
6 | - name: Set program variable to 'datanode'
7 | set_fact: program="datanode"
8 |
9 | - name: Deploy init.d service for datanode
10 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-datanode owner=root group=root mode=0755
11 | when: ansible_service_mgr != "systemd"
12 |
13 | - name: Deploy systemd service for datanode
14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-datanode.service owner=root group=root mode=0644
15 | register: datanode_systemd_unit_altered
16 | when: ansible_service_mgr == "systemd"
17 |
18 | # daemon-reload cannot be sent to handler because it is needed before the end of play
19 | # also, we cannot use flush_handlers before the end of play, because we have other handlers
20 | - name: Reload systemd daemon
21 | command: "systemctl daemon-reload"
22 | when: ansible_service_mgr == "systemd" and datanode_systemd_unit_altered.changed
23 | tags:
24 | - skip_ansible_lint
25 |
26 | - name: Register datanode service
27 | service: name=hdfs-datanode enabled=yes
28 |
--------------------------------------------------------------------------------
/tasks/journalnode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Create journalnode edits dir
3 | file: path={{hdfs_dfs_journalnode_edits_dir}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode={{ hdfs_dfs_journalnode_edits_dir_perm }}
4 |
5 | - name: Set program variable to 'journalnode'
6 | set_fact: program="journalnode"
7 |
8 | - name: Deploy init.d service for journalnode
9 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-journalnode owner=root group=root mode=0755
10 | register: journalnode_systemd_unit_altered
11 | when: ansible_service_mgr != "systemd"
12 |
13 | - name: Deploy systemd service for journalnode
14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-journalnode.service owner=root group=root mode=0644
15 | when: ansible_service_mgr == "systemd"
16 |
17 | # daemon-reload cannot be sent to handler because it is needed before the end of play
18 | # also, we cannot use flush_handlers before the end of play, because we have other handlers
19 | - name: Reload systemd daemon
20 | command: "systemctl daemon-reload"
21 | when: ansible_service_mgr == "systemd" and journalnode_systemd_unit_altered.changed
22 | tags:
23 | - skip_ansible_lint
24 |
25 | - name: Register journalnode service
26 | service: name=hdfs-journalnode enabled=yes
27 |
--------------------------------------------------------------------------------
/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - import_tasks: user.yml
3 | tags: user
4 |
5 | - import_tasks: native.yml
6 | when: hdfs_compile_from_source and inventory_hostname == hdfs_compile_node
7 | tags: native
8 |
9 | - import_tasks: base.yml
10 | tags: base
11 |
12 | - import_tasks: config.yml
13 | tags: base,config
14 |
15 | - import_tasks: datanode.yml
16 | when: inventory_hostname in hdfs_datanodes
17 | tags: datanode
18 |
19 | - import_tasks: namenode.yml
20 | when: inventory_hostname in hdfs_namenodes
21 | tags: namenode
22 |
23 | - import_tasks: secondarynamenode.yml
24 | when: inventory_hostname in hdfs_secondary_namenode and not hdfs_ha_enabled
25 | tags: secondarynamenode
26 |
27 | - import_tasks: journalnode.yml
28 | when: inventory_hostname in hdfs_journalnodes and hdfs_ha_enabled
29 | tags: journalnode
30 |
31 | - import_tasks: bootstrap_spof.yml
32 | when: hdfs_bootstrap and not hdfs_ha_enabled
33 | tags: bootstrap
34 |
35 | - import_tasks: bootstrap_ha.yml
36 | when: hdfs_bootstrap and hdfs_ha_enabled
37 | tags: bootstrap
38 |
39 | - import_tasks: upgrade.yml
40 | any_errors_fatal: True
41 | when: hdfs_upgrade
42 | tags: upgrade
43 |
44 | - import_tasks: scripts.yml
45 | tags: scripts
46 |
--------------------------------------------------------------------------------
/tasks/namenode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Configure mapred-site.xml
3 | template: src=mapred-site.xml.j2 dest="{{hdfs_conf_dir}}/mapred-site.xml" owner={{hdfs_user}} group={{hdfs_group}}
4 |
5 | - name: Configure slaves
6 | template: src=slaves.j2 dest="{{hdfs_conf_dir}}//slaves" owner={{hdfs_user}} group={{hdfs_group}} mode=0744
7 |
8 | - name: Create namenode directories
9 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700
10 | with_items: "{{ hdfs_namenode_dir_list }}"
11 |
12 | - name: Create exclude file
13 | template: src=dfs.hosts.exclude.j2 dest={{hdfs_conf_dir}}/dfs.hosts.exclude owner={{hdfs_user}} group={{hdfs_group}} mode=0700
14 | notify: Restart namenode
15 |
16 | - name: Set program variable to 'namenode'
17 | set_fact: program="namenode"
18 |
19 | - name: Deploy init.d service for namenode
20 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-namenode owner=root group=root mode=0755
21 | when: ansible_service_mgr != "systemd"
22 |
23 | - name: Deploy systemd service for namenode
24 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-namenode.service owner=root group=root mode=0644
25 | register: namenode_systemd_unit_altered
26 | when: ansible_service_mgr == "systemd"
27 |
28 | - name: Set program variable to 'zkfc'
29 | set_fact: program="zkfc"
30 | when: hdfs_ha_enabled
31 |
32 | - name: Deploy init.d service for zkfc
33 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-zkfc owner=root group=root mode=0755
34 | register: zkfc_systemd_unit_altered
35 | when: hdfs_ha_enabled and ansible_service_mgr != "systemd"
36 |
37 | - name: Deploy systemd service for zkfc
38 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-zkfc.service owner=root group=root mode=0644
39 | when: hdfs_ha_enabled and ansible_service_mgr == "systemd"
40 |
41 | # daemon-reload cannot be sent to handler because it is needed before the end of play
42 | # also, we cannot use flush_handlers before the end of play, because we have other handlers
43 | - name: Reload systemd daemon
44 | command: "systemctl daemon-reload"
45 | when: ansible_service_mgr == "systemd" and ( namenode_systemd_unit_altered.changed or zkfc_systemd_unit_altered.changed )
46 | tags:
47 | - skip_ansible_lint
48 |
49 | - name: Register namenode service
50 | service: name=hdfs-namenode enabled=yes
51 |
52 | - name: Register zkfc service
53 | service: name=hdfs-zkfc enabled=yes
54 | when: hdfs_ha_enabled
55 |
--------------------------------------------------------------------------------
/tasks/native.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Make sure base dir exists
3 | file: path={{hdfs_parent_dir}} state=directory owner=root group=root mode=0755 follow=yes
4 |
5 | - name: Install some tools
6 | apt: name={{item}} state=present update_cache=yes
7 | with_items:
8 | - "git"
9 | - "maven"
10 | - "gcc"
11 | - "g++"
12 | - "make"
13 | - "cmake"
14 | - "zlib1g-dev"
15 | - "libcurl4-openssl-dev"
16 | - "libprotobuf8"
17 | - "protobuf-compiler"
18 | - "libssl-dev"
19 | - "bzip2"
20 | - "libbz2-dev"
21 | - "libsnappy-dev"
22 |
23 | - name: Copy Hadoop .tgz to {{hdfs_parent_dir}} and unpack it
24 | unarchive: src=hadoop-{{hdfs_version}}-src.tar.gz dest={{hdfs_parent_dir}} owner={{hdfs_user}} group={{hdfs_group}} creates="{{hdfs_hadoop_home}}-{{hdfs_version}}-src"
25 | when: not hdfs_compile_from_git
26 |
27 | - name: Clone hadoop git repo
28 | git: repo="https://github.com/apache/hadoop.git" dest="{{hdfs_hadoop_home}}-{{hdfs_version}}-src" version="{{hdfs_compile_version}}" depth=1
29 | when: hdfs_compile_from_git
30 |
31 | - name: Fix ownerships for git repo
32 | file: path="{{hdfs_hadoop_home}}-{{hdfs_version}}-src" owner={{hdfs_user}} group={{hdfs_group}} recurse=yes
33 |
34 | - name: Compile
35 | command: mvn package -Pdist,native -DskipTests -Dtar -e -Drequire.snappy -Drequire.bzip2
36 | args:
37 | chdir: "{{hdfs_hadoop_home}}-{{hdfs_version}}-src"
38 | creates: "{{hdfs_hadoop_home}}-{{hdfs_version}}-src/hadoop-dist/target/hadoop-{{hdfs_version}}.tar.gz"
39 | become: true
40 | become_user: "{{hdfs_user}}"
41 |
42 | - name: Fetch hadoop-{{hdfs_version}}.tar.gz to {{hdfs_fetch_folder}}
43 | fetch: src="{{hdfs_hadoop_home}}-{{hdfs_version}}-src/hadoop-dist/target/hadoop-{{hdfs_version}}.tar.gz" dest="{{hdfs_fetch_folder}}/" flat=yes
44 |
--------------------------------------------------------------------------------
/tasks/scripts.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Create log compress and rotate script on {{ hdfs_bin_dir }}
3 | template:
4 | src: "audit-compress-rotate.sh.j2"
5 | dest: "{{ hdfs_bin_dir }}/audit-compress-rotate.sh"
6 | owner: "{{ hdfs_user }}"
7 | group: "{{ hdfs_group }}"
8 | mode: "0750"
9 | when: inventory_hostname in hdfs_namenodes
10 |
11 | # run compress and rotate after the linux log rotate (default 6:25 AM)
12 | - cron:
13 | name: Log compress and rotate cronjob
14 | state: present
15 | minute: "0"
16 | hour: "7"
17 | job: "{{ hdfs_bin_dir }}/audit-compress-rotate.sh 2>&1 | /usr/bin/logger -t hdfs"
18 |
--------------------------------------------------------------------------------
/tasks/secondarynamenode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Set program variable to 'secondarynamenode'
3 | set_fact: program="secondarynamenode"
4 |
5 | - name: Create directories for namenode checkpoints
6 | file: path={{item}} state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700
7 | with_items: "{{ hdfs_namenode_checkpoint_dir_list }}"
8 |
9 | - name: Deploy init.d service for secondarynamenode
10 | template: src=hdfs-service.j2 dest=/etc/init.d/hdfs-secondarynamenode owner=root group=root mode=0755
11 | when: ansible_service_mgr != "systemd"
12 |
13 | - name: Deploy systemd service for secondarynamenode
14 | template: src=hdfs.service.j2 dest=/lib/systemd/system/hdfs-secondarynamenode.service owner=root group=root mode=0644
15 | register: secondarynamenode_systemd_unit_altered
16 | when: ansible_service_mgr == "systemd"
17 |
18 | # daemon-reload cannot be sent to handler because it is needed before the end of play
19 | # also, we cannot use flush_handlers before the end of play, because we have other handlers
20 | - name: Reload systemd daemon
21 | command: "systemctl daemon-reload"
22 | when: ansible_service_mgr == "systemd" and secondarynamenode_systemd_unit_altered.changed
23 | tags:
24 | - skip_ansible_lint
25 |
26 | - name: Register secondarynamenode service
27 | service: name=hdfs-secondarynamenode enabled=yes
28 |
--------------------------------------------------------------------------------
/tasks/ssh_fence.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #### only for ssh fence
3 | - name: Check if ssh keys should be distributed
4 | stat: path={{ hdfs_ssh_known_hosts_file }}
5 | register: host_file_status
6 |
7 | - name: Set distribute keys variable
8 | set_fact: distribute_keys={{not host_file_status.stat.exists or hdfs_redistribute_ssh_keys or created_user.changed}}
9 |
10 | - name: Fetch private key
11 | fetch: src={{ hdfs_user_home }}/.ssh/id_rsa dest=rsa_key
12 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys
13 | check_mode: no
14 |
15 | - name: Fetch public key
16 | fetch: src={{ hdfs_user_home }}/.ssh/id_rsa.pub dest=rsa_key
17 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys
18 | check_mode: no
19 |
20 | - name: Create .ssh directory for {{hdfs_user}}
21 | file: path={{ hdfs_user_home }}/.ssh state=directory owner={{hdfs_user}} group={{hdfs_group}} mode=0700
22 | when: distribute_keys
23 |
24 | - name: Copy private key to all machines
25 | copy: src=rsa_key/{{hdfs_namenodes[0]}}{{ hdfs_user_home }}/.ssh/id_rsa dest={{ hdfs_user_home }}/.ssh/id_rsa owner={{hdfs_user}} group={{hdfs_group}} mode=0600
26 | when: distribute_keys
27 |
28 | - name: Add pubkeys to master server
29 | authorized_key: user={{hdfs_user}} key="{{ lookup('file', 'rsa_key/{{hdfs_namenodes[0]}}{{ hdfs_user_home }}/.ssh/id_rsa.pub') }}"
30 | when: distribute_keys
31 |
32 | - name: Make sure the known hosts file exists
33 | file: path={{ hdfs_ssh_known_hosts_file }} state=touch owner={{hdfs_user}} group={{hdfs_group}} mode=0644
34 | when: (not host_file_status.stat.exists) or (distribute_keys and host_file_status.stat.mode == "0644")
35 | changed_when: (not host_file_status.stat.exists) or host_file_status.stat.mode != "0644"
36 |
37 | - set_fact: hdfs_namenodes_long={{ hdfs_namenodes }}
38 |
39 | - name: Add long names to namenodes for proper key deployment
40 | set_fact: hdfs_namenodes_long={{ hdfs_namenodes_long | map('regex_replace', '(.*)', '\\1.' + hdfs_host_domain_name ) | list}}
41 | when: hdfs_host_domain_name is defined
42 |
43 | - name: Check host name availability
44 | shell: "ssh-keygen -f {{ hdfs_ssh_known_hosts_file }} -F {{ item }}"
45 | with_items: "{{ hdfs_namenodes_long }}"
46 | when: distribute_keys
47 | register: ssh_known_host_results
48 | ignore_errors: yes
49 | changed_when: False
50 | tags:
51 | - skip_ansible_lint
52 |
53 | - name: Scan the public key
54 | shell: "ssh-keyscan -H -T 10 {{ item.item }} >> {{ hdfs_ssh_known_hosts_file }}"
55 | with_items: "{{ ssh_known_host_results.results }}"
56 | when: item.stdout is defined and item.stdout == ""
57 | no_log: True
58 |
59 | - name: Delete key locally
60 | local_action: file path=rsa_key state=absent
61 | when: inventory_hostname == hdfs_namenodes[0] and distribute_keys
62 | become: no
63 |
--------------------------------------------------------------------------------
/tasks/upgrade.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - block:
3 | - name: End play if not HA mode
4 | debug:
5 | msg: "Nothing to upgrade, ending play. This installation is not HA mode. No support for single instance."
6 | - meta: end_play
7 | when: hdfs_namenodes | length != 2
8 |
9 | - name: Check current version
10 | shell: "./hdfs version 2> /dev/null | grep ^Hadoop | cut -c8-"
11 | args:
12 | chdir: "{{ hdfs_bin_dir }}"
13 | become: yes
14 | become_user: "{{ hdfs_user }}"
15 | register: current_hdfs_version
16 | changed_when: False
17 |
18 | - debug:
19 | msg: "Current HDFS version is: {{ current_hdfs_version.stdout }}"
20 | verbosity: 1
21 |
22 | - block:
23 | - name: End play if nothing to upgrade
24 | debug:
25 | msg: "Nothing to upgrade, ending play. Current version is >= than upgrading version."
26 | - meta: end_play
27 | when:
28 | - current_hdfs_version.stdout | version_compare(hdfs_version, '>=')
29 | - not hdfs_upgrade_force
30 |
31 | - name: Set if should upgrade HDFS
32 | set_fact:
33 | hdfs_uptodate: "{{ current_hdfs_version.stdout | version_compare(hdfs_version, '>=') }}"
34 |
35 | - name: Should upgrade hdfs
36 | debug:
37 | msg: "Variable is {{ hdfs_uptodate }}"
38 | verbosity: 2
39 |
40 | - name: Prepare HDFS upgrade (create an fsimage for rollback)
41 | command: "./hdfs dfsadmin -rollingUpgrade prepare"
42 | args:
43 | chdir: "{{ hdfs_bin_dir }}"
44 | become: yes
45 | become_user: "{{ hdfs_user }}"
46 | when:
47 | - inventory_hostname == hdfs_namenodes[0]
48 | - not hdfs_uptodate
49 |
50 | - name: Check HDFS upgrade preparation (retry for 10 minutes)
51 | command: "./hdfs dfsadmin -rollingUpgrade query"
52 | args:
53 | chdir: "{{ hdfs_bin_dir }}"
54 | register: result
55 | become: yes
56 | become_user: "{{ hdfs_user }}"
57 | until: result.stdout.find("Proceed with rolling upgrade") != -1
58 | retries: 72
59 | delay: 10
60 | when:
61 | - inventory_hostname == hdfs_namenodes[0]
62 | - not hdfs_uptodate
63 |
64 | - name: Failover namenode "{{ hdfs_namenodes[0] }}" to "{{ hdfs_namenodes[1] }} if HA mode"
65 | command: "./hdfs haadmin -failover {{ hdfs_namenodes[0] }} {{ hdfs_namenodes[1] }}"
66 | args:
67 | chdir: "{{ hdfs_bin_dir }}"
68 | become: yes
69 | become_user: "{{ hdfs_user }}"
70 | when:
71 | - inventory_hostname == hdfs_namenodes[0]
72 | - hdfs_ha_enabled
73 | - not hdfs_uptodate
74 |
75 | - name: Upgrade namenode "{{ hdfs_namenodes[0] }}"
76 | include_tasks: upgrade_namenode.yml
77 | loop_control:
78 | loop_var: hdfs_namenode
79 | with_items:
80 | - "{{ hdfs_namenodes[0] }}"
81 |
82 | - name: Ask for confirmation if namenode is up
83 | pause:
84 | prompt: "Make sure namenode service on {{ hdfs_namenodes[0] }} is FULLY up! Check the startup progress on the web GUI."
85 | run_once: true
86 | delegate_to: hdfs_namenodes[0]
87 | when:
88 | - not hdfs_uptodate
89 |
90 | - name: Ask for confirmation if safe mode is off
91 | pause:
92 | prompt: "Make sure SAFE MODE is OFF. You might have to wait a couple of seconds (usually 30 seconds). Follow the progress on the web GUI."
93 | run_once: true
94 | delegate_to: hdfs_namenodes[0]
95 | when:
96 | - not hdfs_uptodate
97 |
98 | - name: HDFS namenode on "{{ hdfs_namenodes[0] }}" is running
99 | command: "./hdfs haadmin -checkHealth {{ hdfs_namenodes[0] }}"
100 | args:
101 | chdir: "{{ hdfs_bin_dir }}"
102 | become: yes
103 | become_user: "{{ hdfs_user }}"
104 | any_errors_fatal: true
105 | when:
106 | - inventory_hostname == hdfs_namenodes[0]
107 | - not hdfs_uptodate
108 |
109 | - name: Failover namenode "{{ hdfs_namenodes[1] }}" to "{{ hdfs_namenodes[0] }} if HA mode"
110 | command: "./hdfs haadmin -failover {{ hdfs_namenodes[1] }} {{ hdfs_namenodes[0] }}"
111 | args:
112 | chdir: "{{ hdfs_bin_dir }}"
113 | become: yes
114 | become_user: "{{ hdfs_user }}"
115 | when:
116 | - inventory_hostname == hdfs_namenodes[1]
117 | - hdfs_ha_enabled
118 |
119 | - name: Upgrade namenode "{{ hdfs_namenodes[1] }}"
120 | include_tasks: upgrade_namenode.yml
121 | with_items:
122 | - "{{ hdfs_namenodes[1] }}"
123 | loop_control:
124 | loop_var: hdfs_namenode
125 | when:
126 | - not hdfs_uptodate
127 |
128 | - name: Ask for confirmation if namenode is up
129 | pause:
130 | prompt: "Make sure namenode service on {{ hdfs_namenodes[1] }} is FULLY up! Check the startup progress on the web GUI."
131 | run_once: true
132 | delegate_to: hdfs_namenodes[1]
133 | when:
134 | - not hdfs_uptodate
135 |
136 | - name: Ask for confirmation if safe mode is off
137 | pause:
138 | prompt: "Make sure SAFE MODE is OFF. You might have to wait a couple of seconds (usually 30 seconds). Follow the progress on the web GUI."
139 | run_once: true
140 | delegate_to: hdfs_namenodes[1]
141 | when:
142 | - not hdfs_uptodate
143 |
144 | - name: Upgrade datanodes
145 | include_tasks: upgrade_datanode.yml
146 | with_items:
147 | - "{{ hdfs_datanodes }}"
148 | loop_control:
149 | loop_var: hdfs_datanode
150 |
151 | - name: Upgrade journalnodes
152 | include_tasks: upgrade_journalnode.yml
153 | with_items:
154 | - "{{ hdfs_journalnodes }}"
155 | loop_control:
156 | loop_var: hdfs_journalnode
157 |
158 | - name: Finalize HDFS upgrade
159 | command: "./hdfs dfsadmin -rollingUpgrade finalize"
160 | args:
161 | chdir: "{{ hdfs_bin_dir }}"
162 | become: yes
163 | become_user: "{{ hdfs_user }}"
164 | when: inventory_hostname == hdfs_namenodes[0]
165 |
--------------------------------------------------------------------------------
/tasks/upgrade_datanode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - block:
3 | - name: Shutdown HDFS datanode
4 | command: "./hdfs dfsadmin -shutdownDatanode {{ hdfs_datanode }}:50020"
5 | args:
6 | chdir: "{{ hdfs_bin_dir }}"
7 | become: yes
8 | become_user: "{{ hdfs_user }}"
9 | register: datanode_shutdown
10 |
11 | - name: Output from shutdown
12 | debug:
13 | msg: "Output from shutdown: {{ datanode_shutdown.stdout }}"
14 | verbosity: 1
15 |
16 | - name: Check HDFS datanode is running
17 | command: "./hdfs dfsadmin -getDatanodeInfo {{ hdfs_datanode }}:50020"
18 | args:
19 | chdir: "{{ hdfs_bin_dir }}"
20 | register: result
21 | become: yes
22 | become_user: "{{ hdfs_user }}"
23 | until: result.stderr.find("Connection refused") != -1
24 | retries: 2
25 | delay: 10
26 | failed_when: result.rc == 0
27 | when:
28 | - inventory_hostname == hdfs_datanode
29 | - not hdfs_uptodate
30 | tags: upgrade_datanode
31 |
32 | # The pause runs on the first (single one) node on the inventory list
33 | - name: Ask for confirmation if datanode is down
34 | pause:
35 | prompt: "Make sure hdfs-datanode service on {{ hdfs_datanode }} is down"
36 | when: not hdfs_uptodate
37 |
38 | - block:
39 | - import_tasks: base.yml
40 | - import_tasks: config.yml
41 | - import_tasks: datanode.yml
42 | - name: Restart datanode
43 | service: name=hdfs-datanode state=restarted
44 | when:
45 | - inventory_hostname == hdfs_datanode
46 | - not hdfs_uptodate
47 | tags: upgrade_datanode
48 |
49 | # The pause runs on the first (single one) node on the inventory list
50 | - name: Ask for confirmation if datanode is up
51 | pause:
52 | prompt: "Make sure hdfs-datanode service on {{ hdfs_datanode }} is up"
53 | when: not hdfs_uptodate
54 |
--------------------------------------------------------------------------------
/tasks/upgrade_journalnode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - block:
3 | - name: Restart journalnode
4 | service: name=hdfs-journalnode state=restarted
5 | - name: Wait for Journal Node port to become open, don't start checking for 10 seconds
6 | wait_for:
7 | port: 8485
8 | delay: 10
9 | - name: Pause for 20 seconds until the JournalNode is properly restarted
10 | pause:
11 | seconds: 20
12 | when:
13 | - inventory_hostname == hdfs_journalnode
14 | tags: upgrade_journalnode
15 |
--------------------------------------------------------------------------------
/tasks/upgrade_namenode.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - block:
3 | - import_tasks: base.yml
4 | - import_tasks: config.yml
5 | - import_tasks: namenode.yml
6 | - name: Restart namenode
7 | service: name=hdfs-namenode state=restarted
8 | when:
9 | - inventory_hostname == hdfs_namenode
10 | - not hdfs_uptodate
11 | tags: upgrade_namenode
12 |
--------------------------------------------------------------------------------
/tasks/user.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Add hadoop group on all machines
3 | group: name={{hdfs_group}} state=present
4 |
5 | - name: Add hadoop user on first namenode only and generate an ssh key
6 | user: name={{hdfs_user}} comment="Hadoop superuser" uid=1040 group={{hdfs_group}} createhome=yes generate_ssh_key="{{ hdfs_ssh_fence }}"
7 | when: inventory_hostname == hdfs_namenodes[0]
8 |
9 | - name: Create user on all machines
10 | user: name={{hdfs_user}} comment="Hadoop superuser" uid=1040 group={{hdfs_group}} home={{ hdfs_user_home }} createhome=yes
11 | register: created_user
12 |
13 | - import_tasks: ssh_fence.yml
14 | when: hdfs_ssh_fence and inventory_hostname in hdfs_namenodes
15 |
--------------------------------------------------------------------------------
/templates/audit-compress-rotate.sh.j2:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | LOGDIR={{ hdfs_log_dir }}
3 | TODAY=$(date +'%Y%m%d')
4 |
5 | cd $LOGDIR
6 |
7 | # compress any hdfs-audit files that are not still being used (already rotated)
8 | for LOGFILE in $(ls | egrep '[[:digit:]]{4}\-[[:digit:]]{2}\-[[:digit:]]{2}$'); do
9 | LOGFILEDATE=$(echo $LOGFILE | cut -d'.' -f3 | sed -e 's/-//g');
10 | [ $LOGFILEDATE -lt $TODAY ] && gzip $LOGFILE
11 | done
12 |
13 | # rotate files older than {{ hdfs_audit_rotate_days }} days
14 | find -iname "hdfs-audit*" -mtime +{{ hdfs_audit_rotate_days }} -delete
15 |
--------------------------------------------------------------------------------
/templates/core-site.xml.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | fs.defaultFS
7 | {{ hdfs_default_fs }}
8 |
9 |
10 | hadoop.tmp.dir
11 | {{ hdfs_tmpdir_user }}
12 |
13 |
14 | io.serializations
15 | org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization
16 |
17 |
18 | fs.trash.interval
19 | {{ hdfs_fs_trash_interval }}
20 |
21 |
22 | fs.trash.checkpoint.interval
23 | {{ hdfs_fs_trash_checkpoint_interval }}
24 |
25 | {% if hdfs_ha_enabled %}
26 |
27 | ha.zookeeper.quorum
28 | {{ hdfs_zookeeper_quorum }}
29 |
30 | {% endif %}
31 | {% if hdfs_rack_script_awk is defined %}
32 |
33 | topology.node.switch.mapping.impl
34 | org.apache.hadoop.net.ScriptBasedMapping
35 |
36 |
37 | topology.script.file.name
38 | {{ hdfs_rack_script_path }}
39 |
40 | {% endif %}
41 | {% if core_site_additional_properties is defined %}
42 | {% for property in core_site_additional_properties %}
43 |
44 | {{ property.name }}
45 | {{ property.value }}
46 | {% if property.final is defined %}
47 | true
48 | {% endif %}
49 |
50 | {% endfor %}
51 | {% endif %}
52 |
53 |
--------------------------------------------------------------------------------
/templates/dfs.hosts.exclude.j2:
--------------------------------------------------------------------------------
1 | # This file contains a list of hosts that will be excluded from the DFS
2 | {% if dfs_hosts_exclude is defined %}
3 | {% for host in dfs_hosts_exclude %}
4 | {{ host }}
5 | {% endfor %}
6 | {% endif %}
7 |
--------------------------------------------------------------------------------
/templates/hadoop-env.sh.j2:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Set Hadoop-specific environment variables here.
18 |
19 | # The only required environment variable is JAVA_HOME. All others are
20 | # optional. When running a distributed configuration it is best to
21 | # set JAVA_HOME in this file, so that it is correctly defined on
22 | # remote nodes.
23 |
24 | # The java implementation to use.
25 | export JAVA_HOME=${JAVA_HOME:-"{{ hdfs_java_home }}"}
26 |
27 | # The jsvc implementation to use. Jsvc is required to run secure datanodes
28 | # that bind to privileged ports to provide authentication of data transfer
29 | # protocol. Jsvc is not required if SASL is configured for authentication of
30 | # data transfer protocol using non-privileged ports.
31 | #export JSVC_HOME=${JSVC_HOME}
32 |
33 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
34 |
35 | # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
36 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar {{ hdfs_extra_classpath | join(" ") }} ; do
37 | if [ "$HADOOP_CLASSPATH" ]; then
38 | export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
39 | else
40 | export HADOOP_CLASSPATH=$f
41 | fi
42 | done
43 |
44 | # The maximum amount of heap to use, in MB. Default is 1000.
45 | #export HADOOP_HEAPSIZE=
46 | #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
47 |
48 | # Extra Java runtime options. Empty by default.
49 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
50 |
51 | # Command specific options appended to HADOOP_OPTS when specified
52 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:{{hadoop_security_logger}} } -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-{{hadoop_audit_logger}} } {{hdfs_namenode_javaOpts}} $HADOOP_NAMENODE_OPTS"
53 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS {{hdfs_datanode_javaOpts}} $HADOOP_DATANODE_OPTS"
54 |
55 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-{{hadoop_security_logger}}} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-{{hadoop_audit_logger}}} $HADOOP_SECONDARYNAMENODE_OPTS"
56 |
57 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
58 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
59 |
60 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
61 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
62 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
63 |
64 | # On secure datanodes, user to run the datanode as after dropping privileges.
65 | # This **MUST** be uncommented to enable secure HDFS if using privileged ports
66 | # to provide authentication of data transfer protocol. This **MUST NOT** be
67 | # defined if SASL is configured for authentication of data transfer protocol
68 | # using non-privileged ports.
69 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
70 |
71 | # Where log files are stored. $HADOOP_HOME/logs by default.
72 | export HADOOP_LOG_DIR={{ hdfs_log_dir }}
73 |
74 | # Where log files are stored in the secure data environment.
75 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
76 |
77 | ###
78 | # HDFS Mover specific parameters
79 | ###
80 | # Specify the JVM options to be used when starting the HDFS Mover.
81 | # These options will be appended to the options specified as HADOOP_OPTS
82 | # and therefore may override any similar flags set in HADOOP_OPTS
83 | #
84 | # export HADOOP_MOVER_OPTS=""
85 |
86 | ###
87 | # Advanced Users Only!
88 | ###
89 |
90 | # The directory where pid files are stored. /tmp by default.
91 | # NOTE: this should be set to a directory that can only be written to by
92 | # the user that will run the hadoop daemons. Otherwise there is the
93 | # potential for a symlink attack.
94 | export HADOOP_PID_DIR=${HADOOP_PID_DIR}
95 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
96 |
97 | # A string representing this instance of hadoop. $USER by default.
98 | export HADOOP_IDENT_STRING=$USER
99 |
--------------------------------------------------------------------------------
/templates/hadoop_sudoers.j2:
--------------------------------------------------------------------------------
1 | # Keep environment variables so sudo users can sudo to hdfs user without worrying about environment variables
2 | Defaults env_keep += "HADOOP_HOME"
3 | Defaults env_keep += "HADOOP_PREFIX"
4 | Defaults env_keep += "HADOOP_CONF_DIR"
5 | Defaults env_keep += "HADOOP_LIBEXEC_DIR"
6 | Defaults env_keep += "HADOOP_CLASSPATH"
7 |
--------------------------------------------------------------------------------
/templates/hdfs-service.j2:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ### BEGIN INIT INFO
4 | # Provides: hdfs-{{program}}
5 | # Required-Start: $remote_fs $network
6 | # Required-Stop: $remote_fs
7 | # Default-Start: 2 3 4 5
8 | # Default-Stop: 0 1 6
9 | # Short-Description: Start hdfs-{{program}} daemon
10 | # Description: Start hdfs-{{program}} daemon
11 | ### END INIT INFO
12 |
13 | HADOOP_HOME={{hdfs_hadoop_home}}
14 | HADOOP_CONF_DIR={{hdfs_conf_dir}}
15 |
16 | . $HADOOP_CONF_DIR/hadoop-env.sh
17 |
18 | HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/run/}
19 | NAME="hdfs-{{program}}"
20 | PIDFILE="$HADOOP_PID_DIR/hadoop-${NAME}.pid"
21 |
22 | . /lib/lsb/init-functions
23 |
24 | case "$1" in
25 | start)
26 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start {{program}}"
27 | ;;
28 | stop)
29 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop {{program}}"
30 | ;;
31 | status)
32 | status_of_proc -p $PIDFILE "$NAME" "$NAME" && exit 0 || exit $?
33 | ;;
34 | restart)
35 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop {{program}}"
36 | sudo su - {{hdfs_user}} -c "$HADOOP_HOME/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start {{program}}"
37 | ;;
38 | *)
39 | echo "Usage: $0 {start|stop|restart}"
40 | exit 1
41 | esac
42 |
43 | exit 0
44 |
--------------------------------------------------------------------------------
/templates/hdfs-site.xml.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {% if hdfs_ha_enabled %}
6 |
7 | dfs.nameservices
8 | {{ hdfs_nameservices }}
9 |
10 |
11 | dfs.nameservice.id
12 | {{ hdfs_nameservices }}
13 |
14 |
15 | dfs.ha.namenodes.{{ hdfs_nameservices }}
16 | {{ hdfs_namenodes | join(',') }}
17 |
18 | {% if hdfs_ha_enabled and inventory_hostname in hdfs_namenodes -%}
19 |
20 | dfs.ha.namenode.id
21 | {{ inventory_hostname }}
22 |
23 | {% endif -%}
24 | {% for host in hdfs_namenodes -%}
25 |
26 | dfs.namenode.rpc-address.{{ hdfs_nameservices }}.{{ host }}
27 | {{ host }}{%- if hdfs_host_domain_name is defined and hdfs_host_domain_name | length > 0 -%}.{{ hdfs_host_domain_name }}{%- endif -%}:8020
28 |
29 |
30 | dfs.namenode.rpc-bind-host.{{ hdfs_nameservices }}.{{ host }}
31 | 0.0.0.0
32 |
33 | {% endfor -%}
34 | {% for host in hdfs_namenodes -%}
35 |
36 | dfs.namenode.http-address.{{ hdfs_nameservices }}.{{ host }}
37 | {{ host }}{%- if hdfs_host_domain_name is defined and hdfs_host_domain_name | length > 0 -%}.{{ hdfs_host_domain_name }}{%- endif -%}:50070
38 |
39 |
40 | dfs.namenode.http-bind-host.{{ hdfs_nameservices }}.{{ host }}
41 | 0.0.0.0
42 |
43 | {% endfor -%}
44 |
45 | dfs.namenode.shared.edits.dir
46 | qjournal://{{ hdfs_journalnodes | join(':8485' + ';') }}:8485/{{ hdfs_nameservices }}
47 |
48 |
49 | dfs.journalnode.edits.dir
50 | {{ hdfs_dfs_journalnode_edits_dir }}
51 |
52 |
53 | dfs.client.failover.proxy.provider.{{ hdfs_nameservices }}
54 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
55 |
56 | {% if hdfs_ssh_fence -%}
57 |
58 | dfs.ha.fencing.methods
59 | sshfence
60 |
61 |
62 | dfs.ha.fencing.ssh.private-key-files
63 | {{ hdfs_user_home }}/.ssh/id_rsa
64 |
65 | {% else -%}
66 |
67 | dfs.ha.fencing.methods
68 | shell(/bin/true)
69 |
70 | {% endif -%}
71 |
72 | dfs.ha.automatic-failover.enabled
73 | true
74 |
75 | {% else %}
76 |
77 | dfs.namenode.secondary.http-address
78 | {{ hdfs_secondary_namenode_http_address }}
79 |
80 |
81 | dfs.namenode.checkpoint.dir
82 | {{ hdfs_namenode_checkpoint_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }}
83 |
84 | {% endif %}
85 |
86 | dfs.replication
87 | {{ hdfs_dfs_replication }}
88 |
89 |
90 | dfs.datanode.data.dir
91 | {{ hdfs_datanode_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }}
92 |
93 | {% if inventory_hostname in hdfs_namenodes %}
94 |
95 | dfs.namenode.name.dir
96 | {{ hdfs_namenode_dir_list | map('regex_replace', '^(.*)$', 'file://\\1' ) | join(',') }}
97 |
98 | {% endif %}
99 |
100 | dfs.permissions.superusergroup
101 | {{ hdfs_dfs_permissions_superusergroup }}
102 |
103 |
104 | fs.permissions.umask-mode
105 | {{ hdfs_fs_permissions_umask_mode }}
106 |
107 |
108 | dfs.hosts.exclude
109 | {{ hdfs_conf_dir }}/dfs.hosts.exclude
110 |
111 |
112 | dfs.blocksize
113 | {{ hdfs_dfs_blocksize }}
114 | true
115 |
116 |
117 | dfs.namenode.avoid.read.stale.datanode
118 | {{ hdfs_dfs_namenode_avoid_read_stale_datanode | lower }}
119 |
120 |
121 | dfs.namenode.avoid.write.stale.datanode
122 | {{ hdfs_dfs_namenode_avoid_write_stale_datanode | lower }}
123 |
124 |
125 | dfs.support.append
126 | true
127 |
128 |
129 | dfs.namenode.write.stale.datanode.ratio
130 | {{ hdfs_dfs_namenode_write_stale_datanode_ratio }}
131 |
132 |
133 | dfs.namenode.handler.count
134 | {{ hdfs_dfs_namenode_handler_count }}
135 |
136 |
137 | dfs.namenode.service.handler.count
138 | {{ hdfs_dfs_namenode_service_handler_count }}
139 |
140 |
141 | dfs.datanode.du.reserved
142 | {{ hdfs_dfs_datanode_du_reserved }}
143 |
144 |
145 | dfs.datanode.data.dir.perm
146 | {{ hdfs_dfs_datanode_data_dir_perm }}
147 |
148 |
149 | dfs.datanode.max.transfer.threads
150 | {{ hdfs_dfs_datanode_max_transfer_threads }}
151 |
152 |
153 | dfs.datanode.fsdataset.volume.choosing.policy
154 | org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy
155 |
156 |
157 |
158 | dfs.replication.max
159 | {{ hdfs_dfs_replication_max }}
160 |
161 |
162 | dfs.namenode.replication.min
163 | {{ hdfs_dfs_namenode_replication_min }}
164 |
165 |
166 | dfs.namenode.checkpoint.period
167 | {{ hdfs_dfs_namenode_checkpoint_period }}
168 |
169 |
170 | dfs.namenode.audit.log.async
171 | {{ hdfs_dfs_namenode_audit_log_async | lower }}
172 |
173 |
174 | dfs.client.file-block-storage-locations.num-threads
175 | {{ hdfs_dfs_client_file_block_storage_locations_num_threads }}
176 |
177 |
178 | dfs.client.file-block-storage-locations.timeout.millis
179 | {{ hdfs_dfs_client_file_block_storage_locations_timeout_millis }}
180 |
181 |
182 | dfs.client.read.shortcircuit
183 | {{ hdfs_enable_short_circuit_reads | lower }}
184 |
185 | {% if hdfs_enable_short_circuit_reads is defined %}
186 |
187 | dfs.domain.socket.path
188 | {{ hdfs_dfs_domain_socket_path_folder }}/dn._PORT
189 |
190 | {% endif %}
191 | {% if hdfs_site_additional_properties is defined %}
192 | {% for property in hdfs_site_additional_properties -%}
193 |
194 | {{property.name}}
195 | {{property.value}}
196 | {% if property_final is defined -%}
197 | true
198 | {% endif %}
199 |
200 | {% endfor -%}
201 | {% endif %}
202 |
203 |
--------------------------------------------------------------------------------
/templates/hdfs.service.j2:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=HDFS {{ program }}
3 |
4 | [Service]
5 | Type=forking
6 | User={{hdfs_user}}
7 | Group={{hdfs_group}}
8 | ExecStart={{ hdfs_hadoop_home }}/sbin/hadoop-daemon.sh --config {{ hdfs_conf_dir }} --script hdfs start {{ program }}
9 | ExecStop={{ hdfs_hadoop_home }}/sbin/hadoop-daemon.sh --config {{ hdfs_conf_dir }} --script hdfs stop {{ program }}
10 | RemainAfterExit=no
11 | Restart=on-failure
12 | PIDFile=/tmp/hadoop-hdfs-{{program}}.pid
13 |
14 | TimeoutSec=300
15 |
16 | [Install]
17 | WantedBy=multi-user.target
18 |
--------------------------------------------------------------------------------
/templates/log4j.properties.j2:
--------------------------------------------------------------------------------
1 | ### original file taken from hadoop 2.7.2 tarball ##
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 | # Define some default values that can be overridden by system properties
20 | hadoop.root.logger=INFO,console
21 | hadoop.log.dir=.
22 | hadoop.log.file=hadoop.log
23 |
24 | # Define the root logger to the system property "hadoop.root.logger".
25 | log4j.rootLogger=${hadoop.root.logger}, EventCounter
26 |
27 | # Logging Threshold
28 | log4j.threshold=ALL
29 | #log4j.debug=true
30 |
31 | # Null Appender
32 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
33 |
34 | #
35 | # Rolling File Appender - cap space usage at 5gb.
36 | #
37 | hadoop.log.maxfilesize={{hadoop_log_maxfilesize}}
38 | hadoop.log.maxbackupindex={{hadoop_log_maxbackupindex}}
39 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
40 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
41 |
42 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
43 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
44 |
45 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
46 |
47 | # Pattern format: Date LogLevel LoggerName LogMessage
48 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
49 | # Debugging Pattern format
50 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
51 |
52 |
53 | #
54 | # Daily Rolling File Appender
55 | #
56 |
57 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
58 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
59 |
60 | # Rollover at midnight
61 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
62 |
63 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
64 |
65 | # Pattern format: Date LogLevel LoggerName LogMessage
66 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
67 | # Debugging Pattern format
68 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
69 |
70 |
71 | #
72 | # console
73 | # Add "console" to rootlogger above if you want to use this
74 | #
75 |
76 | log4j.appender.console=org.apache.log4j.ConsoleAppender
77 | log4j.appender.console.target=System.err
78 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
79 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
80 |
81 | #
82 | # TaskLog Appender
83 | #
84 |
85 | #Default values
86 | hadoop.tasklog.taskid=null
87 | hadoop.tasklog.iscleanup=false
88 | hadoop.tasklog.noKeepSplits=4
89 | hadoop.tasklog.totalLogFileSize=100
90 | hadoop.tasklog.purgeLogSplits=true
91 | hadoop.tasklog.logsRetainHours=12
92 |
93 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
94 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
95 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
96 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
97 |
98 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
99 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
100 |
101 | #
102 | # HDFS block state change log from block manager
103 | #
104 | # Uncomment the following to suppress normal block state change
105 | # messages from BlockManager in NameNode.
106 | #log4j.logger.BlockStateChange=WARN
107 |
108 | #
109 | #Security appender
110 | #
111 | hadoop.security.logger=INFO,NullAppender
112 | hadoop.security.log.maxfilesize=100MB
113 | hadoop.security.log.maxbackupindex={{hadoop_log_maxbackupindex}}
114 | log4j.category.SecurityLogger=${hadoop.security.logger}
115 | hadoop.security.log.file=SecurityAuth-${user.name}.audit
116 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
117 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
118 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
119 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
120 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
121 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
122 |
123 | #
124 | # Daily Rolling Security appender
125 | #
126 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
127 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
128 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
129 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
130 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
131 |
132 | #
133 | # hadoop configuration logging
134 | #
135 |
136 | # Uncomment the following line to turn off configuration deprecation warnings.
137 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
138 |
139 | #
140 | # hdfs audit logging
141 | #
142 | hdfs.audit.logger=INFO,NullAppender
143 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
144 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
145 | log4j.appender.RFAAUDIT=org.apache.log4j.DailyRollingFileAppender
146 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
147 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
148 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
149 | log4j.appender.RFAAUDIT.DatePattern=.yyyy-MM-dd
150 | log4j.appender.RFAAUDIT.rollingPolicy.ActiveFileName=${hadoop.log.dir}/hdfs-audit.log
151 |
152 | #
153 | # mapred audit logging
154 | #
155 | mapred.audit.logger=INFO,NullAppender
156 | mapred.audit.log.maxfilesize=256MB
157 | mapred.audit.log.maxbackupindex=20
158 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
159 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
160 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
161 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
162 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
163 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
164 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
165 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
166 |
167 | # Custom Logging levels
168 |
169 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
170 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
171 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
172 |
173 | # Jets3t library
174 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
175 |
176 | # AWS SDK & S3A FileSystem
177 | log4j.logger.com.amazonaws=ERROR
178 | log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
179 | log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
180 |
181 | #
182 | # Event Counter Appender
183 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
184 | #
185 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
186 |
187 | #
188 | # Job Summary Appender
189 | #
190 | # Use following logger to send summary to separate file defined by
191 | # hadoop.mapreduce.jobsummary.log.file :
192 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA
193 | #
194 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
195 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
196 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
197 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20
198 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender
199 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
200 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
201 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
202 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
203 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
204 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
205 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
206 |
207 | #
208 | # Yarn ResourceManager Application Summary Log
209 | #
210 | # Set the ResourceManager summary log filename
211 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
212 | # Set the ResourceManager summary log level and appender
213 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
214 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
215 |
216 | # To enable AppSummaryLogging for the RM,
217 | # set yarn.server.resourcemanager.appsummary.logger to
218 | # ,RMSUMMARY in hadoop-env.sh
219 |
220 | # Appender for ResourceManager Application Summary Log
221 | # Requires the following properties to be set
222 | # - hadoop.log.dir (Hadoop Log directory)
223 | # - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
224 | # - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
225 |
226 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
227 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
228 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
229 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
230 | log4j.appender.RMSUMMARY.MaxFileSize=256MB
231 | log4j.appender.RMSUMMARY.MaxBackupIndex=20
232 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
233 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
234 |
235 | # HS audit log configs
236 | #mapreduce.hs.audit.logger=INFO,HSAUDIT
237 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
238 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
239 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
240 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
241 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
242 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
243 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
244 |
245 | # Http Server Request Logs
246 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog
247 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
248 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
249 | #log4j.appender.namenoderequestlog.RetainDays=3
250 |
251 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog
252 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
253 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
254 | #log4j.appender.datanoderequestlog.RetainDays=3
255 |
256 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
257 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
258 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
259 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3
260 |
261 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
262 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
263 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
264 | #log4j.appender.jobhistoryrequestlog.RetainDays=3
265 |
266 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
267 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
268 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
269 | #log4j.appender.nodemanagerrequestlog.RetainDays=3
270 |
271 |
--------------------------------------------------------------------------------
/templates/mapred-site.xml.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 | mapred.job.tracker
4 | {{ hdfs_namenodes[0] }}:8021
5 |
6 |
7 |
--------------------------------------------------------------------------------
/templates/rack-awareness.sh.j2:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo $@ | xargs -n 1 | awk -F '.' '{{ hdfs_rack_script_awk }}'
--------------------------------------------------------------------------------
/templates/slaves.j2:
--------------------------------------------------------------------------------
1 | {% for slave in hdfs_datanodes %}
2 | {{ slave }}
3 | {% endfor %}
4 |
--------------------------------------------------------------------------------