├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── charts ├── README.md ├── hdfs-client-k8s │ ├── Chart.yaml │ └── templates │ │ └── client-deployment.yaml ├── hdfs-config-k8s │ ├── .helmignore │ ├── Chart.yaml │ └── templates │ │ ├── _helpers.tpl │ │ └── configmap.yaml ├── hdfs-datanode-k8s │ ├── Chart.yaml │ └── templates │ │ └── datanode-daemonset.yaml ├── hdfs-journalnode-k8s │ ├── Chart.yaml │ └── templates │ │ └── journalnode-statefulset.yaml ├── hdfs-k8s │ ├── .gitignore │ ├── .helmignore │ ├── Chart.yaml │ ├── requirements.yaml │ ├── templates │ │ └── _helpers.tpl │ └── values.yaml ├── hdfs-krb5-k8s │ ├── .helmignore │ ├── Chart.yaml │ └── templates │ │ └── statefulset.yaml ├── hdfs-namenode-k8s │ ├── Chart.yaml │ └── templates │ │ └── namenode-statefulset.yaml └── hdfs-simple-namenode-k8s │ ├── Chart.yaml │ └── templates │ └── namenode-statefulset.yaml ├── designs ├── journal-approach.png ├── namenode-HA.md └── namenode-metadata.png ├── tests ├── README.md ├── cases │ ├── _basic-subcharts.sh │ ├── _basic.sh │ ├── _kerberos.sh │ └── _single-namenode.sh ├── cleanup.sh ├── gold │ ├── basic.gold │ ├── kerberos.gold │ ├── single-namenode.gold │ ├── subchart-client.gold │ ├── subchart-config.gold │ ├── subchart-datanode.gold │ ├── subchart-journalnode.gold │ ├── subchart-namenode.gold │ └── subchart-zookeeper.gold ├── lib │ └── _k8s.sh ├── run.sh ├── setup.sh ├── teardown.sh └── values │ ├── common.yaml │ ├── custom-hadoop-config.yaml │ └── kerberos.yaml └── topology ├── README.md └── pod-cidr ├── .gitignore ├── README.md ├── pom.xml └── src └── main └── java └── org └── apache └── hadoop └── net └── PodCIDRToNodeMapping.java /.gitignore: -------------------------------------------------------------------------------- 1 | tests/bin 2 | tests/tmp 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | env: 4 | - CASES=_basic.sh 5 | - CASES=_basic-subcharts.sh 6 | - CASES=_kerberos.sh 7 | - CASES=_single-namenode.sh 8 | 9 | before_script: 10 | # Required for K8s v1.10.x. See 11 | # https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783 12 | - sudo mount --make-shared / && sudo service docker restart 13 | - USE_MINIKUBE_DRIVER_NONE=true USE_SUDO_MINIKUBE=true tests/setup.sh 14 | 15 | script: 16 | - tests/run.sh 17 | 18 | after_script: 19 | - tests/cleanup.sh 20 | - tests/teardown.sh 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: global 3 | title: HDFS on Kubernetes 4 | --- 5 | # HDFS on Kubernetes 6 | Repository holding helm charts for running Hadoop Distributed File System (HDFS) 7 | on Kubernetes. 8 | 9 | See [charts/README.md](charts/README.md) for how to run the charts. 10 | 11 | See [tests/README.md](tests/README.md) for how to run integration tests for 12 | HDFS on Kubernetes. 13 | -------------------------------------------------------------------------------- /charts/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: global 3 | title: HDFS charts 4 | --- 5 | 6 | # HDFS charts 7 | 8 | Helm charts for launching HDFS daemons in a K8s cluster. The main entry-point 9 | chart is `hdfs-k8s`, which is a uber-chart that specifies other charts as 10 | dependency subcharts. This means you can launch all HDFS components using 11 | `hdfs-k8s`. 12 | 13 | Note that the HDFS charts are currently in pre-alpha quality. They are also 14 | being heavily revised and are subject to change. 15 | 16 | HDFS on K8s supports the following features: 17 | - namenode high availability (HA): HDFS namenode daemons are in charge of 18 | maintaining file system metadata concerning which directories have which 19 | files and where are the file data. Namenode crash will cause service outage. 20 | HDFS can run two namenodes in active/standby setup. HDFS on K8s supports HA. 21 | - K8s persistent volumes (PV) for metadata: Namenode crash will cause service 22 | outage. Losing namenode metadata can lead to loss of file system. HDFS on 23 | K8s can store the metadata in remote K8s persistent volumes so that metdata 24 | can remain intact even if both namenode daemons are lost or restarted. 25 | - K8s HostPath volumes for file data: HDFS datanodes daemons store actual 26 | file data. File data should also survive datanode crash or restart. HDFS on 27 | K8s stores the file data on the local disks of the K8s cluster nodes using 28 | K8s HostPath volumes. (We plan to switch to a better mechanism, K8s 29 | persistent local volumes) 30 | - Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom 31 | client code, put a fake user name in requests and steal data. Production 32 | HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos. 33 | 34 | Here is the list of all charts. 35 | 36 | - hdfs-k8s: main uber-chart. Launches other charts. 37 | - hdfs-namenode-k8s: a statefulset and other K8s components for launching HDFS 38 | namenode daemons, which maintains file system metadata. The chart supports 39 | namenode high availability (HA). 40 | - hdfs-datanode-k8s: a daemonset and other K8s components for launching HDFS 41 | datanode daemons, which are responsible for storing file data. 42 | - hdfs-config-k8s: a configmap containing Hadoop config files for HDFS. 43 | - zookeeper: This chart is NOT in this repo. But hdfs-k8s pulls the zookeeper 44 | chart in the incubator remote repo 45 | (https://kubernetes-charts-incubator.storage.googleapis.com/) 46 | as a dependency and launhces zookeeper daemons. Zookeeper makes sure 47 | only one namenode is active in the HA setup, while the other namenode 48 | becomes standby. By default, we will launch three zookeeper servers. 49 | - hdfs-journalnode-k8s: a statefulset and other K8s components for launching 50 | HDFS journalnode quorums, which ensures the file system metadata are 51 | properly shared among the two namenode daemons in the HA setup. 52 | By default, we will launch three journalnode servers. 53 | - hdfs-client-k8s: a pod that is configured to run Hadoop client commands 54 | for accessing HDFS. 55 | - hdfs-krb5-k8s: a size-1 statefulset and other K8s components for launching 56 | a Kerberos server, which can be used to secure HDFS. Disabled by default. 57 | - hdfs-simple-namenode-k8s: Disabled by default. A simpler setup of the 58 | namenode that launches only one namenode. i.e. This does not support HA. It 59 | does not support Kerberos nor persistent volumes either. As it does not 60 | support HA, we also don't need zookeeper nor journal nodes. You may prefer 61 | this if you want the simplest possible setup. 62 | 63 | # Prerequisite 64 | 65 | Requires Kubernetes 1.6+ as the `namenode` and `datanodes` are using 66 | `ClusterFirstWithHostNet`, which was introduced in Kubernetes 1.6 67 | 68 | # Usage 69 | 70 | ## Basic 71 | 72 | The HDFS daemons can be launched using the main `hdfs-k8s` chart. First, build 73 | the main chart using: 74 | 75 | ``` 76 | $ helm repo add incubator \ 77 | https://kubernetes-charts-incubator.storage.googleapis.com/ 78 | $ helm dependency build charts/hdfs-k8s 79 | ``` 80 | 81 | Zookeeper, journalnodes and namenodes need persistent volumes for storing 82 | metadata. By default, the helm charts do not set the storage class name for 83 | dynamically provisioned volumes, nor does it use persistent volume selectors for 84 | static persistent volumes. 85 | 86 | This means it will rely on a provisioner for default storage volume class for 87 | dynamic volumes. Or if your cluster has statically provisioned volumes, the 88 | chart will match existing volumes entirely based on the size requirements. To 89 | override this default behavior, you can specify storage volume classes for 90 | dynamic volumes, or volume selectors for static volumes. See below for how to 91 | set these options. 92 | 93 | - namenodes: Each of the two namenodes needs at least a 100 GB volume. i.e. 94 | Yon need two 100 GB volumes. This can be overridden by the 95 | `hdfs-namenode-k8s.persistence.size` option. 96 | You can also override the storage class or the selector using 97 | `hdfs-namenode-k8s.persistence.storageClass`, or 98 | `hdfs-namenode-k8s.persistence.selector` respectively. For details, see the 99 | values.yaml file inside `hdfs-namenode-k8s` chart dir. 100 | - zookeeper: You need three > 5 GB volumes. i.e. Each of the two zookeeper 101 | servers will need at least 5 GB in the volume. Can be overridden by 102 | the `zookeeper.persistence.size` option. You can also override 103 | the storage class using `zookeeper.persistence.storageClass`. 104 | - journalnodes: Each of the three journalnodes will need at least 20 GB in 105 | the volume. The size can be overridden by the 106 | `hdfs-journalnode-k8s.persistence.size` option. 107 | You can also override the storage class or the selector using 108 | `hdfs-journalnode-k8s.persistence.storageClass`, or 109 | `hdfs-journalnode-k8s.persistence.selector` respectively. For details, see the 110 | values.yaml file inside `hdfs-journalnode-k8s` chart dir. 111 | - kerberos: The single Kerberos server will need at least 20 GB in the volume. 112 | The size can be overridden by the `hdfs-krb5-k8s.persistence.size` option. 113 | You can also override the storage class or the selector using 114 | `hdfs-krb5-k8s.persistence.storageClass`, or 115 | `hdfs-krb5-k8s.persistence.selector` respectively. For details, see the 116 | values.yaml file inside `hdfs-krb5-k8s` chart dir. 117 | 118 | Then launch the main chart. Specify the chart release name say "my-hdfs", 119 | which will be the prefix of the K8s resource names for the HDFS components. 120 | 121 | ``` 122 | $ helm install -n my-hdfs charts/hdfs-k8s 123 | ``` 124 | 125 | Wait for all daemons to be ready. Note some daemons may restart themselves 126 | a few times before they become ready. 127 | 128 | ``` 129 | $ kubectl get pod -l release=my-hdfs 130 | 131 | NAME READY STATUS RESTARTS AGE 132 | my-hdfs-client-c749d9f8f-d5pvk 1/1 Running 0 2m 133 | my-hdfs-datanode-o7jia 1/1 Running 3 2m 134 | my-hdfs-datanode-p5kch 1/1 Running 3 2m 135 | my-hdfs-datanode-r3kjo 1/1 Running 3 2m 136 | my-hdfs-journalnode-0 1/1 Running 0 2m 137 | my-hdfs-journalnode-1 1/1 Running 0 2m 138 | my-hdfs-journalnode-2 1/1 Running 0 1m 139 | my-hdfs-namenode-0 1/1 Running 3 2m 140 | my-hdfs-namenode-1 1/1 Running 3 2m 141 | my-hdfs-zookeeper-0 1/1 Running 0 2m 142 | my-hdfs-zookeeper-1 1/1 Running 0 2m 143 | my-hdfs-zookeeper-2 1/1 Running 0 2m 144 | ``` 145 | 146 | Namenodes and datanodes are currently using the K8s `hostNetwork` so they can 147 | see physical IPs of each other. If they are not using `hostNetowrk`, 148 | overlay K8s network providers such as weave-net may mask the physical IPs, 149 | which will confuse the data locality later inside namenodes. 150 | 151 | Finally, test with the client pod: 152 | 153 | ``` 154 | $ _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ 155 | cut -d/ -f 2) 156 | $ kubectl exec $_CLIENT -- hdfs dfsadmin -report 157 | $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0 158 | $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1 159 | 160 | $ kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp 161 | $ kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp 162 | $ kubectl exec $_CLIENT -- sh -c \ 163 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 164 | $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 165 | ``` 166 | 167 | ## Kerberos 168 | 169 | Kerberos can be enabled by setting a few related options: 170 | 171 | ``` 172 | $ helm install -n my-hdfs charts/hdfs-k8s \ 173 | --set global.kerberosEnabled=true \ 174 | --set global.kerberosRealm=MYCOMPANY.COM \ 175 | --set tags.kerberos=true 176 | ``` 177 | 178 | This will launch all charts including the Kerberos server, which will become 179 | ready pretty soon. However, HDFS daemon charts will be blocked as the deamons 180 | require Kerberos service principals to be available. So we need to unblock 181 | them by creating those principals. 182 | 183 | First, create a configmap containing the common Kerberos config file: 184 | 185 | ``` 186 | _MY_DIR=~/krb5 187 | mkdir -p $_MY_DIR 188 | _KDC=$(kubectl get pod -l app=hdfs-krb5,release=my-hdfs --no-headers \ 189 | -o name | cut -d/ -f2) 190 | _run kubectl cp $_KDC:/etc/krb5.conf $_MY_DIR/tmp/krb5.conf 191 | _run kubectl create configmap my-hdfs-krb5-config \ 192 | --from-file=$_MY_DIR/tmp/krb5.conf 193 | ``` 194 | 195 | Second, create the service principals and passwords. Kerberos requires service 196 | principals to be host specific. Some HDFS daemons are associated with your K8s 197 | cluster nodes' physical host names say kube-n1.mycompany.com, while others are 198 | associated with Kubernetes virtual service names, for instance 199 | my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local. You can get 200 | the list of these host names like: 201 | 202 | ``` 203 | $ _HOSTS=$(kubectl get nodes \ 204 | -o=jsonpath='{.items[*].status.addresses[?(@.type == "Hostname")].address}') 205 | 206 | $ _HOSTS+=$(kubectl describe configmap my-hdfs-config | \ 207 | grep -A 1 -e dfs.namenode.rpc-address.hdfs-k8s \ 208 | -e dfs.namenode.shared.edits.dir | 209 | grep "" | 210 | sed -e "s///" \ 211 | -e "s/<\/value>//" \ 212 | -e "s/:8020//" \ 213 | -e "s/qjournal:\/\///" \ 214 | -e "s/:8485;/ /g" \ 215 | -e "s/:8485\/hdfs-k8s//") 216 | ``` 217 | 218 | Then generate per-host principal accounts and password keytab files. 219 | 220 | ``` 221 | $ _SECRET_CMD="kubectl create secret generic my-hdfs-krb5-keytabs" 222 | $ for _HOST in $_HOSTS; do 223 | kubectl exec $_KDC -- kadmin.local -q \ 224 | "addprinc -randkey hdfs/$_HOST@MYCOMPANY.COM" 225 | kubectl exec $_KDC -- kadmin.local -q \ 226 | "addprinc -randkey HTTP/$_HOST@MYCOMPANY.COM" 227 | kubectl exec $_KDC -- kadmin.local -q \ 228 | "ktadd -norandkey -k /tmp/$_HOST.keytab hdfs/$_HOST@MYCOMPANY.COM HTTP/$_HOST@MYCOMPANY.COM" 229 | kubectl cp $_KDC:/tmp/$_HOST.keytab $_MY_DIR/tmp/$_HOST.keytab 230 | _SECRET_CMD+=" --from-file=$_MY_DIR/tmp/$_HOST.keytab" 231 | done 232 | ``` 233 | 234 | The above was building a command using a shell variable `SECRET_CMD` for 235 | creating a K8s secret that contains all keytab files. Run the command to create 236 | the secret. 237 | 238 | ``` 239 | $ $_SECRET_CMD 240 | ``` 241 | 242 | This will unblock all HDFS daemon pods. Wait until they become ready. 243 | 244 | Finally, test the setup using the following commands: 245 | 246 | ``` 247 | $ _NN0=$(kubectl get pods -l app=hdfs-namenode,release=my-hdfs -o name | \ 248 | head -1 | \ 249 | cut -d/ -f2) 250 | $ kubectl exec $_NN0 -- sh -c "(apt install -y krb5-user > /dev/null)" \ 251 | || true 252 | $ kubectl exec $_NN0 -- \ 253 | kinit -kt /etc/security/hdfs.keytab \ 254 | hdfs/my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local@MYCOMPANY.COM 255 | $ kubectl exec $_NN0 -- hdfs dfsadmin -report 256 | $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn0 257 | $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn1 258 | $ kubectl exec $_NN0 -- hadoop fs -rm -r -f /tmp 259 | $ kubectl exec $_NN0 -- hadoop fs -mkdir /tmp 260 | $ kubectl exec $_NN0 -- hadoop fs -chmod 0777 /tmp 261 | $ kubectl exec $_KDC -- kadmin.local -q \ 262 | "addprinc -randkey user1@MYCOMPANY.COM" 263 | $ kubectl exec $_KDC -- kadmin.local -q \ 264 | "ktadd -norandkey -k /tmp/user1.keytab user1@MYCOMPANY.COM" 265 | $ kubectl cp $_KDC:/tmp/user1.keytab $_MY_DIR/tmp/user1.keytab 266 | $ kubectl cp $_MY_DIR/tmp/user1.keytab $_CLIENT:/tmp/user1.keytab 267 | 268 | $ kubectl exec $_CLIENT -- sh -c "(apt install -y krb5-user > /dev/null)" \ 269 | || true 270 | 271 | $ kubectl exec $_CLIENT -- kinit -kt /tmp/user1.keytab user1@MYCOMPANY.COM 272 | $ kubectl exec $_CLIENT -- sh -c \ 273 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 274 | $ kubectl exec $_CLIENT -- hadoop fs -ls / 275 | $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 276 | ``` 277 | 278 | ## Advanced options 279 | 280 | ### Setting HostPath volume locations for datanodes 281 | 282 | HDFS on K8s stores the file data on the local disks of the K8s cluster nodes 283 | using K8s HostPath volumes. You may want to change the default locations. Set 284 | global.dataNodeHostPath to override the default value. Note the option 285 | takes a list in case you want to use multiple disks. 286 | 287 | ``` 288 | $ helm install -n my-hdfs charts/hdfs-k8s \ 289 | --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data0,/mnt/sda1/hdfs-data1}" 290 | ``` 291 | 292 | ### Using an existing zookeeper quorum 293 | 294 | By default, HDFS on K8s pulls in the zookeeper chart in the incubator remote 295 | repo (https://kubernetes-charts-incubator.storage.googleapis.com/) as a 296 | dependency and launhces zookeeper daemons. But your K8s cluster may already 297 | have a zookeeper quorum. 298 | 299 | It is possible to use the existing zookeeper. We just need set a few options 300 | in the helm install command line. It should be something like: 301 | 302 | ``` 303 | $helm install -n my-hdfs charts/hdfs-k8s \ 304 | --set condition.subchart.zookeeper=false \ 305 | --set global.zookeeperQuorumOverride=zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 306 | ``` 307 | 308 | Setting `condition.subchart.zookeeper` to false prevents the uber-chart from 309 | bringing in zookeeper as sub-chart. And the `global.zookeeperQuorumOverride` 310 | option specifies the custom address for a zookeeper quorum. Use your 311 | zookeeper address here. 312 | 313 | ### Pinning namenodes to specific K8s cluster nodes 314 | 315 | Optionally, you can attach labels to some of your k8s cluster nodes so that 316 | namenodes will always run on those cluster nodes. This can allow your HDFS 317 | client outside the Kubernetes cluster to expect stable IP addresses. When used 318 | by those outside clients, Kerberos expects the namenode addresses to be stable. 319 | 320 | ``` 321 | $ kubectl label nodes YOUR-HOST-1 hdfs-namenode-selector=hdfs-namenode 322 | $ kubectl label nodes YOUR-HOST-2 hdfs-namenode-selector=hdfs-namenode 323 | ``` 324 | 325 | You should add the nodeSelector option to the helm chart command: 326 | 327 | ``` 328 | $ helm install -n my-hdfs charts/hdfs-k8s \ 329 | --set hdfs-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode \ 330 | ... 331 | ``` 332 | 333 | ### Excluding datanodes from some K8s cluster nodes 334 | 335 | You may want to exclude some K8s cluster nodes from datanodes launch target. 336 | For instance, some K8s clusters may let the K8s cluster master node launch 337 | a datanode. To prevent this, label the cluster nodes with 338 | `hdfs-datanode-exclude`. 339 | 340 | ``` 341 | $ kubectl label node YOUR-CLUSTER-NODE hdfs-datanode-exclude=yes 342 | ``` 343 | 344 | ### Launching with a non-HA namenode 345 | 346 | You may want non-HA namenode since it is the simplest possible setup. 347 | Note this won't launch zookeepers nor journalnodes. 348 | 349 | The single namenode is supposed to be pinned to a cluster host using a node 350 | label. Attach a label to one of your K8s cluster node. 351 | 352 | ``` 353 | $ kubectl label nodes YOUR-CLUSTER-NODE hdfs-namenode-selector=hdfs-namenode-0 354 | ``` 355 | 356 | The non-HA setup does not even use persistent vlumes. So you don't even 357 | need to prepare persistent volumes. Instead, it is using hostPath volume 358 | of the pinned cluster node. So, just launch the chart while 359 | setting options to turn off HA. You should add the nodeSelector option 360 | so that the single namenode would find the hostPath volume of the same cluster 361 | node when the pod restarts. 362 | 363 | ``` 364 | $ helm install -n my-hdfs charts/hdfs-k8s \ 365 | --set tags.ha=false \ 366 | --set tags.simple=true \ 367 | --set global.namenodeHAEnabled=false \ 368 | --set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0 369 | ``` 370 | 371 | # Security 372 | 373 | ## K8s secret containing Kerberos keytab files 374 | 375 | The Kerberos setup creates a K8s secret containing all the keytab files of HDFS 376 | daemon service princialps. This will be mounted onto HDFS daemon pods. You may 377 | want to restrict access to this secret using k8s 378 | [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/), to minimize 379 | exposure of the keytab files. 380 | 381 | ## HostPath volumes 382 | `Datanode` daemons run on every cluster node. They also mount k8s `hostPath` 383 | local disk volumes. You may want to restrict access of `hostPath` 384 | using `pod security policy`. 385 | See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) 386 | 387 | ## Credits 388 | 389 | Many charts are using public Hadoop docker images hosted by 390 | [uhopper](https://hub.docker.com/u/uhopper/). 391 | -------------------------------------------------------------------------------- /charts/hdfs-client-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-client-k8s 3 | version: 0.1.0 4 | description: A client for HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-client-k8s/templates/client-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "hdfs-k8s.client.fullname" . }} 5 | labels: 6 | app: {{ template "hdfs-k8s.client.name" . }} 7 | chart: {{ template "hdfs-k8s.subchart" . }} 8 | release: {{ .Release.Name }} 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: {{ template "hdfs-k8s.client.name" . }} 14 | release: {{ .Release.Name }} 15 | template: 16 | metadata: 17 | labels: 18 | app: {{ template "hdfs-k8s.client.name" . }} 19 | release: {{ .Release.Name }} 20 | {{- if .Values.podAnnotations }} 21 | annotations: 22 | {{ toYaml .Values.podAnnotations | indent 8 }} 23 | {{- end }} 24 | spec: 25 | containers: 26 | - name: hdfs-client 27 | image: uhopper/hadoop:2.7.2 28 | env: 29 | - name: HADOOP_CUSTOM_CONF_DIR 30 | value: /etc/hadoop-custom-conf 31 | - name: MULTIHOMED_NETWORK 32 | value: "0" 33 | command: ['/bin/sh', '-c'] 34 | args: 35 | - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg 36 | volumeMounts: 37 | - name: hdfs-config 38 | mountPath: /etc/hadoop-custom-conf 39 | readOnly: true 40 | {{- if .Values.global.kerberosEnabled }} 41 | - name: kerberos-config 42 | mountPath: /etc/krb5.conf 43 | subPath: {{ .Values.global.kerberosConfigFileName }} 44 | readOnly: true 45 | {{- end }} 46 | restartPolicy: Always 47 | volumes: 48 | - name: hdfs-config 49 | configMap: 50 | name: {{ template "hdfs-k8s.config.fullname" . }} 51 | {{- if .Values.global.kerberosEnabled }} 52 | - name: kerberos-config 53 | configMap: 54 | name: {{ template "krb5-configmap" . }} 55 | {{- end }} 56 | -------------------------------------------------------------------------------- /charts/hdfs-config-k8s/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/hdfs-config-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for configuring HDFS on Kubernetes 4 | name: hdfs-config-k8s 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/hdfs-config-k8s/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "hdfs-config-k8s.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "hdfs-config-k8s.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "hdfs-config-k8s.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Create the kerberos principal suffix for core HDFS services 36 | */}} 37 | {{- define "hdfs-principal" -}} 38 | {{- printf "hdfs/_HOST@%s" .Values.kerberosRealm -}} 39 | {{- end -}} 40 | 41 | {{/* 42 | Create the kerberos principal for HTTP services 43 | */}} 44 | {{- define "http-principal" -}} 45 | {{- printf "HTTP/_HOST@%s" .Values.kerberosRealm -}} 46 | {{- end -}} 47 | 48 | {{/* 49 | Create the datanode data dir list. The below uses two loops to make sure the 50 | last item does not have comma. It uses index 0 for the last item since that is 51 | the only special index that helm template gives us. 52 | */}} 53 | {{- define "datanode-data-dirs" -}} 54 | {{- range $index, $path := .Values.global.dataNodeHostPath -}} 55 | {{- if ne $index 0 -}} 56 | /hadoop/dfs/data/{{ $index }}, 57 | {{- end -}} 58 | {{- end -}} 59 | {{- range $index, $path := .Values.global.dataNodeHostPath -}} 60 | {{- if eq $index 0 -}} 61 | /hadoop/dfs/data/{{ $index }} 62 | {{- end -}} 63 | {{- end -}} 64 | {{- end -}} 65 | -------------------------------------------------------------------------------- /charts/hdfs-config-k8s/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ template "hdfs-k8s.config.fullname" . }} 5 | labels: 6 | app: {{ template "hdfs-k8s.client.name" . }} 7 | chart: {{ template "hdfs-k8s.subchart" . }} 8 | release: {{ .Release.Name }} 9 | data: 10 | core-site.xml: | 11 | 12 | 13 | 14 | {{- if .Values.global.kerberosEnabled }} 15 | 16 | hadoop.security.authentication 17 | kerberos 18 | 19 | 31 | 32 | hadoop.security.authorization 33 | false 34 | 35 | 36 | hadoop.rpc.protection 37 | privacy 38 | 39 | 40 | hadoop.user.group.static.mapping.overrides 41 | hdfs=root; 42 | 43 | {{- end }} 44 | {{- range $key, $value := .Values.customHadoopConfig.coreSite }} 45 | 46 | {{ $key }} 47 | {{ $value }} 48 | 49 | {{- end }} 50 | {{- if .Values.global.namenodeHAEnabled }} 51 | 52 | fs.defaultFS 53 | hdfs://hdfs-k8s 54 | 55 | 56 | ha.zookeeper.quorum 57 | {{ template "zookeeper-quorum" . }} 58 | 59 | {{- else }} 60 | 61 | fs.defaultFS 62 | hdfs://{{ template "namenode-svc-0" . }}:8020 63 | 64 | {{- end }} 65 | 66 | hdfs-site.xml: | 67 | 68 | 69 | 70 | {{- if .Values.global.kerberosEnabled }} 71 | 72 | dfs.block.access.token.enable 73 | true 74 | 75 | 76 | dfs.encrypt.data.transfer 77 | true 78 | 79 | 80 | dfs.namenode.kerberos.principal 81 | {{ template "hdfs-principal" . }} 82 | 83 | {{/* 84 | TODO: Check if the https principal is no longer needed in newer Hadoop version. 85 | */}} 86 | 87 | dfs.namenode.kerberos.https.principal 88 | {{ template "http-principal" . }} 89 | 90 | 91 | dfs.web.authentication.kerberos.principal 92 | {{ template "http-principal" . }} 93 | 94 | 95 | dfs.namenode.keytab.file 96 | /etc/security/hdfs.keytab 97 | 98 | 99 | dfs.journalnode.kerberos.principal 100 | {{ template "hdfs-principal" . }} 101 | 102 | 103 | dfs.journalnode.kerberos.internal.spnego.principal 104 | {{ template "http-principal" . }} 105 | 106 | 107 | dfs.journalnode.keytab.file 108 | /etc/security/hdfs.keytab 109 | 110 | 111 | dfs.datanode.kerberos.principal 112 | {{ template "hdfs-principal" . }} 113 | 114 | 115 | dfs.datanode.kerberos.https.principal 116 | {{ template "http-principal" . }} 117 | 118 | 119 | dfs.datanode.keytab.file 120 | /etc/security/hdfs.keytab 121 | 122 | {{- if .Values.global.jsvcEnabled }} 123 | 124 | dfs.datanode.address 125 | 0.0.0.0:1004 126 | 127 | 128 | dfs.datanode.http.address 129 | 0.0.0.0:1006 130 | 131 | {{- end }} 132 | {{- end }} 133 | {{- range $key, $value := .Values.customHadoopConfig.hdfsSite }} 134 | 135 | {{ $key }} 136 | {{ $value }} 137 | 138 | {{- end }} 139 | {{- if .Values.global.namenodeHAEnabled }} 140 | 141 | dfs.nameservices 142 | hdfs-k8s 143 | 144 | 145 | dfs.ha.namenodes.hdfs-k8s 146 | nn0,nn1 147 | 148 | 149 | dfs.namenode.rpc-address.hdfs-k8s.nn0 150 | {{ template "namenode-svc-0" . }}:8020 151 | 152 | 153 | dfs.namenode.rpc-address.hdfs-k8s.nn1 154 | {{ template "namenode-svc-1" . }}:8020 155 | 156 | 157 | dfs.namenode.http-address.hdfs-k8s.nn0 158 | {{ template "namenode-svc-0" . }}:50070 159 | 160 | 161 | dfs.namenode.http-address.hdfs-k8s.nn1 162 | {{ template "namenode-svc-1" . }}:50070 163 | 164 | 165 | dfs.namenode.shared.edits.dir 166 | qjournal://{{ template "journalnode-quorum" . }}/hdfs-k8s 167 | 168 | 169 | dfs.ha.automatic-failover.enabled 170 | true 171 | 172 | 173 | dfs.ha.fencing.methods 174 | shell(/bin/true) 175 | 176 | 177 | dfs.journalnode.edits.dir 178 | /hadoop/dfs/journal 179 | 180 | 181 | dfs.client.failover.proxy.provider.hdfs-k8s 182 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 183 | 184 | {{- end }} 185 | 186 | dfs.namenode.name.dir 187 | file:///hadoop/dfs/name 188 | 189 | 190 | dfs.namenode.datanode.registration.ip-hostname-check 191 | false 192 | 193 | 194 | dfs.datanode.data.dir 195 | {{ template "datanode-data-dirs" . }} 196 | 197 | 198 | -------------------------------------------------------------------------------- /charts/hdfs-datanode-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-datanode-k8s 3 | version: 0.1.0 4 | description: Datanodes for HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml: -------------------------------------------------------------------------------- 1 | # Provides datanode helper scripts. 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts 6 | labels: 7 | app: {{ template "hdfs-k8s.datanode.name" . }} 8 | chart: {{ template "hdfs-k8s.subchart" . }} 9 | release: {{ .Release.Name }} 10 | data: 11 | check-status.sh: | 12 | #!/usr/bin/env bash 13 | # Exit on error. Append "|| true" if you expect an error. 14 | set -o errexit 15 | # Exit on error inside any functions or subshells. 16 | set -o errtrace 17 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 18 | set -o nounset 19 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 20 | # in `mysqldump |gzip` 21 | set -o pipefail 22 | # Turn on traces, useful while debugging. 23 | set -o xtrace 24 | 25 | # Check if datanode registered with the namenode and got non-null cluster ID. 26 | _PORTS="50075 1006" 27 | _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo" 28 | _CLUSTER_ID="" 29 | for _PORT in $_PORTS; do 30 | _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \ 31 | grep ClusterId) || true 32 | done 33 | echo $_CLUSTER_ID | grep -q -v null 34 | --- 35 | # Deleting a daemonset may need some trick. See 36 | # https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 37 | apiVersion: extensions/v1beta1 38 | kind: DaemonSet 39 | metadata: 40 | name: {{ template "hdfs-k8s.datanode.fullname" . }} 41 | labels: 42 | app: {{ template "hdfs-k8s.datanode.name" . }} 43 | chart: {{ template "hdfs-k8s.subchart" . }} 44 | release: {{ .Release.Name }} 45 | spec: 46 | template: 47 | metadata: 48 | labels: 49 | app: {{ template "hdfs-k8s.datanode.name" . }} 50 | release: {{ .Release.Name }} 51 | {{- if .Values.podAnnotations }} 52 | annotations: 53 | {{ toYaml .Values.podAnnotations | indent 8 }} 54 | {{- end }} 55 | spec: 56 | {{- if .Values.affinity }} 57 | affinity: 58 | {{ toYaml .Values.affinity | indent 8 }} 59 | {{- else if .Values.global.defaultAffinityEnabled }} 60 | affinity: 61 | nodeAffinity: 62 | requiredDuringSchedulingIgnoredDuringExecution: 63 | nodeSelectorTerms: 64 | - matchExpressions: 65 | - key: {{ template "hdfs-k8s.datanode.fullname" . }}-exclude 66 | operator: DoesNotExist 67 | {{- end }} 68 | {{- if .Values.nodeSelector }} 69 | nodeSelector: 70 | {{ toYaml .Values.nodeSelector | indent 8 }} 71 | {{- end }} 72 | {{- if .Values.tolerations }} 73 | tolerations: 74 | {{ toYaml .Values.tolerations | indent 8 }} 75 | {{- end }} 76 | hostNetwork: true 77 | hostPID: true 78 | dnsPolicy: ClusterFirstWithHostNet 79 | containers: 80 | - name: datanode 81 | image: uhopper/hadoop-datanode:2.7.2 82 | env: 83 | - name: HADOOP_CUSTOM_CONF_DIR 84 | value: /etc/hadoop-custom-conf 85 | - name: MULTIHOMED_NETWORK 86 | value: "0" 87 | {{- if and .Values.global.kerberosEnabled .Values.global.jsvcEnabled }} 88 | - name: HADOOP_SECURE_DN_USER 89 | value: root 90 | - name: JSVC_OUTFILE 91 | value: /dev/stdout 92 | - name: JSVC_ERRFILE 93 | value: /dev/stderr 94 | - name: JSVC_HOME 95 | value: /jsvc-home 96 | {{- end }} 97 | livenessProbe: 98 | exec: 99 | command: 100 | - /dn-scripts/check-status.sh 101 | initialDelaySeconds: 60 102 | periodSeconds: 30 103 | readinessProbe: 104 | exec: 105 | command: 106 | - /dn-scripts/check-status.sh 107 | initialDelaySeconds: 60 108 | periodSeconds: 30 109 | securityContext: 110 | privileged: true 111 | volumeMounts: 112 | - name: dn-scripts 113 | mountPath: /dn-scripts 114 | readOnly: true 115 | - name: hdfs-config 116 | mountPath: /etc/hadoop-custom-conf 117 | readOnly: true 118 | {{- range $index, $path := .Values.global.dataNodeHostPath }} 119 | - name: hdfs-data-{{ $index }} 120 | mountPath: /hadoop/dfs/data/{{ $index }} 121 | {{- end }} 122 | {{- if .Values.global.kerberosEnabled }} 123 | - name: kerberos-config 124 | mountPath: /etc/krb5.conf 125 | subPath: {{ .Values.global.kerberosConfigFileName }} 126 | readOnly: true 127 | - name: kerberos-keytab-copy 128 | mountPath: /etc/security/ 129 | readOnly: true 130 | {{- if .Values.global.jsvcEnabled }} 131 | - name: jsvc-home 132 | mountPath: /jsvc-home 133 | {{- end }} 134 | {{- end }} 135 | {{- if .Values.global.kerberosEnabled }} 136 | initContainers: 137 | - name: copy-kerberos-keytab 138 | image: busybox:1.27.1 139 | command: ['sh', '-c'] 140 | args: 141 | - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab 142 | env: 143 | - name: MY_NODE_NAME 144 | valueFrom: 145 | fieldRef: 146 | fieldPath: spec.nodeName 147 | volumeMounts: 148 | - name: kerberos-keytabs 149 | mountPath: /kerberos-keytabs 150 | - name: kerberos-keytab-copy 151 | mountPath: /kerberos-keytab-copy 152 | {{- if .Values.global.jsvcEnabled }} 153 | - name: copy-jsvc 154 | # Pull by digest because the image doesn't have tags to pin. 155 | image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef 156 | command: ['sh', '-c'] 157 | args: 158 | - cp /usr/bin/jsvc /jsvc-home/jsvc 159 | volumeMounts: 160 | - name: jsvc-home 161 | mountPath: /jsvc-home 162 | {{- end }} 163 | {{- end }} 164 | restartPolicy: Always 165 | volumes: 166 | - name: dn-scripts 167 | configMap: 168 | name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts 169 | defaultMode: 0744 170 | {{- range $index, $path := .Values.global.dataNodeHostPath }} 171 | - name: hdfs-data-{{ $index }} 172 | hostPath: 173 | path: {{ $path }} 174 | {{- end }} 175 | - name: hdfs-config 176 | configMap: 177 | name: {{ template "hdfs-k8s.config.fullname" . }} 178 | {{- if .Values.global.kerberosEnabled }} 179 | - name: kerberos-config 180 | configMap: 181 | name: {{ template "krb5-configmap" . }} 182 | - name: kerberos-keytabs 183 | secret: 184 | secretName: {{ template "krb5-keytabs-secret" . }} 185 | - name: kerberos-keytab-copy 186 | emptyDir: {} 187 | {{- if .Values.global.jsvcEnabled }} 188 | - name: jsvc-home 189 | emptyDir: {} 190 | {{- end }} 191 | {{- end }} 192 | -------------------------------------------------------------------------------- /charts/hdfs-journalnode-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-journalnode-k8s 3 | version: 0.1.0 4 | description: Journalnode quorum used by HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml: -------------------------------------------------------------------------------- 1 | # A headless service to create DNS records. 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "hdfs-k8s.journalnode.fullname" . }} 6 | labels: 7 | app: {{ template "hdfs-k8s.journalnode.name" . }} 8 | chart: {{ template "hdfs-k8s.subchart" . }} 9 | release: {{ .Release.Name }} 10 | annotations: 11 | # TODO: Deprecated. Replace tolerate-unready-endpoints with 12 | # v1.Service.PublishNotReadyAddresses. 13 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 14 | spec: 15 | ports: 16 | - port: 8485 17 | name: jn 18 | - port: 8480 19 | name: http 20 | clusterIP: None 21 | selector: 22 | app: {{ template "hdfs-k8s.journalnode.name" . }} 23 | release: {{ .Release.Name }} 24 | --- 25 | apiVersion: policy/v1beta1 26 | kind: PodDisruptionBudget 27 | metadata: 28 | name: {{ template "hdfs-k8s.journalnode.fullname" . }} 29 | labels: 30 | app: {{ template "hdfs-k8s.journalnode.name" . }} 31 | chart: {{ template "hdfs-k8s.subchart" . }} 32 | release: {{ .Release.Name }} 33 | spec: 34 | selector: 35 | matchLabels: 36 | app: {{ template "hdfs-k8s.journalnode.name" . }} 37 | release: {{ .Release.Name }} 38 | minAvailable: {{ div .Values.global.journalnodeQuorumSize 2 | add1 }} 39 | --- 40 | apiVersion: apps/v1beta1 41 | kind: StatefulSet 42 | metadata: 43 | name: {{ template "hdfs-k8s.journalnode.fullname" . }} 44 | labels: 45 | app: {{ template "hdfs-k8s.journalnode.name" . }} 46 | chart: {{ template "hdfs-k8s.subchart" . }} 47 | release: {{ .Release.Name }} 48 | spec: 49 | serviceName: {{ template "hdfs-k8s.journalnode.fullname" . }} 50 | replicas: {{ .Values.global.journalnodeQuorumSize }} 51 | template: 52 | metadata: 53 | labels: 54 | app: {{ template "hdfs-k8s.journalnode.name" . }} 55 | release: {{ .Release.Name }} 56 | {{- if .Values.podAnnotations }} 57 | annotations: 58 | {{ toYaml .Values.podAnnotations | indent 8 }} 59 | {{- end }} 60 | spec: 61 | {{- if .Values.affinity }} 62 | affinity: 63 | {{ toYaml .Values.affinity | indent 8 }} 64 | {{- else if .Values.global.defaultAffinityEnabled }} 65 | affinity: 66 | podAntiAffinity: 67 | requiredDuringSchedulingIgnoredDuringExecution: 68 | - labelSelector: 69 | matchExpressions: 70 | - key: "app" 71 | operator: In 72 | values: 73 | - {{ template "hdfs-k8s.journalnode.name" . }} 74 | - key: "release" 75 | operator: In 76 | values: 77 | - {{ .Release.Name }} 78 | topologyKey: "kubernetes.io/hostname" 79 | {{- end }} 80 | {{- if .Values.nodeSelector }} 81 | nodeSelector: 82 | {{ toYaml .Values.nodeSelector | indent 8 }} 83 | {{- end }} 84 | {{- if .Values.tolerations }} 85 | tolerations: 86 | {{ toYaml .Values.tolerations | indent 8 }} 87 | {{- end }} 88 | containers: 89 | - name: hdfs-journalnode 90 | image: uhopper/hadoop-namenode:2.7.2 91 | env: 92 | - name: HADOOP_CUSTOM_CONF_DIR 93 | value: /etc/hadoop-custom-conf 94 | command: ["/entrypoint.sh"] 95 | args: ["/opt/hadoop-2.7.2/bin/hdfs", "--config", "/etc/hadoop", "journalnode"] 96 | ports: 97 | - containerPort: 8485 98 | name: jn 99 | - containerPort: 8480 100 | name: http 101 | volumeMounts: 102 | # Mount a subpath of the volume so that the journal subdir would be 103 | # a brand new empty dir. This way, we won't get affected by 104 | # existing files in the volume top dir. 105 | - name: editdir 106 | mountPath: /hadoop/dfs/journal 107 | subPath: journal 108 | - name: editdir 109 | mountPath: /hadoop/dfs/name 110 | subPath: name 111 | - name: hdfs-config 112 | mountPath: /etc/hadoop-custom-conf 113 | readOnly: true 114 | {{- if .Values.global.kerberosEnabled }} 115 | - name: kerberos-config 116 | mountPath: /etc/krb5.conf 117 | subPath: {{ .Values.global.kerberosConfigFileName }} 118 | readOnly: true 119 | - name: kerberos-keytab-copy 120 | mountPath: /etc/security/ 121 | readOnly: true 122 | {{- end }} 123 | {{- if .Values.global.kerberosEnabled }} 124 | initContainers: 125 | - name: copy-kerberos-keytab 126 | image: busybox:1.27.1 127 | command: ['sh', '-c'] 128 | args: 129 | - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab 130 | env: 131 | - name: MY_KERBEROS_NAME 132 | valueFrom: 133 | fieldRef: 134 | fieldPath: metadata.name 135 | volumeMounts: 136 | - name: kerberos-keytabs 137 | mountPath: /kerberos-keytabs 138 | - name: kerberos-keytab-copy 139 | mountPath: /kerberos-keytab-copy 140 | {{- end }} 141 | restartPolicy: Always 142 | volumes: 143 | - name: hdfs-config 144 | configMap: 145 | name: {{ template "hdfs-k8s.config.fullname" . }} 146 | {{- if .Values.global.kerberosEnabled }} 147 | - name: kerberos-config 148 | configMap: 149 | name: {{ template "krb5-configmap" . }} 150 | - name: kerberos-keytabs 151 | secret: 152 | secretName: {{ template "krb5-keytabs-secret" . }} 153 | - name: kerberos-keytab-copy 154 | emptyDir: {} 155 | {{- end }} 156 | {{- if .Values.global.podSecurityContext.enabled }} 157 | securityContext: 158 | runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} 159 | fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} 160 | {{- end }} 161 | volumeClaimTemplates: 162 | - metadata: 163 | name: editdir 164 | spec: 165 | accessModes: 166 | - {{ .Values.persistence.accessMode | quote }} 167 | resources: 168 | requests: 169 | storage: {{ .Values.persistence.size | quote }} 170 | {{- if .Values.persistence.storageClass }} 171 | {{- if (eq "-" .Values.persistence.storageClass) }} 172 | storageClassName: "" 173 | {{- else }} 174 | storageClassName: "{{ .Values.persistence.storageClass }}" 175 | {{- end }} 176 | {{- end }} 177 | {{- if .Values.persistence.selector }} 178 | selector: 179 | {{ toYaml .Values.persistence.selector | indent 10 }} 180 | {{- end }} 181 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/.gitignore: -------------------------------------------------------------------------------- 1 | charts 2 | requirements.lock 3 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: An entry-point Helm chart for launching HDFS on Kubernetes 4 | name: hdfs 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/requirements.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: zookeeper 3 | version: "1.3.1" 4 | repository: https://kubernetes-charts-incubator.storage.googleapis.com/ 5 | condition: condition.subchart.zookeeper 6 | tags: 7 | - ha 8 | - kerberos 9 | - name: hdfs-config-k8s 10 | version: "0.1.0" 11 | repository: "file://../hdfs-config-k8s" 12 | condition: condition.subchart.config 13 | tags: 14 | - ha 15 | - kerberos 16 | - simple 17 | - name: hdfs-krb5-k8s 18 | version: "0.1.0" 19 | repository: "file://../hdfs-krb5-k8s" 20 | condition: condition.subchart.kerberos 21 | tags: 22 | - kerberos 23 | - name: hdfs-journalnode-k8s 24 | version: "0.1.0" 25 | repository: "file://../hdfs-journalnode-k8s" 26 | condition: condition.subchart.journalnode 27 | tags: 28 | - ha 29 | - kerberos 30 | - name: hdfs-namenode-k8s 31 | version: "0.1.0" 32 | repository: "file://../hdfs-namenode-k8s" 33 | condition: condition.subchart.namenode 34 | tags: 35 | - ha 36 | - kerberos 37 | # Non-HA namenode. Disabled by default 38 | - name: hdfs-simple-namenode-k8s 39 | version: "0.1.0" 40 | repository: "file://../hdfs-simple-namenode-k8s" 41 | condition: condition.subchart.simple-namenode 42 | tags: 43 | - simple 44 | - name: hdfs-datanode-k8s 45 | version: "0.1.0" 46 | repository: "file://../hdfs-datanode-k8s" 47 | condition: condition.subchart.datanode 48 | tags: 49 | - ha 50 | - kerberos 51 | - simple 52 | - name: hdfs-client-k8s 53 | version: "0.1.0" 54 | repository: "file://../hdfs-client-k8s" 55 | condition: condition.subchart.client 56 | tags: 57 | - ha 58 | - kerberos 59 | - simple 60 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Create a short app name. 4 | */}} 5 | {{- define "hdfs-k8s.name" -}} 6 | hdfs 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "hdfs-k8s.fullname" -}} 15 | {{- if .Values.global.fullnameOverride -}} 16 | {{- .Values.global.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := include "hdfs-k8s.name" . -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the subchart label. 29 | */}} 30 | {{- define "hdfs-k8s.subchart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{- define "zookeeper-fullname" -}} 35 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 36 | {{- if contains "zookeeper" $fullname -}} 37 | {{- printf "%s" $fullname -}} 38 | {{- else -}} 39 | {{- printf "%s-zookeeper" $fullname | trunc 63 | trimSuffix "-" -}} 40 | {{- end -}} 41 | {{- end -}} 42 | 43 | {{- define "hdfs-k8s.config.name" -}} 44 | {{- template "hdfs-k8s.name" . -}}-config 45 | {{- end -}} 46 | 47 | {{- define "hdfs-k8s.config.fullname" -}} 48 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 49 | {{- if contains "config" $fullname -}} 50 | {{- printf "%s" $fullname -}} 51 | {{- else -}} 52 | {{- printf "%s-config" $fullname | trunc 63 | trimSuffix "-" -}} 53 | {{- end -}} 54 | {{- end -}} 55 | 56 | {{- define "hdfs-k8s.krb5.name" -}} 57 | {{- template "hdfs-k8s.name" . -}}-krb5 58 | {{- end -}} 59 | 60 | {{- define "hdfs-k8s.krb5.fullname" -}} 61 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 62 | {{- if contains "config" $fullname -}} 63 | {{- printf "%s" $fullname -}} 64 | {{- else -}} 65 | {{- printf "%s-krb5" $fullname | trunc 63 | trimSuffix "-" -}} 66 | {{- end -}} 67 | {{- end -}} 68 | 69 | {{- define "hdfs-k8s.journalnode.name" -}} 70 | {{- template "hdfs-k8s.name" . -}}-journalnode 71 | {{- end -}} 72 | 73 | {{- define "hdfs-k8s.journalnode.fullname" -}} 74 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 75 | {{- if contains "journalnode" $fullname -}} 76 | {{- printf "%s" $fullname -}} 77 | {{- else -}} 78 | {{- printf "%s-journalnode" $fullname | trunc 63 | trimSuffix "-" -}} 79 | {{- end -}} 80 | {{- end -}} 81 | 82 | {{- define "hdfs-k8s.namenode.name" -}} 83 | {{- template "hdfs-k8s.name" . -}}-namenode 84 | {{- end -}} 85 | 86 | {{- define "hdfs-k8s.namenode.fullname" -}} 87 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 88 | {{- if contains "namenode" $fullname -}} 89 | {{- printf "%s" $fullname -}} 90 | {{- else -}} 91 | {{- printf "%s-namenode" $fullname | trunc 63 | trimSuffix "-" -}} 92 | {{- end -}} 93 | {{- end -}} 94 | 95 | {{- define "hdfs-k8s.datanode.name" -}} 96 | {{- template "hdfs-k8s.name" . -}}-datanode 97 | {{- end -}} 98 | 99 | {{- define "hdfs-k8s.datanode.fullname" -}} 100 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 101 | {{- if contains "datanode" $fullname -}} 102 | {{- printf "%s" $fullname -}} 103 | {{- else -}} 104 | {{- printf "%s-datanode" $fullname | trunc 63 | trimSuffix "-" -}} 105 | {{- end -}} 106 | {{- end -}} 107 | 108 | {{- define "hdfs-k8s.client.name" -}} 109 | {{- template "hdfs-k8s.name" . -}}-client 110 | {{- end -}} 111 | 112 | {{- define "hdfs-k8s.client.fullname" -}} 113 | {{- $fullname := include "hdfs-k8s.fullname" . -}} 114 | {{- if contains "client" $fullname -}} 115 | {{- printf "%s" $fullname -}} 116 | {{- else -}} 117 | {{- printf "%s-client" $fullname | trunc 63 | trimSuffix "-" -}} 118 | {{- end -}} 119 | {{- end -}} 120 | 121 | {{/* 122 | Create the kerberos principal suffix for core HDFS services 123 | */}} 124 | {{- define "hdfs-principal" -}} 125 | {{- printf "hdfs/_HOST@%s" .Values.global.kerberosRealm -}} 126 | {{- end -}} 127 | 128 | {{/* 129 | Create the kerberos principal for HTTP services 130 | */}} 131 | {{- define "http-principal" -}} 132 | {{- printf "HTTP/_HOST@%s" .Values.global.kerberosRealm -}} 133 | {{- end -}} 134 | 135 | {{/* 136 | Create the name for a Kubernetes Configmap containing a Kerberos config file. 137 | */}} 138 | {{- define "krb5-configmap" -}} 139 | {{- if .Values.global.kerberosConfigMapOverride -}} 140 | {{- .Values.global.kerberosConfigMapOverride | trunc 63 | trimSuffix "-" -}} 141 | {{- else -}} 142 | {{- $name := include "hdfs-k8s.krb5.fullname" . -}} 143 | {{- printf "%s-config" $name | trunc 63 | trimSuffix "-" -}} 144 | {{- end -}} 145 | {{- end -}} 146 | 147 | {{/* 148 | Create the name for a Kubernetes Secret containing Kerberos keytabs. 149 | */}} 150 | {{- define "krb5-keytabs-secret" -}} 151 | {{- if .Values.global.kerberosKeytabsSecretOverride -}} 152 | {{- .Values.global.kerberosKeytabsSecretOverride | trunc 63 | trimSuffix "-" -}} 153 | {{- else -}} 154 | {{- $name := include "hdfs-k8s.krb5.fullname" . -}} 155 | {{- printf "%s-keytabs" $name | trunc 63 | trimSuffix "-" -}} 156 | {{- end -}} 157 | {{- end -}} 158 | 159 | 160 | {{/* 161 | Create the domain name part of services. 162 | The HDFS config file should specify FQDN of services. Otherwise, Kerberos 163 | login may fail. 164 | */}} 165 | {{- define "svc-domain" -}} 166 | {{- printf "%s.svc.cluster.local" .Release.Namespace -}} 167 | {{- end -}} 168 | 169 | {{/* 170 | Create the zookeeper quorum server list. The below uses two loops to make 171 | sure the last item does not have comma. It uses index 0 for the last item 172 | since that is the only special index that helm template gives us. 173 | */}} 174 | {{- define "zookeeper-quorum" -}} 175 | {{- if .Values.global.zookeeperQuorumOverride -}} 176 | {{- .Values.global.zookeeperQuorumOverride -}} 177 | {{- else -}} 178 | {{- $service := include "zookeeper-fullname" . -}} 179 | {{- $domain := include "svc-domain" . -}} 180 | {{- $replicas := .Values.global.zookeeperQuorumSize | int -}} 181 | {{- range $i, $e := until $replicas -}} 182 | {{- if ne $i 0 -}} 183 | {{- printf "%s-%d.%s-headless.%s:2181," $service $i $service $domain -}} 184 | {{- end -}} 185 | {{- end -}} 186 | {{- range $i, $e := until $replicas -}} 187 | {{- if eq $i 0 -}} 188 | {{- printf "%s-%d.%s-headless.%s:2181" $service $i $service $domain -}} 189 | {{- end -}} 190 | {{- end -}} 191 | {{- end -}} 192 | {{- end -}} 193 | 194 | {{/* 195 | Construct the name of the Kerberos KDC pod 0. 196 | */}} 197 | {{- define "krb5-pod-0" -}} 198 | {{- template "hdfs-k8s.krb5.fullname" . -}}-0 199 | {{- end -}} 200 | 201 | {{/* 202 | Construct the full name of the Kerberos KDC statefulset member 0. 203 | */}} 204 | {{- define "krb5-svc-0" -}} 205 | {{- $pod := include "krb5-pod-0" . -}} 206 | {{- $service := include "hdfs-k8s.krb5.fullname" . -}} 207 | {{- $domain := include "svc-domain" . -}} 208 | {{- printf "%s.%s.%s" $pod $service $domain -}} 209 | {{- end -}} 210 | 211 | {{/* 212 | Create the journalnode quorum server list. The below uses two loops to make 213 | sure the last item does not have the delimiter. It uses index 0 for the last 214 | item since that is the only special index that helm template gives us. 215 | */}} 216 | {{- define "journalnode-quorum" -}} 217 | {{- $service := include "hdfs-k8s.journalnode.fullname" . -}} 218 | {{- $domain := include "svc-domain" . -}} 219 | {{- $replicas := .Values.global.journalnodeQuorumSize | int -}} 220 | {{- range $i, $e := until $replicas -}} 221 | {{- if ne $i 0 -}} 222 | {{- printf "%s-%d.%s.%s:8485;" $service $i $service $domain -}} 223 | {{- end -}} 224 | {{- end -}} 225 | {{- range $i, $e := until $replicas -}} 226 | {{- if eq $i 0 -}} 227 | {{- printf "%s-%d.%s.%s:8485" $service $i $service $domain -}} 228 | {{- end -}} 229 | {{- end -}} 230 | {{- end -}} 231 | 232 | {{/* 233 | Construct the name of the namenode pod 0. 234 | */}} 235 | {{- define "namenode-pod-0" -}} 236 | {{- template "hdfs-k8s.namenode.fullname" . -}}-0 237 | {{- end -}} 238 | 239 | {{/* 240 | Construct the full name of the namenode statefulset member 0. 241 | */}} 242 | {{- define "namenode-svc-0" -}} 243 | {{- $pod := include "namenode-pod-0" . -}} 244 | {{- $service := include "hdfs-k8s.namenode.fullname" . -}} 245 | {{- $domain := include "svc-domain" . -}} 246 | {{- printf "%s.%s.%s" $pod $service $domain -}} 247 | {{- end -}} 248 | 249 | {{/* 250 | Construct the name of the namenode pod 1. 251 | */}} 252 | {{- define "namenode-pod-1" -}} 253 | {{- template "hdfs-k8s.namenode.fullname" . -}}-1 254 | {{- end -}} 255 | 256 | {{/* 257 | Construct the full name of the namenode statefulset member 1. 258 | */}} 259 | {{- define "namenode-svc-1" -}} 260 | {{- $pod := include "namenode-pod-1" . -}} 261 | {{- $service := include "hdfs-k8s.namenode.fullname" . -}} 262 | {{- $domain := include "svc-domain" . -}} 263 | {{- printf "%s.%s.%s" $pod $service $domain -}} 264 | {{- end -}} 265 | -------------------------------------------------------------------------------- /charts/hdfs-k8s/values.yaml: -------------------------------------------------------------------------------- 1 | ## ------------------------------------------------------------------------------ 2 | ## zookeeper: 3 | ## ------------------------------------------------------------------------------ 4 | zookeeper: 5 | ## Configure Zookeeper resource requests and limits 6 | ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ 7 | resources: ~ 8 | 9 | ## The JVM heap size to allocate to Zookeeper 10 | env: 11 | ZK_HEAP_SIZE: 1G 12 | 13 | ## The number of zookeeper server to have in the quorum. 14 | replicaCount: 3 15 | 16 | ## ------------------------------------------------------------------------------ 17 | ## hdfs-config-k8s: 18 | ## ------------------------------------------------------------------------------ 19 | hdfs-config-k8s: 20 | ## Custom hadoop config keys passed to the hdfs configmap as extra keys. 21 | customHadoopConfig: 22 | coreSite: {} 23 | ## Set config key and value pairs, e.g. 24 | # hadoop.http.authentication.type: kerberos 25 | 26 | hdfsSite: {} 27 | ## Set config key and value pairs, e.g. 28 | # dfs.datanode.use.datanode.hostname: "false" 29 | 30 | ## ------------------------------------------------------------------------------ 31 | ## hdfs-journalnode-k8s: 32 | ## ------------------------------------------------------------------------------ 33 | hdfs-journalnode-k8s: 34 | persistence: 35 | ## Persistent Volume Storage Class 36 | ## If defined, storageClassName: 37 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 38 | ## If undefined (the default) or set to null, no storageClassName spec is 39 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 40 | ## GKE, AWS & OpenStack) 41 | ## 42 | # storageClass: "-" 43 | ## To choose a suitable persistent volume from available static volumes, selectors 44 | ## are used. 45 | # selector: 46 | # matchLabels: 47 | # volume-type: hdfs-ssd 48 | accessMode: ReadWriteOnce 49 | size: 20Gi 50 | 51 | ## Node labels and tolerations for pod assignment 52 | nodeSelector: {} 53 | tolerations: [] 54 | affinity: {} 55 | 56 | ## ------------------------------------------------------------------------------ 57 | ## hdfs-namenode-k8s: 58 | ## ------------------------------------------------------------------------------ 59 | hdfs-namenode-k8s: 60 | ## Name of the namenode start script in the config map. 61 | namenodeStartScript: format-and-run.sh 62 | 63 | ## A namenode start script that can have user specified content. 64 | ## Can be used to conduct ad-hoc operation as specified by a user. 65 | ## To use this, also set the namenodeStartScript variable above 66 | ## to custom-run.sh. 67 | customRunScript: | 68 | #!/bin/bash -x 69 | echo Write your own script content! 70 | echo This message will disappear in 10 seconds. 71 | sleep 10 72 | 73 | persistence: 74 | ## Persistent Volume Storage Class 75 | ## If defined, storageClassName: 76 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 77 | ## If undefined (the default) or set to null, no storageClassName spec is 78 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 79 | ## GKE, AWS & OpenStack) 80 | ## 81 | # storageClass: "-" 82 | 83 | ## To choose a suitable persistent volume from available static volumes, selectors 84 | ## are used. 85 | # selector: 86 | # matchLabels: 87 | # volume-type: hdfs-ssd 88 | 89 | accessMode: ReadWriteOnce 90 | 91 | size: 100Gi 92 | 93 | ## Whether or not to use hostNetwork in namenode pods. Disabling this will break 94 | ## data locality as namenode will see pod virtual IPs and fails to equate them with 95 | ## cluster node physical IPs associated with data nodes. 96 | ## We currently disable this only for CI on minikube. 97 | hostNetworkEnabled: true 98 | 99 | ## Node labels and tolerations for pod assignment 100 | nodeSelector: {} 101 | tolerations: [] 102 | affinity: {} 103 | 104 | ## ------------------------------------------------------------------------------ 105 | ## hdfs-simple-namenode-k8s: 106 | ## ------------------------------------------------------------------------------ 107 | hdfs-simple-namenode-k8s: 108 | ## Path of the local disk directory on a cluster node that will contain the namenode 109 | ## fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath 110 | ## volume. 111 | nameNodeHostPath: /hdfs-name 112 | 113 | ## Node labels and tolerations for pod assignment 114 | nodeSelector: {} 115 | tolerations: [] 116 | affinity: {} 117 | 118 | ## ------------------------------------------------------------------------------ 119 | ## hdfs-datanode-k8s: 120 | ## ------------------------------------------------------------------------------ 121 | hdfs-datanode-k8s: 122 | ## Node labels and tolerations for pod assignment 123 | nodeSelector: {} 124 | tolerations: [] 125 | affinity: {} 126 | 127 | ## ------------------------------------------------------------------------------ 128 | ## hdfs-krb5-k8s: 129 | ## ------------------------------------------------------------------------------ 130 | hdfs-krb5-k8s: 131 | persistence: 132 | ## Persistent Volume Storage Class 133 | ## If defined, storageClassName: 134 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 135 | ## If undefined (the default) or set to null, no storageClassName spec is 136 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 137 | ## GKE, AWS & OpenStack) 138 | ## 139 | # storageClass: "-" 140 | 141 | ## To choose a suitable persistent volume from available static volumes, selectors 142 | ## are used. 143 | # selector: 144 | # matchLabels: 145 | # volume-type: hdfs-ssd 146 | 147 | accessMode: ReadWriteOnce 148 | 149 | size: 20Gi 150 | 151 | ## We use a 3rd party image built from https://github.com/gcavalcante8808/docker-krb5-server. 152 | ## TODO: The pod currently prints out the admin account in plain text. 153 | ## Supply an admin account password using a k8s secret. 154 | ## TODO: The auto-generated passwords might be weak due to low entropy. 155 | ## Increase entropy by running rngd or haveged. 156 | ## TODO: Using latest tag is not desirable. The current image does not have specific tags. 157 | ## Find a way to fix it. 158 | image: 159 | repository: gcavalcante8808/krb5-server 160 | 161 | tag: latest 162 | 163 | pullPolicy: IfNotPresent 164 | 165 | service: 166 | type: ClusterIP 167 | 168 | port: 88 169 | ## ------------------------------------------------------------------------------ 170 | ## Global values affecting all sub-charts: 171 | ## ------------------------------------------------------------------------------ 172 | global: 173 | ## A list of the local disk directories on cluster nodes that will contain the datanode 174 | ## blocks. These paths will be mounted to the datanode as K8s HostPath volumes. 175 | ## In a command line, the list should be enclosed in '{' and '}'. 176 | ## e.g. --set "dataNodeHostPath={/hdfs-data,/hdfs-data1}" 177 | dataNodeHostPath: 178 | - /hdfs-data 179 | 180 | ## Parameters for determining which Unix user and group IDs to use in pods. 181 | ## Persistent volume permission may need to match these. 182 | podSecurityContext: 183 | enabled: false 184 | runAsUser: 0 185 | fsGroup: 1000 186 | 187 | ## Whether or not to expect namenodes in the HA setup. 188 | namenodeHAEnabled: true 189 | 190 | ## The number of zookeeper server to have in the quorum. 191 | ## This should match zookeeper.replicaCount above. Used only when 192 | ## namenodeHAEnabled is set. 193 | zookeeperQuorumSize: 3 194 | 195 | ## Override zookeeper quorum address. Zookeeper is used for determining which namenode 196 | ## instance is active. Separated by the comma character. Used only when 197 | ## namenodeHAEnabled is set. 198 | ## 199 | # zookeeperQuorumOverride: zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 200 | 201 | ## How many journal nodes to launch as a quorum. Used only when 202 | ## namenodeHAEnabled is set. 203 | journalnodeQuorumSize: 3 204 | 205 | ## Whether or not to enable default affinity setting. 206 | defaultAffinityEnabled: true 207 | 208 | ## Whether or not Kerberos support is enabled. 209 | kerberosEnabled: false 210 | 211 | ## Effective only if Kerberos is enabled. Override th name of the k8s 212 | ## ConfigMap containing the kerberos config file. 213 | ## 214 | # kerberosConfigMapOverride: kerberos-config 215 | 216 | ## Effective only if Kerberos is enabled. Name of the kerberos config file inside 217 | ## the config map. 218 | kerberosConfigFileName: krb5.conf 219 | 220 | ## Effective only if Kerberos is enabled. Override the name of the k8s Secret 221 | ## containing the kerberos keytab files of per-host HDFS principals. 222 | ## The secret should have multiple data items. Each data item name 223 | ## should be formatted as: 224 | ## `HOST-NAME.keytab` 225 | ## where HOST-NAME should match the cluster node 226 | ## host name that each per-host hdfs principal is associated with. 227 | ## 228 | # kerberosKeytabsSecretOverride: hdfs-kerberos-keytabs 229 | 230 | ## Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name. 231 | ## This should match the realm name in your Kerberos config file. 232 | kerberosRealm: MYCOMPANY.COM 233 | 234 | ## Effective only if Kerberos is enabled. Enable protection of datanodes using 235 | ## the jsvc utility. See the reference doc at 236 | ## https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode 237 | jsvcEnabled: true 238 | 239 | ## Tags and conditions for triggering a group of relevant subcharts. 240 | tags: 241 | ## Trigger all subcharts required for high availability. Enabled by default. 242 | ha: true 243 | 244 | ## Trigger all subcharts required for using Kerberos. Disabled by default. 245 | kerberos: false 246 | 247 | ## Trigger all subcharts required for non-HA setup. Disabled by default. 248 | simple: false 249 | -------------------------------------------------------------------------------- /charts/hdfs-krb5-k8s/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/hdfs-krb5-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-krb5-k8s 3 | version: 0.1.0 4 | description: Kerberos server that can be used for HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-krb5-k8s/templates/statefulset.yaml: -------------------------------------------------------------------------------- 1 | # A headless service to create DNS records. 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "hdfs-k8s.krb5.fullname" . }} 6 | labels: 7 | app: {{ template "hdfs-k8s.krb5.name" . }} 8 | chart: {{ template "hdfs-k8s.subchart" . }} 9 | release: {{ .Release.Name }} 10 | annotations: 11 | # TODO: Deprecated. Replace tolerate-unready-endpoints with 12 | # v1.Service.PublishNotReadyAddresses. 13 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 14 | spec: 15 | ports: 16 | - port: {{ .Values.service.port }} 17 | protocol: TCP 18 | name: kdc-tcp 19 | - port: {{ .Values.service.port }} 20 | protocol: UDP 21 | name: kdc-udp 22 | clusterIP: None 23 | selector: 24 | app: {{ template "hdfs-k8s.krb5.name" . }} 25 | release: {{ .Release.Name }} 26 | --- 27 | apiVersion: apps/v1beta1 28 | kind: StatefulSet 29 | metadata: 30 | name: {{ template "hdfs-k8s.krb5.fullname" . }} 31 | labels: 32 | app: {{ template "hdfs-k8s.krb5.name" . }} 33 | chart: {{ template "hdfs-k8s.subchart" . }} 34 | release: {{ .Release.Name }} 35 | spec: 36 | serviceName: {{ template "hdfs-k8s.krb5.fullname" . }} 37 | replicas: {{ .Values.replicaCount }} 38 | selector: 39 | matchLabels: 40 | app: {{ template "hdfs-k8s.krb5.name" . }} 41 | release: {{ .Release.Name }} 42 | template: 43 | metadata: 44 | labels: 45 | app: {{ template "hdfs-k8s.krb5.name" . }} 46 | release: {{ .Release.Name }} 47 | {{- if .Values.podAnnotations }} 48 | annotations: 49 | {{ toYaml .Values.podAnnotations | indent 8 }} 50 | {{- end }} 51 | spec: 52 | containers: 53 | - name: {{ .Chart.Name }} 54 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 55 | imagePullPolicy: {{ .Values.image.pullPolicy }} 56 | env: 57 | - name: KRB5_REALM 58 | value: {{ .Values.global.kerberosRealm }} 59 | - name: KRB5_KDC 60 | value: {{ template "krb5-svc-0" . }} 61 | ports: 62 | - name: kdc-tcp 63 | containerPort: 88 64 | protocol: TCP 65 | - name: kdc-udp 66 | containerPort: 88 67 | protocol: UDP 68 | livenessProbe: 69 | tcpSocket: 70 | port: kdc-tcp 71 | readinessProbe: 72 | tcpSocket: 73 | port: kdc-tcp 74 | restartPolicy: Always 75 | {{- if .Values.global.podSecurityContext.enabled }} 76 | securityContext: 77 | runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} 78 | fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} 79 | {{- end }} 80 | volumeClaimTemplates: 81 | - metadata: 82 | name: datadir 83 | spec: 84 | accessModes: 85 | - {{ .Values.persistence.accessMode | quote }} 86 | resources: 87 | requests: 88 | storage: {{ .Values.persistence.size | quote }} 89 | {{- if .Values.persistence.storageClass }} 90 | {{- if (eq "-" .Values.persistence.storageClass) }} 91 | storageClassName: "" 92 | {{- else }} 93 | storageClassName: "{{ .Values.persistence.storageClass }}" 94 | {{- end }} 95 | {{- end }} 96 | {{- if .Values.persistence.selector }} 97 | selector: 98 | {{ toYaml .Values.persistence.selector | indent 10 }} 99 | {{- end }} 100 | -------------------------------------------------------------------------------- /charts/hdfs-namenode-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-namenode-k8s 3 | version: 0.1.0 4 | description: namenodes in HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml: -------------------------------------------------------------------------------- 1 | # A headless service to create DNS records. 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "hdfs-k8s.namenode.fullname" . }} 6 | labels: 7 | app: {{ template "hdfs-k8s.namenode.name" . }} 8 | chart: {{ template "hdfs-k8s.subchart" . }} 9 | release: {{ .Release.Name }} 10 | annotations: 11 | # TODO: Deprecated. Replace tolerate-unready-endpoints with 12 | # v1.Service.PublishNotReadyAddresses. 13 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 14 | spec: 15 | ports: 16 | - port: 8020 17 | name: fs 18 | - port: 50070 19 | name: http 20 | clusterIP: None 21 | selector: 22 | app: {{ template "hdfs-k8s.namenode.name" . }} 23 | release: {{ .Release.Name }} 24 | --- 25 | apiVersion: policy/v1beta1 26 | kind: PodDisruptionBudget 27 | metadata: 28 | name: {{ template "hdfs-k8s.namenode.fullname" . }} 29 | labels: 30 | app: {{ template "hdfs-k8s.namenode.name" . }} 31 | chart: {{ template "hdfs-k8s.subchart" . }} 32 | release: {{ .Release.Name }} 33 | spec: 34 | selector: 35 | matchLabels: 36 | app: {{ template "hdfs-k8s.namenode.name" . }} 37 | release: {{ .Release.Name }} 38 | minAvailable: 1 39 | --- 40 | # Provides namenode helper scripts. Most of them are start scripts 41 | # that meet different needs. 42 | # TODO: Support upgrade of metadata in case a new Hadoop version requires it. 43 | apiVersion: v1 44 | kind: ConfigMap 45 | metadata: 46 | name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts 47 | labels: 48 | app: {{ template "hdfs-k8s.namenode.name" . }} 49 | chart: {{ template "hdfs-k8s.subchart" . }} 50 | release: {{ .Release.Name }} 51 | data: 52 | # A bootstrap script which will start namenode daemons after conducting 53 | # optional metadata initialization steps. The metadata initialization 54 | # steps will take place in case the metadata dir is empty, 55 | # which will be the case only for the very first run. The specific steps 56 | # will differ depending on whether the namenode is active or standby. 57 | # We also assume, for the very first run, namenode-0 will be active and 58 | # namenode-1 will be standby as StatefulSet will launch namenode-0 first 59 | # and zookeeper will determine the sole namenode to be the active one. 60 | # For active namenode, the initialization steps will format the metadata, 61 | # zookeeper dir and journal node data entries. 62 | # For standby namenode, the initialization steps will simply receieve 63 | # the first batch of metadata updates from the journal node. 64 | format-and-run.sh: | 65 | #!/usr/bin/env bash 66 | # Exit on error. Append "|| true" if you expect an error. 67 | set -o errexit 68 | # Exit on error inside any functions or subshells. 69 | set -o errtrace 70 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 71 | set -o nounset 72 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 73 | # in `mysqldump |gzip` 74 | set -o pipefail 75 | # Turn on traces, useful while debugging. 76 | set -o xtrace 77 | 78 | _HDFS_BIN=$HADOOP_PREFIX/bin/hdfs 79 | _METADATA_DIR=/hadoop/dfs/name/current 80 | if [[ "$MY_POD" = "$NAMENODE_POD_0" ]]; then 81 | if [[ ! -d $_METADATA_DIR ]]; then 82 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -format \ 83 | -nonInteractive hdfs-k8s || 84 | (rm -rf $_METADATA_DIR; exit 1) 85 | fi 86 | _ZKFC_FORMATTED=/hadoop/dfs/name/current/.hdfs-k8s-zkfc-formatted 87 | if [[ ! -f $_ZKFC_FORMATTED ]]; then 88 | _OUT=$($_HDFS_BIN --config $HADOOP_CONF_DIR zkfc -formatZK -nonInteractive 2>&1) 89 | # zkfc masks fatal exceptions and returns exit code 0 90 | (echo $_OUT | grep -q "FATAL") && exit 1 91 | touch $_ZKFC_FORMATTED 92 | fi 93 | elif [[ "$MY_POD" = "$NAMENODE_POD_1" ]]; then 94 | if [[ ! -d $_METADATA_DIR ]]; then 95 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -bootstrapStandby \ 96 | -nonInteractive || \ 97 | (rm -rf $_METADATA_DIR; exit 1) 98 | fi 99 | fi 100 | $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start zkfc 101 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode 102 | 103 | # A start script that will just hang indefinitely. A user can then get 104 | # inside the pod and debug. Or a user can conduct a custom manual operations. 105 | do-nothing.sh: | 106 | #!/usr/bin/env bash 107 | tail -f /var/log/dmesg 108 | 109 | # A start script that has user specified content. Can be used to conduct 110 | # ad-hoc operation as specified by a user. 111 | custom-run.sh: {{ .Values.customRunScript | quote }} 112 | --- 113 | apiVersion: apps/v1beta1 114 | kind: StatefulSet 115 | metadata: 116 | name: {{ template "hdfs-k8s.namenode.fullname" . }} 117 | labels: 118 | app: {{ template "hdfs-k8s.namenode.name" . }} 119 | chart: {{ template "hdfs-k8s.subchart" . }} 120 | release: {{ .Release.Name }} 121 | spec: 122 | serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} 123 | replicas: 2 124 | template: 125 | metadata: 126 | labels: 127 | app: {{ template "hdfs-k8s.namenode.name" . }} 128 | release: {{ .Release.Name }} 129 | {{- if .Values.podAnnotations }} 130 | annotations: 131 | {{ toYaml .Values.podAnnotations | indent 8 }} 132 | {{- end }} 133 | spec: 134 | {{- if .Values.hostNetworkEnabled }} 135 | # Use hostNetwork so datanodes connect to namenode without going through an overlay network 136 | # like weave. Otherwise, namenode fails to see physical IP address of datanodes. 137 | # Disabling this will break data locality as namenode will see pod virtual IPs and fails to 138 | # equate them with cluster node physical IPs associated with data nodes. 139 | # We currently disable this only for CI on minikube. 140 | hostNetwork: true 141 | hostPID: true 142 | dnsPolicy: ClusterFirstWithHostNet 143 | {{- else }} 144 | dnsPolicy: ClusterFirst 145 | {{- end }} 146 | {{- if .Values.affinity }} 147 | affinity: 148 | {{ toYaml .Values.affinity | indent 8 }} 149 | {{- else if .Values.global.defaultAffinityEnabled }} 150 | affinity: 151 | podAntiAffinity: 152 | requiredDuringSchedulingIgnoredDuringExecution: 153 | - labelSelector: 154 | matchExpressions: 155 | - key: "app" 156 | operator: In 157 | values: 158 | - {{ template "hdfs-k8s.namenode.name" . }} 159 | - key: "release" 160 | operator: In 161 | values: 162 | - {{ .Release.Name }} 163 | topologyKey: "kubernetes.io/hostname" 164 | {{- end }} 165 | {{- if .Values.nodeSelector }} 166 | nodeSelector: 167 | {{ toYaml .Values.nodeSelector | indent 8 }} 168 | {{- end }} 169 | {{- if .Values.tolerations }} 170 | tolerations: 171 | {{ toYaml .Values.tolerations | indent 8 }} 172 | {{- end }} 173 | containers: 174 | # TODO: Support hadoop version as option. 175 | - name: hdfs-namenode 176 | image: uhopper/hadoop-namenode:2.7.2 177 | env: 178 | - name: HADOOP_CUSTOM_CONF_DIR 179 | value: /etc/hadoop-custom-conf 180 | - name: MULTIHOMED_NETWORK 181 | value: "0" 182 | # Used by the start script below. 183 | - name: MY_POD 184 | valueFrom: 185 | fieldRef: 186 | fieldPath: metadata.name 187 | - name: NAMENODE_POD_0 188 | value: {{ template "namenode-pod-0" . }} 189 | - name: NAMENODE_POD_1 190 | value: {{ template "namenode-pod-1" . }} 191 | command: ['/bin/sh', '-c'] 192 | # The start script is provided by a config map. 193 | args: 194 | - /entrypoint.sh "/nn-scripts/{{ .Values.namenodeStartScript }}" 195 | ports: 196 | - containerPort: 8020 197 | name: fs 198 | - containerPort: 50070 199 | name: http 200 | volumeMounts: 201 | - name: nn-scripts 202 | mountPath: /nn-scripts 203 | readOnly: true 204 | # Mount a subpath of the volume so that the name subdir would be a 205 | # brand new empty dir. This way, we won't get affected by existing 206 | # files in the volume top dir. 207 | - name: metadatadir 208 | mountPath: /hadoop/dfs/name 209 | subPath: name 210 | - name: hdfs-config 211 | mountPath: /etc/hadoop-custom-conf 212 | readOnly: true 213 | {{- if .Values.global.kerberosEnabled }} 214 | - name: kerberos-config 215 | mountPath: /etc/krb5.conf 216 | subPath: {{ .Values.global.kerberosConfigFileName }} 217 | readOnly: true 218 | - name: kerberos-keytab-copy 219 | mountPath: /etc/security/ 220 | readOnly: true 221 | {{- end }} 222 | {{- if .Values.global.kerberosEnabled }} 223 | initContainers: 224 | - name: copy-kerberos-keytab 225 | image: busybox:1.27.1 226 | command: ['sh', '-c'] 227 | args: 228 | - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab 229 | env: 230 | - name: MY_KERBEROS_NAME 231 | valueFrom: 232 | fieldRef: 233 | {{- if .Values.hostNetworkEnabled }} 234 | fieldPath: spec.nodeName 235 | {{- else }} 236 | fieldPath: metadata.name 237 | {{- end }} 238 | volumeMounts: 239 | - name: kerberos-keytabs 240 | mountPath: /kerberos-keytabs 241 | - name: kerberos-keytab-copy 242 | mountPath: /kerberos-keytab-copy 243 | {{- end }} 244 | restartPolicy: Always 245 | volumes: 246 | - name: nn-scripts 247 | configMap: 248 | name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts 249 | defaultMode: 0744 250 | - name: hdfs-config 251 | configMap: 252 | name: {{ template "hdfs-k8s.config.fullname" . }} 253 | {{- if .Values.global.kerberosEnabled }} 254 | - name: kerberos-config 255 | configMap: 256 | name: {{ template "krb5-configmap" . }} 257 | - name: kerberos-keytabs 258 | secret: 259 | secretName: {{ template "krb5-keytabs-secret" . }} 260 | - name: kerberos-keytab-copy 261 | emptyDir: {} 262 | {{- end }} 263 | {{- if .Values.global.podSecurityContext.enabled }} 264 | securityContext: 265 | runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} 266 | fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} 267 | {{- end }} 268 | volumeClaimTemplates: 269 | - metadata: 270 | name: metadatadir 271 | spec: 272 | accessModes: 273 | - {{ .Values.persistence.accessMode | quote }} 274 | resources: 275 | requests: 276 | storage: {{ .Values.persistence.size | quote }} 277 | {{- if .Values.persistence.storageClass }} 278 | {{- if (eq "-" .Values.persistence.storageClass) }} 279 | storageClassName: "" 280 | {{- else }} 281 | storageClassName: "{{ .Values.persistence.storageClass }}" 282 | {{- end }} 283 | {{- end }} 284 | {{- if .Values.persistence.selector }} 285 | selector: 286 | {{ toYaml .Values.persistence.selector | indent 10 }} 287 | {{- end }} 288 | -------------------------------------------------------------------------------- /charts/hdfs-simple-namenode-k8s/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: hdfs-simple-namenode-k8s 3 | version: 0.1.0 4 | description: Non-HA namenode for HDFS on Kubernetes. 5 | -------------------------------------------------------------------------------- /charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml: -------------------------------------------------------------------------------- 1 | # A headless service to create DNS records. 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ template "hdfs-k8s.namenode.fullname" . }} 6 | labels: 7 | app: {{ template "hdfs-k8s.namenode.name" . }} 8 | chart: {{ template "hdfs-k8s.subchart" . }} 9 | release: {{ .Release.Name }} 10 | spec: 11 | ports: 12 | - port: 8020 13 | name: fs 14 | clusterIP: None 15 | selector: 16 | app: {{ template "hdfs-k8s.namenode.name" . }} 17 | release: {{ .Release.Name }} 18 | --- 19 | apiVersion: apps/v1beta1 20 | kind: StatefulSet 21 | metadata: 22 | name: {{ template "hdfs-k8s.namenode.fullname" . }} 23 | labels: 24 | app: {{ template "hdfs-k8s.namenode.name" . }} 25 | chart: {{ template "hdfs-k8s.subchart" . }} 26 | release: {{ .Release.Name }} 27 | spec: 28 | serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} 29 | # Create a size-1 set. 30 | replicas: 1 31 | template: 32 | metadata: 33 | labels: 34 | app: {{ template "hdfs-k8s.namenode.name" . }} 35 | release: {{ .Release.Name }} 36 | {{- if .Values.podAnnotations }} 37 | annotations: 38 | {{ toYaml .Values.podAnnotations | indent 8 }} 39 | {{- end }} 40 | spec: 41 | {{- if .Values.affinity }} 42 | affinity: 43 | {{ toYaml .Values.affinity | indent 8 }} 44 | {{- end }} 45 | {{- if .Values.nodeSelector }} 46 | nodeSelector: 47 | {{ toYaml .Values.nodeSelector | indent 8 }} 48 | {{- end }} 49 | {{- if .Values.tolerations }} 50 | tolerations: 51 | {{ toYaml .Values.tolerations | indent 8 }} 52 | {{- end }} 53 | # Use hostNetwork so datanodes connect to namenode without going through an overlay network 54 | # like weave. Otherwise, namenode fails to see physical IP address of datanodes. 55 | hostNetwork: true 56 | hostPID: true 57 | dnsPolicy: ClusterFirstWithHostNet 58 | containers: 59 | - name: hdfs-namenode 60 | image: uhopper/hadoop-namenode:2.7.2 61 | env: 62 | - name: HADOOP_CUSTOM_CONF_DIR 63 | value: /etc/hadoop-custom-conf 64 | - name: CLUSTER_NAME 65 | value: hdfs-k8s 66 | ports: 67 | - containerPort: 8020 68 | name: fs 69 | volumeMounts: 70 | - name: hdfs-name 71 | mountPath: /hadoop/dfs/name 72 | - name: hdfs-config 73 | mountPath: /etc/hadoop-custom-conf 74 | readOnly: true 75 | restartPolicy: Always 76 | volumes: 77 | - name: hdfs-name 78 | hostPath: 79 | path: {{ .Values.nameNodeHostPath }} 80 | - name: hdfs-config 81 | configMap: 82 | name: {{ template "hdfs-k8s.config.fullname" . }} 83 | -------------------------------------------------------------------------------- /designs/journal-approach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache-spark-on-k8s/kubernetes-HDFS/a28441b96e76e750193ddf020e2c86756af9e2c9/designs/journal-approach.png -------------------------------------------------------------------------------- /designs/namenode-HA.md: -------------------------------------------------------------------------------- 1 | # Namenode HA for HDFS on K8s 2 | 3 | ## Goals 4 | 5 | 1. Adopt one of existing namenode HA solutions and make it fit for HDFS on K8s: 6 | There are two HA solutions: an old NFS-based solution, and a new one based on 7 | the Quorum Journal Service. We are leaning toward the journal-based solution. 8 | We’ll discuss the details below. 9 | 2. Keep HDFS on K8s easy to use: The current HDFS on K8s is known to be easy to 10 | set up, thanks to automations allowed by Kubernetes and Helm. We’d like to 11 | keep it that way even for the HA setup. 12 | 13 | ## Existing Namenode HA solutions 14 | ### Terminology 15 | 16 | - Primary namenode: A central daemon used in a non-HA setup that maintains the 17 | file system metadata. 18 | - Secondary namenode: The other namenode daemon instance used in a non-HA setup 19 | that runs along with the primary namenode. The secondary namenode creates new 20 | snapshots of namenode metadata by merging incremental updates. 21 | - Active namenode: A namenode instance used in a HA setup that is in charge of 22 | maintaining the file system metadata. 23 | - Standby namenode: The other namenode instance used in a HA setup that runs 24 | along with the active namenode. The standby namenode listens to metadata 25 | updates made by the active namenode and gets ready to take over in case the 26 | active namenode crashes. 27 | 28 | ### Namenode metadata 29 | 30 | The namenode daemon maintains the file system metadata such as which directories 31 | have which files, file ownership, which datanode daemons have blocks of those 32 | files, etc. 33 | 34 | NN manipulates the metadata mostly in memory. But it has to persist them to 35 | disks for **crash safety**. I.e. Avoid losing metadata when the NN crashes or 36 | restarts. 37 | 38 | There are two disk files that NN writes: 39 | 1. Snapshot of the metadata dumped at a time point in the past. This is called 40 | **fsimage**. 41 | 2. Incremental updates since the snapshot time point. In non-HA setup, the 42 | updates are appended to a local file called **editlog**. (In journal-based 43 | HA, editlog is stored on shared network service) 44 | 45 | The editlog is later merged into a new fsimage snapshot, starting a new cycle. 46 | 47 | ![namenode metadata](namenode-metadata.png) 48 | 49 | Another important piece of metadata, the mapping of which datanodes have which 50 | file blocks, is *not* written to disk. After restart, NN rebuilds this mapping 51 | from datanode heartbeat messages. This takes a while and it is one of the 52 | reasons why restarting NN is slow. 53 | 54 | ### HA solution choices 55 | 56 | In the HA setup, there are two NN instances: an active NN and a standby NN. The 57 | active NN handles clients’ requests and modifies the filesystem metadata. The 58 | modification goes to the editlog file. This editlog should be shared with the 59 | standby NN so that it can also have up-to-date metadata and quickly become the 60 | active NN when the prior active NN crashes. 61 | 62 | Hadoop has two HA solutions, mainly based on how exactly the editlog is shared 63 | with the standby NN: 64 | 65 | 1. An old NFS-based solution described at 66 | https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html. 67 | The editlog is placed in a NFS volume that both the active and standby NN 68 | have access to. 69 | 1. a new one based on the Quorum Journal Service at 70 | https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html. 71 | The Journal Service is zookeeper-like service that has an odd number of 72 | backing servers. The active NN writes to the journal service, while the 73 | standby listens to the service. For each metadata update, the majority of the 74 | journal servers should agree on the change. 75 | 76 | The NFS approach has a flaw around the split-brain scenario. When both NNs think 77 | they are active, they will write to the editlog simultaneously, corrupting the 78 | file. So the NFS approach relies on forcibly shutting down one of the NNs. 79 | (Called fencing) And this requires special HWs at the BIOS or power switch 80 | level. Most people don’t like this extra requirement. 81 | 82 | The Quorum Journal Service solves the split-brain issue at the service level. 83 | The service only honors one writer at a given time point. So no need to have 84 | special hardware for fencing. (Some soft fencing is still recommended to prevent 85 | the rogue NN from continuing to serve lingering read clients) To use the journal 86 | service, each NN host needs to run a client for it called Quorum Journal 87 | Manager. The journal manager with an active NN registers with the journal 88 | servers using a unique epoch number. Write requests come with the epoch number 89 | and they will be rejected if their epoch number is smaller than the servers 90 | expect. This way, they can reject requests from a rogue, previously active, NN 91 | with old epoch number. More details can be found at 92 | http://johnjianfang.blogspot.com/2015/02/quorum-journal-manager-part-i-protocol.html. 93 | 94 | For HDFS on K8s, we are leaning toward the journal manager approach. 95 | 96 | ![journal-approach](journal-approach.png) 97 | 98 | 99 | ### Other HA aspects 100 | 101 | The standby NN does one more thing. It also merges the editlog into a new 102 | fsimage snapshot. And sends the new snapshot to the active NN via HTTP, so that 103 | they can drop earlier updates in the editlog. (For non-HA setup, this can be 104 | done by another special NN instance, called the **secondary** NN. But in HA, the 105 | standby NN will do that for us) 106 | 107 | We said earlier that the block-to-datanode mapping is not persisted. So 108 | datanodes actually send heartbeats with the block mapping to both NNs, so that 109 | the standby NN can become active right away. 110 | 111 | Clients also are aware of both NNs. There is a client-side library that will 112 | figure out who to talk to. 113 | 114 | Automatic failover to a new active NN requires a zookeeper service, which needs 115 | an odd number of instances. (This is in addition to the journal manager, which 116 | is similar to zookeeper but not same). For this, the NN hosts should run an 117 | extra zookeeper client called Zookeeper Failover Controller. The controller 118 | monitors the health of the local NN and communicate with the zookeeper service 119 | in the right way so that the failing active NN can release the zookeeper lock to 120 | the standby NN. 121 | 122 | ## Namenode HA design for HDFS on K8s 123 | 124 | So we need three K8s services for the HA setup. 125 | 126 | 1. Namenode service with two NNs 127 | 2. Journal service with an odd number of journal servers 128 | 3. Zookeeper with an odd number of servers. 129 | 130 | For each of these, we’ll use a stateful set of a corresponding size. For 131 | Zookeeper, we already have a helm chart in 132 | https://github.com/kubernetes/contrib/tree/master/statefulsets/zookeeper. So we 133 | can reuse it. Each Zookeeper server writes its data to a persistent volume. 134 | 135 | For journal servers, we need to write a new helm chart. This can be modeled 136 | after the zookeeper helm chart. This should be straightforward. 137 | 138 | For NN, we have a helm chart for non-HA setup at 139 | https://github.com/apache-spark-on-k8s/kubernetes-HDFS/tree/master/charts/hdfs-namenode-k8s, 140 | which uses a statefulset of size 1. We can extend this to support HA setup as an 141 | option. We’ll have to do the following work: 142 | 143 | 1. The statefulset size is currently one. Extend it to two. 144 | 2. Add all config options described at 145 | https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html. 146 | This includes the config key for using the Quorum Journal servers as editlog 147 | destination. 148 | 3. Add a container to each NN pod for running the Zookeeper Failover Controller. 149 | 4. Optionally, use persistent volumes for storing fsimage files. 150 | 151 | What is notably missing is support for fencing that we discussed above. We will 152 | leave this as an open problem that we may address in a later version. 153 | 154 | Item (4) is significant because the NN pod in the non-HA setup stores the 155 | fsimage file on a HostPath volume. We also pins the NN to a particular K8s node 156 | using a K8s node label to make sure a restarted NN can find the right fsimage 157 | file. Hopefully, we can remove the HostPath and node pinning dependencies with 158 | (4). But we want to keep the old behavior as an option, in case people want to 159 | try HDFS on K8s on a very simple setup without persistent volumes and HA. 160 | 161 | People have to upgrade HDFS software version occasionally, like HDFS 2.7 to 2.8. 162 | Sometimes the metadata format changes and NNs need to convert the metadata to a 163 | new format. Unfortunately, the format upgrade is done in a non-symmetric way. 164 | The active NN should do the format conversion and write the new metadata to the 165 | journal service. Then the standby NN should sync with it upon start. The NN helm 166 | chart for HA setup should support this in an automated fashion. We think we can 167 | do that using an init container. We’ll address this in a later PR. 168 | -------------------------------------------------------------------------------- /designs/namenode-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache-spark-on-k8s/kubernetes-HDFS/a28441b96e76e750193ddf020e2c86756af9e2c9/designs/namenode-metadata.png -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: global 3 | title: HDFS on Kubernetes Integration Tests 4 | --- 5 | 6 | # Running the integration tests 7 | 8 | Note that the integration test framework is currently being heavily revised and 9 | is subject to change. 10 | 11 | The integration tests consists of 4 scripts under the `tests` dir: 12 | 13 | - `setup.sh`: Downloads and starts minikube. Also downloads tools such as 14 | kubectl, helm, etc. 15 | - `run.sh`: Launches the HDFS helm charts on the started minikube instance 16 | and tests the resulting HDFS cluster using a HDFS client. 17 | - `cleanup.sh`: Shuts down the HDFS cluster so that run.sh can be executed 18 | again if necessary. 19 | - `teardown.sh`: Stops the minikube instance so that setup.sh can be executed 20 | again if necessary. 21 | 22 | You can execute these scripts in the listed order to run the integration tests. 23 | These scripts do not require any command line options for the basic 24 | functionality. So an example execution would look like: 25 | 26 | ``` 27 | $ tests/setup.sh 28 | $ tests/run.sh 29 | $ tests/cleanup.sh 30 | $ tests/teardown.sh 31 | ``` 32 | 33 | # Travis CI support 34 | 35 | The repo uses [Travis CI](https://travis-ci.org/) to run the integration tests. 36 | See `.travis.yml` under the top directory. Each new pull request will trigger 37 | a Travis build to test the PR. 38 | 39 | You may want to enable Travis in your own fork before sending pull requests. 40 | You can trigger Travis builds on branches in your fork. 41 | For details, see https://docs.travis-ci.com/. 42 | 43 | # Advanced usage 44 | 45 | ## Re-running tests 46 | 47 | As a contributor of this project, you may have to re-run the tests after 48 | modifying some helm chart code. Among the four steps, `setup.sh` takes the most 49 | time. You may want to avoid that unless it's necessary. 50 | 51 | So run `setup.sh` first, followed by `run.sh`: 52 | 53 | ``` 54 | $ tests/setup.sh 55 | $ tests/run.sh 56 | ``` 57 | 58 | Then, execute only `cleanup.sh`. i.e. Skip `teardown.sh`. The minikube instance 59 | will be still up and running. 60 | 61 | ``` 62 | $ tests/cleanup.sh 63 | ``` 64 | 65 | Then modify helm charts as you want and execute `run.sh` again. 66 | 67 | ``` 68 | $ tests/run.sh 69 | ``` 70 | 71 | Now repeat the `cleanup` and `run` cycle, while modifying helm charts as you 72 | want in between. 73 | 74 | ``` 75 | $ tests/cleanup.sh 76 | ... modify your code ... 77 | $ tests/run.sh 78 | ``` 79 | 80 | Some data are stored in the minikube instance. For example, the downloaded 81 | docker images and the persistent volume data, In some cases, you may want to 82 | clean them up. You can run `teardown.sh` and `setup.sh` again to 83 | purge them. 84 | 85 | ## Running only particular test cases. 86 | 87 | `run.sh` will enumerate all the test cases under `tests/cases` dir. You may 88 | want to run only a particular test case say `tests/cases/_basic.sh`. You 89 | can specify to `CASES` env var to cover the test case only: 90 | 91 | ``` 92 | $ CASES=_basic.sh tests/run.sh 93 | ``` 94 | 95 | `CASES` can be also set for `cleanup.sh`. 96 | 97 | ``` 98 | $ CASES=_basic.sh tests/cleanup.sh 99 | ``` 100 | 101 | ## Checking the helm chart diff from the dry-run 102 | 103 | Before running `helm install` commands, `run.sh` will also conduct dry-run 104 | and check the expanded K8s resource yaml content from the debug information. 105 | The repo has gold files checked in, and the expanded yaml content will be 106 | compared against the gold files. 107 | 108 | To ensure your change produces no diff, you can set the `CRASH_ON_DIFF` env 109 | var. 110 | 111 | ``` 112 | $ DRY_RUN_ONLY=true CRASH_ON_DIFF=true tests/run.sh 113 | ``` 114 | 115 | To promote your yaml output to new golds, you can set the `BLESS_DIFF` env 116 | var. 117 | 118 | ``` 119 | $ DRY_RUN_ONLY=true BLESS_DIFF=true tests/run.sh 120 | ``` 121 | -------------------------------------------------------------------------------- /tests/cases/_basic-subcharts.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function run_test_case () { 4 | _helm_diff_and_install \ 5 | ${_TEST_DIR}/gold/subchart-zookeeper.gold \ 6 | hdfs-k8s \ 7 | -n my-hdfs-zookeeper \ 8 | --values ${_TEST_DIR}/values/common.yaml \ 9 | --set tags.ha=false \ 10 | --set condition.subchart.zookeeper=true \ 11 | --set zookeeper.fullnameOverride=my-hdfs-zookeeper \ 12 | --set global.fullnameOverride=my-hdfs 13 | 14 | _helm_diff_and_install \ 15 | ${_TEST_DIR}/gold/subchart-config.gold \ 16 | hdfs-k8s \ 17 | -n my-hdfs-config \ 18 | --values ${_TEST_DIR}/values/common.yaml \ 19 | --set tags.ha=false \ 20 | --set condition.subchart.config=true \ 21 | --set global.fullnameOverride=my-hdfs 22 | 23 | _helm_diff_and_install \ 24 | ${_TEST_DIR}/gold/subchart-journalnode.gold \ 25 | hdfs-k8s \ 26 | -n my-hdfs-journalnode \ 27 | --values ${_TEST_DIR}/values/common.yaml \ 28 | --set tags.ha=false \ 29 | --set condition.subchart.journalnode=true \ 30 | --set global.fullnameOverride=my-hdfs 31 | 32 | _helm_diff_and_install \ 33 | ${_TEST_DIR}/gold/subchart-namenode.gold \ 34 | hdfs-k8s \ 35 | -n my-hdfs-namenode \ 36 | --values ${_TEST_DIR}/values/common.yaml \ 37 | --set tags.ha=false \ 38 | --set condition.subchart.namenode=true \ 39 | --set global.fullnameOverride=my-hdfs 40 | 41 | _helm_diff_and_install \ 42 | ${_TEST_DIR}/gold/subchart-datanode.gold \ 43 | hdfs-k8s \ 44 | -n my-hdfs-datanode \ 45 | --values ${_TEST_DIR}/values/common.yaml \ 46 | --set tags.ha=false \ 47 | --set condition.subchart.datanode=true \ 48 | --set global.fullnameOverride=my-hdfs 49 | 50 | _helm_diff_and_install \ 51 | ${_TEST_DIR}/gold/subchart-client.gold \ 52 | hdfs-k8s \ 53 | -n my-hdfs-client \ 54 | --values ${_TEST_DIR}/values/common.yaml \ 55 | --set tags.ha=false \ 56 | --set condition.subchart.client=true \ 57 | --set global.fullnameOverride=my-hdfs 58 | 59 | if [[ "${DRY_RUN_ONLY:-false}" = "true" ]]; then 60 | return 61 | fi 62 | 63 | k8s_single_pod_ready -l app=zookeeper,release=my-hdfs-zookeeper 64 | k8s_all_pods_ready 3 -l app=hdfs-journalnode,release=my-hdfs-journalnode 65 | k8s_all_pods_ready 2 -l app=hdfs-namenode,release=my-hdfs-namenode 66 | k8s_single_pod_ready -l app=hdfs-datanode,release=my-hdfs-datanode 67 | k8s_single_pod_ready -l app=hdfs-client,release=my-hdfs-client 68 | _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs-client -o name | \ 69 | cut -d/ -f 2) 70 | echo Found client pod: $_CLIENT 71 | 72 | echo All pods: 73 | kubectl get pods 74 | 75 | echo All persistent volumes: 76 | kubectl get pv 77 | 78 | _run kubectl exec $_CLIENT -- hdfs dfsadmin -report 79 | _run kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0 80 | _run kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1 81 | 82 | _run kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp 83 | _run kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp 84 | _run kubectl exec $_CLIENT -- sh -c \ 85 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 86 | _run kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 87 | } 88 | 89 | function cleanup_test_case() { 90 | local charts="my-hdfs-client \ 91 | my-hdfs-datanode \ 92 | my-hdfs-namenode \ 93 | my-hdfs-journalnode \ 94 | my-hdfs-config \ 95 | my-hdfs-zookeeper" 96 | for chart in $charts; do 97 | helm delete --purge $chart || true 98 | done 99 | } 100 | -------------------------------------------------------------------------------- /tests/cases/_basic.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function run_test_case () { 4 | _helm_diff_and_install ${_TEST_DIR}/gold/basic.gold \ 5 | hdfs-k8s \ 6 | -n my-hdfs \ 7 | --values ${_TEST_DIR}/values/common.yaml \ 8 | --values ${_TEST_DIR}/values/custom-hadoop-config.yaml \ 9 | --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data0,/mnt/sda1/hdfs-data1}" 10 | 11 | if [[ "${DRY_RUN_ONLY:-false}" = "true" ]]; then 12 | return 13 | fi 14 | 15 | k8s_single_pod_ready -l app=zookeeper,release=my-hdfs 16 | k8s_all_pods_ready 3 -l app=hdfs-journalnode,release=my-hdfs 17 | k8s_all_pods_ready 2 -l app=hdfs-namenode,release=my-hdfs 18 | k8s_single_pod_ready -l app=hdfs-datanode,release=my-hdfs 19 | k8s_single_pod_ready -l app=hdfs-client,release=my-hdfs 20 | _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ 21 | cut -d/ -f 2) 22 | echo Found client pod: $_CLIENT 23 | 24 | echo All pods: 25 | kubectl get pods 26 | 27 | echo All persistent volumes: 28 | kubectl get pv 29 | 30 | _run kubectl exec $_CLIENT -- hdfs dfsadmin -report 31 | _run kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0 32 | _run kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1 33 | 34 | _run kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp 35 | _run kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp 36 | _run kubectl exec $_CLIENT -- sh -c \ 37 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 38 | _run kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 39 | } 40 | 41 | function cleanup_test_case() { 42 | helm delete --purge my-hdfs || true 43 | } 44 | -------------------------------------------------------------------------------- /tests/cases/_kerberos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function run_test_case () { 4 | _helm_diff_and_install ${_TEST_DIR}/gold/kerberos.gold \ 5 | hdfs-k8s \ 6 | -n my-hdfs \ 7 | --values ${_TEST_DIR}/values/common.yaml \ 8 | --values ${_TEST_DIR}/values/kerberos.yaml \ 9 | --set tags.kerberos=true 10 | 11 | if [[ "${DRY_RUN_ONLY:-false}" = "true" ]]; then 12 | return 13 | fi 14 | 15 | # The above helm command launches all components. However, core HDFS 16 | # componensts such as namenodes and datanodes are blocked by a expected 17 | # Kerberos configmap and secret. So we create them here. 18 | k8s_single_pod_ready -l app=hdfs-krb5,release=my-hdfs 19 | _KDC=$(kubectl get pod -l app=hdfs-krb5,release=my-hdfs --no-headers \ 20 | -o name | cut -d/ -f2) 21 | _run kubectl cp $_KDC:/etc/krb5.conf $_TEST_DIR/tmp/krb5.conf 22 | _run kubectl create configmap my-hdfs-krb5-config \ 23 | --from-file=$_TEST_DIR/tmp/krb5.conf 24 | 25 | _HOSTS=$(kubectl get nodes \ 26 | -o=jsonpath='{.items[*].status.addresses[?(@.type == "Hostname")].address}') 27 | _HOSTS+=$(kubectl describe configmap my-hdfs-config | \ 28 | grep -A 1 -e dfs.namenode.rpc-address.hdfs-k8s \ 29 | -e dfs.namenode.shared.edits.dir | 30 | grep "" | 31 | sed -e "s///" \ 32 | -e "s/<\/value>//" \ 33 | -e "s/:8020//" \ 34 | -e "s/qjournal:\/\///" \ 35 | -e "s/:8485;/ /g" \ 36 | -e "s/:8485\/hdfs-k8s//") 37 | 38 | echo Adding service principals for hosts $_HOSTS 39 | _SECRET_CMD="kubectl create secret generic my-hdfs-krb5-keytabs" 40 | for _HOST in $_HOSTS; do 41 | _run kubectl exec $_KDC -- kadmin.local -q \ 42 | "addprinc -randkey hdfs/$_HOST@MYCOMPANY.COM" 43 | _run kubectl exec $_KDC -- kadmin.local -q \ 44 | "addprinc -randkey HTTP/$_HOST@MYCOMPANY.COM" 45 | _run kubectl exec $_KDC -- kadmin.local -q \ 46 | "ktadd -norandkey -k /tmp/$_HOST.keytab hdfs/$_HOST@MYCOMPANY.COM HTTP/$_HOST@MYCOMPANY.COM" 47 | _run kubectl cp $_KDC:/tmp/$_HOST.keytab $_TEST_DIR/tmp/$_HOST.keytab 48 | _SECRET_CMD+=" --from-file=$_TEST_DIR/tmp/$_HOST.keytab" 49 | done 50 | echo Adding a K8s secret containing Kerberos keytab files 51 | _run $_SECRET_CMD 52 | 53 | k8s_single_pod_ready -l app=zookeeper,release=my-hdfs 54 | k8s_all_pods_ready 3 -l app=hdfs-journalnode,release=my-hdfs 55 | k8s_all_pods_ready 2 -l app=hdfs-namenode,release=my-hdfs 56 | k8s_single_pod_ready -l app=hdfs-datanode,release=my-hdfs 57 | k8s_single_pod_ready -l app=hdfs-client,release=my-hdfs 58 | _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ 59 | cut -d/ -f 2) 60 | echo Found client pod: $_CLIENT 61 | 62 | echo All pods: 63 | kubectl get pods 64 | 65 | echo All persistent volumes: 66 | kubectl get pv 67 | 68 | _NN0=$(kubectl get pods -l app=hdfs-namenode,release=my-hdfs -o name | \ 69 | head -1 | \ 70 | cut -d/ -f2) 71 | kubectl exec $_NN0 -- sh -c "(apt update > /dev/null)" \ 72 | || true 73 | kubectl exec $_NN0 -- sh -c "(DEBIAN_FRONTEND=noninteractive apt install -y krb5-user > /dev/null)" \ 74 | || true 75 | _run kubectl exec $_NN0 -- \ 76 | kinit -kt /etc/security/hdfs.keytab \ 77 | hdfs/my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local@MYCOMPANY.COM 78 | _run kubectl exec $_NN0 -- hdfs dfsadmin -report 79 | _run kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn0 80 | _run kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn1 81 | _run kubectl exec $_NN0 -- hadoop fs -rm -r -f /tmp 82 | _run kubectl exec $_NN0 -- hadoop fs -mkdir /tmp 83 | _run kubectl exec $_NN0 -- hadoop fs -chmod 0777 /tmp 84 | 85 | _run kubectl exec $_KDC -- kadmin.local -q \ 86 | "addprinc -randkey user1@MYCOMPANY.COM" 87 | _run kubectl exec $_KDC -- kadmin.local -q \ 88 | "ktadd -norandkey -k /tmp/user1.keytab user1@MYCOMPANY.COM" 89 | _run kubectl cp $_KDC:/tmp/user1.keytab $_TEST_DIR/tmp/user1.keytab 90 | _run kubectl cp $_TEST_DIR/tmp/user1.keytab $_CLIENT:/tmp/user1.keytab 91 | 92 | kubectl exec $_CLIENT -- sh -c "(apt update > /dev/null)" \ 93 | || true 94 | kubectl exec $_CLIENT -- sh -c "(DEBIAN_FRONTEND=noninteractive apt install -y krb5-user > /dev/null)" \ 95 | || true 96 | 97 | _run kubectl exec $_CLIENT -- kinit -kt /tmp/user1.keytab user1@MYCOMPANY.COM 98 | _run kubectl exec $_CLIENT -- sh -c \ 99 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 100 | _run kubectl exec $_CLIENT -- hadoop fs -ls / 101 | _run kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 102 | } 103 | 104 | function cleanup_test_case() { 105 | kubectl delete configmap my-hdfs-krb5-config || true 106 | kubectl delete secret my-hdfs-krb5-keytabs || true 107 | helm delete --purge my-hdfs || true 108 | } 109 | -------------------------------------------------------------------------------- /tests/cases/_single-namenode.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function run_test_case () { 4 | 5 | _NODE=$(kubectl get node --no-headers -o name | cut -d/ -f2) 6 | kubectl label nodes $_NODE hdfs-namenode-selector=hdfs-namenode-0 7 | 8 | _helm_diff_and_install ${_TEST_DIR}/gold/single-namenode.gold \ 9 | hdfs-k8s \ 10 | -n my-hdfs \ 11 | --set tags.ha=false \ 12 | --set tags.simple=true \ 13 | --set global.namenodeHAEnabled=false \ 14 | --set "hdfs-simple-namenode-k8s.nameNodeHostPath=/mnt/sda1/hdfs-name" \ 15 | --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data}" 16 | 17 | if [[ "${DRY_RUN_ONLY:-false}" = "true" ]]; then 18 | return 19 | fi 20 | 21 | k8s_single_pod_ready -l app=hdfs-namenode,release=my-hdfs 22 | k8s_single_pod_ready -l app=hdfs-datanode,release=my-hdfs 23 | k8s_single_pod_ready -l app=hdfs-client,release=my-hdfs 24 | _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ 25 | cut -d/ -f 2) 26 | echo Found client pod: $_CLIENT 27 | 28 | echo All pods: 29 | kubectl get pods 30 | 31 | _run kubectl exec $_CLIENT -- hdfs dfsadmin -report 32 | 33 | _run kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp 34 | _run kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp 35 | _run kubectl exec $_CLIENT -- sh -c \ 36 | "(head -c 100M < /dev/urandom > /tmp/random-100M)" 37 | _run kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp 38 | } 39 | 40 | function cleanup_test_case() { 41 | helm delete --purge my-hdfs || true 42 | 43 | _NODE=$(kubectl get node --no-headers -o name | cut -d/ -f2) || true 44 | kubectl label nodes $_NODE hdfs-namenode-selector- || true 45 | } 46 | -------------------------------------------------------------------------------- /tests/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Exit on error. Append "|| true" if you expect an error. 3 | set -o errexit 4 | # Exit on error inside any functions or subshells. 5 | set -o errtrace 6 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 7 | set -o nounset 8 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 9 | # in `mysqldump |gzip` 10 | set -o pipefail 11 | if [[ "${DEBUG:-}" = "true" ]]; then 12 | # Turn on traces, useful while debugging but commented out by default 13 | set -o xtrace 14 | fi 15 | 16 | _MY_SCRIPT="${BASH_SOURCE[0]}" 17 | _TEST_DIR=$(cd "$(dirname "$_MY_SCRIPT")" && pwd) 18 | 19 | cd $_TEST_DIR 20 | export PATH=${_TEST_DIR}/bin:$PATH 21 | 22 | _DEFAULT_CASES="*" 23 | : "${CASES:=$_DEFAULT_CASES}" 24 | _CASES=$(ls ${_TEST_DIR}/cases/${CASES}) 25 | for _CASE in $_CASES; do 26 | source $_CASE 27 | echo Cleaning up test case: $_CASE 28 | cleanup_test_case 29 | done 30 | -------------------------------------------------------------------------------- /tests/gold/single-namenode.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | global: 9 | dataNodeHostPath: 10 | - /mnt/sda1/hdfs-data 11 | namenodeHAEnabled: false 12 | hdfs-simple-namenode-k8s: 13 | nameNodeHostPath: /mnt/sda1/hdfs-name 14 | tags: 15 | ha: false 16 | simple: true 17 | 18 | COMPUTED VALUES: 19 | global: 20 | dataNodeHostPath: 21 | - /mnt/sda1/hdfs-data 22 | defaultAffinityEnabled: true 23 | journalnodeQuorumSize: 3 24 | jsvcEnabled: true 25 | kerberosConfigFileName: krb5.conf 26 | kerberosEnabled: false 27 | kerberosRealm: MYCOMPANY.COM 28 | namenodeHAEnabled: false 29 | podSecurityContext: 30 | enabled: false 31 | fsGroup: 1000 32 | runAsUser: 0 33 | zookeeperQuorumSize: 3 34 | hdfs-client-k8s: 35 | global: 36 | dataNodeHostPath: 37 | - /mnt/sda1/hdfs-data 38 | defaultAffinityEnabled: true 39 | journalnodeQuorumSize: 3 40 | jsvcEnabled: true 41 | kerberosConfigFileName: krb5.conf 42 | kerberosEnabled: false 43 | kerberosRealm: MYCOMPANY.COM 44 | namenodeHAEnabled: false 45 | podSecurityContext: 46 | enabled: false 47 | fsGroup: 1000 48 | runAsUser: 0 49 | zookeeperQuorumSize: 3 50 | hdfs-config-k8s: 51 | customHadoopConfig: 52 | coreSite: {} 53 | hdfsSite: {} 54 | global: 55 | dataNodeHostPath: 56 | - /mnt/sda1/hdfs-data 57 | defaultAffinityEnabled: true 58 | journalnodeQuorumSize: 3 59 | jsvcEnabled: true 60 | kerberosConfigFileName: krb5.conf 61 | kerberosEnabled: false 62 | kerberosRealm: MYCOMPANY.COM 63 | namenodeHAEnabled: false 64 | podSecurityContext: 65 | enabled: false 66 | fsGroup: 1000 67 | runAsUser: 0 68 | zookeeperQuorumSize: 3 69 | hdfs-datanode-k8s: 70 | affinity: {} 71 | global: 72 | dataNodeHostPath: 73 | - /mnt/sda1/hdfs-data 74 | defaultAffinityEnabled: true 75 | journalnodeQuorumSize: 3 76 | jsvcEnabled: true 77 | kerberosConfigFileName: krb5.conf 78 | kerberosEnabled: false 79 | kerberosRealm: MYCOMPANY.COM 80 | namenodeHAEnabled: false 81 | podSecurityContext: 82 | enabled: false 83 | fsGroup: 1000 84 | runAsUser: 0 85 | zookeeperQuorumSize: 3 86 | nodeSelector: {} 87 | tolerations: [] 88 | hdfs-journalnode-k8s: 89 | affinity: {} 90 | nodeSelector: {} 91 | persistence: 92 | accessMode: ReadWriteOnce 93 | size: 20Gi 94 | tolerations: [] 95 | hdfs-krb5-k8s: 96 | image: 97 | pullPolicy: IfNotPresent 98 | repository: gcavalcante8808/krb5-server 99 | tag: latest 100 | persistence: 101 | accessMode: ReadWriteOnce 102 | size: 20Gi 103 | service: 104 | port: 88 105 | type: ClusterIP 106 | hdfs-namenode-k8s: 107 | affinity: {} 108 | customRunScript: | 109 | #!/bin/bash -x 110 | echo Write your own script content! 111 | echo This message will disappear in 10 seconds. 112 | sleep 10 113 | hostNetworkEnabled: true 114 | namenodeStartScript: format-and-run.sh 115 | nodeSelector: {} 116 | persistence: 117 | accessMode: ReadWriteOnce 118 | size: 100Gi 119 | tolerations: [] 120 | hdfs-simple-namenode-k8s: 121 | affinity: {} 122 | global: 123 | dataNodeHostPath: 124 | - /mnt/sda1/hdfs-data 125 | defaultAffinityEnabled: true 126 | journalnodeQuorumSize: 3 127 | jsvcEnabled: true 128 | kerberosConfigFileName: krb5.conf 129 | kerberosEnabled: false 130 | kerberosRealm: MYCOMPANY.COM 131 | namenodeHAEnabled: false 132 | podSecurityContext: 133 | enabled: false 134 | fsGroup: 1000 135 | runAsUser: 0 136 | zookeeperQuorumSize: 3 137 | nameNodeHostPath: /mnt/sda1/hdfs-name 138 | nodeSelector: {} 139 | tolerations: [] 140 | tags: 141 | ha: false 142 | kerberos: false 143 | simple: true 144 | zookeeper: 145 | env: 146 | ZK_HEAP_SIZE: 1G 147 | replicaCount: 3 148 | resources: null 149 | 150 | HOOKS: 151 | MANIFEST: 152 | 153 | --- 154 | # Source: hdfs/charts/hdfs-config-k8s/templates/configmap.yaml 155 | apiVersion: v1 156 | kind: ConfigMap 157 | metadata: 158 | name: my-hdfs-config 159 | labels: 160 | app: hdfs-client 161 | chart: hdfs-config-k8s-0.1.0 162 | release: my-hdfs 163 | data: 164 | core-site.xml: | 165 | 166 | 167 | 168 | 169 | fs.defaultFS 170 | hdfs://my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local:8020 171 | 172 | 173 | hdfs-site.xml: | 174 | 175 | 176 | 177 | 178 | dfs.namenode.name.dir 179 | file:///hadoop/dfs/name 180 | 181 | 182 | dfs.namenode.datanode.registration.ip-hostname-check 183 | false 184 | 185 | 186 | dfs.datanode.data.dir 187 | /mnt/sda1/hdfs-data 188 | 189 | 190 | --- 191 | # Source: hdfs/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml 192 | # Provides datanode helper scripts. 193 | apiVersion: v1 194 | kind: ConfigMap 195 | metadata: 196 | name: my-hdfs-datanode-scripts 197 | labels: 198 | app: hdfs-datanode 199 | chart: hdfs-datanode-k8s-0.1.0 200 | release: my-hdfs 201 | data: 202 | check-status.sh: | 203 | #!/usr/bin/env bash 204 | # Exit on error. Append "|| true" if you expect an error. 205 | set -o errexit 206 | # Exit on error inside any functions or subshells. 207 | set -o errtrace 208 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 209 | set -o nounset 210 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 211 | # in `mysqldump |gzip` 212 | set -o pipefail 213 | # Turn on traces, useful while debugging. 214 | set -o xtrace 215 | 216 | # Check if datanode registered with the namenode and got non-null cluster ID. 217 | _PORTS="50075 1006" 218 | _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo" 219 | _CLUSTER_ID="" 220 | for _PORT in $_PORTS; do 221 | _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \ 222 | grep ClusterId) || true 223 | done 224 | echo $_CLUSTER_ID | grep -q -v null 225 | --- 226 | # Source: hdfs/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml 227 | # A headless service to create DNS records. 228 | apiVersion: v1 229 | kind: Service 230 | metadata: 231 | name: my-hdfs-namenode 232 | labels: 233 | app: hdfs-namenode 234 | chart: hdfs-simple-namenode-k8s-0.1.0 235 | release: my-hdfs 236 | spec: 237 | ports: 238 | - port: 8020 239 | name: fs 240 | clusterIP: None 241 | selector: 242 | app: hdfs-namenode 243 | release: my-hdfs 244 | --- 245 | # Source: hdfs/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml 246 | # Deleting a daemonset may need some trick. See 247 | # https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 248 | apiVersion: extensions/v1beta1 249 | kind: DaemonSet 250 | metadata: 251 | name: my-hdfs-datanode 252 | labels: 253 | app: hdfs-datanode 254 | chart: hdfs-datanode-k8s-0.1.0 255 | release: my-hdfs 256 | spec: 257 | template: 258 | metadata: 259 | labels: 260 | app: hdfs-datanode 261 | release: my-hdfs 262 | spec: 263 | affinity: 264 | nodeAffinity: 265 | requiredDuringSchedulingIgnoredDuringExecution: 266 | nodeSelectorTerms: 267 | - matchExpressions: 268 | - key: my-hdfs-datanode-exclude 269 | operator: DoesNotExist 270 | hostNetwork: true 271 | hostPID: true 272 | dnsPolicy: ClusterFirstWithHostNet 273 | containers: 274 | - name: datanode 275 | image: uhopper/hadoop-datanode:2.7.2 276 | env: 277 | - name: HADOOP_CUSTOM_CONF_DIR 278 | value: /etc/hadoop-custom-conf 279 | - name: MULTIHOMED_NETWORK 280 | value: "0" 281 | livenessProbe: 282 | exec: 283 | command: 284 | - /dn-scripts/check-status.sh 285 | initialDelaySeconds: 60 286 | periodSeconds: 30 287 | readinessProbe: 288 | exec: 289 | command: 290 | - /dn-scripts/check-status.sh 291 | initialDelaySeconds: 60 292 | periodSeconds: 30 293 | securityContext: 294 | privileged: true 295 | volumeMounts: 296 | - name: dn-scripts 297 | mountPath: /dn-scripts 298 | readOnly: true 299 | - name: hdfs-config 300 | mountPath: /etc/hadoop-custom-conf 301 | readOnly: true 302 | - name: hdfs-data-0 303 | mountPath: /hadoop/dfs/data/0 304 | restartPolicy: Always 305 | volumes: 306 | - name: dn-scripts 307 | configMap: 308 | name: my-hdfs-datanode-scripts 309 | defaultMode: 0744 310 | - name: hdfs-data-0 311 | hostPath: 312 | path: /mnt/sda1/hdfs-data 313 | - name: hdfs-config 314 | configMap: 315 | name: my-hdfs-config 316 | --- 317 | # Source: hdfs/charts/hdfs-client-k8s/templates/client-deployment.yaml 318 | apiVersion: apps/v1 319 | apiVersion: extensions/v1beta1 320 | kind: Deployment 321 | metadata: 322 | name: my-hdfs-client 323 | labels: 324 | app: hdfs-client 325 | chart: hdfs-client-k8s-0.1.0 326 | release: my-hdfs 327 | spec: 328 | replicas: 1 329 | selector: 330 | matchLabels: 331 | app: hdfs-client 332 | release: my-hdfs 333 | template: 334 | metadata: 335 | labels: 336 | app: hdfs-client 337 | release: my-hdfs 338 | spec: 339 | containers: 340 | - name: hdfs-client 341 | image: uhopper/hadoop:2.7.2 342 | env: 343 | - name: HADOOP_CUSTOM_CONF_DIR 344 | value: /etc/hadoop-custom-conf 345 | - name: MULTIHOMED_NETWORK 346 | value: "0" 347 | command: ['/bin/sh', '-c'] 348 | args: 349 | - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg 350 | volumeMounts: 351 | - name: hdfs-config 352 | mountPath: /etc/hadoop-custom-conf 353 | readOnly: true 354 | restartPolicy: Always 355 | volumes: 356 | - name: hdfs-config 357 | configMap: 358 | name: my-hdfs-config 359 | --- 360 | # Source: hdfs/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml 361 | apiVersion: apps/v1beta1 362 | kind: StatefulSet 363 | metadata: 364 | name: my-hdfs-namenode 365 | labels: 366 | app: hdfs-namenode 367 | chart: hdfs-simple-namenode-k8s-0.1.0 368 | release: my-hdfs 369 | spec: 370 | serviceName: my-hdfs-namenode 371 | # Create a size-1 set. 372 | replicas: 1 373 | template: 374 | metadata: 375 | labels: 376 | app: hdfs-namenode 377 | release: my-hdfs 378 | spec: 379 | # Use hostNetwork so datanodes connect to namenode without going through an overlay network 380 | # like weave. Otherwise, namenode fails to see physical IP address of datanodes. 381 | hostNetwork: true 382 | hostPID: true 383 | dnsPolicy: ClusterFirstWithHostNet 384 | containers: 385 | - name: hdfs-namenode 386 | image: uhopper/hadoop-namenode:2.7.2 387 | env: 388 | - name: HADOOP_CUSTOM_CONF_DIR 389 | value: /etc/hadoop-custom-conf 390 | - name: CLUSTER_NAME 391 | value: hdfs-k8s 392 | ports: 393 | - containerPort: 8020 394 | name: fs 395 | volumeMounts: 396 | - name: hdfs-name 397 | mountPath: /hadoop/dfs/name 398 | - name: hdfs-config 399 | mountPath: /etc/hadoop-custom-conf 400 | readOnly: true 401 | restartPolicy: Always 402 | volumes: 403 | - name: hdfs-name 404 | hostPath: 405 | path: /mnt/sda1/hdfs-name 406 | - name: hdfs-config 407 | configMap: 408 | name: my-hdfs-config 409 | -------------------------------------------------------------------------------- /tests/gold/subchart-client.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-client 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | client: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | replicaCount: 1 25 | resources: 26 | requests: 27 | memory: 100m 28 | 29 | COMPUTED VALUES: 30 | condition: 31 | subchart: 32 | client: true 33 | global: 34 | dataNodeHostPath: 35 | - /mnt/sda1/hdfs-data 36 | defaultAffinityEnabled: false 37 | fullnameOverride: my-hdfs 38 | journalnodeQuorumSize: 3 39 | jsvcEnabled: true 40 | kerberosConfigFileName: krb5.conf 41 | kerberosEnabled: false 42 | kerberosRealm: MYCOMPANY.COM 43 | namenodeHAEnabled: true 44 | podSecurityContext: 45 | enabled: false 46 | fsGroup: 1000 47 | runAsUser: 0 48 | zookeeperQuorumSize: 1 49 | hdfs-client-k8s: 50 | global: 51 | dataNodeHostPath: 52 | - /mnt/sda1/hdfs-data 53 | defaultAffinityEnabled: false 54 | fullnameOverride: my-hdfs 55 | journalnodeQuorumSize: 3 56 | jsvcEnabled: true 57 | kerberosConfigFileName: krb5.conf 58 | kerberosEnabled: false 59 | kerberosRealm: MYCOMPANY.COM 60 | namenodeHAEnabled: true 61 | podSecurityContext: 62 | enabled: false 63 | fsGroup: 1000 64 | runAsUser: 0 65 | zookeeperQuorumSize: 1 66 | hdfs-config-k8s: 67 | customHadoopConfig: 68 | coreSite: {} 69 | hdfsSite: {} 70 | hdfs-datanode-k8s: 71 | affinity: {} 72 | nodeSelector: {} 73 | tolerations: [] 74 | hdfs-journalnode-k8s: 75 | affinity: {} 76 | nodeSelector: {} 77 | persistence: 78 | accessMode: ReadWriteOnce 79 | size: 20Gi 80 | tolerations: [] 81 | hdfs-krb5-k8s: 82 | image: 83 | pullPolicy: IfNotPresent 84 | repository: gcavalcante8808/krb5-server 85 | tag: latest 86 | persistence: 87 | accessMode: ReadWriteOnce 88 | size: 20Gi 89 | service: 90 | port: 88 91 | type: ClusterIP 92 | hdfs-namenode-k8s: 93 | affinity: {} 94 | customRunScript: | 95 | #!/bin/bash -x 96 | echo Write your own script content! 97 | echo This message will disappear in 10 seconds. 98 | sleep 10 99 | hostNetworkEnabled: false 100 | namenodeStartScript: format-and-run.sh 101 | nodeSelector: {} 102 | persistence: 103 | accessMode: ReadWriteOnce 104 | size: 100Gi 105 | tolerations: [] 106 | hdfs-simple-namenode-k8s: 107 | affinity: {} 108 | nameNodeHostPath: /hdfs-name 109 | nodeSelector: {} 110 | tolerations: [] 111 | tags: 112 | ha: false 113 | kerberos: false 114 | simple: false 115 | zookeeper: 116 | env: 117 | ZK_HEAP_SIZE: 100m 118 | replicaCount: 1 119 | resources: 120 | requests: 121 | memory: 100m 122 | 123 | HOOKS: 124 | MANIFEST: 125 | 126 | --- 127 | # Source: hdfs/charts/hdfs-client-k8s/templates/client-deployment.yaml 128 | apiVersion: apps/v1 129 | apiVersion: extensions/v1beta1 130 | kind: Deployment 131 | metadata: 132 | name: my-hdfs-client 133 | labels: 134 | app: hdfs-client 135 | chart: hdfs-client-k8s-0.1.0 136 | release: my-hdfs-client 137 | spec: 138 | replicas: 1 139 | selector: 140 | matchLabels: 141 | app: hdfs-client 142 | release: my-hdfs-client 143 | template: 144 | metadata: 145 | labels: 146 | app: hdfs-client 147 | release: my-hdfs-client 148 | spec: 149 | containers: 150 | - name: hdfs-client 151 | image: uhopper/hadoop:2.7.2 152 | env: 153 | - name: HADOOP_CUSTOM_CONF_DIR 154 | value: /etc/hadoop-custom-conf 155 | - name: MULTIHOMED_NETWORK 156 | value: "0" 157 | command: ['/bin/sh', '-c'] 158 | args: 159 | - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg 160 | volumeMounts: 161 | - name: hdfs-config 162 | mountPath: /etc/hadoop-custom-conf 163 | readOnly: true 164 | restartPolicy: Always 165 | volumes: 166 | - name: hdfs-config 167 | configMap: 168 | name: my-hdfs-config 169 | -------------------------------------------------------------------------------- /tests/gold/subchart-config.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-config 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | config: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | replicaCount: 1 25 | resources: 26 | requests: 27 | memory: 100m 28 | 29 | COMPUTED VALUES: 30 | condition: 31 | subchart: 32 | config: true 33 | global: 34 | dataNodeHostPath: 35 | - /mnt/sda1/hdfs-data 36 | defaultAffinityEnabled: false 37 | fullnameOverride: my-hdfs 38 | journalnodeQuorumSize: 3 39 | jsvcEnabled: true 40 | kerberosConfigFileName: krb5.conf 41 | kerberosEnabled: false 42 | kerberosRealm: MYCOMPANY.COM 43 | namenodeHAEnabled: true 44 | podSecurityContext: 45 | enabled: false 46 | fsGroup: 1000 47 | runAsUser: 0 48 | zookeeperQuorumSize: 1 49 | hdfs-config-k8s: 50 | customHadoopConfig: 51 | coreSite: {} 52 | hdfsSite: {} 53 | global: 54 | dataNodeHostPath: 55 | - /mnt/sda1/hdfs-data 56 | defaultAffinityEnabled: false 57 | fullnameOverride: my-hdfs 58 | journalnodeQuorumSize: 3 59 | jsvcEnabled: true 60 | kerberosConfigFileName: krb5.conf 61 | kerberosEnabled: false 62 | kerberosRealm: MYCOMPANY.COM 63 | namenodeHAEnabled: true 64 | podSecurityContext: 65 | enabled: false 66 | fsGroup: 1000 67 | runAsUser: 0 68 | zookeeperQuorumSize: 1 69 | hdfs-datanode-k8s: 70 | affinity: {} 71 | nodeSelector: {} 72 | tolerations: [] 73 | hdfs-journalnode-k8s: 74 | affinity: {} 75 | nodeSelector: {} 76 | persistence: 77 | accessMode: ReadWriteOnce 78 | size: 20Gi 79 | tolerations: [] 80 | hdfs-krb5-k8s: 81 | image: 82 | pullPolicy: IfNotPresent 83 | repository: gcavalcante8808/krb5-server 84 | tag: latest 85 | persistence: 86 | accessMode: ReadWriteOnce 87 | size: 20Gi 88 | service: 89 | port: 88 90 | type: ClusterIP 91 | hdfs-namenode-k8s: 92 | affinity: {} 93 | customRunScript: | 94 | #!/bin/bash -x 95 | echo Write your own script content! 96 | echo This message will disappear in 10 seconds. 97 | sleep 10 98 | hostNetworkEnabled: false 99 | namenodeStartScript: format-and-run.sh 100 | nodeSelector: {} 101 | persistence: 102 | accessMode: ReadWriteOnce 103 | size: 100Gi 104 | tolerations: [] 105 | hdfs-simple-namenode-k8s: 106 | affinity: {} 107 | nameNodeHostPath: /hdfs-name 108 | nodeSelector: {} 109 | tolerations: [] 110 | tags: 111 | ha: false 112 | kerberos: false 113 | simple: false 114 | zookeeper: 115 | env: 116 | ZK_HEAP_SIZE: 100m 117 | replicaCount: 1 118 | resources: 119 | requests: 120 | memory: 100m 121 | 122 | HOOKS: 123 | MANIFEST: 124 | 125 | --- 126 | # Source: hdfs/charts/hdfs-config-k8s/templates/configmap.yaml 127 | apiVersion: v1 128 | kind: ConfigMap 129 | metadata: 130 | name: my-hdfs-config 131 | labels: 132 | app: hdfs-client 133 | chart: hdfs-config-k8s-0.1.0 134 | release: my-hdfs-config 135 | data: 136 | core-site.xml: | 137 | 138 | 139 | 140 | 141 | fs.defaultFS 142 | hdfs://hdfs-k8s 143 | 144 | 145 | ha.zookeeper.quorum 146 | my-hdfs-zookeeper-0.my-hdfs-zookeeper-headless.default.svc.cluster.local:2181 147 | 148 | 149 | hdfs-site.xml: | 150 | 151 | 152 | 153 | 154 | dfs.nameservices 155 | hdfs-k8s 156 | 157 | 158 | dfs.ha.namenodes.hdfs-k8s 159 | nn0,nn1 160 | 161 | 162 | dfs.namenode.rpc-address.hdfs-k8s.nn0 163 | my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local:8020 164 | 165 | 166 | dfs.namenode.rpc-address.hdfs-k8s.nn1 167 | my-hdfs-namenode-1.my-hdfs-namenode.default.svc.cluster.local:8020 168 | 169 | 170 | dfs.namenode.http-address.hdfs-k8s.nn0 171 | my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local:50070 172 | 173 | 174 | dfs.namenode.http-address.hdfs-k8s.nn1 175 | my-hdfs-namenode-1.my-hdfs-namenode.default.svc.cluster.local:50070 176 | 177 | 178 | dfs.namenode.shared.edits.dir 179 | qjournal://my-hdfs-journalnode-1.my-hdfs-journalnode.default.svc.cluster.local:8485;my-hdfs-journalnode-2.my-hdfs-journalnode.default.svc.cluster.local:8485;my-hdfs-journalnode-0.my-hdfs-journalnode.default.svc.cluster.local:8485/hdfs-k8s 180 | 181 | 182 | dfs.ha.automatic-failover.enabled 183 | true 184 | 185 | 186 | dfs.ha.fencing.methods 187 | shell(/bin/true) 188 | 189 | 190 | dfs.journalnode.edits.dir 191 | /hadoop/dfs/journal 192 | 193 | 194 | dfs.client.failover.proxy.provider.hdfs-k8s 195 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 196 | 197 | 198 | dfs.namenode.name.dir 199 | file:///hadoop/dfs/name 200 | 201 | 202 | dfs.namenode.datanode.registration.ip-hostname-check 203 | false 204 | 205 | 206 | dfs.datanode.data.dir 207 | /mnt/sda1/hdfs-data 208 | 209 | 210 | -------------------------------------------------------------------------------- /tests/gold/subchart-datanode.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-datanode 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | datanode: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | replicaCount: 1 25 | resources: 26 | requests: 27 | memory: 100m 28 | 29 | COMPUTED VALUES: 30 | condition: 31 | subchart: 32 | datanode: true 33 | global: 34 | dataNodeHostPath: 35 | - /mnt/sda1/hdfs-data 36 | defaultAffinityEnabled: false 37 | fullnameOverride: my-hdfs 38 | journalnodeQuorumSize: 3 39 | jsvcEnabled: true 40 | kerberosConfigFileName: krb5.conf 41 | kerberosEnabled: false 42 | kerberosRealm: MYCOMPANY.COM 43 | namenodeHAEnabled: true 44 | podSecurityContext: 45 | enabled: false 46 | fsGroup: 1000 47 | runAsUser: 0 48 | zookeeperQuorumSize: 1 49 | hdfs-config-k8s: 50 | customHadoopConfig: 51 | coreSite: {} 52 | hdfsSite: {} 53 | hdfs-datanode-k8s: 54 | affinity: {} 55 | global: 56 | dataNodeHostPath: 57 | - /mnt/sda1/hdfs-data 58 | defaultAffinityEnabled: false 59 | fullnameOverride: my-hdfs 60 | journalnodeQuorumSize: 3 61 | jsvcEnabled: true 62 | kerberosConfigFileName: krb5.conf 63 | kerberosEnabled: false 64 | kerberosRealm: MYCOMPANY.COM 65 | namenodeHAEnabled: true 66 | podSecurityContext: 67 | enabled: false 68 | fsGroup: 1000 69 | runAsUser: 0 70 | zookeeperQuorumSize: 1 71 | nodeSelector: {} 72 | tolerations: [] 73 | hdfs-journalnode-k8s: 74 | affinity: {} 75 | nodeSelector: {} 76 | persistence: 77 | accessMode: ReadWriteOnce 78 | size: 20Gi 79 | tolerations: [] 80 | hdfs-krb5-k8s: 81 | image: 82 | pullPolicy: IfNotPresent 83 | repository: gcavalcante8808/krb5-server 84 | tag: latest 85 | persistence: 86 | accessMode: ReadWriteOnce 87 | size: 20Gi 88 | service: 89 | port: 88 90 | type: ClusterIP 91 | hdfs-namenode-k8s: 92 | affinity: {} 93 | customRunScript: | 94 | #!/bin/bash -x 95 | echo Write your own script content! 96 | echo This message will disappear in 10 seconds. 97 | sleep 10 98 | hostNetworkEnabled: false 99 | namenodeStartScript: format-and-run.sh 100 | nodeSelector: {} 101 | persistence: 102 | accessMode: ReadWriteOnce 103 | size: 100Gi 104 | tolerations: [] 105 | hdfs-simple-namenode-k8s: 106 | affinity: {} 107 | nameNodeHostPath: /hdfs-name 108 | nodeSelector: {} 109 | tolerations: [] 110 | tags: 111 | ha: false 112 | kerberos: false 113 | simple: false 114 | zookeeper: 115 | env: 116 | ZK_HEAP_SIZE: 100m 117 | replicaCount: 1 118 | resources: 119 | requests: 120 | memory: 100m 121 | 122 | HOOKS: 123 | MANIFEST: 124 | 125 | --- 126 | # Source: hdfs/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml 127 | # Provides datanode helper scripts. 128 | apiVersion: v1 129 | kind: ConfigMap 130 | metadata: 131 | name: my-hdfs-datanode-scripts 132 | labels: 133 | app: hdfs-datanode 134 | chart: hdfs-datanode-k8s-0.1.0 135 | release: my-hdfs-datanode 136 | data: 137 | check-status.sh: | 138 | #!/usr/bin/env bash 139 | # Exit on error. Append "|| true" if you expect an error. 140 | set -o errexit 141 | # Exit on error inside any functions or subshells. 142 | set -o errtrace 143 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 144 | set -o nounset 145 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 146 | # in `mysqldump |gzip` 147 | set -o pipefail 148 | # Turn on traces, useful while debugging. 149 | set -o xtrace 150 | 151 | # Check if datanode registered with the namenode and got non-null cluster ID. 152 | _PORTS="50075 1006" 153 | _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo" 154 | _CLUSTER_ID="" 155 | for _PORT in $_PORTS; do 156 | _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \ 157 | grep ClusterId) || true 158 | done 159 | echo $_CLUSTER_ID | grep -q -v null 160 | --- 161 | # Source: hdfs/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml 162 | # Deleting a daemonset may need some trick. See 163 | # https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 164 | apiVersion: extensions/v1beta1 165 | kind: DaemonSet 166 | metadata: 167 | name: my-hdfs-datanode 168 | labels: 169 | app: hdfs-datanode 170 | chart: hdfs-datanode-k8s-0.1.0 171 | release: my-hdfs-datanode 172 | spec: 173 | template: 174 | metadata: 175 | labels: 176 | app: hdfs-datanode 177 | release: my-hdfs-datanode 178 | spec: 179 | hostNetwork: true 180 | hostPID: true 181 | dnsPolicy: ClusterFirstWithHostNet 182 | containers: 183 | - name: datanode 184 | image: uhopper/hadoop-datanode:2.7.2 185 | env: 186 | - name: HADOOP_CUSTOM_CONF_DIR 187 | value: /etc/hadoop-custom-conf 188 | - name: MULTIHOMED_NETWORK 189 | value: "0" 190 | livenessProbe: 191 | exec: 192 | command: 193 | - /dn-scripts/check-status.sh 194 | initialDelaySeconds: 60 195 | periodSeconds: 30 196 | readinessProbe: 197 | exec: 198 | command: 199 | - /dn-scripts/check-status.sh 200 | initialDelaySeconds: 60 201 | periodSeconds: 30 202 | securityContext: 203 | privileged: true 204 | volumeMounts: 205 | - name: dn-scripts 206 | mountPath: /dn-scripts 207 | readOnly: true 208 | - name: hdfs-config 209 | mountPath: /etc/hadoop-custom-conf 210 | readOnly: true 211 | - name: hdfs-data-0 212 | mountPath: /hadoop/dfs/data/0 213 | restartPolicy: Always 214 | volumes: 215 | - name: dn-scripts 216 | configMap: 217 | name: my-hdfs-datanode-scripts 218 | defaultMode: 0744 219 | - name: hdfs-data-0 220 | hostPath: 221 | path: /mnt/sda1/hdfs-data 222 | - name: hdfs-config 223 | configMap: 224 | name: my-hdfs-config 225 | -------------------------------------------------------------------------------- /tests/gold/subchart-journalnode.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-journalnode 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | journalnode: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | replicaCount: 1 25 | resources: 26 | requests: 27 | memory: 100m 28 | 29 | COMPUTED VALUES: 30 | condition: 31 | subchart: 32 | journalnode: true 33 | global: 34 | dataNodeHostPath: 35 | - /mnt/sda1/hdfs-data 36 | defaultAffinityEnabled: false 37 | fullnameOverride: my-hdfs 38 | journalnodeQuorumSize: 3 39 | jsvcEnabled: true 40 | kerberosConfigFileName: krb5.conf 41 | kerberosEnabled: false 42 | kerberosRealm: MYCOMPANY.COM 43 | namenodeHAEnabled: true 44 | podSecurityContext: 45 | enabled: false 46 | fsGroup: 1000 47 | runAsUser: 0 48 | zookeeperQuorumSize: 1 49 | hdfs-config-k8s: 50 | customHadoopConfig: 51 | coreSite: {} 52 | hdfsSite: {} 53 | hdfs-datanode-k8s: 54 | affinity: {} 55 | nodeSelector: {} 56 | tolerations: [] 57 | hdfs-journalnode-k8s: 58 | affinity: {} 59 | global: 60 | dataNodeHostPath: 61 | - /mnt/sda1/hdfs-data 62 | defaultAffinityEnabled: false 63 | fullnameOverride: my-hdfs 64 | journalnodeQuorumSize: 3 65 | jsvcEnabled: true 66 | kerberosConfigFileName: krb5.conf 67 | kerberosEnabled: false 68 | kerberosRealm: MYCOMPANY.COM 69 | namenodeHAEnabled: true 70 | podSecurityContext: 71 | enabled: false 72 | fsGroup: 1000 73 | runAsUser: 0 74 | zookeeperQuorumSize: 1 75 | nodeSelector: {} 76 | persistence: 77 | accessMode: ReadWriteOnce 78 | size: 20Gi 79 | tolerations: [] 80 | hdfs-krb5-k8s: 81 | image: 82 | pullPolicy: IfNotPresent 83 | repository: gcavalcante8808/krb5-server 84 | tag: latest 85 | persistence: 86 | accessMode: ReadWriteOnce 87 | size: 20Gi 88 | service: 89 | port: 88 90 | type: ClusterIP 91 | hdfs-namenode-k8s: 92 | affinity: {} 93 | customRunScript: | 94 | #!/bin/bash -x 95 | echo Write your own script content! 96 | echo This message will disappear in 10 seconds. 97 | sleep 10 98 | hostNetworkEnabled: false 99 | namenodeStartScript: format-and-run.sh 100 | nodeSelector: {} 101 | persistence: 102 | accessMode: ReadWriteOnce 103 | size: 100Gi 104 | tolerations: [] 105 | hdfs-simple-namenode-k8s: 106 | affinity: {} 107 | nameNodeHostPath: /hdfs-name 108 | nodeSelector: {} 109 | tolerations: [] 110 | tags: 111 | ha: false 112 | kerberos: false 113 | simple: false 114 | zookeeper: 115 | env: 116 | ZK_HEAP_SIZE: 100m 117 | replicaCount: 1 118 | resources: 119 | requests: 120 | memory: 100m 121 | 122 | HOOKS: 123 | MANIFEST: 124 | 125 | --- 126 | # Source: hdfs/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml 127 | # A headless service to create DNS records. 128 | apiVersion: v1 129 | kind: Service 130 | metadata: 131 | name: my-hdfs-journalnode 132 | labels: 133 | app: hdfs-journalnode 134 | chart: hdfs-journalnode-k8s-0.1.0 135 | release: my-hdfs-journalnode 136 | annotations: 137 | # TODO: Deprecated. Replace tolerate-unready-endpoints with 138 | # v1.Service.PublishNotReadyAddresses. 139 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 140 | spec: 141 | ports: 142 | - port: 8485 143 | name: jn 144 | - port: 8480 145 | name: http 146 | clusterIP: None 147 | selector: 148 | app: hdfs-journalnode 149 | release: my-hdfs-journalnode 150 | --- 151 | # Source: hdfs/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml 152 | apiVersion: apps/v1beta1 153 | kind: StatefulSet 154 | metadata: 155 | name: my-hdfs-journalnode 156 | labels: 157 | app: hdfs-journalnode 158 | chart: hdfs-journalnode-k8s-0.1.0 159 | release: my-hdfs-journalnode 160 | spec: 161 | serviceName: my-hdfs-journalnode 162 | replicas: 3 163 | template: 164 | metadata: 165 | labels: 166 | app: hdfs-journalnode 167 | release: my-hdfs-journalnode 168 | spec: 169 | containers: 170 | - name: hdfs-journalnode 171 | image: uhopper/hadoop-namenode:2.7.2 172 | env: 173 | - name: HADOOP_CUSTOM_CONF_DIR 174 | value: /etc/hadoop-custom-conf 175 | command: ["/entrypoint.sh"] 176 | args: ["/opt/hadoop-2.7.2/bin/hdfs", "--config", "/etc/hadoop", "journalnode"] 177 | ports: 178 | - containerPort: 8485 179 | name: jn 180 | - containerPort: 8480 181 | name: http 182 | volumeMounts: 183 | # Mount a subpath of the volume so that the journal subdir would be 184 | # a brand new empty dir. This way, we won't get affected by 185 | # existing files in the volume top dir. 186 | - name: editdir 187 | mountPath: /hadoop/dfs/journal 188 | subPath: journal 189 | - name: editdir 190 | mountPath: /hadoop/dfs/name 191 | subPath: name 192 | - name: hdfs-config 193 | mountPath: /etc/hadoop-custom-conf 194 | readOnly: true 195 | restartPolicy: Always 196 | volumes: 197 | - name: hdfs-config 198 | configMap: 199 | name: my-hdfs-config 200 | volumeClaimTemplates: 201 | - metadata: 202 | name: editdir 203 | spec: 204 | accessModes: 205 | - "ReadWriteOnce" 206 | resources: 207 | requests: 208 | storage: "20Gi" 209 | --- 210 | # Source: hdfs/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml 211 | apiVersion: policy/v1beta1 212 | kind: PodDisruptionBudget 213 | metadata: 214 | name: my-hdfs-journalnode 215 | labels: 216 | app: hdfs-journalnode 217 | chart: hdfs-journalnode-k8s-0.1.0 218 | release: my-hdfs-journalnode 219 | spec: 220 | selector: 221 | matchLabels: 222 | app: hdfs-journalnode 223 | release: my-hdfs-journalnode 224 | minAvailable: 2 225 | -------------------------------------------------------------------------------- /tests/gold/subchart-namenode.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-namenode 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | namenode: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | replicaCount: 1 25 | resources: 26 | requests: 27 | memory: 100m 28 | 29 | COMPUTED VALUES: 30 | condition: 31 | subchart: 32 | namenode: true 33 | global: 34 | dataNodeHostPath: 35 | - /mnt/sda1/hdfs-data 36 | defaultAffinityEnabled: false 37 | fullnameOverride: my-hdfs 38 | journalnodeQuorumSize: 3 39 | jsvcEnabled: true 40 | kerberosConfigFileName: krb5.conf 41 | kerberosEnabled: false 42 | kerberosRealm: MYCOMPANY.COM 43 | namenodeHAEnabled: true 44 | podSecurityContext: 45 | enabled: false 46 | fsGroup: 1000 47 | runAsUser: 0 48 | zookeeperQuorumSize: 1 49 | hdfs-config-k8s: 50 | customHadoopConfig: 51 | coreSite: {} 52 | hdfsSite: {} 53 | hdfs-datanode-k8s: 54 | affinity: {} 55 | nodeSelector: {} 56 | tolerations: [] 57 | hdfs-journalnode-k8s: 58 | affinity: {} 59 | nodeSelector: {} 60 | persistence: 61 | accessMode: ReadWriteOnce 62 | size: 20Gi 63 | tolerations: [] 64 | hdfs-krb5-k8s: 65 | image: 66 | pullPolicy: IfNotPresent 67 | repository: gcavalcante8808/krb5-server 68 | tag: latest 69 | persistence: 70 | accessMode: ReadWriteOnce 71 | size: 20Gi 72 | service: 73 | port: 88 74 | type: ClusterIP 75 | hdfs-namenode-k8s: 76 | affinity: {} 77 | customRunScript: | 78 | #!/bin/bash -x 79 | echo Write your own script content! 80 | echo This message will disappear in 10 seconds. 81 | sleep 10 82 | global: 83 | dataNodeHostPath: 84 | - /mnt/sda1/hdfs-data 85 | defaultAffinityEnabled: false 86 | fullnameOverride: my-hdfs 87 | journalnodeQuorumSize: 3 88 | jsvcEnabled: true 89 | kerberosConfigFileName: krb5.conf 90 | kerberosEnabled: false 91 | kerberosRealm: MYCOMPANY.COM 92 | namenodeHAEnabled: true 93 | podSecurityContext: 94 | enabled: false 95 | fsGroup: 1000 96 | runAsUser: 0 97 | zookeeperQuorumSize: 1 98 | hostNetworkEnabled: false 99 | namenodeStartScript: format-and-run.sh 100 | nodeSelector: {} 101 | persistence: 102 | accessMode: ReadWriteOnce 103 | size: 100Gi 104 | tolerations: [] 105 | hdfs-simple-namenode-k8s: 106 | affinity: {} 107 | nameNodeHostPath: /hdfs-name 108 | nodeSelector: {} 109 | tolerations: [] 110 | tags: 111 | ha: false 112 | kerberos: false 113 | simple: false 114 | zookeeper: 115 | env: 116 | ZK_HEAP_SIZE: 100m 117 | replicaCount: 1 118 | resources: 119 | requests: 120 | memory: 100m 121 | 122 | HOOKS: 123 | MANIFEST: 124 | 125 | --- 126 | # Source: hdfs/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml 127 | # Provides namenode helper scripts. Most of them are start scripts 128 | # that meet different needs. 129 | # TODO: Support upgrade of metadata in case a new Hadoop version requires it. 130 | apiVersion: v1 131 | kind: ConfigMap 132 | metadata: 133 | name: my-hdfs-namenode-scripts 134 | labels: 135 | app: hdfs-namenode 136 | chart: hdfs-namenode-k8s-0.1.0 137 | release: my-hdfs-namenode 138 | data: 139 | # A bootstrap script which will start namenode daemons after conducting 140 | # optional metadata initialization steps. The metadata initialization 141 | # steps will take place in case the metadata dir is empty, 142 | # which will be the case only for the very first run. The specific steps 143 | # will differ depending on whether the namenode is active or standby. 144 | # We also assume, for the very first run, namenode-0 will be active and 145 | # namenode-1 will be standby as StatefulSet will launch namenode-0 first 146 | # and zookeeper will determine the sole namenode to be the active one. 147 | # For active namenode, the initialization steps will format the metadata, 148 | # zookeeper dir and journal node data entries. 149 | # For standby namenode, the initialization steps will simply receieve 150 | # the first batch of metadata updates from the journal node. 151 | format-and-run.sh: | 152 | #!/usr/bin/env bash 153 | # Exit on error. Append "|| true" if you expect an error. 154 | set -o errexit 155 | # Exit on error inside any functions or subshells. 156 | set -o errtrace 157 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 158 | set -o nounset 159 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 160 | # in `mysqldump |gzip` 161 | set -o pipefail 162 | # Turn on traces, useful while debugging. 163 | set -o xtrace 164 | 165 | _HDFS_BIN=$HADOOP_PREFIX/bin/hdfs 166 | _METADATA_DIR=/hadoop/dfs/name/current 167 | if [[ "$MY_POD" = "$NAMENODE_POD_0" ]]; then 168 | if [[ ! -d $_METADATA_DIR ]]; then 169 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -format \ 170 | -nonInteractive hdfs-k8s || 171 | (rm -rf $_METADATA_DIR; exit 1) 172 | fi 173 | _ZKFC_FORMATTED=/hadoop/dfs/name/current/.hdfs-k8s-zkfc-formatted 174 | if [[ ! -f $_ZKFC_FORMATTED ]]; then 175 | _OUT=$($_HDFS_BIN --config $HADOOP_CONF_DIR zkfc -formatZK -nonInteractive 2>&1) 176 | # zkfc masks fatal exceptions and returns exit code 0 177 | (echo $_OUT | grep -q "FATAL") && exit 1 178 | touch $_ZKFC_FORMATTED 179 | fi 180 | elif [[ "$MY_POD" = "$NAMENODE_POD_1" ]]; then 181 | if [[ ! -d $_METADATA_DIR ]]; then 182 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -bootstrapStandby \ 183 | -nonInteractive || \ 184 | (rm -rf $_METADATA_DIR; exit 1) 185 | fi 186 | fi 187 | $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start zkfc 188 | $_HDFS_BIN --config $HADOOP_CONF_DIR namenode 189 | 190 | # A start script that will just hang indefinitely. A user can then get 191 | # inside the pod and debug. Or a user can conduct a custom manual operations. 192 | do-nothing.sh: | 193 | #!/usr/bin/env bash 194 | tail -f /var/log/dmesg 195 | 196 | # A start script that has user specified content. Can be used to conduct 197 | # ad-hoc operation as specified by a user. 198 | custom-run.sh: "#!/bin/bash -x\necho Write your own script content!\necho This message will disappear in 10 seconds.\nsleep 10\n" 199 | --- 200 | # Source: hdfs/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml 201 | # A headless service to create DNS records. 202 | apiVersion: v1 203 | kind: Service 204 | metadata: 205 | name: my-hdfs-namenode 206 | labels: 207 | app: hdfs-namenode 208 | chart: hdfs-namenode-k8s-0.1.0 209 | release: my-hdfs-namenode 210 | annotations: 211 | # TODO: Deprecated. Replace tolerate-unready-endpoints with 212 | # v1.Service.PublishNotReadyAddresses. 213 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 214 | spec: 215 | ports: 216 | - port: 8020 217 | name: fs 218 | - port: 50070 219 | name: http 220 | clusterIP: None 221 | selector: 222 | app: hdfs-namenode 223 | release: my-hdfs-namenode 224 | --- 225 | # Source: hdfs/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml 226 | apiVersion: apps/v1beta1 227 | kind: StatefulSet 228 | metadata: 229 | name: my-hdfs-namenode 230 | labels: 231 | app: hdfs-namenode 232 | chart: hdfs-namenode-k8s-0.1.0 233 | release: my-hdfs-namenode 234 | spec: 235 | serviceName: my-hdfs-namenode 236 | replicas: 2 237 | template: 238 | metadata: 239 | labels: 240 | app: hdfs-namenode 241 | release: my-hdfs-namenode 242 | spec: 243 | dnsPolicy: ClusterFirst 244 | containers: 245 | # TODO: Support hadoop version as option. 246 | - name: hdfs-namenode 247 | image: uhopper/hadoop-namenode:2.7.2 248 | env: 249 | - name: HADOOP_CUSTOM_CONF_DIR 250 | value: /etc/hadoop-custom-conf 251 | - name: MULTIHOMED_NETWORK 252 | value: "0" 253 | # Used by the start script below. 254 | - name: MY_POD 255 | valueFrom: 256 | fieldRef: 257 | fieldPath: metadata.name 258 | - name: NAMENODE_POD_0 259 | value: my-hdfs-namenode-0 260 | - name: NAMENODE_POD_1 261 | value: my-hdfs-namenode-1 262 | command: ['/bin/sh', '-c'] 263 | # The start script is provided by a config map. 264 | args: 265 | - /entrypoint.sh "/nn-scripts/format-and-run.sh" 266 | ports: 267 | - containerPort: 8020 268 | name: fs 269 | - containerPort: 50070 270 | name: http 271 | volumeMounts: 272 | - name: nn-scripts 273 | mountPath: /nn-scripts 274 | readOnly: true 275 | # Mount a subpath of the volume so that the name subdir would be a 276 | # brand new empty dir. This way, we won't get affected by existing 277 | # files in the volume top dir. 278 | - name: metadatadir 279 | mountPath: /hadoop/dfs/name 280 | subPath: name 281 | - name: hdfs-config 282 | mountPath: /etc/hadoop-custom-conf 283 | readOnly: true 284 | restartPolicy: Always 285 | volumes: 286 | - name: nn-scripts 287 | configMap: 288 | name: my-hdfs-namenode-scripts 289 | defaultMode: 0744 290 | - name: hdfs-config 291 | configMap: 292 | name: my-hdfs-config 293 | volumeClaimTemplates: 294 | - metadata: 295 | name: metadatadir 296 | spec: 297 | accessModes: 298 | - "ReadWriteOnce" 299 | resources: 300 | requests: 301 | storage: "100Gi" 302 | --- 303 | # Source: hdfs/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml 304 | apiVersion: policy/v1beta1 305 | kind: PodDisruptionBudget 306 | metadata: 307 | name: my-hdfs-namenode 308 | labels: 309 | app: hdfs-namenode 310 | chart: hdfs-namenode-k8s-0.1.0 311 | release: my-hdfs-namenode 312 | spec: 313 | selector: 314 | matchLabels: 315 | app: hdfs-namenode 316 | release: my-hdfs-namenode 317 | minAvailable: 1 318 | -------------------------------------------------------------------------------- /tests/gold/subchart-zookeeper.gold: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME: my-hdfs-zookeeper 5 | REVISION: 1 6 | CHART: hdfs-0.1.0 7 | USER-SUPPLIED VALUES: 8 | condition: 9 | subchart: 10 | zookeeper: true 11 | global: 12 | dataNodeHostPath: 13 | - /mnt/sda1/hdfs-data 14 | defaultAffinityEnabled: false 15 | fullnameOverride: my-hdfs 16 | zookeeperQuorumSize: 1 17 | hdfs-namenode-k8s: 18 | hostNetworkEnabled: false 19 | tags: 20 | ha: false 21 | zookeeper: 22 | env: 23 | ZK_HEAP_SIZE: 100m 24 | fullnameOverride: my-hdfs-zookeeper 25 | replicaCount: 1 26 | resources: 27 | requests: 28 | memory: 100m 29 | 30 | COMPUTED VALUES: 31 | condition: 32 | subchart: 33 | zookeeper: true 34 | global: 35 | dataNodeHostPath: 36 | - /mnt/sda1/hdfs-data 37 | defaultAffinityEnabled: false 38 | fullnameOverride: my-hdfs 39 | journalnodeQuorumSize: 3 40 | jsvcEnabled: true 41 | kerberosConfigFileName: krb5.conf 42 | kerberosEnabled: false 43 | kerberosRealm: MYCOMPANY.COM 44 | namenodeHAEnabled: true 45 | podSecurityContext: 46 | enabled: false 47 | fsGroup: 1000 48 | runAsUser: 0 49 | zookeeperQuorumSize: 1 50 | hdfs-config-k8s: 51 | customHadoopConfig: 52 | coreSite: {} 53 | hdfsSite: {} 54 | hdfs-datanode-k8s: 55 | affinity: {} 56 | nodeSelector: {} 57 | tolerations: [] 58 | hdfs-journalnode-k8s: 59 | affinity: {} 60 | nodeSelector: {} 61 | persistence: 62 | accessMode: ReadWriteOnce 63 | size: 20Gi 64 | tolerations: [] 65 | hdfs-krb5-k8s: 66 | image: 67 | pullPolicy: IfNotPresent 68 | repository: gcavalcante8808/krb5-server 69 | tag: latest 70 | persistence: 71 | accessMode: ReadWriteOnce 72 | size: 20Gi 73 | service: 74 | port: 88 75 | type: ClusterIP 76 | hdfs-namenode-k8s: 77 | affinity: {} 78 | customRunScript: | 79 | #!/bin/bash -x 80 | echo Write your own script content! 81 | echo This message will disappear in 10 seconds. 82 | sleep 10 83 | hostNetworkEnabled: false 84 | namenodeStartScript: format-and-run.sh 85 | nodeSelector: {} 86 | persistence: 87 | accessMode: ReadWriteOnce 88 | size: 100Gi 89 | tolerations: [] 90 | hdfs-simple-namenode-k8s: 91 | affinity: {} 92 | nameNodeHostPath: /hdfs-name 93 | nodeSelector: {} 94 | tolerations: [] 95 | tags: 96 | ha: false 97 | kerberos: false 98 | simple: false 99 | zookeeper: 100 | affinity: {} 101 | env: 102 | JMXAUTH: "false" 103 | JMXDISABLE: "false" 104 | JMXPORT: 1099 105 | JMXSSL: "false" 106 | ZK_CLIENT_PORT: 2181 107 | ZK_ELECTION_PORT: 3888 108 | ZK_HEAP_SIZE: 100m 109 | ZK_INIT_LIMIT: 5 110 | ZK_LOG_LEVEL: INFO 111 | ZK_MAX_CLIENT_CNXNS: 60 112 | ZK_MAX_SESSION_TIMEOUT: 40000 113 | ZK_MIN_SESSION_TIMEOUT: 4000 114 | ZK_PURGE_INTERVAL: 0 115 | ZK_SERVER_PORT: 2888 116 | ZK_SNAP_RETAIN_COUNT: 3 117 | ZK_SYNC_LIMIT: 10 118 | ZK_TICK_TIME: 2000 119 | exporters: 120 | jmx: 121 | config: 122 | lowercaseOutputName: false 123 | rules: 124 | - name: zookeeper_$2 125 | pattern: org.apache.ZooKeeperService<>(\w+) 126 | - labels: 127 | replicaId: $2 128 | name: zookeeper_$3 129 | pattern: org.apache.ZooKeeperService<>(\w+) 130 | - labels: 131 | memberType: $3 132 | replicaId: $2 133 | name: zookeeper_$4 134 | pattern: org.apache.ZooKeeperService<>(\w+) 136 | - labels: 137 | memberType: $3 138 | replicaId: $2 139 | name: zookeeper_$4_$5 140 | pattern: org.apache.ZooKeeperService<>(\w+) 142 | startDelaySeconds: 30 143 | enabled: false 144 | env: {} 145 | image: 146 | pullPolicy: IfNotPresent 147 | repository: sscaling/jmx-prometheus-exporter 148 | tag: 0.3.0 149 | livenessProbe: 150 | failureThreshold: 8 151 | httpGet: 152 | path: /metrics 153 | port: jmxxp 154 | initialDelaySeconds: 30 155 | periodSeconds: 15 156 | successThreshold: 1 157 | timeoutSeconds: 60 158 | path: /metrics 159 | ports: 160 | jmxxp: 161 | containerPort: 9404 162 | protocol: TCP 163 | readinessProbe: 164 | failureThreshold: 8 165 | httpGet: 166 | path: /metrics 167 | port: jmxxp 168 | initialDelaySeconds: 30 169 | periodSeconds: 15 170 | successThreshold: 1 171 | timeoutSeconds: 60 172 | resources: {} 173 | zookeeper: 174 | config: 175 | logLevel: info 176 | resetOnScrape: "true" 177 | enabled: false 178 | env: {} 179 | image: 180 | pullPolicy: IfNotPresent 181 | repository: josdotso/zookeeper-exporter 182 | tag: v1.1.2 183 | livenessProbe: 184 | failureThreshold: 8 185 | httpGet: 186 | path: /metrics 187 | port: zookeeperxp 188 | initialDelaySeconds: 30 189 | periodSeconds: 15 190 | successThreshold: 1 191 | timeoutSeconds: 60 192 | path: /metrics 193 | ports: 194 | zookeeperxp: 195 | containerPort: 9141 196 | protocol: TCP 197 | readinessProbe: 198 | failureThreshold: 8 199 | httpGet: 200 | path: /metrics 201 | port: zookeeperxp 202 | initialDelaySeconds: 30 203 | periodSeconds: 15 204 | successThreshold: 1 205 | timeoutSeconds: 60 206 | resources: {} 207 | fullnameOverride: my-hdfs-zookeeper 208 | global: 209 | dataNodeHostPath: 210 | - /mnt/sda1/hdfs-data 211 | defaultAffinityEnabled: false 212 | fullnameOverride: my-hdfs 213 | journalnodeQuorumSize: 3 214 | jsvcEnabled: true 215 | kerberosConfigFileName: krb5.conf 216 | kerberosEnabled: false 217 | kerberosRealm: MYCOMPANY.COM 218 | namenodeHAEnabled: true 219 | podSecurityContext: 220 | enabled: false 221 | fsGroup: 1000 222 | runAsUser: 0 223 | zookeeperQuorumSize: 1 224 | image: 225 | pullPolicy: IfNotPresent 226 | repository: gcr.io/google_samples/k8szk 227 | tag: v3 228 | jobs: 229 | chroots: 230 | activeDeadlineSeconds: 300 231 | backoffLimit: 5 232 | completions: 1 233 | config: 234 | create: [] 235 | enabled: false 236 | env: [] 237 | parallelism: 1 238 | resources: {} 239 | restartPolicy: Never 240 | livenessProbe: 241 | exec: 242 | command: 243 | - zkOk.sh 244 | initialDelaySeconds: 20 245 | nodeSelector: {} 246 | persistence: 247 | accessMode: ReadWriteOnce 248 | enabled: true 249 | size: 5Gi 250 | podAnnotations: {} 251 | podDisruptionBudget: 252 | maxUnavailable: 1 253 | podLabels: {} 254 | ports: 255 | client: 256 | containerPort: 2181 257 | protocol: TCP 258 | election: 259 | containerPort: 3888 260 | protocol: TCP 261 | server: 262 | containerPort: 2888 263 | protocol: TCP 264 | readinessProbe: 265 | exec: 266 | command: 267 | - zkOk.sh 268 | initialDelaySeconds: 20 269 | replicaCount: 1 270 | resources: 271 | requests: 272 | memory: 100m 273 | securityContext: 274 | fsGroup: 1000 275 | runAsUser: 1000 276 | service: 277 | annotations: {} 278 | ports: 279 | client: 280 | port: 2181 281 | protocol: TCP 282 | targetPort: client 283 | type: ClusterIP 284 | terminationGracePeriodSeconds: 1800 285 | tolerations: [] 286 | updateStrategy: 287 | type: OnDelete 288 | 289 | HOOKS: 290 | MANIFEST: 291 | 292 | --- 293 | # Source: hdfs/charts/zookeeper/templates/service-headless.yaml 294 | apiVersion: v1 295 | kind: Service 296 | metadata: 297 | name: my-hdfs-zookeeper-headless 298 | labels: 299 | app: zookeeper 300 | chart: zookeeper-1.0.0 301 | release: my-hdfs-zookeeper 302 | heritage: Tiller 303 | spec: 304 | clusterIP: None 305 | ports: 306 | - name: client 307 | port: 2181 308 | targetPort: 309 | protocol: TCP 310 | - name: election 311 | port: 3888 312 | targetPort: 313 | protocol: TCP 314 | - name: server 315 | port: 2888 316 | targetPort: 317 | protocol: TCP 318 | selector: 319 | app: zookeeper 320 | release: my-hdfs-zookeeper 321 | --- 322 | # Source: hdfs/charts/zookeeper/templates/service.yaml 323 | apiVersion: v1 324 | kind: Service 325 | metadata: 326 | name: my-hdfs-zookeeper 327 | labels: 328 | app: zookeeper 329 | chart: zookeeper-1.0.0 330 | release: my-hdfs-zookeeper 331 | heritage: Tiller 332 | annotations: 333 | spec: 334 | type: ClusterIP 335 | ports: 336 | - name: client 337 | port: 2181 338 | protocol: TCP 339 | targetPort: client 340 | 341 | selector: 342 | app: zookeeper 343 | release: my-hdfs-zookeeper 344 | --- 345 | # Source: hdfs/charts/zookeeper/templates/statefulset.yaml 346 | apiVersion: apps/v1beta1 347 | kind: StatefulSet 348 | metadata: 349 | name: my-hdfs-zookeeper 350 | labels: 351 | app: zookeeper 352 | chart: zookeeper-1.0.0 353 | release: my-hdfs-zookeeper 354 | heritage: Tiller 355 | component: server 356 | spec: 357 | serviceName: my-hdfs-zookeeper-headless 358 | replicas: 1 359 | terminationGracePeriodSeconds: 1800 360 | selector: 361 | matchLabels: 362 | app: zookeeper 363 | release: my-hdfs-zookeeper 364 | component: server 365 | updateStrategy: 366 | type: OnDelete 367 | 368 | template: 369 | metadata: 370 | labels: 371 | app: zookeeper 372 | release: my-hdfs-zookeeper 373 | component: server 374 | annotations: 375 | spec: 376 | securityContext: 377 | fsGroup: 1000 378 | runAsUser: 1000 379 | 380 | containers: 381 | 382 | - name: zookeeper 383 | image: "gcr.io/google_samples/k8szk:v3" 384 | imagePullPolicy: IfNotPresent 385 | command: 386 | - /bin/bash 387 | - -xec 388 | - zkGenConfig.sh && exec zkServer.sh start-foreground 389 | ports: 390 | - name: client 391 | containerPort: 2181 392 | protocol: TCP 393 | 394 | - name: election 395 | containerPort: 3888 396 | protocol: TCP 397 | 398 | - name: server 399 | containerPort: 2888 400 | protocol: TCP 401 | 402 | livenessProbe: 403 | exec: 404 | command: 405 | - zkOk.sh 406 | initialDelaySeconds: 20 407 | 408 | readinessProbe: 409 | exec: 410 | command: 411 | - zkOk.sh 412 | initialDelaySeconds: 20 413 | 414 | env: 415 | - name: ZK_REPLICAS 416 | value: "1" 417 | - name: JMXAUTH 418 | value: "false" 419 | - name: JMXDISABLE 420 | value: "false" 421 | - name: JMXPORT 422 | value: "1099" 423 | - name: JMXSSL 424 | value: "false" 425 | - name: ZK_CLIENT_PORT 426 | value: "2181" 427 | - name: ZK_ELECTION_PORT 428 | value: "3888" 429 | - name: ZK_HEAP_SIZE 430 | value: "100m" 431 | - name: ZK_INIT_LIMIT 432 | value: "5" 433 | - name: ZK_LOG_LEVEL 434 | value: "INFO" 435 | - name: ZK_MAX_CLIENT_CNXNS 436 | value: "60" 437 | - name: ZK_MAX_SESSION_TIMEOUT 438 | value: "40000" 439 | - name: ZK_MIN_SESSION_TIMEOUT 440 | value: "4000" 441 | - name: ZK_PURGE_INTERVAL 442 | value: "0" 443 | - name: ZK_SERVER_PORT 444 | value: "2888" 445 | - name: ZK_SNAP_RETAIN_COUNT 446 | value: "3" 447 | - name: ZK_SYNC_LIMIT 448 | value: "10" 449 | - name: ZK_TICK_TIME 450 | value: "2000" 451 | resources: 452 | requests: 453 | memory: 100m 454 | 455 | volumeMounts: 456 | - name: data 457 | mountPath: /var/lib/zookeeper 458 | 459 | volumeClaimTemplates: 460 | - metadata: 461 | name: data 462 | spec: 463 | accessModes: 464 | - "ReadWriteOnce" 465 | resources: 466 | requests: 467 | storage: "5Gi" 468 | --- 469 | # Source: hdfs/charts/zookeeper/templates/poddisruptionbudget.yaml 470 | apiVersion: policy/v1beta1 471 | kind: PodDisruptionBudget 472 | metadata: 473 | name: my-hdfs-zookeeper 474 | labels: 475 | app: zookeeper 476 | chart: zookeeper-1.0.0 477 | release: my-hdfs-zookeeper 478 | heritage: Tiller 479 | component: server 480 | spec: 481 | selector: 482 | matchLabels: 483 | app: zookeeper 484 | release: my-hdfs-zookeeper 485 | component: server 486 | maxUnavailable: 1 487 | -------------------------------------------------------------------------------- /tests/lib/_k8s.sh: -------------------------------------------------------------------------------- 1 | # Helper bash functions. 2 | 3 | # Wait for Kubernetes resources to be up and ready. 4 | function _wait_for_ready () { 5 | local count="$1" 6 | shift 7 | local evidence="$1" 8 | shift 9 | local attempts=60 10 | echo "Waiting till ready (count: $count): $@" 11 | while [[ "$count" != $("$@" 2>&1 | tail -n +2 | grep -c $evidence) ]]; 12 | do 13 | if [[ "$attempts" = "1" ]]; then 14 | echo "Last run: $@" 15 | "$@" || true 16 | local command="$@" 17 | command="${command/get/describe}" 18 | $command || true 19 | fi 20 | ((attempts--)) || return 1 21 | sleep 5 22 | done 23 | "$@" || true 24 | } 25 | 26 | # Wait for all expected number of nodes to be ready 27 | function k8s_all_nodes_ready () { 28 | local count="$1" 29 | shift 30 | _wait_for_ready "$count" "-v NotReady" kubectl get nodes 31 | _wait_for_ready "$count" Ready kubectl get nodes 32 | } 33 | 34 | function k8s_single_node_ready () { 35 | k8s_all_nodes_ready 1 36 | } 37 | 38 | # Wait for all expected number of pods to be ready. This works only for 39 | # pods with up to 4 containers, as we check "1/1" to "4/4" in 40 | # `kubectl get pods` output. 41 | function k8s_all_pods_ready () { 42 | local count="$1" 43 | shift 44 | local evidence="-e 1/1 -e 2/2 -e 3/3 -e 4/4" 45 | _wait_for_ready "$count" "$evidence" kubectl get pods "$@" 46 | } 47 | 48 | function k8s_single_pod_ready () { 49 | k8s_all_pods_ready 1 "$@" 50 | } 51 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Exit on error. Append "|| true" if you expect an error. 4 | set -o errexit 5 | # Exit on error inside any functions or subshells. 6 | set -o errtrace 7 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 8 | set -o nounset 9 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 10 | # in `mysqldump |gzip` 11 | set -o pipefail 12 | if [[ "${DEBUG:-}" = "true" ]]; then 13 | # Turn on traces, useful while debugging but commented out by default 14 | set -o xtrace 15 | fi 16 | 17 | _MY_SCRIPT="${BASH_SOURCE[0]}" 18 | _TEST_DIR=$(cd "$(dirname "$_MY_SCRIPT")" && pwd) 19 | export PATH=${_TEST_DIR}/bin:$PATH 20 | source ${_TEST_DIR}/lib/_k8s.sh 21 | 22 | _PROJECT_DIR=$(cd "$(dirname "$_TEST_DIR")" && pwd) 23 | _CHART_DIR=${_PROJECT_DIR}/charts 24 | 25 | function _run () { 26 | local attempts=2 27 | echo Running: "$@" 28 | until "$@"; do 29 | ((attempts--)) || return 1 30 | sleep 5 31 | done 32 | } 33 | 34 | function _helm_diff_and_install () { 35 | local gold=$1 36 | shift 37 | echo Running: helm install --dry-run --debug "$@" 38 | local tmpfile=$(mktemp ${_TEST_DIR}/tmp/helm-dry-run.XXXXXX) 39 | (helm install --dry-run --debug "$@" | \ 40 | grep -v -e "^RELEASED" -e "^\[debug\]") > $tmpfile 41 | if [[ "${BLESS_DIFF:-}" = "true" ]]; then 42 | echo Blessing $tmpfile 43 | cp -f $tmpfile $gold 44 | else 45 | echo Comparing $gold and $tmpfile 46 | if [[ "${CRASH_ON_DIFF:-false}" = "true" ]]; then 47 | diff $gold $tmpfile 48 | else 49 | diff $gold $tmpfile || true 50 | fi 51 | fi 52 | rm "$tmpfile" 53 | if [[ "${DRY_RUN_ONLY:-false}" = "true" ]]; then 54 | return 55 | fi 56 | echo Running: helm install "$@" 57 | helm install "$@" 58 | } 59 | 60 | kubectl cluster-info 61 | cd $_CHART_DIR 62 | rm -rf hdfs-k8s/charts hdfs-k8s/requirements.lock 63 | _run helm repo add incubator \ 64 | https://kubernetes-charts-incubator.storage.googleapis.com/ 65 | _run helm dependency build hdfs-k8s 66 | 67 | _DEFAULT_CASES="*" 68 | : "${CASES:=$_DEFAULT_CASES}" 69 | _CASES=$(ls ${_TEST_DIR}/cases/${CASES}) 70 | for _CASE in $_CASES; do 71 | echo Running test case: $_CASE 72 | source $_CASE 73 | run_test_case 74 | if [[ "${SKIP_CLEANUP:-false}" = "false" ]]; then 75 | echo Cleaning up test case: $_CASE 76 | cleanup_test_case 77 | fi 78 | done 79 | -------------------------------------------------------------------------------- /tests/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Exit on error. Append "|| true" if you expect an error. 4 | set -o errexit 5 | # Exit on error inside any functions or subshells. 6 | set -o errtrace 7 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 8 | set -o nounset 9 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 10 | # in `mysqldump |gzip` 11 | set -o pipefail 12 | if [[ "${DEBUG:-}" = "true" ]]; then 13 | # Turn on traces, useful while debugging but commented out by default 14 | set -o xtrace 15 | fi 16 | 17 | _MY_SCRIPT="${BASH_SOURCE[0]}" 18 | _MY_DIR=$(cd "$(dirname "$_MY_SCRIPT")" && pwd) 19 | # Avoids 1.7.x because of https://github.com/kubernetes/minikube/issues/2240 20 | # Also avoids 1.9.4 because of 21 | # https://github.com/kubernetes/kubernetes/issues/61076#issuecomment-376660233 22 | # TODO: Try 1.9.x > 1.9.4 when a new minikube version supports that. 23 | _DEFAULT_K8S_VERSION=v1.10.0 24 | : "${K8S_VERSION:=$_DEFAULT_K8S_VERSION}" 25 | _DEFAULT_MINIKUBE_VERSION=v0.26.0 26 | : "${MINIKUBE_VERSION:=$_DEFAULT_MINIKUBE_VERSION}" 27 | _HELM_VERSION=v2.8.1 28 | 29 | _UNAME_OUT=$(uname -s) 30 | case "${_UNAME_OUT}" in 31 | Linux*) _MY_OS=linux;; 32 | Darwin*) _MY_OS=darwin;; 33 | *) _MY_OS="UNKNOWN:${unameOut}" 34 | esac 35 | echo "Local OS is ${_MY_OS}" 36 | 37 | export MINIKUBE_WANTUPDATENOTIFICATION=false 38 | export MINIKUBE_WANTREPORTERRORPROMPT=false 39 | export CHANGE_MINIKUBE_NONE_USER=true 40 | 41 | cd $_MY_DIR 42 | 43 | source lib/_k8s.sh 44 | 45 | rm -rf tmp 46 | mkdir -p bin tmp 47 | if [[ ! -x bin/kubectl ]]; then 48 | echo Downloading kubectl, which is a requirement for using minikube. 49 | curl -Lo bin/kubectl \ 50 | https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/${_MY_OS}/amd64/kubectl 51 | chmod +x bin/kubectl 52 | fi 53 | if [[ ! -x bin/minikube ]]; then 54 | echo Downloading minikube. 55 | curl -Lo bin/minikube \ 56 | https://storage.googleapis.com/minikube/releases/${MINIKUBE_VERSION}/minikube-${_MY_OS}-amd64 57 | chmod +x bin/minikube 58 | fi 59 | if [[ ! -x bin/helm ]]; then 60 | echo Downloading helm 61 | curl -Lo tmp/helm.tar.gz \ 62 | https://storage.googleapis.com/kubernetes-helm/helm-${_HELM_VERSION}-${_MY_OS}-amd64.tar.gz 63 | (cd tmp; tar xfz helm.tar.gz; mv ${_MY_OS}-amd64/helm ${_MY_DIR}/bin) 64 | fi 65 | 66 | export PATH="${_MY_DIR}/bin:$PATH" 67 | 68 | if [[ "${USE_MINIKUBE_DRIVER_NONE:-}" = "true" ]]; then 69 | # Run minikube with none driver. 70 | # See https://blog.travis-ci.com/2017-10-26-running-kubernetes-on-travis-ci-with-minikube 71 | _VM_DRIVER="--vm-driver=none" 72 | if [[ ! -x /usr/local/bin/nsenter ]]; then 73 | # From https://engineering.bitnami.com/articles/implementing-kubernetes-integration-tests-in-travis.html 74 | # Travis ubuntu trusty env doesn't have nsenter, needed for --vm-driver=none 75 | which nsenter >/dev/null && return 0 76 | echo "INFO: Building 'nsenter' ..." 77 | cat <<-EOF | docker run -i --rm -v "$(pwd):/build" ubuntu:14.04 >& nsenter.build.log 78 | apt-get update 79 | apt-get install -qy git bison build-essential autopoint libtool automake autoconf gettext pkg-config 80 | git clone --depth 1 git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git /tmp/util-linux 81 | cd /tmp/util-linux 82 | ./autogen.sh 83 | ./configure --without-python --disable-all-programs --enable-nsenter 84 | make nsenter 85 | cp -pfv nsenter /build 86 | EOF 87 | if [ ! -f ./nsenter ]; then 88 | echo "ERROR: nsenter build failed, log:" 89 | cat nsenter.build.log 90 | return 1 91 | fi 92 | echo "INFO: nsenter build OK" 93 | sudo mv ./nsenter /usr/local/bin 94 | fi 95 | fi 96 | 97 | _MINIKUBE="minikube" 98 | if [[ "${USE_SUDO_MINIKUBE:-}" = "true" ]]; then 99 | _MINIKUBE="sudo PATH=$PATH bin/minikube" 100 | fi 101 | 102 | # The default bootstrapper kubeadm assumes CentOS. Travis is Debian. 103 | $_MINIKUBE config set bootstrapper localkube 104 | $_MINIKUBE config set ShowBootstrapperDeprecationNotification false || true 105 | $_MINIKUBE start --kubernetes-version=${K8S_VERSION} \ 106 | ${_VM_DRIVER:-} 107 | # Fix the kubectl context, as it's often stale. 108 | $_MINIKUBE update-context 109 | echo Minikube disks: 110 | if [[ "${USE_MINIKUBE_DRIVER_NONE:-}" = "true" ]]; then 111 | # minikube does not support ssh for --vm-driver=none 112 | df 113 | else 114 | $_MINIKUBE ssh df 115 | fi 116 | 117 | # Wait for Kubernetes to be up and ready. 118 | k8s_single_node_ready 119 | 120 | echo Minikube addons: 121 | $_MINIKUBE addons list 122 | kubectl get storageclass 123 | echo Showing kube-system pods 124 | kubectl get -n kube-system pods 125 | 126 | (k8s_single_pod_ready -n kube-system -l component=kube-addon-manager) || 127 | (_ADDON=$(kubectl get pod -n kube-system -l component=kube-addon-manager 128 | --no-headers -o name| cut -d/ -f2); 129 | echo Addon-manager describe:; 130 | kubectl describe pod -n kube-system $_ADDON; 131 | echo Addon-manager log:; 132 | kubectl logs -n kube-system $_ADDON; 133 | exit 1) 134 | k8s_single_pod_ready -n kube-system -l k8s-app=kube-dns 135 | k8s_single_pod_ready -n kube-system storage-provisioner 136 | 137 | helm init 138 | k8s_single_pod_ready -n kube-system -l name=tiller 139 | -------------------------------------------------------------------------------- /tests/teardown.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Exit on error. Append "|| true" if you expect an error. 3 | set -o errexit 4 | # Exit on error inside any functions or subshells. 5 | set -o errtrace 6 | # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR 7 | set -o nounset 8 | # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) 9 | # in `mysqldump |gzip` 10 | set -o pipefail 11 | if [[ "${DEBUG:-}" = "true" ]]; then 12 | # Turn on traces, useful while debugging but commented out by default 13 | set -o xtrace 14 | fi 15 | 16 | _MY_SCRIPT="${BASH_SOURCE[0]}" 17 | _MY_DIR=$(cd "$(dirname "$_MY_SCRIPT")" && pwd) 18 | 19 | cd $_MY_DIR 20 | export PATH=${_MY_DIR}/bin:$PATH 21 | 22 | minikube status || true 23 | minikube stop || true 24 | minikube delete || true 25 | rm -rf tmp 26 | rm -rf bin 27 | -------------------------------------------------------------------------------- /tests/values/common.yaml: -------------------------------------------------------------------------------- 1 | # Common values. 2 | 3 | zookeeper: 4 | replicaCount: 1 5 | env: 6 | ZK_HEAP_SIZE: 100m 7 | resources: 8 | requests: 9 | memory: 100m 10 | 11 | # Disables hostNetwork so namenode pods on a single minikube node can avoid 12 | # port conflict. 13 | hdfs-namenode-k8s: 14 | hostNetworkEnabled: false 15 | 16 | global: 17 | zookeeperQuorumSize: 1 18 | # Disables affinity so multiple member pods can launch on a single minikube 19 | # node. 20 | defaultAffinityEnabled: false 21 | dataNodeHostPath: 22 | - /mnt/sda1/hdfs-data 23 | -------------------------------------------------------------------------------- /tests/values/custom-hadoop-config.yaml: -------------------------------------------------------------------------------- 1 | # Custom hadoop config keys passed to the hdfs configmap as extra keys. 2 | hdfs-config-k8s.customHadoopConfig: 3 | coreSite: 4 | hadoop.http.authentication.type: kerberos 5 | hadoop.http.authentication.simple.anonymous.allowed: "false" 6 | hdfsSite: 7 | dfs.replication: 2 8 | -------------------------------------------------------------------------------- /tests/values/kerberos.yaml: -------------------------------------------------------------------------------- 1 | # Values for enabling Kerberos. 2 | 3 | global: 4 | kerberosEnabled: true 5 | kerberosRealm: MYCOMPANY.COM 6 | -------------------------------------------------------------------------------- /topology/README.md: -------------------------------------------------------------------------------- 1 | HDFS namenode topology plugins for various Kubernetes network providers. 2 | 3 | HDFS namenode handles RPC requests from clients. Namenode often gets the IP 4 | addresses of clients from the remote endpoints of RPC connections. 5 | In Kubernetes, HDFS clients may run inside pods. The client IP addresses can 6 | be virtual pod IP addresses. This can confuse the namenode when it runs 7 | the data locality optimization code, which requires the comparison of client 8 | IP addresses against the IP addresses associated with datanodes. The latter 9 | are physical IP addresses of cluster nodes that datanodes are running on. 10 | The client pod virtual IP addresses would not match any datanode IP addresses. 11 | 12 | We can configure namenode with the topology plugins in this directory to 13 | correct the namenode data locality code. So far, we learned that only 14 | Google Container Engine (GKE) suffers from the data locality issue caused 15 | by the virtual pod IP addresses exposed to namenode. (See below) 16 | GKE uses the native `kubenet` network provider. 17 | 18 | - TODO: Currently, there is no easy way to launch the namenode helm chart 19 | with a topology plugins configured. Build a new Docker image with 20 | topology plugins and support the configuration. See plugin README 21 | for installation/configuration instructions. 22 | 23 | Many K8s network providers do not need any topology plugins. Most K8s network 24 | providers conduct IP masquerading or Network Address Translation (NAT), when pod 25 | packets head outside the pod IP subnet. They rewrite headers of pod packets by 26 | putting the physical IP addresses of the cluster nodes that pods are running on. 27 | The namenode and datanodes use `hostNetwork` and their IP addresses are outside 28 | the pod IP subnet. As the result, namenode will see the physical cluster node 29 | IP address from client RPC connections originating from pods. The data locality 30 | will work fine with them. 31 | 32 | Here is the list of network providers that conduct NAT: 33 | 34 | - By design, overlay networks such as weave and flannel conduct NAT for any 35 | pod packet heading outside a local pod network. This means packets coming to 36 | a node IP also does NAT. (In overlay, pod packets heading to another pod in 37 | a different node puts back the pod IPs once they got inside the destination 38 | node) 39 | - Calico is a popular non-overlay network provider. It turns out Calico can be 40 | also configured to do NAT between pod subnet and node subnet thanks to the 41 | `nat-outgoing` option. The option can be easily turned on and is enabled 42 | by default. 43 | - In EC2, the standard tool kops can provision k8s clusters using the same 44 | native kubenet that GKE uses. Unlike GKE, it turns out kubenet in EC2 does 45 | NAT between pod subnet to host network. This is because kops sets option 46 | --non-masquerade-cidr=100.64.0.0/10 to cover only pod IP subnet. Traffic to 47 | IPs ouside this range will do NAT. In EC2, cluster hosts like 172.20.47.241 48 | sits outside this CIDR. This means pod packets heading to node IPs will do 49 | masquerading. (Note GKE kubenet uses the default value of 50 | --non-masquerade-cidr, 10.0.0.0/8, which covers both pod IP and node IP 51 | subnets. GKE does not expose any way to override this value) 52 | 53 | Over time, we will also check the behaviors of other network providers and 54 | document them here. 55 | 56 | Here's how one can check if data locality in the namenode works. 57 | 1. Launch a HDFS client pod and go inside the pod. 58 | ``` 59 | $ kubectl run -i --tty hadoop --image=uhopper/hadoop:2.7.2 \ 60 | --generator="run-pod/v1" --command -- /bin/bash 61 | ``` 62 | 2. Inside the pod, create a simple text file on HDFS. 63 | ``` 64 | $ hadoop fs \ 65 | -fs hdfs://hdfs-namenode-0.hdfs-namenode.default.svc.cluster.local \ 66 | -cp file:/etc/hosts /hosts 67 | ``` 68 | 3. Set the number of replicas for the file to the number of your cluster 69 | nodes. This ensures that there will be a copy of the file in the cluster node 70 | that your client pod is running on. Wait some time until this happens. 71 | ``` 72 | $ hadoop fs -setrep NUM-REPLICAS /hosts 73 | ``` 74 | 4. Run the following `hdfs cat` command. From the debug messages, see 75 | which datanode is being used. Make sure it is your local datanode. (You can 76 | get this from `$ kubectl get pods hadoop -o json | grep hostIP`. Do this 77 | outside the pod) 78 | ``` 79 | $ hadoop --loglevel DEBUG fs \ 80 | -fs hdfs://hdfs-namenode-0.hdfs-namenode.default.svc.cluster.local \ 81 | -cat /hosts 82 | ... 83 | 17/04/24 20:51:28 DEBUG hdfs.DFSClient: Connecting to datanode 10.128.0.4:50010 84 | ... 85 | ``` 86 | 87 | If no, you should check if your local datanode is even in the list from the 88 | debug messsages above. If it is not, then this is because step (3) did not 89 | finish yet. Wait more. (You can use a smaller cluster for this test if that 90 | is possible) 91 | ``` 92 | 17/04/24 20:51:28 DEBUG hdfs.DFSClient: newInfo = LocatedBlocks{ 93 | fileLength=199 94 | underConstruction=false 95 | blocks=[LocatedBlock{BP-347555225-10.128.0.2-1493066928989:blk_1073741825_1001; 96 | getBlockSize()=199; corrupt=false; offset=0; 97 | locs=[DatanodeInfoWithStorage[10.128.0.4:50010,DS-d2de9d29-6962-4435-a4b4-aadf4ea67e46,DISK], 98 | DatanodeInfoWithStorage[10.128.0.3:50010,DS-0728ffcf-f400-4919-86bf-af0f9af36685,DISK], 99 | DatanodeInfoWithStorage[10.128.0.2:50010,DS-3a881114-af08-47de-89cf-37dec051c5c2,DISK]]}] 100 | lastLocatedBlock=LocatedBlock{BP-347555225-10.128.0.2-1493066928989:blk_1073741825_1001; 101 | ``` 102 | 5. Repeat the `hdfs cat` command multiple times. Check if the same datanode 103 | is being consistently used. 104 | -------------------------------------------------------------------------------- /topology/pod-cidr/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | target 4 | -------------------------------------------------------------------------------- /topology/pod-cidr/README.md: -------------------------------------------------------------------------------- 1 | A namenode topology plugin mapping pods to cluster nodes for a K8s configured 2 | with pod CIDR. Currently, this is known to work only with the `kubenet` network 3 | provider. For more details, see README.md of the parent directory. 4 | 5 | ## Installation 6 | To use this plugin, add the followings to the hdfs-site.xml: 7 | 8 | ``` 9 | 10 | net.topology.node.switch.mapping.impl 11 | org.apache.hadoop.net.PodCIDRToNodeMapping 12 | 13 | 14 | net.topology.impl 15 | org.apache.hadoop.net.NetworkTopologyWithNodeGroup 16 | 17 | 18 | net.topology.nodegroup.aware 19 | true 20 | 21 | 22 | dfs.block.replicator.classname 23 | org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyWithNodeGroup 24 | 25 | ``` 26 | -------------------------------------------------------------------------------- /topology/pod-cidr/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 17 | 4.0.0 18 | pod-cidr-namenode-topology-plugin 19 | hdfs-k8s 20 | 0.1-SNAPSHOT 21 | HDFS topology plugin using pod CIDR 22 | pod CIDR namenode topology plugin 23 | jar 24 | 25 | UTF-8 26 | 27 | 28 | 29 | 30 | org.apache.maven.plugins 31 | maven-compiler-plugin 32 | 3.5.1 33 | 34 | 1.7 35 | 1.7 36 | 37 | 38 | 39 | 40 | 41 | 42 | commons-cli 43 | commons-cli 44 | 1.3.1 45 | 46 | 47 | commons-logging 48 | commons-logging 49 | 1.1 50 | 51 | 52 | commons-net 53 | commons-net 54 | 3.1 55 | 56 | 57 | com.google.guava 58 | guava 59 | 11.0.2 60 | 61 | 62 | io.fabric8 63 | kubernetes-client 64 | 2.2.1 65 | 66 | 67 | log4j 68 | log4j 69 | 1.2.17 70 | 71 | 72 | org.apache.commons 73 | commons-lang3 74 | 3.5 75 | 76 | 77 | org.apache.hadoop 78 | hadoop-common 79 | 2.7.3 80 | provided 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /topology/pod-cidr/src/main/java/org/apache/hadoop/net/PodCIDRToNodeMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package org.apache.hadoop.net; 19 | 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.Set; 23 | import java.util.concurrent.Executors; 24 | import javax.annotation.Nullable; 25 | import javax.annotation.concurrent.GuardedBy; 26 | 27 | import com.google.common.collect.ImmutableList; 28 | import com.google.common.collect.ImmutableMap; 29 | import com.google.common.collect.ImmutableSet; 30 | import com.google.common.collect.Lists; 31 | import com.google.common.collect.Maps; 32 | import com.google.common.collect.Sets; 33 | import com.google.common.net.InetAddresses; 34 | import com.google.common.util.concurrent.ThreadFactoryBuilder; 35 | import io.fabric8.kubernetes.api.model.Node; 36 | import io.fabric8.kubernetes.api.model.NodeList; 37 | import io.fabric8.kubernetes.client.Config; 38 | import io.fabric8.kubernetes.client.ConfigBuilder; 39 | import io.fabric8.kubernetes.client.DefaultKubernetesClient; 40 | import io.fabric8.kubernetes.client.KubernetesClient; 41 | import io.fabric8.kubernetes.client.utils.HttpClientUtils; 42 | import okhttp3.Dispatcher; 43 | import okhttp3.OkHttpClient; 44 | import org.apache.commons.cli.BasicParser; 45 | import org.apache.commons.cli.CommandLine; 46 | import org.apache.commons.cli.CommandLineParser; 47 | import org.apache.commons.cli.Option; 48 | import org.apache.commons.cli.Options; 49 | import org.apache.commons.cli.ParseException; 50 | import org.apache.commons.lang3.tuple.ImmutablePair; 51 | import org.apache.commons.logging.Log; 52 | import org.apache.commons.logging.LogFactory; 53 | import org.apache.commons.net.util.SubnetUtils; 54 | import org.apache.commons.net.util.SubnetUtils.SubnetInfo; 55 | import org.apache.hadoop.conf.Configuration; 56 | import org.apache.log4j.BasicConfigurator; 57 | import org.apache.log4j.Level; 58 | import org.apache.log4j.Logger; 59 | 60 | /** 61 | * A namenode topology plugin mapping pods to cluster nodes for a K8s configured with pod CIDR. 62 | * 63 | * For each k8s pod, determines a network path with three components. The full path would look like: 64 | * 65 | * RACK-NAME '/' NODE-NAME '/' POD-HOST 66 | * 67 | * , where NODE-NAME is the cluster node that the pod is running on. 68 | * 69 | * To comply with this, datanodes will be also put the node name into the same hierarchy. 70 | * 71 | * RACK-NAME '/' NODE-NAME '/' NODE-NAME 72 | * 73 | * This way, the namenode will see the datanode and pods in the same node are closer than otherwise. 74 | * 75 | * The resolve method below only returns the first parts for input entries. 76 | * 77 | * Note this three level hierarchy requires NetworkTopologyWithNodeGroup to be used in namenode. 78 | * For details on installation instruction, see README.md at the project directory. 79 | */ 80 | @SuppressWarnings("unused") 81 | public class PodCIDRToNodeMapping extends AbstractDNSToSwitchMapping { 82 | 83 | private static final String DEFAULT_NETWORK_LOCATION = NetworkTopology.DEFAULT_RACK + 84 | NetworkTopologyWithNodeGroup.DEFAULT_NODEGROUP; 85 | 86 | private static Log log = LogFactory.getLog(PodCIDRToNodeMapping.class); 87 | 88 | @GuardedBy("this") 89 | @Nullable private KubernetesClient kubernetesClient; 90 | @GuardedBy("this") 91 | @Nullable private PodCIDRLookup podCIDRLookup; 92 | 93 | @SuppressWarnings("unused") 94 | public PodCIDRToNodeMapping() { 95 | // Do nothing. 96 | } 97 | 98 | @SuppressWarnings("unused") 99 | public PodCIDRToNodeMapping(Configuration conf) { 100 | super(conf); 101 | } 102 | 103 | @Override 104 | public List resolve(List names) { 105 | List networkPathDirList = Lists.newArrayList(); 106 | for (String name : names) { 107 | String networkPathDir = resolveName(name); 108 | networkPathDirList.add(networkPathDir); 109 | } 110 | if (log.isDebugEnabled()) { 111 | log.debug("Resolved " + names + " to " + networkPathDirList); 112 | } 113 | return ImmutableList.copyOf(networkPathDirList); 114 | } 115 | 116 | @Override 117 | public void reloadCachedMappings() { 118 | // Do nothing. 119 | } 120 | 121 | @Override 122 | public void reloadCachedMappings(List list) { 123 | // Do nothing. 124 | } 125 | 126 | private String resolveName(String name) { 127 | String networkPathDir = resolveClusterNode(name); 128 | if (!DEFAULT_NETWORK_LOCATION.equals(networkPathDir)) { 129 | return networkPathDir; 130 | } 131 | return resolvePodIP(name); 132 | } 133 | 134 | private String resolveClusterNode(String clusterNodeName) { 135 | if (InetAddresses.isInetAddress(clusterNodeName)) { 136 | return DEFAULT_NETWORK_LOCATION; 137 | } 138 | String hostName = clusterNodeName.split("\\.")[0]; 139 | PodCIDRLookup lookup = getOrFetchPodCIDR(); 140 | if (lookup.containsNode(clusterNodeName) || lookup.containsNode(hostName)) { 141 | return getNetworkPathDir(hostName); 142 | } 143 | return DEFAULT_NETWORK_LOCATION; 144 | } 145 | 146 | private String resolvePodIP(String podIP) { 147 | if (!InetAddresses.isInetAddress(podIP)) { 148 | return DEFAULT_NETWORK_LOCATION; 149 | } 150 | PodCIDRLookup lookup = getOrFetchPodCIDR(); 151 | String nodeName = lookup.findNodeByPodIP(podIP); 152 | if (nodeName.length() > 0) { 153 | return getNetworkPathDir(nodeName); 154 | } 155 | return DEFAULT_NETWORK_LOCATION; 156 | } 157 | 158 | private static String getNetworkPathDir(String node) { 159 | return NetworkTopology.DEFAULT_RACK + NodeBase.PATH_SEPARATOR_STR + node; 160 | } 161 | 162 | private synchronized PodCIDRLookup getOrFetchPodCIDR() { 163 | if (podCIDRLookup != null) { 164 | // TODO. Support refresh. 165 | return podCIDRLookup; 166 | } 167 | podCIDRLookup = PodCIDRLookup.fetchPodCIDR(getOrCreateKubernetesClient()); 168 | if (log.isDebugEnabled()) { 169 | log.debug("Fetched pod CIDR per node and built a lookup" + podCIDRLookup); 170 | } 171 | return podCIDRLookup; 172 | } 173 | 174 | private synchronized KubernetesClient getOrCreateKubernetesClient() { 175 | if (kubernetesClient != null) { 176 | return kubernetesClient; 177 | } 178 | // Disable the ping thread that is not daemon, in order to allow the main thread to shut down 179 | // upon errors. Otherwise, the namenode will hang indefinitely. 180 | Config config = new ConfigBuilder() 181 | .withWebsocketPingInterval(0) 182 | .build(); 183 | // Use a Dispatcher with a custom executor service that creates daemon threads. The default 184 | // executor service used by Dispatcher creates non-daemon threads. 185 | OkHttpClient httpClient = HttpClientUtils.createHttpClient(config).newBuilder() 186 | .dispatcher(new Dispatcher( 187 | Executors.newCachedThreadPool( 188 | new ThreadFactoryBuilder().setDaemon(true) 189 | .setNameFormat("k8s-topology-plugin-%d") 190 | .build()))) 191 | .build(); 192 | kubernetesClient = new DefaultKubernetesClient(httpClient, config); 193 | return kubernetesClient; 194 | } 195 | 196 | /** 197 | * Looks up a node that runs the pod with a given pod IP address. 198 | * 199 | * Each K8s node runs a number of pods. K8s pods have unique virtual IP addresses. In kubenet, 200 | * each node is assigned a pod IP subnet distinct from other nodes, which can be denoted by 201 | * a pod CIDR. For instance, node A can be assigned 10.0.0.0/24 while node B gets 10.0.1.0/24. 202 | * When a pod has an IP value, say 10.0.1.10, it should match node B. 203 | * 204 | * The key lookup data structure is the podSubnetToNode list below. The list contains 2-entry 205 | * tuples. 206 | * - The first entry is netmask values of pod subnets. e.g. ff.ff.ff.00 for /24. 207 | * (We expect only one netmask key for now, but the list can have multiple entries to support 208 | * general cases) 209 | * - The second entry is a map of a pod network address, associated with the netmask, to the 210 | * cluster node. e.g. 10.0.0.0 -> node A and 10.0.1.0 -> node B. 211 | */ 212 | private static class PodCIDRLookup { 213 | 214 | // See the class comment above. 215 | private final ImmutableList>> podSubnetToNode; 217 | // K8s cluster node names. 218 | private final ImmutableSet nodeNames; 219 | 220 | PodCIDRLookup() { 221 | this(ImmutableList.>>of(), 222 | ImmutableSet.of()); 223 | } 224 | 225 | private PodCIDRLookup( 226 | ImmutableList>> podSubnetToNode, 227 | ImmutableSet nodeNames) { 228 | this.nodeNames = nodeNames; 229 | this.podSubnetToNode = podSubnetToNode; 230 | } 231 | 232 | boolean containsNode(String nodeName) { 233 | return nodeNames.contains(nodeName); 234 | } 235 | 236 | String findNodeByPodIP(String podIP) { 237 | for (ImmutablePair> entry : podSubnetToNode) { 238 | Netmask netmask = entry.getLeft(); 239 | ImmutableMap networkToNode = entry.getRight(); 240 | // Computes the subnet that results from the netmask applied to the pod IP. 241 | SubnetInfo podSubnetToCheck; 242 | try { 243 | podSubnetToCheck = new SubnetUtils(podIP, netmask.getValue()).getInfo(); 244 | } catch (IllegalArgumentException e) { 245 | log.warn(e); 246 | continue; 247 | } 248 | String networkAddress = podSubnetToCheck.getNetworkAddress(); 249 | String nodeName = networkToNode.get(new NetworkAddress(networkAddress)); 250 | if (nodeName != null) { // The cluster node is in charge of this pod IP subnet. 251 | return nodeName; 252 | } 253 | } 254 | return ""; 255 | } 256 | 257 | static PodCIDRLookup fetchPodCIDR(KubernetesClient kubernetesClient) { 258 | Set nodeNames = Sets.newHashSet(); 259 | Map> netmaskToNetworkToNode = Maps.newHashMap(); 260 | NodeList nodes = kubernetesClient.nodes().list(); 261 | for (Node node : nodes.getItems()) { 262 | String nodeName = node.getMetadata().getName(); 263 | @Nullable String podCIDR = node.getSpec().getPodCIDR(); 264 | if (podCIDR == null || podCIDR.length() == 0) { 265 | log.warn("Could not get pod CIDR for node " + nodeName); 266 | continue; 267 | } 268 | if (log.isDebugEnabled()) { 269 | log.debug("Found pod CIDR " + podCIDR + " for node " + nodeName); 270 | } 271 | nodeNames.add(nodeName); 272 | SubnetInfo subnetInfo; 273 | try { 274 | subnetInfo = new SubnetUtils(podCIDR).getInfo(); 275 | } catch (IllegalArgumentException e) { 276 | log.debug(e); 277 | continue; 278 | } 279 | String netmask = subnetInfo.getNetmask(); 280 | String networkAddress = subnetInfo.getNetworkAddress(); 281 | Map networkToNode = netmaskToNetworkToNode.get(netmask); 282 | if (networkToNode == null) { 283 | networkToNode = Maps.newHashMap(); 284 | netmaskToNetworkToNode.put(netmask, networkToNode); 285 | } 286 | networkToNode.put(networkAddress, nodeName); 287 | } 288 | return buildLookup(nodeNames, netmaskToNetworkToNode); 289 | } 290 | 291 | private static PodCIDRLookup buildLookup(Set nodeNames, 292 | Map> netmaskToNetworkToNode) { 293 | ImmutableList.Builder>> builder = 294 | ImmutableList.builder(); 295 | for (Map.Entry> entry : netmaskToNetworkToNode.entrySet()) { 296 | Netmask netmask = new Netmask(entry.getKey()); 297 | ImmutableMap.Builder networkToNodeBuilder = ImmutableMap.builder(); 298 | for (Map.Entry networkToNode : entry.getValue().entrySet()) { 299 | networkToNodeBuilder.put(new NetworkAddress(networkToNode.getKey()), 300 | networkToNode.getValue()); 301 | } 302 | builder.add(ImmutablePair.of(netmask, networkToNodeBuilder.build())); 303 | } 304 | return new PodCIDRLookup(builder.build(), ImmutableSet.copyOf(nodeNames)); 305 | } 306 | } 307 | 308 | private static class Netmask { 309 | 310 | private final String netmask; 311 | 312 | Netmask(String netmask) { 313 | this.netmask = netmask; 314 | } 315 | 316 | String getValue() { 317 | return netmask; 318 | } 319 | 320 | @Override 321 | public boolean equals(Object o) { 322 | if (this == o) { 323 | return true; 324 | } 325 | if (o == null || getClass() != o.getClass()) { 326 | return false; 327 | } 328 | Netmask netmask1 = (Netmask)o; 329 | return netmask.equals(netmask1.netmask); 330 | } 331 | 332 | @Override 333 | public int hashCode() { 334 | return netmask.hashCode(); 335 | } 336 | } 337 | 338 | private static class NetworkAddress { 339 | 340 | private final String networkAddress; 341 | 342 | NetworkAddress(String networkAddress) { 343 | this.networkAddress = networkAddress; 344 | } 345 | 346 | String getValue() { 347 | return networkAddress; 348 | } 349 | 350 | @Override 351 | public boolean equals(Object o) { 352 | if (this == o) { 353 | return true; 354 | } 355 | if (o == null || getClass() != o.getClass()) { 356 | return false; 357 | } 358 | NetworkAddress that = (NetworkAddress)o; 359 | return networkAddress.equals(that.networkAddress); 360 | } 361 | 362 | @Override 363 | public int hashCode() { 364 | return networkAddress.hashCode(); 365 | } 366 | } 367 | 368 | // For debugging purpose. 369 | public static void main(String[] args) throws ParseException { 370 | Options options = new Options(); 371 | Option nameOption = new Option("n", true, "Name to resolve"); 372 | nameOption.setRequired(true); 373 | options.addOption(nameOption); 374 | CommandLineParser parser = new BasicParser(); 375 | CommandLine cmd = parser.parse(options, args); 376 | 377 | BasicConfigurator.configure(); 378 | Logger.getRootLogger().setLevel(Level.DEBUG); 379 | PodCIDRToNodeMapping plugin = new PodCIDRToNodeMapping(); 380 | Configuration conf = new Configuration(); 381 | plugin.setConf(conf); 382 | 383 | String nameToResolve = cmd.getOptionValue(nameOption.getOpt()); 384 | List networkPathDirs = plugin.resolve(Lists.newArrayList(nameToResolve)); 385 | log.info("Resolved " + nameToResolve + " to " + networkPathDirs); 386 | } 387 | } 388 | --------------------------------------------------------------------------------