├── LICENSE ├── README.md ├── chart ├── Chart.yaml ├── README.md ├── conf-files │ ├── core-site.xml │ └── log4j.properties ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── spark-config.yaml │ ├── spark-master-deployment.yaml │ └── spark-worker-deployment.yaml └── values.yaml ├── images ├── Dockerfile.base ├── Dockerfile.master ├── Dockerfile.worker ├── Makefile ├── README.md ├── core-site.xml ├── log4j.properties ├── spark-defaults.conf ├── start-common.sh ├── start-master └── start-worker ├── operator └── README.md └── utilities ├── cronjob.yaml └── entryPoint.yaml /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kubernetes-spark 2 | 3 | This repo contains both the tools to build Spark images as well as the helm charts to deploy Spark onto your K8s cluster. 4 | 5 | Note: The helm chart has been tested against a CoreOS Tectonic cluster K8s version 1.6.4 6 | -------------------------------------------------------------------------------- /chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | name: spark 2 | version: 0.2.2.0 3 | description: Fast and general-purpose cluster computing system. 4 | home: http://spark.apache.org 5 | icon: http://spark.apache.org/images/spark-logo-trademark.png 6 | sources: 7 | - https://github.com/eddieesquivel/kubernetes-spark 8 | maintainers: 9 | - name: Eddie Esquivel 10 | email: eddie.esquivel@coreos.com 11 | -------------------------------------------------------------------------------- /chart/README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark Helm Chart 2 | 3 | Apache Spark is a fast and general-purpose cluster computing system including Apache Zeppelin. 4 | 5 | * http://spark.apache.org/ 6 | * https://zeppelin.apache.org/ 7 | 8 | Inspired from Helm Classic chart https://github.com/helm/charts 9 | 10 | ## Chart Details 11 | This chart will do the following: 12 | 13 | * 1 x Spark Master with port 8080 exposed on an external LoadBalancer 14 | * 3 x Spark Workers with HorizontalPodAutoscaler to scale to max 10 pods when CPU hits 50% of 100m 15 | * All using Kubernetes Deployments 16 | 17 | ## Prerequisites 18 | 19 | * Assumes that serviceAccount tokens are available under hostname metadata. URL -- http://metadata/computeMetadata/v1/instance/service-accounts/default/token 20 | 21 | ## Installing the Chart 22 | 23 | To install the chart with the release name `my-release`: 24 | 25 | ```bash 26 | $ helm install -n my-spark-chart --debug ./kubernetes-spark/chart 27 | ``` 28 | 29 | ## Configuration 30 | 31 | The following tables lists the configurable parameters of the Spark chart and their default values. 32 | 33 | ### Spark Master 34 | 35 | | Parameter | Description | Default | 36 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- | 37 | | `Master.Name` | Spark master name | `spark-master` | 38 | | `Master.Image` | Container image name | `quay.io/eddie_esquivel/spark-master` | 39 | | `Master.ImageTag` | Container image tag | `2.1.1-ha` | 40 | | `Master.Replicas` | k8s deployment replicas | `1` | 41 | | `Master.Component` | k8s selector key | `spark-master` | 42 | | `Master.Cpu` | container requested cpu | `100m` | 43 | | `Master.Memory` | container requested memory | `512Mi` | 44 | | `Master.ServicePort` | k8s service port | `7077` | 45 | | `Master.ContainerPort` | Container listening port | `7077` | 46 | | `Master.DaemonMemory` | Master JVM Xms and Xmx option | `1g` | 47 | 48 | ### Spark WebUi 49 | 50 | | Parameter | Description | Default | 51 | |-----------------------|----------------------------------|----------------------------------------------------------| 52 | | `WebUi.Name` | Spark webui name | `spark-webui` | 53 | | `WebUi.ServicePort` | k8s service port | `8080` | 54 | | `WebUi.ContainerPort` | Container listening port | `8080` | 55 | 56 | ### Spark Worker 57 | 58 | | Parameter | Description | Default | 59 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- | 60 | | `Worker.Name` | Spark worker name | `spark-worker` | 61 | | `Worker.Image` | Container image name | `quay.io/eddie_esquivel/spark-worker` | 62 | | `Worker.ImageTag` | Container image tag | `2.1.1-ha` | 63 | | `Worker.Replicas` | k8s hpa and deployment replicas | `3` | 64 | | `Worker.ReplicasMax` | k8s hpa max replicas | `10` | 65 | | `Worker.Component` | k8s selector key | `spark-worker` | 66 | | `Worker.Cpu` | container requested cpu | `100m` | 67 | | `Worker.Memory` | container requested memory | `512Mi` | 68 | | `Worker.ContainerPort` | Container listening port | `7077` | 69 | | `Worker.CpuTargetPercentage` | k8s hpa cpu targetPercentage | `50` | 70 | | `Worker.DaemonMemory` | Worker JVM Xms and Xmx setting | `1g` | 71 | | `Worker.ExecutorMemory` | Worker memory available for executor | `1g` | 72 | 73 | -------------------------------------------------------------------------------- /chart/conf-files/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | fs.defaultFShdfs://hdfs-namenode:8020 21 | 22 | 23 | -------------------------------------------------------------------------------- /chart/conf-files/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=INFO, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.err 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.spark-project.jetty=WARN 10 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR 11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 13 | -------------------------------------------------------------------------------- /chart/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the Spark URL to visit by running these commands in the same shell: 2 | 3 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 4 | You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "webui-fullname" . }}' 5 | 6 | export SPARK_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "webui-fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 7 | echo http://$SPARK_SERVICE_IP:{{ .Values.WebUi.ServicePort }} 8 | 9 | 10 | -------------------------------------------------------------------------------- /chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 24 -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create fully qualified names. 11 | We truncate at 24 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | */}} 13 | {{- define "master-fullname" -}} 14 | {{- $name := default .Chart.Name .Values.Master.Name -}} 15 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}} 16 | {{- end -}} 17 | 18 | {{- define "webui-fullname" -}} 19 | {{- $name := default .Chart.Name .Values.WebUi.Name -}} 20 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}} 21 | {{- end -}} 22 | 23 | {{- define "worker-fullname" -}} 24 | {{- $name := default .Chart.Name .Values.Worker.Name -}} 25 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}} 26 | {{- end -}} 27 | 28 | -------------------------------------------------------------------------------- /chart/templates/spark-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }}-conf 5 | data: 6 | spark-defaults.conf: | 7 | spark.master spark://{{ template "master-fullname" . }}:7077 8 | spark.driver.extraLibraryPath /opt/hadoop/lib/native 9 | spark.app.id {{ template "master-fullname" . }} 10 | spark.ui.reverseProxy true 11 | {{ (.Files.Glob "conf-files/*").AsConfig | indent 2 }} 12 | -------------------------------------------------------------------------------- /chart/templates/spark-master-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "master-fullname" . }} 5 | labels: 6 | heritage: {{.Release.Service | quote }} 7 | release: {{.Release.Name | quote }} 8 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 9 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 10 | spec: 11 | ports: 12 | - name: {{.Values.Master.ServicePortName}} 13 | port: {{.Values.Master.ServicePort}} 14 | targetPort: {{.Values.Master.ServiceContainerPort}} 15 | - name: {{.Values.Master.RestPortName}} 16 | port: {{.Values.Master.RestPort}} 17 | targetPort: {{.Values.Master.RestContainerPort}} 18 | selector: 19 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 20 | --- 21 | apiVersion: extensions/v1beta1 22 | kind: Ingress 23 | metadata: 24 | name: {{ template "webui-fullname" . }} 25 | labels: 26 | heritage: {{.Release.Service | quote }} 27 | release: {{.Release.Name | quote }} 28 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 29 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 30 | annotations: 31 | kubernetes.io/ingress.class: {{.Values.WebUi.IngressClass | quote}} 32 | spec: 33 | rules: 34 | - host: {{ template "master-fullname" . }}.{{.Values.WebUi.Domain}} 35 | http: 36 | paths: 37 | - path: / 38 | backend: 39 | serviceName: {{ template "webui-fullname" . }} 40 | servicePort: {{.Values.WebUi.ServicePort}} 41 | --- 42 | apiVersion: v1 43 | kind: Service 44 | metadata: 45 | name: {{ template "webui-fullname" . }} 46 | labels: 47 | heritage: {{.Release.Service | quote }} 48 | release: {{.Release.Name | quote }} 49 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 50 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 51 | spec: 52 | ports: 53 | - port: {{.Values.WebUi.ServicePort}} 54 | targetPort: {{.Values.WebUi.ContainerPort}} 55 | selector: 56 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 57 | --- 58 | apiVersion: extensions/v1beta1 59 | kind: Deployment 60 | metadata: 61 | name: {{ template "master-fullname" . }} 62 | labels: 63 | heritage: {{.Release.Service | quote }} 64 | release: {{.Release.Name | quote }} 65 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 66 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 67 | spec: 68 | replicas: {{default 1 .Values.Master.Replicas}} 69 | strategy: 70 | type: RollingUpdate 71 | selector: 72 | matchLabels: 73 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 74 | template: 75 | metadata: 76 | labels: 77 | heritage: {{.Release.Service | quote }} 78 | release: {{.Release.Name | quote }} 79 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 80 | component: "{{.Release.Name}}-{{.Values.Master.Component}}" 81 | spec: 82 | containers: 83 | - name: {{ template "master-fullname" . }} 84 | image: "{{.Values.Master.Image}}:{{.Values.Master.ImageTag}}" 85 | command: ["/bin/sh","-c"] 86 | args: ["echo $(hostname -i) {{ template "master-fullname" . }} >> /etc/hosts; /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip {{ template "master-fullname" . }} --port {{.Values.Master.ServicePort}} --webui-port {{.Values.WebUi.ContainerPort}}"] 87 | ports: 88 | - containerPort: {{.Values.Master.ServiceContainerPort}} 89 | - containerPort: {{.Values.Master.RestContainerPort}} 90 | - containerPort: {{.Values.WebUi.ContainerPort}} 91 | resources: 92 | requests: 93 | cpu: "{{.Values.Master.Cpu}}" 94 | memory: "{{.Values.Master.Memory}}" 95 | env: 96 | - name: SPARK_DAEMON_MEMORY 97 | value: {{ default "1g" .Values.Master.DaemonMemory | quote }} 98 | volumeMounts: 99 | - mountPath: /opt/spark/conf 100 | name: conf 101 | volumes: 102 | - name: conf 103 | configMap: 104 | name: {{ .Release.Name }}-conf 105 | 106 | -------------------------------------------------------------------------------- /chart/templates/spark-worker-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "worker-fullname" . }} 5 | labels: 6 | heritage: {{.Release.Service | quote }} 7 | release: {{.Release.Name | quote }} 8 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 9 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}" 10 | spec: 11 | replicas: {{default 1 .Values.Worker.Replicas}} 12 | strategy: 13 | type: RollingUpdate 14 | selector: 15 | matchLabels: 16 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}" 17 | template: 18 | metadata: 19 | labels: 20 | heritage: {{.Release.Service | quote }} 21 | release: {{.Release.Name | quote }} 22 | chart: "{{.Chart.Name}}-{{.Chart.Version}}" 23 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}" 24 | spec: 25 | containers: 26 | - name: {{ template "worker-fullname" . }} 27 | image: "{{.Values.Worker.Image}}:{{.Values.Worker.ImageTag}}" 28 | command: ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.worker.Worker", "spark://{{ template "master-fullname" . }}:{{.Values.Master.ServicePort}}", "--webui-port", "{{.Values.Worker.ContainerPort}}"] 29 | ports: 30 | - containerPort: {{.Values.Worker.ContainerPort}} 31 | resources: 32 | requests: 33 | cpu: "{{.Values.Worker.Cpu}}" 34 | memory: "{{.Values.Worker.Memory}}" 35 | env: 36 | - name: SPARK_DAEMON_MEMORY 37 | value: {{ default "1g" .Values.Worker.DaemonMemory | quote }} 38 | - name: SPARK_WORKER_MEMORY 39 | value: {{ default "1g" .Values.Worker.ExecutorMemory | quote }} 40 | volumeMounts: 41 | - mountPath: /opt/spark/conf 42 | name: conf 43 | volumes: 44 | - name: conf 45 | configMap: 46 | name: {{ .Release.Name }}-conf 47 | -------------------------------------------------------------------------------- /chart/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark. 2 | # This is a YAML-formatted file. 3 | # Declare name/value pairs to be passed into your templates. 4 | # name: value 5 | 6 | Master: 7 | Name: master 8 | Image: "quay.io/eddie_esquivel/spark-master" 9 | ImageTag: "2.2.0" 10 | Replicas: 1 11 | Component: "spark-master" 12 | Cpu: "100m" 13 | Memory: "512Mi" 14 | ServicePortName: service 15 | ServicePort: 7077 16 | ServiceContainerPort: 7077 17 | RestPortName: rest 18 | RestPort: 6066 19 | RestContainerPort: 6066 20 | # Set Master JVM memory. Default 1g 21 | #DaemonMemory: 1g 22 | 23 | WebUi: 24 | Name: webui 25 | ServicePort: 8080 26 | ContainerPort: 8080 27 | IngressClass: tectonic 28 | Domain: se.k8s.work 29 | 30 | Worker: 31 | Name: worker 32 | Image: "quay.io/eddie_esquivel/spark-worker" 33 | ImageTag: "2.2.0" 34 | Replicas: 3 35 | Component: "spark-worker" 36 | Cpu: "100m" 37 | Memory: "512Mi" 38 | ContainerPort: 8081 39 | # Set Worker JVM memory. Default 1g 40 | #DaemonMemory: 1g 41 | # Set how much total memory workers have to give executors 42 | #ExecutorMemory: 1g 43 | 44 | -------------------------------------------------------------------------------- /images/Dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM java:openjdk-8-jdk 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | ENV hadoop_ver 2.7.3 5 | ENV spark_ver 2.2.0 6 | ENV spark_hadoop_ver 2.7 7 | 8 | RUN ln -sf /bin/bash /bin/sh 9 | 10 | RUN cd /tmp && \ 11 | curl -O http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \ 12 | curl -O https://archive.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop${spark_hadoop_ver}.tgz 13 | 14 | RUN mkdir -p /opt && \ 15 | cd /opt && \ 16 | tar -zxf /tmp/hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \ 17 | ln -s hadoop-${hadoop_ver} hadoop && \ 18 | echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native 19 | 20 | # Get Spark from US Apache mirror. 21 | RUN mkdir -p /opt && \ 22 | cd /opt && \ 23 | tar -zxf /tmp/hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \ 24 | ln -s hadoop-${hadoop_ver} hadoop && \ 25 | echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native 26 | 27 | # Get Spark from US Apache mirror. 28 | RUN mkdir -p /opt && \ 29 | cd /opt && \ 30 | tar -zxf /tmp/spark-${spark_ver}-bin-hadoop${spark_hadoop_ver}.tgz && \ 31 | ln -s spark-${spark_ver}-bin-hadoop${spark_hadoop_ver} spark && \ 32 | echo Spark ${spark_ver} installed in /opt 33 | 34 | RUN apt-get update && apt-get install -y apt-utils apt-transport-https ca-certificates 35 | RUN rm -rf /tmp/* && \ 36 | apt-get update && \ 37 | apt-get -y upgrade && \ 38 | apt-get install -y python-numpy python-pip maven && \ 39 | curl -sL https://deb.nodesource.com/setup_7.x | bash && \ 40 | apt-get install -y nodejs build-essential && \ 41 | apt-get clean && \ 42 | rm -rf /var/lib/apt/lists/* 43 | 44 | RUN pip install boto && \ 45 | update-java-alternatives -s java-1.8.0-openjdk-amd64 46 | 47 | ADD log4j.properties /opt/spark/conf/log4j.properties 48 | ADD core-site.xml /opt/spark/conf/core-site.xml 49 | ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf 50 | ADD start-common.sh start-worker start-master / 51 | ENV PATH $PATH:/opt/spark/bin 52 | ENV DEBIAN_FRONTEND teletype 53 | ENV SPARK_HOME /opt/spark 54 | ENV TERM xterm 55 | -------------------------------------------------------------------------------- /images/Dockerfile.master: -------------------------------------------------------------------------------- 1 | FROM quay.io/eddie_esquivel/spark-base:2.2.0 2 | ENTRYPOINT ["/bin/bash", "-c", "./start-master"] 3 | -------------------------------------------------------------------------------- /images/Dockerfile.worker: -------------------------------------------------------------------------------- 1 | FROM quay.io/eddie_esquivel/spark-base:2.2.0 2 | ENTRYPOINT ["/bin/bash", "-c", "./start-worker"] 3 | -------------------------------------------------------------------------------- /images/Makefile: -------------------------------------------------------------------------------- 1 | all: spark 2 | push: push-spark 3 | .PHONY: push push-spark spark 4 | 5 | # To bump the Spark version, bump the spark_ver in Dockerfile, bump 6 | # this tag and reset to v1. You should also double check the native 7 | # Hadoop libs at that point (we grab the 2.6.1 libs, which are 8 | # appropriate for 1.5.2-with-2.6). 9 | SPARK_TAG = 2.2.0 10 | 11 | spark-base: 12 | docker build -t quay.io/eddie_esquivel/spark-base . -f Dockerfile.base 13 | docker tag quay.io/eddie_esquivel/spark-base quay.io/eddie_esquivel/spark-base:$(SPARK_TAG) 14 | 15 | spark-master: spark-base 16 | docker build -t quay.io/eddie_esquivel/spark-master . -f Dockerfile.master 17 | docker tag quay.io/eddie_esquivel/spark-master quay.io/eddie_esquivel/spark-master:$(SPARK_TAG) 18 | 19 | spark-worker: spark-base 20 | docker build -t quay.io/eddie_esquivel/spark-worker . -f Dockerfile.worker 21 | docker tag quay.io/eddie_esquivel/spark-worker quay.io/eddie_esquivel/spark-worker:$(SPARK_TAG) 22 | 23 | spark: spark-base spark-master spark-worker 24 | 25 | push-spark-base: spark-base 26 | docker push quay.io/eddie_esquivel/spark-base 27 | docker push quay.io/eddie_esquivel/spark-base:$(SPARK_TAG) 28 | 29 | push-spark-master: spark-master 30 | docker push quay.io/eddie_esquivel/spark-master 31 | docker push quay.io/eddie_esquivel/spark-master:$(SPARK_TAG) 32 | 33 | push-spark-worker: spark-worker 34 | docker push quay.io/eddie_esquivel/spark-worker 35 | docker push quay.io/eddie_esquivel/spark-worker:$(SPARK_TAG) 36 | 37 | push-spark: push-spark-base push-spark-master push-spark-worker 38 | 39 | clean: 40 | docker rm -f $$(docker ps -aq) > /dev/null 2>&1 ;true 41 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-master" | awk '{print $3}') > /dev/null 2>&1 ;true 42 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-worker" | awk '{print $3}') > /dev/null 2>&1 ;true 43 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-base" | awk '{print $3}') > /dev/null 2>&1 ;true 44 | docker rmi -f $$(docker images -a | grep "^" | awk '{print $3}') > /dev/null 2>&1 ;true 45 | -------------------------------------------------------------------------------- /images/README.md: -------------------------------------------------------------------------------- 1 | # Spark 2 | 3 | This is a Docker image appropriate for running Spark on Kuberenetes. It produces two main images: 4 | * `spark-master` - Runs a Spark master in Standalone mode and exposes a port for Spark and a port for the WebUI. 5 | * `spark-worker` - Runs a Spark worer in Standalone mode and connects to the Spark master via DNS name `spark-master`. 6 | 7 | 8 | You can find the built images in these repos: 9 | 10 | https://quay.io/repository/eddie_esquivel/spark-master 11 | 12 | https://quay.io/repository/eddie_esquivel/spark-worker 13 | -------------------------------------------------------------------------------- /images/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | fs.defaultFShdfs://10.0.103.51:8020 21 | 22 | 23 | -------------------------------------------------------------------------------- /images/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=INFO, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.err 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.spark-project.jetty=WARN 10 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR 11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 13 | -------------------------------------------------------------------------------- /images/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.master spark://spark-master:7077 2 | spark.driver.extraLibraryPath /opt/hadoop/lib/native 3 | spark.app.id KubernetesSpark 4 | spark.ui.reverseProxy true 5 | 6 | -------------------------------------------------------------------------------- /images/start-common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 The Kubernetes Authors All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id) 18 | 19 | if [[ -n "${PROJECT_ID}" ]]; then 20 | sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml 21 | fi 22 | 23 | # We don't want any of the incoming service variables, we'd rather use 24 | # DNS. But this one interferes directly with Spark. 25 | unset SPARK_MASTER_PORT 26 | 27 | # spark.{executor,driver}.extraLibraryPath don't actually seem to 28 | # work, this seems to be the only reliable way to get the native libs 29 | # picked up. 30 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native 31 | -------------------------------------------------------------------------------- /images/start-master: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 The Kubernetes Authors All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . /start-common.sh 18 | 19 | echo "$(hostname -i) spark-master" >> /etc/hosts 20 | 21 | # Run spark-class directly so that when it exits (or crashes), the pod restarts. 22 | /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080 23 | -------------------------------------------------------------------------------- /images/start-worker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 The Kubernetes Authors All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . /start-common.sh 18 | 19 | if ! getent hosts spark-master; then 20 | echo "=== Cannot resolve the DNS entry for spark-master. Has the service been created yet, and is SkyDNS functional?" 21 | echo "=== See http://kubernetes.io/v1.1/docs/admin/dns.html for more details on DNS integration." 22 | echo "=== Sleeping 10s before pod exit." 23 | sleep 10 24 | exit 0 25 | fi 26 | 27 | # Run spark-class directly so that when it exits (or crashes), the pod restarts. 28 | /opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077,spark-master-ha:7077 --webui-port 8081 29 | -------------------------------------------------------------------------------- /operator/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eddieesquivel/kubernetes-spark/affe2b8abe328884b6b19f0fc7f45b044b57a886/operator/README.md -------------------------------------------------------------------------------- /utilities/cronjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v2alpha1 2 | kind: CronJob 3 | metadata: 4 | name: spark-job 5 | spec: 6 | schedule: "*/2 * * * *" 7 | jobTemplate: 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: spark-job 13 | image: quay.io/eddie_esquivel/spark-master:2.2.0 14 | command: 15 | - "/bin/bash" 16 | - "-c" 17 | - "spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark-cluster-master:6066 --deploy-mode cluster --supervise --executor-memory 1G --total-executor-cores 10 hdfs://hdfs-namenode:8020/demo/spark-examples_2.11-2.1.1.jar 100" 18 | volumeMounts: 19 | - mountPath: /opt/spark/conf 20 | name: conf 21 | restartPolicy: OnFailure 22 | volumes: 23 | - name: conf 24 | configMap: 25 | name: eddie-chart-conf 26 | -------------------------------------------------------------------------------- /utilities/entryPoint.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: edit-entry-point 5 | spec: 6 | containers: 7 | - name: edit-entry-point 8 | image: quay.io/eddie_esquivel/spark-master:2.2.0 9 | command: 10 | - "/bin/bash" 11 | - "-c" 12 | - "trap : TERM INT; sleep infinity & wait" 13 | volumeMounts: 14 | - mountPath: /opt/spark/conf 15 | name: conf 16 | volumes: 17 | - name: conf 18 | configMap: 19 | name: spark-cluster-conf 20 | restartPolicy: "OnFailure" 21 | --------------------------------------------------------------------------------