├── LICENSE
├── README.md
├── chart
├── Chart.yaml
├── README.md
├── conf-files
│ ├── core-site.xml
│ └── log4j.properties
├── templates
│ ├── NOTES.txt
│ ├── _helpers.tpl
│ ├── spark-config.yaml
│ ├── spark-master-deployment.yaml
│ └── spark-worker-deployment.yaml
└── values.yaml
├── images
├── Dockerfile.base
├── Dockerfile.master
├── Dockerfile.worker
├── Makefile
├── README.md
├── core-site.xml
├── log4j.properties
├── spark-defaults.conf
├── start-common.sh
├── start-master
└── start-worker
├── operator
└── README.md
└── utilities
├── cronjob.yaml
└── entryPoint.yaml
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # kubernetes-spark
2 |
3 | This repo contains both the tools to build Spark images as well as the helm charts to deploy Spark onto your K8s cluster.
4 |
5 | Note: The helm chart has been tested against a CoreOS Tectonic cluster K8s version 1.6.4
6 |
--------------------------------------------------------------------------------
/chart/Chart.yaml:
--------------------------------------------------------------------------------
1 | name: spark
2 | version: 0.2.2.0
3 | description: Fast and general-purpose cluster computing system.
4 | home: http://spark.apache.org
5 | icon: http://spark.apache.org/images/spark-logo-trademark.png
6 | sources:
7 | - https://github.com/eddieesquivel/kubernetes-spark
8 | maintainers:
9 | - name: Eddie Esquivel
10 | email: eddie.esquivel@coreos.com
11 |
--------------------------------------------------------------------------------
/chart/README.md:
--------------------------------------------------------------------------------
1 | # Apache Spark Helm Chart
2 |
3 | Apache Spark is a fast and general-purpose cluster computing system including Apache Zeppelin.
4 |
5 | * http://spark.apache.org/
6 | * https://zeppelin.apache.org/
7 |
8 | Inspired from Helm Classic chart https://github.com/helm/charts
9 |
10 | ## Chart Details
11 | This chart will do the following:
12 |
13 | * 1 x Spark Master with port 8080 exposed on an external LoadBalancer
14 | * 3 x Spark Workers with HorizontalPodAutoscaler to scale to max 10 pods when CPU hits 50% of 100m
15 | * All using Kubernetes Deployments
16 |
17 | ## Prerequisites
18 |
19 | * Assumes that serviceAccount tokens are available under hostname metadata. URL -- http://metadata/computeMetadata/v1/instance/service-accounts/default/token
20 |
21 | ## Installing the Chart
22 |
23 | To install the chart with the release name `my-release`:
24 |
25 | ```bash
26 | $ helm install -n my-spark-chart --debug ./kubernetes-spark/chart
27 | ```
28 |
29 | ## Configuration
30 |
31 | The following tables lists the configurable parameters of the Spark chart and their default values.
32 |
33 | ### Spark Master
34 |
35 | | Parameter | Description | Default |
36 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- |
37 | | `Master.Name` | Spark master name | `spark-master` |
38 | | `Master.Image` | Container image name | `quay.io/eddie_esquivel/spark-master` |
39 | | `Master.ImageTag` | Container image tag | `2.1.1-ha` |
40 | | `Master.Replicas` | k8s deployment replicas | `1` |
41 | | `Master.Component` | k8s selector key | `spark-master` |
42 | | `Master.Cpu` | container requested cpu | `100m` |
43 | | `Master.Memory` | container requested memory | `512Mi` |
44 | | `Master.ServicePort` | k8s service port | `7077` |
45 | | `Master.ContainerPort` | Container listening port | `7077` |
46 | | `Master.DaemonMemory` | Master JVM Xms and Xmx option | `1g` |
47 |
48 | ### Spark WebUi
49 |
50 | | Parameter | Description | Default |
51 | |-----------------------|----------------------------------|----------------------------------------------------------|
52 | | `WebUi.Name` | Spark webui name | `spark-webui` |
53 | | `WebUi.ServicePort` | k8s service port | `8080` |
54 | | `WebUi.ContainerPort` | Container listening port | `8080` |
55 |
56 | ### Spark Worker
57 |
58 | | Parameter | Description | Default |
59 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- |
60 | | `Worker.Name` | Spark worker name | `spark-worker` |
61 | | `Worker.Image` | Container image name | `quay.io/eddie_esquivel/spark-worker` |
62 | | `Worker.ImageTag` | Container image tag | `2.1.1-ha` |
63 | | `Worker.Replicas` | k8s hpa and deployment replicas | `3` |
64 | | `Worker.ReplicasMax` | k8s hpa max replicas | `10` |
65 | | `Worker.Component` | k8s selector key | `spark-worker` |
66 | | `Worker.Cpu` | container requested cpu | `100m` |
67 | | `Worker.Memory` | container requested memory | `512Mi` |
68 | | `Worker.ContainerPort` | Container listening port | `7077` |
69 | | `Worker.CpuTargetPercentage` | k8s hpa cpu targetPercentage | `50` |
70 | | `Worker.DaemonMemory` | Worker JVM Xms and Xmx setting | `1g` |
71 | | `Worker.ExecutorMemory` | Worker memory available for executor | `1g` |
72 |
73 |
--------------------------------------------------------------------------------
/chart/conf-files/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 | fs.defaultFShdfs://hdfs-namenode:8020
21 |
22 |
23 |
--------------------------------------------------------------------------------
/chart/conf-files/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.target=System.err
5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
7 |
8 | # Settings to quiet third party logs that are too verbose
9 | log4j.logger.org.spark-project.jetty=WARN
10 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13 |
--------------------------------------------------------------------------------
/chart/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | 1. Get the Spark URL to visit by running these commands in the same shell:
2 |
3 | NOTE: It may take a few minutes for the LoadBalancer IP to be available.
4 | You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "webui-fullname" . }}'
5 |
6 | export SPARK_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "webui-fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
7 | echo http://$SPARK_SERVICE_IP:{{ .Values.WebUi.ServicePort }}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/chart/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/* vim: set filetype=mustache: */}}
2 | {{/*
3 | Expand the name of the chart.
4 | */}}
5 | {{- define "name" -}}
6 | {{- default .Chart.Name .Values.nameOverride | trunc 24 -}}
7 | {{- end -}}
8 |
9 | {{/*
10 | Create fully qualified names.
11 | We truncate at 24 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | */}}
13 | {{- define "master-fullname" -}}
14 | {{- $name := default .Chart.Name .Values.Master.Name -}}
15 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}}
16 | {{- end -}}
17 |
18 | {{- define "webui-fullname" -}}
19 | {{- $name := default .Chart.Name .Values.WebUi.Name -}}
20 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}}
21 | {{- end -}}
22 |
23 | {{- define "worker-fullname" -}}
24 | {{- $name := default .Chart.Name .Values.Worker.Name -}}
25 | {{- printf "%s-%s" .Release.Name $name | trunc 24 -}}
26 | {{- end -}}
27 |
28 |
--------------------------------------------------------------------------------
/chart/templates/spark-config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: {{ .Release.Name }}-conf
5 | data:
6 | spark-defaults.conf: |
7 | spark.master spark://{{ template "master-fullname" . }}:7077
8 | spark.driver.extraLibraryPath /opt/hadoop/lib/native
9 | spark.app.id {{ template "master-fullname" . }}
10 | spark.ui.reverseProxy true
11 | {{ (.Files.Glob "conf-files/*").AsConfig | indent 2 }}
12 |
--------------------------------------------------------------------------------
/chart/templates/spark-master-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: {{ template "master-fullname" . }}
5 | labels:
6 | heritage: {{.Release.Service | quote }}
7 | release: {{.Release.Name | quote }}
8 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
9 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
10 | spec:
11 | ports:
12 | - name: {{.Values.Master.ServicePortName}}
13 | port: {{.Values.Master.ServicePort}}
14 | targetPort: {{.Values.Master.ServiceContainerPort}}
15 | - name: {{.Values.Master.RestPortName}}
16 | port: {{.Values.Master.RestPort}}
17 | targetPort: {{.Values.Master.RestContainerPort}}
18 | selector:
19 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
20 | ---
21 | apiVersion: extensions/v1beta1
22 | kind: Ingress
23 | metadata:
24 | name: {{ template "webui-fullname" . }}
25 | labels:
26 | heritage: {{.Release.Service | quote }}
27 | release: {{.Release.Name | quote }}
28 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
29 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
30 | annotations:
31 | kubernetes.io/ingress.class: {{.Values.WebUi.IngressClass | quote}}
32 | spec:
33 | rules:
34 | - host: {{ template "master-fullname" . }}.{{.Values.WebUi.Domain}}
35 | http:
36 | paths:
37 | - path: /
38 | backend:
39 | serviceName: {{ template "webui-fullname" . }}
40 | servicePort: {{.Values.WebUi.ServicePort}}
41 | ---
42 | apiVersion: v1
43 | kind: Service
44 | metadata:
45 | name: {{ template "webui-fullname" . }}
46 | labels:
47 | heritage: {{.Release.Service | quote }}
48 | release: {{.Release.Name | quote }}
49 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
50 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
51 | spec:
52 | ports:
53 | - port: {{.Values.WebUi.ServicePort}}
54 | targetPort: {{.Values.WebUi.ContainerPort}}
55 | selector:
56 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
57 | ---
58 | apiVersion: extensions/v1beta1
59 | kind: Deployment
60 | metadata:
61 | name: {{ template "master-fullname" . }}
62 | labels:
63 | heritage: {{.Release.Service | quote }}
64 | release: {{.Release.Name | quote }}
65 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
66 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
67 | spec:
68 | replicas: {{default 1 .Values.Master.Replicas}}
69 | strategy:
70 | type: RollingUpdate
71 | selector:
72 | matchLabels:
73 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
74 | template:
75 | metadata:
76 | labels:
77 | heritage: {{.Release.Service | quote }}
78 | release: {{.Release.Name | quote }}
79 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
80 | component: "{{.Release.Name}}-{{.Values.Master.Component}}"
81 | spec:
82 | containers:
83 | - name: {{ template "master-fullname" . }}
84 | image: "{{.Values.Master.Image}}:{{.Values.Master.ImageTag}}"
85 | command: ["/bin/sh","-c"]
86 | args: ["echo $(hostname -i) {{ template "master-fullname" . }} >> /etc/hosts; /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip {{ template "master-fullname" . }} --port {{.Values.Master.ServicePort}} --webui-port {{.Values.WebUi.ContainerPort}}"]
87 | ports:
88 | - containerPort: {{.Values.Master.ServiceContainerPort}}
89 | - containerPort: {{.Values.Master.RestContainerPort}}
90 | - containerPort: {{.Values.WebUi.ContainerPort}}
91 | resources:
92 | requests:
93 | cpu: "{{.Values.Master.Cpu}}"
94 | memory: "{{.Values.Master.Memory}}"
95 | env:
96 | - name: SPARK_DAEMON_MEMORY
97 | value: {{ default "1g" .Values.Master.DaemonMemory | quote }}
98 | volumeMounts:
99 | - mountPath: /opt/spark/conf
100 | name: conf
101 | volumes:
102 | - name: conf
103 | configMap:
104 | name: {{ .Release.Name }}-conf
105 |
106 |
--------------------------------------------------------------------------------
/chart/templates/spark-worker-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: extensions/v1beta1
2 | kind: Deployment
3 | metadata:
4 | name: {{ template "worker-fullname" . }}
5 | labels:
6 | heritage: {{.Release.Service | quote }}
7 | release: {{.Release.Name | quote }}
8 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
9 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}"
10 | spec:
11 | replicas: {{default 1 .Values.Worker.Replicas}}
12 | strategy:
13 | type: RollingUpdate
14 | selector:
15 | matchLabels:
16 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}"
17 | template:
18 | metadata:
19 | labels:
20 | heritage: {{.Release.Service | quote }}
21 | release: {{.Release.Name | quote }}
22 | chart: "{{.Chart.Name}}-{{.Chart.Version}}"
23 | component: "{{.Release.Name}}-{{.Values.Worker.Component}}"
24 | spec:
25 | containers:
26 | - name: {{ template "worker-fullname" . }}
27 | image: "{{.Values.Worker.Image}}:{{.Values.Worker.ImageTag}}"
28 | command: ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.worker.Worker", "spark://{{ template "master-fullname" . }}:{{.Values.Master.ServicePort}}", "--webui-port", "{{.Values.Worker.ContainerPort}}"]
29 | ports:
30 | - containerPort: {{.Values.Worker.ContainerPort}}
31 | resources:
32 | requests:
33 | cpu: "{{.Values.Worker.Cpu}}"
34 | memory: "{{.Values.Worker.Memory}}"
35 | env:
36 | - name: SPARK_DAEMON_MEMORY
37 | value: {{ default "1g" .Values.Worker.DaemonMemory | quote }}
38 | - name: SPARK_WORKER_MEMORY
39 | value: {{ default "1g" .Values.Worker.ExecutorMemory | quote }}
40 | volumeMounts:
41 | - mountPath: /opt/spark/conf
42 | name: conf
43 | volumes:
44 | - name: conf
45 | configMap:
46 | name: {{ .Release.Name }}-conf
47 |
--------------------------------------------------------------------------------
/chart/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for spark.
2 | # This is a YAML-formatted file.
3 | # Declare name/value pairs to be passed into your templates.
4 | # name: value
5 |
6 | Master:
7 | Name: master
8 | Image: "quay.io/eddie_esquivel/spark-master"
9 | ImageTag: "2.2.0"
10 | Replicas: 1
11 | Component: "spark-master"
12 | Cpu: "100m"
13 | Memory: "512Mi"
14 | ServicePortName: service
15 | ServicePort: 7077
16 | ServiceContainerPort: 7077
17 | RestPortName: rest
18 | RestPort: 6066
19 | RestContainerPort: 6066
20 | # Set Master JVM memory. Default 1g
21 | #DaemonMemory: 1g
22 |
23 | WebUi:
24 | Name: webui
25 | ServicePort: 8080
26 | ContainerPort: 8080
27 | IngressClass: tectonic
28 | Domain: se.k8s.work
29 |
30 | Worker:
31 | Name: worker
32 | Image: "quay.io/eddie_esquivel/spark-worker"
33 | ImageTag: "2.2.0"
34 | Replicas: 3
35 | Component: "spark-worker"
36 | Cpu: "100m"
37 | Memory: "512Mi"
38 | ContainerPort: 8081
39 | # Set Worker JVM memory. Default 1g
40 | #DaemonMemory: 1g
41 | # Set how much total memory workers have to give executors
42 | #ExecutorMemory: 1g
43 |
44 |
--------------------------------------------------------------------------------
/images/Dockerfile.base:
--------------------------------------------------------------------------------
1 | FROM java:openjdk-8-jdk
2 |
3 | ENV DEBIAN_FRONTEND noninteractive
4 | ENV hadoop_ver 2.7.3
5 | ENV spark_ver 2.2.0
6 | ENV spark_hadoop_ver 2.7
7 |
8 | RUN ln -sf /bin/bash /bin/sh
9 |
10 | RUN cd /tmp && \
11 | curl -O http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \
12 | curl -O https://archive.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop${spark_hadoop_ver}.tgz
13 |
14 | RUN mkdir -p /opt && \
15 | cd /opt && \
16 | tar -zxf /tmp/hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \
17 | ln -s hadoop-${hadoop_ver} hadoop && \
18 | echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
19 |
20 | # Get Spark from US Apache mirror.
21 | RUN mkdir -p /opt && \
22 | cd /opt && \
23 | tar -zxf /tmp/hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \
24 | ln -s hadoop-${hadoop_ver} hadoop && \
25 | echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
26 |
27 | # Get Spark from US Apache mirror.
28 | RUN mkdir -p /opt && \
29 | cd /opt && \
30 | tar -zxf /tmp/spark-${spark_ver}-bin-hadoop${spark_hadoop_ver}.tgz && \
31 | ln -s spark-${spark_ver}-bin-hadoop${spark_hadoop_ver} spark && \
32 | echo Spark ${spark_ver} installed in /opt
33 |
34 | RUN apt-get update && apt-get install -y apt-utils apt-transport-https ca-certificates
35 | RUN rm -rf /tmp/* && \
36 | apt-get update && \
37 | apt-get -y upgrade && \
38 | apt-get install -y python-numpy python-pip maven && \
39 | curl -sL https://deb.nodesource.com/setup_7.x | bash && \
40 | apt-get install -y nodejs build-essential && \
41 | apt-get clean && \
42 | rm -rf /var/lib/apt/lists/*
43 |
44 | RUN pip install boto && \
45 | update-java-alternatives -s java-1.8.0-openjdk-amd64
46 |
47 | ADD log4j.properties /opt/spark/conf/log4j.properties
48 | ADD core-site.xml /opt/spark/conf/core-site.xml
49 | ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
50 | ADD start-common.sh start-worker start-master /
51 | ENV PATH $PATH:/opt/spark/bin
52 | ENV DEBIAN_FRONTEND teletype
53 | ENV SPARK_HOME /opt/spark
54 | ENV TERM xterm
55 |
--------------------------------------------------------------------------------
/images/Dockerfile.master:
--------------------------------------------------------------------------------
1 | FROM quay.io/eddie_esquivel/spark-base:2.2.0
2 | ENTRYPOINT ["/bin/bash", "-c", "./start-master"]
3 |
--------------------------------------------------------------------------------
/images/Dockerfile.worker:
--------------------------------------------------------------------------------
1 | FROM quay.io/eddie_esquivel/spark-base:2.2.0
2 | ENTRYPOINT ["/bin/bash", "-c", "./start-worker"]
3 |
--------------------------------------------------------------------------------
/images/Makefile:
--------------------------------------------------------------------------------
1 | all: spark
2 | push: push-spark
3 | .PHONY: push push-spark spark
4 |
5 | # To bump the Spark version, bump the spark_ver in Dockerfile, bump
6 | # this tag and reset to v1. You should also double check the native
7 | # Hadoop libs at that point (we grab the 2.6.1 libs, which are
8 | # appropriate for 1.5.2-with-2.6).
9 | SPARK_TAG = 2.2.0
10 |
11 | spark-base:
12 | docker build -t quay.io/eddie_esquivel/spark-base . -f Dockerfile.base
13 | docker tag quay.io/eddie_esquivel/spark-base quay.io/eddie_esquivel/spark-base:$(SPARK_TAG)
14 |
15 | spark-master: spark-base
16 | docker build -t quay.io/eddie_esquivel/spark-master . -f Dockerfile.master
17 | docker tag quay.io/eddie_esquivel/spark-master quay.io/eddie_esquivel/spark-master:$(SPARK_TAG)
18 |
19 | spark-worker: spark-base
20 | docker build -t quay.io/eddie_esquivel/spark-worker . -f Dockerfile.worker
21 | docker tag quay.io/eddie_esquivel/spark-worker quay.io/eddie_esquivel/spark-worker:$(SPARK_TAG)
22 |
23 | spark: spark-base spark-master spark-worker
24 |
25 | push-spark-base: spark-base
26 | docker push quay.io/eddie_esquivel/spark-base
27 | docker push quay.io/eddie_esquivel/spark-base:$(SPARK_TAG)
28 |
29 | push-spark-master: spark-master
30 | docker push quay.io/eddie_esquivel/spark-master
31 | docker push quay.io/eddie_esquivel/spark-master:$(SPARK_TAG)
32 |
33 | push-spark-worker: spark-worker
34 | docker push quay.io/eddie_esquivel/spark-worker
35 | docker push quay.io/eddie_esquivel/spark-worker:$(SPARK_TAG)
36 |
37 | push-spark: push-spark-base push-spark-master push-spark-worker
38 |
39 | clean:
40 | docker rm -f $$(docker ps -aq) > /dev/null 2>&1 ;true
41 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-master" | awk '{print $3}') > /dev/null 2>&1 ;true
42 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-worker" | awk '{print $3}') > /dev/null 2>&1 ;true
43 | docker rmi -f $$(docker images -a | grep "^quay.io/eddie_esquivel/spark-base" | awk '{print $3}') > /dev/null 2>&1 ;true
44 | docker rmi -f $$(docker images -a | grep "^" | awk '{print $3}') > /dev/null 2>&1 ;true
45 |
--------------------------------------------------------------------------------
/images/README.md:
--------------------------------------------------------------------------------
1 | # Spark
2 |
3 | This is a Docker image appropriate for running Spark on Kuberenetes. It produces two main images:
4 | * `spark-master` - Runs a Spark master in Standalone mode and exposes a port for Spark and a port for the WebUI.
5 | * `spark-worker` - Runs a Spark worer in Standalone mode and connects to the Spark master via DNS name `spark-master`.
6 |
7 |
8 | You can find the built images in these repos:
9 |
10 | https://quay.io/repository/eddie_esquivel/spark-master
11 |
12 | https://quay.io/repository/eddie_esquivel/spark-worker
13 |
--------------------------------------------------------------------------------
/images/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 | fs.defaultFShdfs://10.0.103.51:8020
21 |
22 |
23 |
--------------------------------------------------------------------------------
/images/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=INFO, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.target=System.err
5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
7 |
8 | # Settings to quiet third party logs that are too verbose
9 | log4j.logger.org.spark-project.jetty=WARN
10 | log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13 |
--------------------------------------------------------------------------------
/images/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.master spark://spark-master:7077
2 | spark.driver.extraLibraryPath /opt/hadoop/lib/native
3 | spark.app.id KubernetesSpark
4 | spark.ui.reverseProxy true
5 |
6 |
--------------------------------------------------------------------------------
/images/start-common.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 The Kubernetes Authors All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
18 |
19 | if [[ -n "${PROJECT_ID}" ]]; then
20 | sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
21 | fi
22 |
23 | # We don't want any of the incoming service variables, we'd rather use
24 | # DNS. But this one interferes directly with Spark.
25 | unset SPARK_MASTER_PORT
26 |
27 | # spark.{executor,driver}.extraLibraryPath don't actually seem to
28 | # work, this seems to be the only reliable way to get the native libs
29 | # picked up.
30 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native
31 |
--------------------------------------------------------------------------------
/images/start-master:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 The Kubernetes Authors All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | . /start-common.sh
18 |
19 | echo "$(hostname -i) spark-master" >> /etc/hosts
20 |
21 | # Run spark-class directly so that when it exits (or crashes), the pod restarts.
22 | /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
23 |
--------------------------------------------------------------------------------
/images/start-worker:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 The Kubernetes Authors All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | . /start-common.sh
18 |
19 | if ! getent hosts spark-master; then
20 | echo "=== Cannot resolve the DNS entry for spark-master. Has the service been created yet, and is SkyDNS functional?"
21 | echo "=== See http://kubernetes.io/v1.1/docs/admin/dns.html for more details on DNS integration."
22 | echo "=== Sleeping 10s before pod exit."
23 | sleep 10
24 | exit 0
25 | fi
26 |
27 | # Run spark-class directly so that when it exits (or crashes), the pod restarts.
28 | /opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077,spark-master-ha:7077 --webui-port 8081
29 |
--------------------------------------------------------------------------------
/operator/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eddieesquivel/kubernetes-spark/affe2b8abe328884b6b19f0fc7f45b044b57a886/operator/README.md
--------------------------------------------------------------------------------
/utilities/cronjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v2alpha1
2 | kind: CronJob
3 | metadata:
4 | name: spark-job
5 | spec:
6 | schedule: "*/2 * * * *"
7 | jobTemplate:
8 | spec:
9 | template:
10 | spec:
11 | containers:
12 | - name: spark-job
13 | image: quay.io/eddie_esquivel/spark-master:2.2.0
14 | command:
15 | - "/bin/bash"
16 | - "-c"
17 | - "spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark-cluster-master:6066 --deploy-mode cluster --supervise --executor-memory 1G --total-executor-cores 10 hdfs://hdfs-namenode:8020/demo/spark-examples_2.11-2.1.1.jar 100"
18 | volumeMounts:
19 | - mountPath: /opt/spark/conf
20 | name: conf
21 | restartPolicy: OnFailure
22 | volumes:
23 | - name: conf
24 | configMap:
25 | name: eddie-chart-conf
26 |
--------------------------------------------------------------------------------
/utilities/entryPoint.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: edit-entry-point
5 | spec:
6 | containers:
7 | - name: edit-entry-point
8 | image: quay.io/eddie_esquivel/spark-master:2.2.0
9 | command:
10 | - "/bin/bash"
11 | - "-c"
12 | - "trap : TERM INT; sleep infinity & wait"
13 | volumeMounts:
14 | - mountPath: /opt/spark/conf
15 | name: conf
16 | volumes:
17 | - name: conf
18 | configMap:
19 | name: spark-cluster-conf
20 | restartPolicy: "OnFailure"
21 |
--------------------------------------------------------------------------------