├── .gitignore ├── Chart.yaml ├── README.md ├── templates ├── _helpers.tpl ├── deployment.yaml ├── ingress.yaml ├── service.yaml └── spark-configmap.yaml └── values.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # Crash log files 9 | crash.log 10 | 11 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 12 | # .tfvars files are managed as part of configuration and so should be included in 13 | # version control. 14 | # 15 | # example.tfvars 16 | 17 | # Ignore override files as they are usually used to override resources locally and so 18 | # are not checked in 19 | override.tf 20 | override.tf.json 21 | *_override.tf 22 | *_override.tf.json 23 | 24 | # Include override files you do wish to add to version control using negated pattern 25 | # 26 | # !example_override.tf 27 | 28 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 29 | # example: *tfplan* 30 | -------------------------------------------------------------------------------- /Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: spark-history-server 3 | description: A Helm chart for Spark HS in Kubernetes 4 | version: 0.0.1 5 | appVersion: 3.0.1 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spark history server Helm Chart 2 | 3 | [SHS](https://apache-spark-on-k8s.github.io/userdocs/running-on-kubernetes.html) Spark History Server is the web UI for completed and running Spark applications. 4 | 5 | ## Chart Details 6 | 7 | This chart uses AWS K8s [IRSA](https://aws.amazon.com/blogs/opensource/introducing-fine-grained-iam-roles-service-accounts/) technology to authenticate against AWS, so a proper `ServiceAccount` needs to be specified in order to have the right permissions accessing application logs S3 bucket. 8 | 9 | ## Installing the Chart 10 | 11 | To install the chart: 12 | 13 | ```sh 14 | helm install --set app.S3logPath=yourBucketName/eventLogFoloder 15 | ``` 16 | 17 | ## Required variables 18 | 19 | Minimum required variables are: 20 | 21 | | Parameter | Required | Description | Example | 22 | | --------------------- | -------- | ------------------------------------------------------------------------------------------------------------------- | --------------- | 23 | | `S3logPath` | `yes` | S3 bucket and key used as base directory for Spark Application logs | `mybucket/logs` | 24 | | `serviceAccount.name` | `yes` | Service account used to run the deployment. Needs to have the appropiate IRSA role associated in order to access S3 | `sparkSA` | 25 | -------------------------------------------------------------------------------- /templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "spark-hs.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "spark-hs.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "spark-hs.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | 35 | {{/* 36 | Create the name of the service account to use 37 | */}} 38 | {{- define "spark-hs.serviceAccountName" -}} 39 | {{- if .Values.serviceAccount.create -}} 40 | {{ default (include "spark-hs.fullname" .) .Values.serviceAccount.name }} 41 | {{- else -}} 42 | {{ default "default" .Values.serviceAccount.name }} 43 | {{- end -}} 44 | {{- end -}} 45 | 46 | {{/*Deployment selectors*/}} 47 | {{- define "spark-hs.selectorLabels" -}} 48 | app.kubernetes.io/name: {{ include "spark-hs.name" . }} 49 | app.kubernetes.io/instance: {{ .Release.Name }} 50 | {{- end -}} 51 | 52 | {{/* 53 | Common labels 54 | */}} 55 | {{- define "spark-hs.labels" -}} 56 | helm.sh/chart: {{ include "spark-hs.chart" . }} 57 | app.kubernetes.io/name: {{ include "spark-hs.name" . }} 58 | app.kubernetes.io/instance: {{ .Release.Name }} 59 | {{- if .Chart.AppVersion }} 60 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 61 | {{- end }} 62 | app.kubernetes.io/managed-by: {{ .Release.Service }} 63 | {{- end -}} 64 | -------------------------------------------------------------------------------- /templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "spark-hs.fullname" . }} 5 | labels: 6 | {{- include "spark-hs.labels" . | nindent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | strategy: 10 | type: RollingUpdate 11 | rollingUpdate: 12 | maxUnavailable: 50% 13 | maxSurge: 1 14 | selector: 15 | matchLabels: 16 | {{- include "spark-hs.selectorLabels" . | nindent 6 }} 17 | template: 18 | metadata: 19 | labels: 20 | {{- include "spark-hs.selectorLabels" . | nindent 8 }} 21 | spec: 22 | {{- with .Values.imagePullSecrets }} 23 | imagePullSecrets: 24 | {{- toYaml . | nindent 8 }} 25 | {{- end }} 26 | serviceAccountName: {{ include "spark-hs.serviceAccountName" . }} 27 | containers: 28 | - name: {{ .Chart.Name }} 29 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 30 | imagePullPolicy: {{ .Values.image.pullPolicy }} 31 | command: 32 | - '/opt/spark/sbin/start-history-server.sh' 33 | env: 34 | - name: SPARK_NO_DAEMONIZE 35 | value: "false" 36 | - name: SPARK_HISTORY_OPTS 37 | value: "-Dspark.history.fs.logDirectory=s3a://{{ .Values.S3logPath }}" 38 | - name: AWS_ROLE_SESSION_NAME 39 | value: "spark-hs" 40 | - name: SPARK_CONF_DIR 41 | value: /opt/spark/conf 42 | volumeMounts: 43 | - name: config-volume 44 | mountPath: /opt/spark/conf/spark-defaults.conf 45 | subPath: spark-defaults.conf 46 | ports: 47 | - name: http 48 | containerPort: {{ .Values.service.internalPort }} 49 | protocol: TCP 50 | terminationMessagePath: /dev/termination-log 51 | terminationMessagePolicy: File 52 | resources: 53 | {{ toYaml .Values.resources | indent 12 }} 54 | volumes: 55 | - name: config-volume 56 | configMap: 57 | name: {{ template "spark-hs.fullname" . }}-spark-hs-config 58 | -------------------------------------------------------------------------------- /templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $serviceName := include "spark-hs.fullname" . -}} 3 | {{- $servicePort := .Values.service.externalPort -}} 4 | apiVersion: networking.k8s.io/v1 5 | kind: Ingress 6 | metadata: 7 | name: {{ template "spark-hs.fullname" . }} 8 | labels: 9 | app: {{ template "spark-hs.name" . }} 10 | chart: {{ template "spark-hs.chart" . }} 11 | release: {{ .Release.Name }} 12 | heritage: {{ .Release.Service }} 13 | {{- with .Values.ingress.annotations }} 14 | annotations: 15 | {{ toYaml . | indent 4 }} 16 | {{- end }} 17 | spec: 18 | rules: 19 | - host: {{ .Values.ingress.host }} 20 | http: 21 | paths: 22 | - path: {{ .Values.ingress.path }} 23 | pathType: Prefix 24 | backend: 25 | service: 26 | name: {{ $serviceName }} 27 | port: 28 | number: {{ $servicePort }} 29 | {{- end }} 30 | -------------------------------------------------------------------------------- /templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "spark-hs.fullname" . }} 5 | labels: 6 | {{- include "spark-hs.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.externalPort }} 11 | targetPort: {{ .Values.service.internalPort }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "spark-hs.selectorLabels" . | nindent 6 }} 16 | -------------------------------------------------------------------------------- /templates/spark-configmap.yaml: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | name: {{ template "spark-hs.fullname" . }}-spark-hs-config 5 | data: 6 | spark-defaults.conf: |- 7 | spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.WebIdentityTokenCredentialsProvider 8 | spark.history.fs.eventLog.rolling.maxFilesToRetain=5 9 | -------------------------------------------------------------------------------- /values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark-hs 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | replicaCount: 1 6 | 7 | image: 8 | repository: bitnami/spark 9 | tag: 3.0.1 10 | pullPolicy: IfNotPresent 11 | 12 | imagePullSecrets: [] 13 | # - name: my-secret 14 | 15 | serviceAccount: 16 | create: false 17 | name: '' 18 | 19 | # S3 path to read logs from 20 | # S3logPath: my-company-s3-bucket/spark-hs 21 | S3logPath: '' 22 | 23 | service: 24 | externalPort: 80 25 | internalPort: 18080 26 | type: ClusterIP 27 | 28 | 29 | resources: {} 30 | # resources: 31 | # limits: 32 | # cpu: 500m 33 | # memory: 1024Mi 34 | 35 | # requests: 36 | # cpu: 250m 37 | # memory: 512Mi 38 | 39 | ingress: 40 | enabled: false 41 | annotations: {} 42 | # kubernetes.io/ingress.class: nginx 43 | # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true' 44 | # host: 'mycompany.com' 45 | # path: '/' 46 | --------------------------------------------------------------------------------