├── .gitignore ├── README.md ├── charts ├── jupyter-with-spark │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── conf │ │ ├── jupyter │ │ │ └── jupyter_notebook_config.py.template │ │ └── spark │ │ │ └── spark-defaults.conf │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ ├── pvc.yaml │ │ └── service.yaml │ └── values.yaml ├── snappydata │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── conf │ │ ├── debug.conf.template │ │ ├── docker.properties.template │ │ ├── fairscheduler.xml │ │ ├── fairscheduler.xml.template │ │ ├── leads.template │ │ ├── locators.template │ │ ├── log4j.properties.template │ │ ├── metrics.properties.template │ │ ├── servers.template │ │ ├── slaves.template │ │ ├── snappy-env.sh.template │ │ ├── spark-defaults.conf.template │ │ └── spark-env.sh.template │ ├── plans.yaml │ ├── plans │ │ ├── large.yaml │ │ ├── medium.yaml │ │ └── small.yaml │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── leader_statefulset.yaml │ │ ├── locator_statefulset.yaml │ │ ├── role-binding.yaml │ │ ├── server_statefulset.yaml │ │ └── service.yaml │ └── values.yaml ├── spark-hs │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── conf │ │ └── secrets │ │ │ └── .gitignore │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ └── service.yaml │ └── values.yaml ├── spark-rss │ ├── Chart.yaml │ ├── README.md │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ └── service.yaml │ └── values.yaml ├── spark-shuffle │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ └── shuffle-daemonset.yaml │ └── values.yaml ├── spark-umbrella │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── charts │ │ └── .gitignore │ ├── conf │ │ ├── secrets │ │ │ └── .gitignore │ │ ├── spark │ │ │ ├── .gitignore │ │ │ ├── fairscheduler.xml.template │ │ │ ├── log4j.properties.template │ │ │ ├── metrics.properties.template │ │ │ ├── spark-defaults.conf │ │ │ ├── spark-defaults.conf.template │ │ │ └── spark-env.sh.template │ │ └── zeppelin │ │ │ ├── .gitignore │ │ │ ├── configuration.xsl │ │ │ ├── interpreter-list │ │ │ ├── log4j.properties │ │ │ ├── shiro.ini.template │ │ │ ├── zeppelin-env.sh.template │ │ │ └── zeppelin-site.xml.template │ ├── requirements.lock │ ├── requirements.yaml │ ├── templates │ │ └── configmap.yaml │ └── values.yaml └── zeppelin-with-spark │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── conf │ ├── secrets │ │ └── .gitignore │ ├── spark │ │ ├── fairscheduler.xml.template │ │ ├── log4j.properties.template │ │ ├── metrics.properties.template │ │ ├── spark-defaults.conf.template │ │ └── spark-env.sh.template │ └── zeppelin │ │ ├── configuration.xsl │ │ ├── interpreter-list │ │ ├── log4j.properties │ │ ├── shiro.ini.template │ │ ├── zeppelin-env.sh.template │ │ └── zeppelin-site.xml.template │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── configmap.yaml │ ├── deployment.yaml │ ├── pvc.yaml │ └── service.yaml │ └── values.yaml ├── dockerfiles ├── jupyter │ └── Dockerfile └── zeppelin │ ├── Dockerfile │ └── setSparkEnvVars.sh ├── docs └── building-images.md ├── k8s-helm-spark-architecture-draw.io.png ├── kubernetes-how-does-it-work.1.png ├── spark-on-kubernetes-how-does-it-work.2.png ├── templates └── snappydata-cluster.yml ├── tiles ├── README.md └── snappydata │ ├── icon.png │ ├── tile-history.yml │ └── tile.yml └── utils ├── debug-pod-override-template.json └── snappy-debug-pod.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | 4 | tiles/snappydata/product/ 5 | tiles/snappydata/release/ 6 | 7 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for launching Jupyter notebooks with Apache Spark in-cluster client mode. 4 | name: jupyter-with-spark 5 | version: 0.1.0 6 | home: https://github.com/apache-spark-on-k8s/spark 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png 8 | maintainers: 9 | - name: SnappyData, Inc. 10 | email: chomp@snappydata.io 11 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/conf/spark/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.submit.deployMode client 29 | spark.kubernetes.driver.docker.image snappydatainc/spark-driver-py:v2.2.0-kubernetes-0.5.1 30 | spark.kubernetes.executor.docker.image snappydatainc/spark-executor-py:v2.2.0-kubernetes-0.5.1 31 | spark.kubernetes.initcontainer.docker.image snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1 32 | spark.kubernetes.docker.image.pullPolicy Always 33 | # Replace sparkonk8s-test.json with the actual name of your keyfile 34 | # to enable access to Google Cloud Storage. 35 | spark.hadoop.google.cloud.auth.service.account.json.keyfile /etc/secrets/sparkonk8s-test.json 36 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Get the application URL by running these commands: 2 | {{- if contains "NodePort" .Values.jupyterService.type }} 3 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ .Release.Name }}-jupyter-spark) 4 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 5 | echo "Access Jupyter notebooks at http://$NODE_IP:$NODE_PORT" 6 | echo "Access Spark at http://$NODE_IP:{{ .Values.jupyterService.sparkUIPort }} after a Spark job is run." 7 | {{- else if contains "LoadBalancer" .Values.jupyterService.type }} 8 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 9 | You can watch the status of by running 'kubectl get svc -w {{ .Release.Name }}-jupyter-spark' 10 | export JUPYTER_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ .Release.Name }}-jupyter-spark -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 11 | echo "Access Jupyter notebooks at http://$JUPYTER_SERVICE_IP:{{ .Values.jupyterService.jupyterPort }}" 12 | echo "Access Spark at http://$SPARK_UI_SERVICE_IP:{{ .Values.jupyterService.sparkUIPort }} after a Spark job is run." 13 | {{- else if contains "ClusterIP" .Values.jupyterService.type }} 14 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "jupyter-with-spark.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 15 | echo "Visit http://127.0.0.1:8888 to access Jupyter notebooks" 16 | kubectl port-forward $POD_NAME 8888:80 17 | {{- end }} 18 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "jupyter-with-spark.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "jupyter-with-spark.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "jupyter-with-spark.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if (not .Values.global.umbrellaChart) }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ .Release.Name }}-jupyter-configmap 6 | data: 7 | jupyter_notebook_config.py: |- 8 | {{- range .Files.Lines "conf/jupyter/jupyter_notebook_config.py" }} 9 | {{ . }}{{ end }} 10 | {{- end }} 11 | --- 12 | {{- if (not .Values.global.umbrellaChart) }} 13 | apiVersion: v1 14 | kind: ConfigMap 15 | metadata: 16 | name: {{ .Release.Name }}-jp-spark-configmap 17 | data: 18 | spark-defaults.conf: |- 19 | {{- range .Files.Lines "conf/spark/spark-defaults.conf" }} 20 | {{ . }}{{ end }} 21 | fairscheduler.xml: |- 22 | {{- range .Files.Lines "conf/spark/fairscheduler.xml" }} 23 | {{ . }}{{ end }} 24 | log4j.properties: |- 25 | {{- range .Files.Lines "conf/spark/log4j.properties" }} 26 | {{ . }}{{ end }} 27 | {{- end }} 28 | --- 29 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }} 30 | apiVersion: v1 31 | kind: Secret 32 | metadata: 33 | name: {{ .Release.Name }}-jp-secrets 34 | type: Opaque 35 | data: 36 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 37 | {{- end }} 38 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "jupyter-with-spark.fullname" . }} 5 | labels: 6 | app: {{ template "jupyter-with-spark.name" . }} 7 | chart: {{ template "jupyter-with-spark.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: {{ template "jupyter-with-spark.name" . }} 15 | release: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app: {{ template "jupyter-with-spark.name" . }} 20 | release: {{ .Release.Name }} 21 | spec: 22 | containers: 23 | - name: {{ .Chart.Name }} 24 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 25 | imagePullPolicy: {{ .Values.image.pullPolicy }} 26 | ports: 27 | - name: http 28 | containerPort: {{ .Values.jupyterService.jupyterPort }} 29 | protocol: TCP 30 | - name: web-ui 31 | containerPort: {{ .Values.jupyterService.sparkUIPort }} 32 | protocol: TCP 33 | command: 34 | - "/bin/sh" 35 | - "-c" 36 | - > 37 | jupyter notebook --generate-config; 38 | size=$(wc -c < /tmp/conf/jupyter/jupyter_notebook_config.py); 39 | if [ $size -gt 0 ]; then cp /tmp/conf/jupyter/jupyter_notebook_config.py /home/jovyan/.jupyter/jupyter_notebook_config.py; fi; 40 | cp /tmp/conf/spark/* /opt/spark/conf/; 41 | mkdir -p /home/jovyan/notebooks; 42 | prefix=`date +%s%N | cut -b1-13`; 43 | echo "" >> /opt/spark/conf/spark-defaults.conf; 44 | echo "spark.master k8s://https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT" >> /opt/spark/conf/spark-defaults.conf; 45 | echo "spark.kubernetes.driver.pod.name $HOSTNAME" >> /opt/spark/conf/spark-defaults.conf; 46 | echo "spark.kubernetes.executor.podNamePrefix spark-$prefix" >> /opt/spark/conf/spark-defaults.conf; 47 | echo "spark.kubernetes.authenticate.driver.serviceAccountName {{ .Values.global.serviceAccount | default .Values.serviceAccount }}" >> /opt/spark/conf/spark-defaults.conf; 48 | echo "spark.ui.port {{ .Values.jupyterService.sparkUIPort }}" >> /opt/spark/conf/spark-defaults.conf; 49 | echo "spark.kubernetes.namespace {{ .Release.Namespace }}" >> /opt/spark/conf/spark-defaults.conf; 50 | {{- if .Values.sparkEventLog.enableHistoryEvents }} 51 | echo "spark.eventLog.enabled true" >> /opt/spark/conf/spark-defaults.conf; 52 | echo "spark.eventLog.dir {{ .Values.sparkEventLog.eventLogDir }}" >> /opt/spark/conf/spark-defaults.conf; 53 | {{- end }} 54 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 55 | echo "spark.kubernetes.driver.secrets.{{ .Release.Name }}-secrets /etc/secrets" >> /opt/spark/conf/spark-defaults.conf; 56 | {{- end }} 57 | {{- if eq .Values.jupyterService.password "" }} 58 | . /usr/local/bin/start.sh jupyter notebook --NotebookApp.token='' --NotebookApp.port={{ .Values.jupyterService.jupyterPort }}; 59 | {{ else }} 60 | echo "{ \"NotebookApp\": { \"password\": \"REPLACE_ME\" } }" > ~/.jupyter/jupyter_notebook_config.json; 61 | printf "import os\nfrom notebook.auth import passwd\nprint(passwd('{{ .Values.jupyterService.password }}'))" > hash.py; 62 | export HASHED=`python hash.py`; 63 | rm hash.py; 64 | sed -i -e "s/REPLACE_ME/${HASHED}/g" ~/.jupyter/jupyter_notebook_config.json; 65 | . /usr/local/bin/start.sh jupyter notebook --NotebookApp.port={{ .Values.jupyterService.jupyterPort }}; 66 | {{- end }} 67 | livenessProbe: 68 | httpGet: 69 | path: / 70 | port: http 71 | readinessProbe: 72 | httpGet: 73 | path: / 74 | port: http 75 | resources: 76 | {{ toYaml .Values.resources | indent 12 }} 77 | volumeMounts: 78 | - name: data 79 | mountPath: /data/ 80 | - name: spark-config 81 | mountPath: /tmp/conf/spark/ 82 | - name: jupyter-config 83 | mountPath: /tmp/conf/jupyter/ 84 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 85 | - name: secrets-volume 86 | mountPath: /etc/secrets/ 87 | {{- end }} 88 | {{- with .Values.nodeSelector }} 89 | nodeSelector: 90 | {{ toYaml . | indent 8 }} 91 | {{- end }} 92 | serviceAccount: {{ .Values.serviceAccount }} 93 | {{- with .Values.affinity }} 94 | affinity: 95 | {{ toYaml . | indent 8 }} 96 | {{- end }} 97 | {{- with .Values.tolerations }} 98 | tolerations: 99 | {{ toYaml . | indent 8 }} 100 | {{- end }} 101 | volumes: 102 | - name: data 103 | {{- if .Values.persistence.enabled }} 104 | persistentVolumeClaim: 105 | claimName: {{ .Values.persistence.existingClaim | default (include "jupyter-with-spark.fullname" .) }} 106 | {{- else }} 107 | emptyDir: {} 108 | {{- end }} 109 | - name: jupyter-config 110 | configMap: 111 | name: {{ .Release.Name }}-jupyter-configmap 112 | - name: spark-config 113 | configMap: 114 | name: {{ .Release.Name }}-jp-spark-configmap 115 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 116 | - name: secrets-volume 117 | secret: 118 | secretName: {{ .Release.Name }}-jp-secrets 119 | {{- end }} -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/pvc.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: {{ template "jupyter-with-spark.fullname" . }} 6 | labels: 7 | app: {{ template "jupyter-with-spark.fullname" . }} 8 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 9 | release: "{{ .Release.Name }}" 10 | heritage: "{{ .Release.Service }}" 11 | {{- if .Values.persistence.keepResource }} 12 | annotations: 13 | "helm.sh/resource-policy": keep 14 | {{- end }} 15 | spec: 16 | accessModes: 17 | - {{ .Values.persistence.accessMode | quote }} 18 | resources: 19 | requests: 20 | storage: {{ .Values.persistence.size | quote }} 21 | {{- if .Values.persistence.storageClass }} 22 | {{- if (eq "-" .Values.persistence.storageClass) }} 23 | storageClassName: "" 24 | {{- else }} 25 | storageClassName: "{{ .Values.persistence.storageClass }}" 26 | {{- end }} 27 | {{- end }} 28 | {{- end }} 29 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Release.Name }}-jupyter-spark 5 | labels: 6 | app: {{ template "jupyter-with-spark.name" . }} 7 | chart: {{ template "jupyter-with-spark.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | type: {{ .Values.jupyterService.type | default "LoadBalancer" | quote }} 12 | ports: 13 | - port: {{ .Values.jupyterService.jupyterPort }} 14 | targetPort: http 15 | protocol: TCP 16 | name: http 17 | - port: {{ .Values.jupyterService.sparkUIPort }} 18 | targetPort: web-ui 19 | protocol: TCP 20 | name: web-ui 21 | selector: 22 | app: {{ template "jupyter-with-spark.name" . }} 23 | release: {{ .Release.Name }} 24 | --- 25 | -------------------------------------------------------------------------------- /charts/jupyter-with-spark/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for jupyter-with-spark helm chart. 2 | # Declare variables to be passed into your templates. 3 | 4 | image: 5 | repository: snappydatainc/jupyter-notebook 6 | tag: 5.2.2-spark-v2.2.0-kubernetes-0.5.1 7 | pullPolicy: IfNotPresent 8 | 9 | jupyterService: 10 | type: LoadBalancer 11 | jupyterPort: 8888 12 | sparkUIPort: 4040 13 | # Set your password to access the notebook server. A default ('abc123') has been set for you. 14 | # Setting the password to empty string will disable the authentication (not recommended). 15 | password: 'abc123' 16 | 17 | sparkWebUI: 18 | type: LoadBalancer 19 | port: 4040 20 | 21 | serviceAccount: default 22 | mountSecrets: false 23 | 24 | sparkEventLog: 25 | enableHistoryEvents: false 26 | # eventsLogDir should point to a URI of GCS bucket where history events will be dumped 27 | eventLogDir: "gs://spark-history-server-store/" 28 | 29 | persistence: 30 | enabled: false 31 | # A manually managed Persistent Volume and Claim 32 | # Requires persistence.enabled: true 33 | # If defined, PVC must be created manually before the volume can be bound. 34 | # existingClaim: 35 | 36 | # If defined, storageClassName: 37 | # If set to "-", storageClassName: "", which disables dynamic provisioning 38 | # If undefined (the default) or set to null, no storageClassName spec is 39 | # set, choosing the default provisioner. (gp2 on AWS, standard on 40 | # GKE, Azure & OpenStack) 41 | # 42 | # storageClass: "-" 43 | accessMode: ReadWriteOnce 44 | size: 6Gi 45 | # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned 46 | keepResource: true 47 | 48 | resources: {} 49 | # We usually recommend not to specify default resources and to leave this as a conscious 50 | # choice for the user. This also increases chances charts run on environments with little 51 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 52 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 53 | # limits: 54 | # cpu: 100m 55 | # memory: 128Mi 56 | # requests: 57 | # cpu: 100m 58 | # memory: 128Mi 59 | 60 | nodeSelector: {} 61 | 62 | tolerations: [] 63 | 64 | affinity: {} 65 | 66 | #internal attribute, do not change 67 | global: 68 | umbrellaChart: false -------------------------------------------------------------------------------- /charts/snappydata/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | 23 | # PAS tile specific artifacts 24 | images/ 25 | plans/ 26 | plans.yaml -------------------------------------------------------------------------------- /charts/snappydata/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for launching SnappyData cluster on Kubernetes. 4 | name: snappydata 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/snappydata/conf/debug.conf.template: -------------------------------------------------------------------------------- 1 | MEMBERS_FILE=$SNAPPY_HOME/work/members.txt 2 | NO_OF_STACK_DUMPS=2 3 | INTERVAL_BETWEEN_DUMPS=10 4 | -------------------------------------------------------------------------------- /charts/snappydata/conf/docker.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | spark.mesos.executor.docker.image: 19 | spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro 20 | spark.mesos.executor.home: /opt/spark 21 | -------------------------------------------------------------------------------- /charts/snappydata/conf/fairscheduler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | FAIR 23 | 1 24 | 25 | 26 | FAIR 27 | 2 28 | 2 29 | 30 | 31 | -------------------------------------------------------------------------------- /charts/snappydata/conf/fairscheduler.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | FAIR 23 | 1 24 | 2 25 | 26 | 27 | FIFO 28 | 2 29 | 3 30 | 31 | 32 | -------------------------------------------------------------------------------- /charts/snappydata/conf/leads.template: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you 5 | # may not use this file except in compliance with the License. You 6 | # may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | # implied. See the License for the specific language governing 14 | # permissions and limitations under the License. See accompanying 15 | # LICENSE file. 16 | # 17 | # 18 | # Here are examples using common configuration properties 19 | # I) 20 | # Specify the host name on which a Snappy lead will be started. Also 21 | # specify the directory where the logs and metadata files 22 | # for that lead instance will be created. If the directory and properties 23 | # are not specified a default directory is created inside the SNAPPY_HOME directory. 24 | # 25 | # lead1 -dir=/tmp/data/lead (config args) 26 | # 27 | # II) 28 | # Below is an example of how you can specify multiple locators for a lead and also 29 | # set its heap size to 8 GB. 30 | # 31 | # lead1 -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=8g 32 | # 33 | # III) 34 | # Another example which shows how to specify Spark properties. 35 | # 36 | # lead1 -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16 37 | # 38 | # IV) Start the SnappyData Zeppelin interpreter on the Lead node 39 | # 40 | # lead1 -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16 -zeppelin.interpreter.enable=true -classpath= 41 | # 42 | # For more options, see http://snappydatainc.github.io/snappydata/configuration/#configuration 43 | localhost 44 | -------------------------------------------------------------------------------- /charts/snappydata/conf/locators.template: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you 5 | # may not use this file except in compliance with the License. You 6 | # may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | # implied. See the License for the specific language governing 14 | # permissions and limitations under the License. See accompanying 15 | # LICENSE file. 16 | # 17 | # 18 | # By default, SnappyData launch scripts will start a single locator on localhost 19 | # and uses /work/localhost-locator-1/ as the directory for logs and 20 | # statistics. 21 | # Assuming your network is ssh enabled you can add hostnames (one line per host) to start 22 | # locator on multiple hosts. 23 | # 24 | # Example configurations: 25 | # I) Configuring the hostname/IP address for cluster members to find the locator: 26 | # 27 | # By default, locator binds to provided hostname on port 10334 for discovering other members of the cluster. 28 | # Optionally set peer-discovery-address to a hostname/IP (usually the internal LAN IP) where other members of 29 | # cluster can talk to locator (configured as their -locators option) which is the provided hostname by default, 30 | # and peer-discover-port if you want to change port from the default 10334. 31 | # The peer-discovery-address can be a wildcard like 0.0.0.0 to listen on all interfaces. 32 | # 33 | # locator1 -peer-discovery-port=9988 -locators=locator2:8899 34 | # 35 | # If there are multiple locators in the cluster, then specify hostname:port of other locators in the 36 | # -locators option. 37 | # 38 | # locator1 -peer-discovery-port=9988 -locators=locator2:8899 39 | # locator2 -peer-discovery-port=9988 -locators=locator1:8899 40 | # 41 | # II) Using client bind address: 42 | # 43 | # One can specify bind address for clients to allow clients from outside this machine to connect 44 | # using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost). 45 | # 46 | # In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments), 47 | # you should also configure the -hostname-for-clients else clients from outside the network 48 | # will not be able to connect to the locators/servers. It should be set to the public hostname 49 | # or public IP address that will be sent to clients to connect to. It can be skipped for cases 50 | # where private hostname is the same as public hostname (e.g. DNS translates appropriately). 51 | # Default is the `client-bind-address` of the locator. 52 | # 53 | # -client-bind-address= -hostname-for-clients= 54 | # 55 | # III) Logging to different directory 56 | # Specify the startup directory where the logs and configuration files for that locator instance 57 | # are managed. 58 | # 59 | # locator1 -dir=/tmp/data/locator -client-bind-address=locator1 60 | # 61 | # For more configuration options, see 62 | # http://snappydatainc.github.io/snappydata/configuration/#configuration 63 | localhost 64 | -------------------------------------------------------------------------------- /charts/snappydata/conf/log4j.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | log4j.rootCategory=INFO, file 19 | 20 | # RollingFile appender 21 | log4j.appender.file=org.apache.log4j.RollingFileAppender 22 | log4j.appender.file.append=true 23 | log4j.appender.file.file=snappydata.log 24 | log4j.appender.file.MaxFileSize=1GB 25 | log4j.appender.file.MaxBackupIndex=10000 26 | log4j.appender.file.layout=io.snappydata.log4j.PatternLayout 27 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n 28 | 29 | # Console appender 30 | log4j.appender.console=org.apache.log4j.ConsoleAppender 31 | log4j.appender.console.target=System.out 32 | log4j.appender.console.layout=io.snappydata.log4j.PatternLayout 33 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n 34 | 35 | # Ignore messages below warning level from Jetty, because it's a bit verbose 36 | log4j.logger.org.spark-project.jetty=WARN 37 | org.spark-project.jetty.LEVEL=WARN 38 | log4j.logger.org.mortbay.jetty=WARN 39 | log4j.logger.org.eclipse.jetty=WARN 40 | 41 | # Some packages are noisy for no good reason. 42 | log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false 43 | log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF 44 | 45 | log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false 46 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF 47 | 48 | log4j.additivity.hive.log=false 49 | log4j.logger.hive.log=OFF 50 | 51 | log4j.additivity.parquet.hadoop.ParquetRecordReader=false 52 | log4j.logger.parquet.hadoop.ParquetRecordReader=OFF 53 | 54 | log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false 55 | log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF 56 | 57 | log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false 58 | log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF 59 | 60 | log4j.additivity.hive.ql.metadata.Hive=false 61 | log4j.logger.hive.ql.metadata.Hive=OFF 62 | 63 | log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false 64 | log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR 65 | 66 | # Other Spark classes that generate unnecessary logs at INFO level 67 | log4j.logger.org.apache.spark.broadcast.TorrentBroadcast=WARN 68 | log4j.logger.org.apache.spark.ContextCleaner=WARN 69 | log4j.logger.org.apache.spark.MapOutputTracker=WARN 70 | log4j.logger.org.apache.spark.scheduler.TaskSchedulerImpl=WARN 71 | log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN 72 | log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN 73 | log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN 74 | log4j.logger.org.apache.spark.scheduler.FairSchedulableBuilder=WARN 75 | log4j.logger.org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint=WARN 76 | log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN 77 | log4j.logger.org.apache.hadoop.hive=WARN 78 | # for all Spark generated code (including ad-hoc UnsafeProjection calls etc) 79 | log4j.logger.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=WARN 80 | log4j.logger.org.apache.spark.sql.execution.datasources=WARN 81 | log4j.logger.org.apache.spark.scheduler.SnappyTaskSchedulerImpl=WARN 82 | log4j.logger.org.apache.spark.MapOutputTrackerMasterEndpoint=WARN 83 | log4j.logger.org.apache.spark.MapOutputTrackerMaster=WARN 84 | log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN 85 | log4j.logger.org.apache.spark.MapOutputTrackerWorker=WARN 86 | log4j.logger.org.apache.parquet=ERROR 87 | log4j.logger.parquet=ERROR 88 | log4j.logger.org.apache.hadoop.io.compress=WARN 89 | log4j.logger.spark.jobserver.LocalContextSupervisorActor=WARN 90 | log4j.logger.spark.jobserver.JarManager=WARN 91 | log4j.logger.org.apache.spark.sql.hive.HiveClientUtil=WARN 92 | log4j.logger.org.datanucleus=ERROR 93 | # Task logger created in SparkEnv 94 | log4j.logger.org.apache.spark.Task=WARN 95 | log4j.logger.org.apache.spark.sql.catalyst.parser.CatalystSqlParser=WARN 96 | 97 | # Keep log-level of some classes as INFO even if root level is higher 98 | log4j.logger.io.snappydata.impl.LeadImpl=INFO 99 | log4j.logger.io.snappydata.impl.ServerImpl=INFO 100 | log4j.logger.io.snappydata.impl.LocatorImpl=INFO 101 | log4j.logger.spray.can.server.HttpListener=INFO 102 | 103 | # for generated code of plans 104 | # log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenExec=DEBUG 105 | # for SnappyData generated code used on store (ComplexTypeSerializer, JDBC inserts ...) 106 | # log4j.logger.org.apache.spark.sql.store.CodeGeneration=DEBUG 107 | -------------------------------------------------------------------------------- /charts/snappydata/conf/metrics.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # syntax: [instance].sink|source.[name].[options]=[value] 19 | 20 | # This file configures Spark's internal metrics system. The metrics system is 21 | # divided into instances which correspond to internal components. 22 | # Each instance can be configured to report its metrics to one or more sinks. 23 | # Accepted values for [instance] are "master", "worker", "executor", "driver", 24 | # and "applications". A wildcard "*" can be used as an instance name, in 25 | # which case all instances will inherit the supplied property. 26 | # 27 | # Within an instance, a "source" specifies a particular set of grouped metrics. 28 | # there are two kinds of sources: 29 | # 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will 30 | # collect a Spark component's internal state. Each instance is paired with a 31 | # Spark source that is added automatically. 32 | # 2. Common sources, like JvmSource, which will collect low level state. 33 | # These can be added through configuration options and are then loaded 34 | # using reflection. 35 | # 36 | # A "sink" specifies where metrics are delivered to. Each instance can be 37 | # assigned one or more sinks. 38 | # 39 | # The sink|source field specifies whether the property relates to a sink or 40 | # source. 41 | # 42 | # The [name] field specifies the name of source or sink. 43 | # 44 | # The [options] field is the specific property of this source or sink. The 45 | # source or sink is responsible for parsing this property. 46 | # 47 | # Notes: 48 | # 1. To add a new sink, set the "class" option to a fully qualified class 49 | # name (see examples below). 50 | # 2. Some sinks involve a polling period. The minimum allowed polling period 51 | # is 1 second. 52 | # 3. Wildcard properties can be overridden by more specific properties. 53 | # For example, master.sink.console.period takes precedence over 54 | # *.sink.console.period. 55 | # 4. A metrics specific configuration 56 | # "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be 57 | # added to Java properties using -Dspark.metrics.conf=xxx if you want to 58 | # customize metrics system. You can also put the file in ${SPARK_HOME}/conf 59 | # and it will be loaded automatically. 60 | # 5. The MetricsServlet sink is added by default as a sink in the master, 61 | # worker and driver, and you can send HTTP requests to the "/metrics/json" 62 | # endpoint to get a snapshot of all the registered metrics in JSON format. 63 | # For master, requests to the "/metrics/master/json" and 64 | # "/metrics/applications/json" endpoints can be sent separately to get 65 | # metrics snapshots of the master instance and applications. This 66 | # MetricsServlet does not have to be configured. 67 | 68 | ## List of available common sources and their properties. 69 | 70 | # org.apache.spark.metrics.source.JvmSource 71 | # Note: Currently, JvmSource is the only available common source. 72 | # It can be added to an instance by setting the "class" option to its 73 | # fully qualified class name (see examples below). 74 | 75 | ## List of available sinks and their properties. 76 | 77 | # org.apache.spark.metrics.sink.ConsoleSink 78 | # Name: Default: Description: 79 | # period 10 Poll period 80 | # unit seconds Unit of the poll period 81 | 82 | # org.apache.spark.metrics.sink.CSVSink 83 | # Name: Default: Description: 84 | # period 10 Poll period 85 | # unit seconds Unit of the poll period 86 | # directory /tmp Where to store CSV files 87 | 88 | # org.apache.spark.metrics.sink.GangliaSink 89 | # Name: Default: Description: 90 | # host NONE Hostname or multicast group of the Ganglia server, 91 | # must be set 92 | # port NONE Port of the Ganglia server(s), must be set 93 | # period 10 Poll period 94 | # unit seconds Unit of the poll period 95 | # ttl 1 TTL of messages sent by Ganglia 96 | # dmax 0 Lifetime in seconds of metrics (0 never expired) 97 | # mode multicast Ganglia network mode ('unicast' or 'multicast') 98 | 99 | # org.apache.spark.metrics.sink.JmxSink 100 | 101 | # org.apache.spark.metrics.sink.MetricsServlet 102 | # Name: Default: Description: 103 | # path VARIES* Path prefix from the web server root 104 | # sample false Whether to show entire set of samples for histograms 105 | # ('false' or 'true') 106 | # 107 | # * Default path is /metrics/json for all instances except the master. The 108 | # master has two paths: 109 | # /metrics/applications/json # App information 110 | # /metrics/master/json # Master information 111 | 112 | # org.apache.spark.metrics.sink.GraphiteSink 113 | # Name: Default: Description: 114 | # host NONE Hostname of the Graphite server, must be set 115 | # port NONE Port of the Graphite server, must be set 116 | # period 10 Poll period 117 | # unit seconds Unit of the poll period 118 | # prefix EMPTY STRING Prefix to prepend to every metric's name 119 | # protocol tcp Protocol ("tcp" or "udp") to use 120 | 121 | ## Examples 122 | # Enable JmxSink for all instances by class name 123 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 124 | 125 | # Enable ConsoleSink for all instances by class name 126 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink 127 | 128 | # Polling period for the ConsoleSink 129 | #*.sink.console.period=10 130 | # Unit of the polling period for the ConsoleSink 131 | #*.sink.console.unit=seconds 132 | 133 | # Polling period for the ConsoleSink specific for the master instance 134 | #master.sink.console.period=15 135 | # Unit of the polling period for the ConsoleSink specific for the master 136 | # instance 137 | #master.sink.console.unit=seconds 138 | 139 | # Enable CsvSink for all instances by class name 140 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink 141 | 142 | # Polling period for the CsvSink 143 | #*.sink.csv.period=1 144 | # Unit of the polling period for the CsvSink 145 | #*.sink.csv.unit=minutes 146 | 147 | # Polling directory for CsvSink 148 | #*.sink.csv.directory=/tmp/ 149 | 150 | # Polling period for the CsvSink specific for the worker instance 151 | #worker.sink.csv.period=10 152 | # Unit of the polling period for the CsvSink specific for the worker instance 153 | #worker.sink.csv.unit=minutes 154 | 155 | # Enable Slf4jSink for all instances by class name 156 | #*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink 157 | 158 | # Polling period for the Slf4JSink 159 | #*.sink.slf4j.period=1 160 | # Unit of the polling period for the Slf4jSink 161 | #*.sink.slf4j.unit=minutes 162 | 163 | # Enable JvmSource for instance master, worker, driver and executor 164 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 165 | 166 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 167 | 168 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 169 | 170 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 171 | -------------------------------------------------------------------------------- /charts/snappydata/conf/servers.template: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you 5 | # may not use this file except in compliance with the License. You 6 | # may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | # implied. See the License for the specific language governing 14 | # permissions and limitations under the License. See accompanying 15 | # LICENSE file. 16 | # 17 | # 18 | # Specify the host name on which a Snappy server will be started. Also 19 | # specify the startup directory where the logs and configuration files 20 | # for that server instance are kept. If the directory and properties 21 | # are not specified a default directory is created inside the SNAPPY_HOME directory. 22 | # 23 | # server1 -dir=/tmp/data/server [config args] 24 | # 25 | # An example of how you can specify multiple locators for a server and 26 | # set its heap size to 64 GB. 27 | # 28 | # server1 -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=64g 29 | # 30 | # One can specify bind address for clients to allow clients from outside this machine to connect 31 | # using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost). 32 | # 33 | # In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments), 34 | # you should also configure the -hostname-for-clients else clients from outside the network 35 | # will not be able to connect to the servers. It should be set to the public hostname 36 | # or public IP address that will be sent to clients to connect to. It can be skipped for cases 37 | # where private hostname is the same as public hostname (e.g. DNS translates appropriately). 38 | # Default is the `client-bind-address` of the server. 39 | # 40 | # -client-bind-address= -client-port=1555 -hostname-for-clients= 41 | # 42 | # For more configuration options, 43 | # see http://snappydatainc.github.io/snappydata/configuration/#configuration 44 | localhost 45 | -------------------------------------------------------------------------------- /charts/snappydata/conf/slaves.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # A Spark Worker will be started on each of the machines listed below. 19 | localhost -------------------------------------------------------------------------------- /charts/snappydata/conf/snappy-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # This file is sourced when running various Snappy programs. 21 | # Copy it as snappy-env.sh and edit that to configure Spark and Snappy for your site. 22 | 23 | # Options read when launching programs locally with 24 | # ./bin/run-example or ./bin/spark-submit 25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 28 | # - SPARK_CLASSPATH, default classpath entries to append 29 | 30 | # Options read by executors and drivers running inside the cluster 31 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 32 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 33 | # - SPARK_CLASSPATH, default classpath entries to append 34 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 35 | 36 | # Options read by Snappy servers, leads and locators 37 | # - LOCATOR_STARTUP_OPTIONS, options and properties that are passed to all the locators. 38 | # - SERVER_STARTUP_OPTIONS, options and properties that are passed to all the snappy servers 39 | # - LEAD_STARTUP_OPTIONS, options and properties that are passed to all the lead opts. -------------------------------------------------------------------------------- /charts/snappydata/conf/spark-defaults.conf.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | -------------------------------------------------------------------------------- /charts/snappydata/conf/spark-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # This file is sourced when running various Spark programs. 21 | # Copy it as spark-env.sh and edit that to configure Spark for your site. 22 | 23 | # Options read when launching programs locally with 24 | # ./bin/run-example or ./bin/spark-submit 25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 28 | # - SPARK_CLASSPATH, default classpath entries to append 29 | 30 | # Options read by executors and drivers running inside the cluster 31 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 32 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 33 | # - SPARK_CLASSPATH, default classpath entries to append 34 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 35 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos 36 | 37 | # Options read in YARN client mode 38 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 39 | # - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2) 40 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). 41 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) 42 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) 43 | 44 | # Options for the daemons used in the standalone deploy mode 45 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname 46 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master 47 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") 48 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 49 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) 50 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker 51 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node 52 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 53 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") 54 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). 55 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") 56 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") 57 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") 58 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers 59 | 60 | # Generic options for the daemons used in the standalone deploy mode 61 | # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) 62 | # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) 63 | # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) 64 | # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) 65 | # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) 66 | # - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. 67 | -------------------------------------------------------------------------------- /charts/snappydata/plans.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Specific to PCF tile 3 | ############################# 4 | - name: "small" 5 | description: "default (small) plan for snappydata cluster" 6 | file: "small.yaml" 7 | - name: "medium" 8 | description: "medium plan for snappydata cluster" 9 | file: "medium.yaml" 10 | - name: "large" 11 | description: "large plan for snappydata cluster" 12 | file: "large.yaml" 13 | -------------------------------------------------------------------------------- /charts/snappydata/plans/large.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Specific to PCF tile 3 | ############################# 4 | locators: 5 | resources: 6 | requests: 7 | memory: 1024Mi 8 | persistence: 9 | size: 10Gi 10 | 11 | servers: 12 | replicaCount: 2 13 | resources: 14 | requests: 15 | memory: 4096Mi 16 | persistence: 17 | size: 20Gi 18 | 19 | leaders: 20 | resources: 21 | requests: 22 | memory: 4096Mi 23 | persistence: 24 | size: 20Gi -------------------------------------------------------------------------------- /charts/snappydata/plans/medium.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Specific to PCF tile 3 | ############################# 4 | locators: 5 | resources: 6 | requests: 7 | memory: 1024Mi 8 | persistence: 9 | size: 10Gi 10 | 11 | servers: 12 | replicaCount: 2 13 | resources: 14 | requests: 15 | memory: 2048Mi 16 | persistence: 17 | size: 10Gi 18 | 19 | leaders: 20 | resources: 21 | requests: 22 | memory: 2048Mi 23 | persistence: 24 | size: 10Gi -------------------------------------------------------------------------------- /charts/snappydata/plans/small.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Specific to PCF tile 3 | ############################# 4 | locators: 5 | resources: 6 | requests: 7 | memory: 1024Mi 8 | persistence: 9 | size: 10Gi 10 | 11 | servers: 12 | replicaCount: 2 13 | resources: 14 | requests: 15 | memory: 1024Mi 16 | persistence: 17 | size: 10Gi 18 | 19 | leaders: 20 | resources: 21 | requests: 22 | memory: 1024Mi 23 | persistence: 24 | size: 10Gi -------------------------------------------------------------------------------- /charts/snappydata/templates/NOTES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/snappydata/templates/NOTES.txt -------------------------------------------------------------------------------- /charts/snappydata/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "snappydata.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "snappydata.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "snappydata.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/snappydata/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }}-configmap 5 | data: 6 | {{ (.Files.Glob "conf/*").AsConfig | indent 2 }} -------------------------------------------------------------------------------- /charts/snappydata/templates/leader_statefulset.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Statefulset for leaders 3 | ############################# 4 | apiVersion: apps/v1 5 | kind: StatefulSet 6 | metadata: 7 | name: "{{ .Release.Name }}-leader" 8 | # TODO: Do we need to change, for example like the one given below? 9 | # name: {{ template "snappydata.fullname" . }} 10 | labels: 11 | app: {{ template "snappydata.name" . }} 12 | chart: {{ template "snappydata.chart" . }} 13 | release: {{ .Release.Name }} 14 | heritage: {{ .Release.Service }} 15 | spec: 16 | serviceName: "{{ .Release.Name }}-leader" 17 | replicas: {{ .Values.leaders.replicaCount | default 1 }} 18 | selector: 19 | matchLabels: 20 | app: "{{ .Release.Name }}-leader" 21 | template: 22 | metadata: 23 | labels: 24 | app: "{{ .Release.Name }}-leader" 25 | release: {{ .Release.Name }} 26 | spec: 27 | terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }} 28 | {{- if .Values.imagePullSecrets }} 29 | imagePullSecrets: 30 | - name: {{ .Values.imagePullSecrets }} 31 | {{- end }} 32 | containers: 33 | - name: "{{ .Release.Name }}-leader" 34 | image: "{{ .Values.image }}:{{ .Values.imageTag }}" 35 | imagePullPolicy: {{ .Values.imagePullPolicy }} 36 | ports: 37 | - containerPort: 5050 38 | name: sparkui 39 | livenessProbe: 40 | exec: 41 | command: 42 | - /bin/sh 43 | - -c 44 | - /opt/snappydata/sbin/snappy-leads.sh status | grep -e running -e waiting 45 | # initial delay intentionally kept large, as lead waits(250 seconds) for servers to be available 46 | initialDelaySeconds: {{ .Values.leaders.initialDelaySeconds | default 360 }} 47 | command: 48 | - "/bin/bash" 49 | - "-c" 50 | - > 51 | cp /snappy_conf/* /opt/snappydata/conf; 52 | 53 | WAIT_FOR_SERVICE_ARG="--get-ip {{ .Release.Name }}-leader-public --wait-for {{ .Release.Name }}-server 1527"; 54 | USER_PROVIDED_STARTUP_CONF={{ .Values.leaders.conf | default "" | quote }}; 55 | SNAPPY_STARTUP_CONF="-locators={{ .Release.Name }}-locator:10334 $USER_PROVIDED_STARTUP_CONF"; 56 | echo "Executing command: start lead $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF"; 57 | 58 | start lead $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF; 59 | lifecycle: 60 | preStop: 61 | exec: 62 | command: ["/opt/snappydata/sbin/snappy-leads.sh", "stop"] 63 | resources: 64 | {{ toYaml .Values.leaders.resources | indent 12 }} 65 | {{- with .Values.nodeSelector }} 66 | nodeSelector: 67 | {{ toYaml . | indent 8 }} 68 | {{- end }} 69 | {{- with .Values.affinity }} 70 | affinity: 71 | {{ toYaml . | indent 8 }} 72 | {{- end }} 73 | {{- with .Values.tolerations }} 74 | tolerations: 75 | {{ toYaml . | indent 8 }} 76 | {{- end }} 77 | volumeMounts: 78 | - mountPath: "/opt/snappydata/work" 79 | name: snappy-disk-claim 80 | - mountPath: /snappy_conf 81 | name: snappy-config-properties 82 | volumes: 83 | - name: snappy-config-properties 84 | configMap: 85 | name: {{ .Release.Name }}-configmap 86 | 87 | volumeClaimTemplates: 88 | - metadata: 89 | name: snappy-disk-claim 90 | spec: 91 | accessModes: [ {{ .Values.leaders.persistence.accessMode | quote }} ] 92 | resources: 93 | requests: 94 | storage: {{ .Values.leaders.persistence.size | quote }} 95 | {{- if .Values.leaders.persistence.storageClass }} 96 | storageClassName: {{ .Values.leaders.persistence.storageClass | quote }} 97 | {{- end }} 98 | -------------------------------------------------------------------------------- /charts/snappydata/templates/locator_statefulset.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Statefulset for locators 3 | ############################# 4 | apiVersion: apps/v1 5 | kind: StatefulSet 6 | metadata: 7 | name: "{{ .Release.Name }}-locator" 8 | # TODO: Do we need to change, for example like the one given below? 9 | # name: {{ template "snappydata.fullname" . }} 10 | labels: 11 | app: {{ template "snappydata.name" . }} 12 | chart: {{ template "snappydata.chart" . }} 13 | release: {{ .Release.Name }} 14 | heritage: {{ .Release.Service }} 15 | spec: 16 | serviceName: "{{ .Release.Name }}-locator" 17 | replicas: {{ .Values.locators.replicaCount | default 1 }} 18 | selector: 19 | matchLabels: 20 | app: "{{ .Release.Name }}-locator" 21 | template: 22 | metadata: 23 | labels: 24 | app: "{{ .Release.Name }}-locator" 25 | release: {{ .Release.Name }} 26 | spec: 27 | terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }} 28 | {{- if .Values.imagePullSecrets }} 29 | imagePullSecrets: 30 | - name: {{ .Values.imagePullSecrets }} 31 | {{- end }} 32 | containers: 33 | - name: "{{ .Release.Name }}-locator" 34 | image: "{{ .Values.image }}:{{ .Values.imageTag }}" 35 | imagePullPolicy: {{ .Values.imagePullPolicy }} 36 | ports: 37 | - containerPort: 10334 38 | name: locator 39 | - containerPort: 1527 40 | name: jdbc 41 | livenessProbe: 42 | exec: 43 | command: 44 | - /bin/sh 45 | - -c 46 | - /opt/snappydata/sbin/snappy-locators.sh status | grep -e running -e waiting 47 | initialDelaySeconds: {{ .Values.locators.initialDelaySeconds | default 80 }} 48 | command: 49 | - "/bin/bash" 50 | - "-c" 51 | - > 52 | cp /snappy_conf/* /opt/snappydata/conf; 53 | 54 | SNAPPY_STARTUP_CONF={{ .Values.locators.conf | default "" | quote }}; 55 | echo "Executing command: start locator $SNAPPY_STARTUP_CONF"; 56 | 57 | start locator $SNAPPY_STARTUP_CONF; 58 | lifecycle: 59 | preStop: 60 | exec: 61 | command: ["/opt/snappydata/sbin/snappy-locators.sh", "stop"] 62 | resources: 63 | {{ toYaml .Values.locators.resources | indent 12 }} 64 | {{- with .Values.nodeSelector }} 65 | nodeSelector: 66 | {{ toYaml . | indent 8 }} 67 | {{- end }} 68 | {{- with .Values.affinity }} 69 | affinity: 70 | {{ toYaml . | indent 8 }} 71 | {{- end }} 72 | {{- with .Values.tolerations }} 73 | tolerations: 74 | {{ toYaml . | indent 8 }} 75 | {{- end }} 76 | volumeMounts: 77 | - mountPath: "/opt/snappydata/work" 78 | name: snappy-disk-claim 79 | - mountPath: /snappy_conf 80 | name: snappy-config-properties 81 | volumes: 82 | - name: snappy-config-properties 83 | configMap: 84 | name: {{ .Release.Name }}-configmap 85 | 86 | volumeClaimTemplates: 87 | - metadata: 88 | name: snappy-disk-claim 89 | spec: 90 | accessModes: [ {{ .Values.locators.persistence.accessMode | quote }} ] 91 | resources: 92 | requests: 93 | storage: {{ .Values.locators.persistence.size | quote }} 94 | {{- if .Values.locators.persistence.storageClass }} 95 | storageClassName: {{ .Values.locators.persistence.storageClass | quote }} 96 | {{- end }} 97 | -------------------------------------------------------------------------------- /charts/snappydata/templates/role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: service-view-binding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: view 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | -------------------------------------------------------------------------------- /charts/snappydata/templates/server_statefulset.yaml: -------------------------------------------------------------------------------- 1 | ############################# 2 | # Statefulset for servers 3 | ############################# 4 | apiVersion: apps/v1 5 | kind: StatefulSet 6 | metadata: 7 | name: "{{ .Release.Name }}-server" 8 | # TODO: Do we need to change, for example like the one given below? 9 | # name: {{ template "snappydata.fullname" . }} 10 | labels: 11 | app: {{ template "snappydata.name" . }} 12 | chart: {{ template "snappydata.chart" . }} 13 | release: {{ .Release.Name }} 14 | heritage: {{ .Release.Service }} 15 | spec: 16 | serviceName: "{{ .Release.Name }}-server" 17 | replicas: {{ .Values.servers.replicaCount | default 2 }} 18 | selector: 19 | matchLabels: 20 | app: "{{ .Release.Name }}-server" 21 | template: 22 | metadata: 23 | labels: 24 | app: "{{ .Release.Name }}-server" 25 | release: {{ .Release.Name }} 26 | spec: 27 | terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }} 28 | {{- if .Values.imagePullSecrets }} 29 | imagePullSecrets: 30 | - name: {{ .Values.imagePullSecrets }} 31 | {{- end }} 32 | containers: 33 | - name: "{{ .Release.Name }}-server" 34 | image: "{{ .Values.image }}:{{ .Values.imageTag }}" 35 | imagePullPolicy: {{ .Values.imagePullPolicy }} 36 | # Even servers use the same port as locator ... all run on independent pods 37 | # ... and, the service will either roundrobin or loadbalance 38 | ports: 39 | - containerPort: 1527 40 | name: jdbc 41 | livenessProbe: 42 | exec: 43 | command: 44 | - /bin/sh 45 | - -c 46 | - /opt/snappydata/sbin/snappy-servers.sh status | grep -e running -e waiting 47 | # initial delay intentionally kept large, as server waits(250 seconds) for locator to be available 48 | initialDelaySeconds: {{ .Values.servers.initialDelaySeconds | default 360 }} 49 | command: 50 | - "/bin/bash" 51 | - "-c" 52 | - > 53 | cp /snappy_conf/* /opt/snappydata/conf; 54 | 55 | WAIT_FOR_SERVICE_ARG="--get-ip {{ .Release.Name }}-server-public --wait-for {{ .Release.Name }}-locator 10334"; 56 | USER_PROVIDED_STARTUP_CONF={{ .Values.servers.conf | default "" | quote }}; 57 | SNAPPY_STARTUP_CONF="-locators={{ .Release.Name }}-locator:10334 $USER_PROVIDED_STARTUP_CONF"; 58 | echo "Executing command: start server $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF"; 59 | 60 | start server $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF; 61 | lifecycle: 62 | preStop: 63 | exec: 64 | command: ["/opt/snappydata/sbin/snappy-servers.sh", "stop"] 65 | resources: 66 | {{ toYaml .Values.servers.resources | indent 12 }} 67 | {{- with .Values.nodeSelector }} 68 | nodeSelector: 69 | {{ toYaml . | indent 8 }} 70 | {{- end }} 71 | {{- with .Values.affinity }} 72 | affinity: 73 | {{ toYaml . | indent 8 }} 74 | {{- end }} 75 | {{- with .Values.tolerations }} 76 | tolerations: 77 | {{ toYaml . | indent 8 }} 78 | {{- end }} 79 | volumeMounts: 80 | - mountPath: "/opt/snappydata/work" 81 | name: snappy-disk-claim 82 | - mountPath: /snappy_conf 83 | name: snappy-config-properties 84 | volumes: 85 | - name: snappy-config-properties 86 | configMap: 87 | name: {{ .Release.Name }}-configmap 88 | 89 | volumeClaimTemplates: 90 | - metadata: 91 | name: snappy-disk-claim 92 | spec: 93 | accessModes: [ {{ .Values.servers.persistence.accessMode | quote }} ] 94 | resources: 95 | requests: 96 | storage: {{ .Values.servers.persistence.size | quote }} 97 | {{- if .Values.servers.persistence.storageClass }} 98 | storageClassName: {{ .Values.servers.persistence.storageClass | quote }} 99 | {{- end }} 100 | -------------------------------------------------------------------------------- /charts/snappydata/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: "{{ .Release.Name }}-locator-public" 5 | labels: 6 | app: {{ template "snappydata.name" . }} 7 | chart: {{ template "snappydata.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | ports: 12 | - port: 1527 13 | targetPort: 1527 14 | name: jdbc 15 | type: LoadBalancer 16 | selector: 17 | app: "{{ .Release.Name }}-locator" 18 | --- 19 | apiVersion: v1 20 | kind: Service 21 | metadata: 22 | name: "{{ .Release.Name }}-server-public" 23 | labels: 24 | app: {{ template "snappydata.name" . }} 25 | chart: {{ template "snappydata.chart" . }} 26 | release: {{ .Release.Name }} 27 | heritage: {{ .Release.Service }} 28 | spec: 29 | ports: 30 | - port: 1527 31 | targetPort: 1527 32 | name: jdbc 33 | type: LoadBalancer 34 | selector: 35 | app: "{{ .Release.Name }}-server" 36 | --- 37 | apiVersion: v1 38 | kind: Service 39 | metadata: 40 | name: "{{ .Release.Name }}-leader-public" 41 | labels: 42 | app: {{ template "snappydata.name" . }} 43 | chart: {{ template "snappydata.chart" . }} 44 | release: {{ .Release.Name }} 45 | heritage: {{ .Release.Service }} 46 | spec: 47 | ports: 48 | - port: 5050 49 | targetPort: 5050 50 | name: spark 51 | - port: 8090 52 | targetPort: 8090 53 | name: jobserver 54 | - port: 3768 55 | targetPort: 3768 56 | name: zeppelin-interpreter 57 | - port: 10000 58 | targetPort: 10000 59 | name: hive-thrift-server 60 | type: LoadBalancer 61 | selector: 62 | app: "{{ .Release.Name }}-leader" 63 | --- 64 | 65 | #################### 66 | # Headless services 67 | #################### 68 | apiVersion: v1 69 | kind: Service 70 | metadata: 71 | name: "{{ .Release.Name }}-locator" 72 | labels: 73 | app: {{ template "snappydata.name" . }} 74 | chart: {{ template "snappydata.chart" . }} 75 | release: {{ .Release.Name }} 76 | heritage: {{ .Release.Service }} 77 | spec: 78 | ports: 79 | - port: 10334 80 | targetPort: 10334 81 | name: locator 82 | - port: 1527 83 | targetPort: 1527 84 | name: jdbc 85 | clusterIP: None 86 | selector: 87 | app: "{{ .Release.Name }}-locator" 88 | --- 89 | apiVersion: v1 90 | kind: Service 91 | metadata: 92 | name: "{{ .Release.Name }}-server" 93 | labels: 94 | app: {{ template "snappydata.name" . }} 95 | chart: {{ template "snappydata.chart" . }} 96 | release: {{ .Release.Name }} 97 | heritage: {{ .Release.Service }} 98 | spec: 99 | ports: 100 | - port: 1527 101 | targetPort: 1527 102 | name: jdbc 103 | clusterIP: None 104 | selector: 105 | app: "{{ .Release.Name }}-server" 106 | --- 107 | apiVersion: v1 108 | kind: Service 109 | metadata: 110 | name: "{{ .Release.Name }}-leader" 111 | labels: 112 | app: {{ template "snappydata.name" . }} 113 | chart: {{ template "snappydata.chart" . }} 114 | release: {{ .Release.Name }} 115 | heritage: {{ .Release.Service }} 116 | spec: 117 | ports: 118 | - port: 5050 119 | targetPort: 5050 120 | name: spark 121 | clusterIP: None 122 | selector: 123 | app: "{{ .Release.Name }}-leader" 124 | --- -------------------------------------------------------------------------------- /charts/snappydata/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for snappydata. 2 | # This is a YAML-formatted file. 3 | 4 | image: snappydatainc/snappydata 5 | imageTag: 1.0.2.1 6 | imagePullPolicy: IfNotPresent 7 | # Use "imagePullSecrets" to specify the secret name to be used while pulling image from a private registry 8 | # Replace "secretname" with the actual name of the secret 9 | #imagePullSecrets: secretname 10 | 11 | locators: 12 | ## config options for locators 13 | conf: "" 14 | resources: 15 | # limits: 16 | # cpu: 100m 17 | # memory: 128Mi 18 | requests: 19 | # cpu: 100m 20 | memory: 1024Mi 21 | ## PV configuration 22 | persistence: 23 | ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes 24 | ## If undefined (the default) or set to null, no storageClassName spec is 25 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 26 | ## GKE, Azure & OpenStack) 27 | ## 28 | # storageClass: 29 | accessMode: ReadWriteOnce 30 | size: 10Gi 31 | 32 | servers: 33 | replicaCount: 2 34 | ## config options for servers 35 | conf: "" 36 | resources: 37 | # limits: 38 | # cpu: 100m 39 | # memory: 128Mi 40 | requests: 41 | # cpu: 100m 42 | memory: 4096Mi 43 | ## PV configuration 44 | persistence: 45 | ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes 46 | ## If undefined (the default) or set to null, no storageClassName spec is 47 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 48 | ## GKE, Azure & OpenStack) 49 | ## 50 | # storageClass: 51 | accessMode: ReadWriteOnce 52 | size: 10Gi 53 | 54 | leaders: 55 | ## config options for leaders 56 | conf: "-zeppelin.interpreter.enable=true" 57 | resources: 58 | # limits: 59 | # cpu: 100m 60 | # memory: 128Mi 61 | requests: 62 | # cpu: 100m 63 | memory: 4096Mi 64 | ## PV configuration 65 | persistence: 66 | ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes 67 | ## If undefined (the default) or set to null, no storageClassName spec is 68 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 69 | ## GKE, Azure & OpenStack) 70 | ## 71 | # storageClass: 72 | accessMode: ReadWriteOnce 73 | size: 10Gi 74 | 75 | nodeSelector: {} 76 | 77 | tolerations: [] 78 | 79 | affinity: {} 80 | -------------------------------------------------------------------------------- /charts/spark-hs/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/spark-hs/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart to deploy Spark history server for Kubernetes 4 | name: spark-hs 5 | version: 0.1.0 6 | home: https://github.com/apache-spark-on-k8s/spark 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png 8 | maintainers: 9 | - name: 10 | email: 11 | 12 | -------------------------------------------------------------------------------- /charts/spark-hs/README.md: -------------------------------------------------------------------------------- 1 | # A Helm chart for Spark History Server 2 | [Spark History Server](https://spark.apache.org/docs/latest/monitoring.html#viewing-after-the-fact) Web UI 3 | allows users to view job execution details even after the application has finished execution. To use History Server, 4 | Spark applications should be configured to log events to a directory from which Spark History Server will read events 5 | to construct the job execution visualization. The events directory can be a local file path, an HDFS path, or any alternative 6 | file system supported by Hadoop APIs. 7 | 8 | ## Chart Details 9 | This chart launches Spark History Server on Kubernetes. History server can read events from any 10 | HDFS compatible system (GCS/S3/HDFS) or a file system path mounted on the pod. A user can set GCS bucket 11 | URI in 'historyServerConf.eventsDir' attribute. 12 | 13 | *Note:* This README file describes history server configuration to read history events from GCS bucket 14 | 15 | ## Steps to configure and install chart 16 | 17 | 1. Setup gsutil and gcloud on your local laptop and associate them with your GCP project, create a bucket, 18 | create an IAM service account sparkonk8s-test, generate a json key file sparkonk8s-test.json, to grant sparkonk8s-test 19 | admin permission to bucket gs://spark-history-server. 20 | 21 | ``` 22 | $ gsutil mb -c nearline gs://spark-history-server 23 | $ export ACCOUNT_NAME=sparkonk8s-test 24 | $ export GCP_PROJECT_ID=project-id 25 | $ gcloud iam service-accounts create ${ACCOUNT_NAME} --display-name "${ACCOUNT_NAME}" 26 | $ gcloud iam service-accounts keys create "${ACCOUNT_NAME}.json" --iam-account "${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com" 27 | $ gcloud projects add-iam-policy-binding ${GCP_PROJECT_ID} --member "serviceAccount:${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com" --role roles/storage.admin 28 | $ gsutil iam ch serviceAccount:${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com:objectAdmin gs://spark-history-server 29 | ``` 30 | 31 | 2. In order for history server to be able read from the GCS bucket, we need 32 | to mount the json key file on the history server pod. First copy the json file into 'conf/secrets' 33 | directory for spark history server chart 34 | 35 | ``` 36 | $ cp sparkonk8s-test.json spark-hs/conf/secrets/ 37 | ``` 38 | 39 | 3. Modify the 'values.yaml' file and specify the GCS bucket path created above. History server 40 | will read spark events from this path 41 | 42 | ``` 43 | historyServerConf: 44 | eventsDir: "gs://spark-history-server/" 45 | ``` 46 | 47 | Also set 'mountSecrets' field of values.yaml file to true. When 'mountSecrets' 48 | is set to true json key file will be mounted on path '/etc/secrets' of the pod. 49 | 50 | ``` 51 | mountSecrets: true 52 | ``` 53 | 54 | Lastly set the SPARK_HISTORY_OPTS so that history server uses json key file while 55 | accessing the GCS bucket 56 | 57 | ``` 58 | environment: 59 | SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json 60 | ``` 61 | 62 | 4. Install the chart 63 | ``` 64 | helm install --name history --namespace spark ./spark-hs/ 65 | ``` 66 | 67 | Spark History UI URL can now be accessed as follows: 68 | ``` 69 | $ export SERVICE_IP=$(kubectl get svc --namespace default example-history-spark-hs -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 70 | $ echo http://$SERVICE_IP:18080 71 | ``` 72 | Use the URL to access History UI in a browser. 73 | 74 | ## Enable spark-submit to log spark history events 75 | The spark-submit example below shows Spark job that logs historical events 76 | to the GCS bucket created in above steps. Once job finishes, use the 77 | Spark history server UI to view the job execution details. 78 | 79 | ``` 80 | bin/spark-submit \ 81 | --master k8s://https:// \ 82 | --deploy-mode cluster \ 83 | --name spark-pi \ 84 | --conf spark.kubernetes.namespace=spark \ 85 | --class org.apache.spark.examples.SparkPi \ 86 | --conf spark.eventLog.enabled=true \ 87 | --conf spark.eventLog.dir=gs://spark-history-server/ \ 88 | --conf spark.executor.instances=2 \ 89 | --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json \ 90 | --conf spark.kubernetes.driver.secrets.history-secrets=/etc/secrets \ 91 | --conf spark.kubernetes.executor.secrets.history-secrets=/etc/secrets \ 92 | --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 \ 93 | --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 \ 94 | local:///opt/spark/examples/jars/spark-examples_2.11-2.2.0-k8s-0.5.0.jar 95 | ``` 96 | 97 | 98 | ## Deleting the chart 99 | Use `helm delete` command to delete the chart 100 | ``` 101 | $ helm delete --purge example-history 102 | ``` 103 | -------------------------------------------------------------------------------- /charts/spark-hs/conf/secrets/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-hs/conf/secrets/.gitignore -------------------------------------------------------------------------------- /charts/spark-hs/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | {{- if contains "NodePort" .Values.service.type }} 3 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "spark-hs.fullname" . }}) 4 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 5 | echo http://$NODE_IP:$NODE_PORT 6 | {{- else if contains "LoadBalancer" .Values.service.type }} 7 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 8 | You can watch the status of by running 'kubectl get svc -w {{ template "spark-hs.fullname" . }}' 9 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "spark-hs.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 10 | echo http://$SERVICE_IP:{{ .Values.service.port }} 11 | {{- else if contains "ClusterIP" .Values.service.type }} 12 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "spark-hs.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 13 | echo "Visit http://127.0.0.1:8080 to use your application" 14 | kubectl port-forward $POD_NAME 8080:80 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/spark-hs/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "spark-hs.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "spark-hs.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "spark-hs.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/spark-hs/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }}-hsenv-configmap 5 | data: 6 | {{- range $key, $val := .Values.environment }} 7 | {{ $key }}: {{ $val | quote }} 8 | {{- end }} 9 | {{- range $key, $val := .Values.historyServerConf }} 10 | {{ $key }}: {{ $val | quote }} 11 | {{- end }} 12 | --- 13 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }} 14 | apiVersion: v1 15 | kind: Secret 16 | metadata: 17 | name: history-secrets 18 | type: Opaque 19 | data: 20 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 21 | {{- end }} -------------------------------------------------------------------------------- /charts/spark-hs/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "spark-hs.fullname" . }} 5 | labels: 6 | app: {{ template "spark-hs.name" . }} 7 | chart: {{ template "spark-hs.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: {{ template "spark-hs.name" . }} 15 | release: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app: {{ template "spark-hs.name" . }} 20 | release: {{ .Release.Name }} 21 | spec: 22 | containers: 23 | - name: {{ .Chart.Name }} 24 | image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}" 25 | imagePullPolicy: {{ .Values.image.pullPolicy }} 26 | ports: 27 | - name: historyport 28 | containerPort: 18080 29 | protocol: TCP 30 | command: 31 | - "/bin/sh" 32 | - "-c" 33 | - > 34 | if [ "$enablePVC" == "true" ]; then 35 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \ 36 | -Dspark.history.fs.logDirectory=file:/mnt/$eventsDir"; 37 | else 38 | export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \ 39 | -Dspark.history.fs.logDirectory=$eventsDir"; 40 | fi; 41 | /opt/spark/bin/spark-class org.apache.spark.deploy.history.HistoryServer; 42 | envFrom: 43 | - configMapRef: 44 | name: {{ .Release.Name }}-hsenv-configmap 45 | livenessProbe: 46 | httpGet: 47 | path: / 48 | port: historyport 49 | readinessProbe: 50 | httpGet: 51 | path: / 52 | port: historyport 53 | resources: 54 | {{ toYaml .Values.resources | indent 12 }} 55 | volumeMounts: 56 | - name: data 57 | mountPath: /mnt 58 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 59 | - name: secrets-volume 60 | mountPath: /etc/secrets 61 | {{- end }} 62 | {{- with .Values.nodeSelector }} 63 | nodeSelector: 64 | {{ toYaml . | indent 8 }} 65 | {{- end }} 66 | serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }} 67 | {{- with .Values.affinity }} 68 | affinity: 69 | {{ toYaml . | indent 8 }} 70 | {{- end }} 71 | {{- with .Values.tolerations }} 72 | tolerations: 73 | {{ toYaml . | indent 8 }} 74 | {{- end }} 75 | volumes: 76 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 77 | - name: secrets-volume 78 | secret: 79 | secretName: history-secrets 80 | {{- end }} 81 | - name: data 82 | {{- if .Values.historyServerConf.enablePVC }} 83 | persistentVolumeClaim: 84 | claimName: {{ .Values.historyServerConf.existingClaimName }} 85 | {{- else }} 86 | emptyDir: {} 87 | {{- end -}} 88 | -------------------------------------------------------------------------------- /charts/spark-hs/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "spark-hs.fullname" . }} 5 | labels: 6 | app: {{ template "spark-hs.name" . }} 7 | chart: {{ template "spark-hs.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | type: {{ .Values.service.type }} 12 | ports: 13 | - port: {{ .Values.service.port }} 14 | targetPort: historyport 15 | protocol: TCP 16 | name: historyport 17 | selector: 18 | app: {{ template "spark-hs.name" . }} 19 | release: {{ .Release.Name }} -------------------------------------------------------------------------------- /charts/spark-hs/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark-hs. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: 6 | repository: snappydatainc 7 | tag: spark-init:v2.2.0-kubernetes-0.5.1 8 | pullPolicy: IfNotPresent 9 | 10 | service: 11 | type: LoadBalancer 12 | port: 18080 13 | 14 | serviceAccount: default 15 | 16 | historyServerConf: 17 | # if using file system, this should be an absolute path in the mounted volume 18 | # if not using file system, mention HDFS compatible URI 19 | eventsDir: "gs://spark-history-server-store/" 20 | #eventsDir: "/" 21 | 22 | # to use a file system path for Spark events dir, set 'enablePVC' to true and mention the 23 | # name of an already created persistent volume claim in existingClaimName. 24 | # The volume will be mounted on /data in the pod 25 | enablePVC: false 26 | existingClaimName: "claim" 27 | 28 | # copy your key file in 'conf/secrets' directory and set mountSecrets to true 29 | # key file will be mounted on '/etc/secrets' 30 | mountSecrets: true 31 | 32 | # any environment variables that need to be made available to history server 33 | environment: 34 | # Note: do not configure Spark history events directory using SPARK_HISTORY_OPTS. It will be 35 | # configured by this chart based on the values in "historyServerConf" attributes in values.yaml 36 | # However other options can be specified. 37 | 38 | SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json 39 | #SPARK_DAEMON_MEMORY: 1g 40 | #SPARK_DAEMON_JAVA_OPTS: ... 41 | #SPARK_DAEMON_CLASSPATH: ... 42 | #SPARK_PUBLIC_DNS: ... 43 | 44 | resources: {} 45 | # We usually recommend not to specify default resources and to leave this as a conscious 46 | # choice for the user. This also increases chances charts run on environments with little 47 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 48 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 49 | # limits: 50 | # cpu: 100m 51 | # memory: 128Mi 52 | # requests: 53 | # cpu: 100m 54 | # memory: 128Mi 55 | 56 | nodeSelector: {} 57 | 58 | tolerations: [] 59 | 60 | affinity: {} 61 | 62 | #internal attribute, do not change 63 | global: 64 | umbrellaChart: false -------------------------------------------------------------------------------- /charts/spark-rss/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart to deploy Spark resource staging server on Kubernetes 4 | name: spark-rss 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/spark-rss/README.md: -------------------------------------------------------------------------------- 1 | # A Helm chart for Spark Resource Staging Server 2 | This chart is still work-in-progress 3 | 4 | ## Chart Details 5 | This chart launches [Spark Resource Staging Server](https://apache-spark-on-k8s.github.io/userdocs/running-on-kubernetes.html#dependency-management). 6 | Spark Resource Staging Server is used for dependency management when Spark is run in Kubernetes environment. 7 | 8 | ## Installing the Chart 9 | 10 | 1. By default this chart deploys Spark Resource Staging Server and configures a service of type 11 | LoadBalancer to access it. 12 | 13 | For example: 14 | 15 | ``` 16 | helm install --name rss --namespace spark ./spark-rss/ 17 | ``` 18 | 19 | The above command will display output such as given below: 20 | ``` 21 | NOTES: 22 | Get the resource staging server URI by running these commands: 23 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 24 | You can watch the status of by running 'kubectl get svc -w rss-spark-rss' 25 | export SERVICE_IP=$(kubectl get svc --namespace default rss-spark-rss -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 26 | echo http://$SERVICE_IP:10000 27 | ``` 28 | 29 | The URI displayed by using the commands given in the above output can be used to run 30 | spark-submit command with resource staging server. 31 | 32 | 2. Users also have a choice of using NodePort service instead of LoadBalancer. To use a 33 | NodePort service use a command such as given below or alternatively modify 34 | values.yaml and specify the service.type value as "NodePort" 35 | 36 | ``` 37 | helm install --name rss --namespace spark --set service.type=NodePort ./spark-rss/ 38 | ``` 39 | 40 | The above command will display output such as given below: 41 | 42 | ``` 43 | NOTES: 44 | Get the resource staging server URI by running these commands: 45 | export NODE_PORT=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services rss-spark-rss) 46 | export NODE_IP=$(kubectl get nodes --namespace default -o jsonpath="{.items[0].status.addresses[1].address}") 47 | echo http://$NODE_IP:$NODE_PORT 48 | ``` 49 | NOTE: To access NodePort service externally, create a firewall rule that allows TCP traffic on your node port. 50 | For example, if Service has a NodePort value of 31000, create a firewall rule that allows TCP traffic on port 31000. 51 | Different cloud providers offer different ways of configuring firewall rules. Without the firewall you may not be 52 | able to use resource staging server as spark-submit will throw error. 53 | 54 | #### spark-submit command example 55 | Given below is an example spark-submit command that uses resource staging server. 56 | 57 | 58 | bin/spark-submit \ 59 | --deploy-mode cluster \ 60 | --class org.apache.spark.examples.SparkPi \ 61 | --master k8s://: \ 62 | --conf spark.kubernetes.namespace=spark \ 63 | --conf spark.executor.instances=5 \ 64 | --conf spark.app.name=spark-pi \ 65 | --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 \ 66 | --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 \ 67 | --conf spark.kubernetes.initcontainer.docker.image=snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1 \ 68 | --conf spark.kubernetes.resourceStagingServer.uri= \ 69 | ./examples/jars/spark-examples_2.11-2.2.0-k8s-0.5.0.jar 70 | 71 | 72 | 73 | ## Deleting the chart 74 | Use `helm delete` command to delete the chart 75 | ``` 76 | $ helm delete --purge rss 77 | ``` 78 | 79 | ## Configuration 80 | The following table lists the configuration parameters available for this chart 81 | 82 | | Parameter | Description | Default | 83 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- | 84 | | `image.repository` | Docker repo for the image | `SnappyDataInc` | 85 | | `image.tag` | Tag for the Docker image | `spark-resource-staging-server:v2.2.0-kubernetes-0.5.1` | 86 | | `image.pullPolicy` | Pull policy for the image | `IfNotPresent` | 87 | | `service.type` | K8S service type for Resource Staging Server | `LoadBalancer` | 88 | | `service.externalPort` | Port exposed externally for Resource Staging Server service | `10000` | 89 | | `service.internalPort` | Port exposed only internally for the pod so that LoadBalancer can connect to it | `10000` | 90 | | `service.nodePort` | Used if the service is of NodePort type | `310000` | 91 | | `serviceAccount` | Service account used to deploy Resource Staging Server | `default` | 92 | | `properties` | Configuration properties for Resource Staging Server. These will be made available as configmaps | | 93 | | `resources` | CPU and Memory resources for the RSS pod | | 94 | | `global.umbrellaChart` | Internal attribute. Do not modify | `false` | 95 | 96 | These configuration attributes can be set in the `values.yaml` file or while using the helm install command, for example, 97 | 98 | ``` 99 | # set an attribute while using helm install command 100 | helm install --name rss --namespace spark --set serviceAccount=spark ./spark-rss 101 | ``` -------------------------------------------------------------------------------- /charts/spark-rss/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Get the resource staging server URI by running these commands: 2 | {{- if contains "NodePort" .Values.service.type }} 3 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "spark-rss.fullname" . }}) 4 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[1].address}") 5 | echo http://$NODE_IP:$NODE_PORT 6 | 7 | NOTE: To access NodePort service externally, create a firewall rule that allows TCP traffic on your node port. 8 | For example, if Service has a NodePort value of 31000, create a firewall rule that allows TCP traffic on port 31000. 9 | Different cloud providers offer different ways of configuring firewall rules. Without the firewall you may not be 10 | able to use resource staging server as spark-submit will throw error. 11 | 12 | {{- else if contains "LoadBalancer" .Values.service.type }} 13 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 14 | You can watch the status of by running 'kubectl get svc -w {{ template "spark-rss.fullname" . }}' 15 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "spark-rss.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 16 | echo http://$SERVICE_IP:{{ .Values.service.externalPort }} 17 | {{- end }} 18 | -------------------------------------------------------------------------------- /charts/spark-rss/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "spark-rss.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "spark-rss.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "spark-rss.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/spark-rss/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }}-rss-configmap 5 | data: 6 | resource-staging-server.properties: | 7 | spark.kubernetes.resourceStagingServer.port={{ .Values.service.internalPort }} 8 | {{- range $key, $val := .Values.properties }} 9 | {{ $key }}={{ $val }} 10 | {{- end}} -------------------------------------------------------------------------------- /charts/spark-rss/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "spark-rss.fullname" . }} 5 | labels: 6 | app: {{ template "spark-rss.name" . }} 7 | chart: {{ template "spark-rss.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: {{ template "spark-rss.name" . }} 15 | release: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app: {{ template "spark-rss.name" . }} 20 | release: {{ .Release.Name }} 21 | resource-staging-server-instance: default 22 | spec: 23 | volumes: 24 | - name: resource-staging-server-properties 25 | configMap: 26 | name: {{ .Release.Name }}-rss-configmap 27 | containers: 28 | - name: {{ .Chart.Name }} 29 | image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}" 30 | imagePullPolicy: {{ .Values.image.pullPolicy }} 31 | ports: 32 | - name: rssport 33 | containerPort: {{ .Values.service.internalPort }} 34 | protocol: TCP 35 | # TODO: add a proper liveness probe 36 | # livenessProbe: 37 | # tcpSocket: 38 | # port: rssport 39 | # initialDelaySeconds: 180 40 | # readinessProbe: 41 | # tcpSocket: 42 | # port: rssport 43 | # initialDelaySeconds: 20 44 | resources: 45 | {{ toYaml .Values.resources | indent 12 }} 46 | volumeMounts: 47 | - name: resource-staging-server-properties 48 | mountPath: '/etc/spark-resource-staging-server' 49 | args: 50 | - '/etc/spark-resource-staging-server/resource-staging-server.properties' 51 | serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }} -------------------------------------------------------------------------------- /charts/spark-rss/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "spark-rss.fullname" . }} 5 | labels: 6 | app: {{ template "spark-rss.name" . }} 7 | chart: {{ template "spark-rss.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | type: {{ .Values.service.type }} 12 | ports: 13 | - port: {{ .Values.service.externalPort }} 14 | targetPort: {{ .Values.service.internalPort }} 15 | protocol: TCP 16 | {{ if eq .Values.service.type "NodePort" -}} 17 | nodePort: {{ .Values.service.nodePort }} 18 | {{ end -}} 19 | name: rssport 20 | selector: 21 | app: {{ template "spark-rss.name" . }} 22 | release: {{ .Release.Name }} 23 | # TODO: Is this required 24 | resource-staging-server-instance: default -------------------------------------------------------------------------------- /charts/spark-rss/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark-rss. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: 6 | repository: snappydatainc 7 | tag: spark-resource-staging-server:v2.2.0-kubernetes-0.5.1 8 | pullPolicy: IfNotPresent 9 | 10 | service: 11 | type: LoadBalancer 12 | externalPort: 10000 13 | internalPort: 10000 14 | nodePort: 31000 15 | 16 | serviceAccount: default 17 | 18 | # properties that can be made available via configmap 19 | properties: 20 | spark.ssl.kubernetes.resourceStagingServer.enabled: false 21 | # Other possible properties are listed below, primarily for setting up TLS. The paths given by KeyStore, password, and PEM files here should correspond to 22 | # files that are securely mounted into the resource staging server container, via e.g. secret volumes. 23 | # spark.ssl.kubernetes.resourceStagingServer.keyStore=/mnt/secrets/resource-staging-server/keyStore.jks 24 | # spark.ssl.kubernetes.resourceStagingServer.keyStorePassword=changeit 25 | # spark.ssl.kubernetes.resourceStagingServer.keyPassword=changeit 26 | # spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile=/mnt/secrets/resource-staging-server/keystore-password.txt 27 | # spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile=/mnt/secrets/resource-staging-server/keystore-key-password.txt 28 | # spark.ssl.kubernetes.resourceStagingServer.keyPem=/mnt/secrets/resource-staging-server/key.pem 29 | # spark.ssl.kubernetes.resourceStagingServer.serverCertPem=/mnt/secrets/resource-staging-server/cert.pem 30 | 31 | resources: 32 | limits: 33 | cpu: 100m 34 | memory: 1Gi 35 | requests: 36 | cpu: 100m 37 | memory: 256Mi 38 | 39 | #internal attribute, do not change 40 | global: 41 | umbrellaChart: false -------------------------------------------------------------------------------- /charts/spark-shuffle/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/spark-shuffle/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart to deploy Spark shuffle service deamon set for Kubernetes 4 | name: spark-shuffle 5 | version: 0.1.0 6 | home: https://github.com/apache-spark-on-k8s/spark 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png 8 | maintainers: 9 | - name: 10 | email: 11 | -------------------------------------------------------------------------------- /charts/spark-shuffle/README.md: -------------------------------------------------------------------------------- 1 | # A Helm chart to launch Spark shuffle service on k8s 2 | 3 | This chart launches Spark shuffle service as a daemonset on Kubernetes. Spark shuffle service is 4 | required for dynamic executor scaling in Spark 5 | 6 | 7 | ## Installing the Chart 8 | 9 | To install the chart use following command: 10 | 11 | ``` 12 | $ helm install --name example-shuffle --namespace spark ./spark-shuffle/ 13 | ``` 14 | 15 | Above command will deploy the chart and display labels attached to the Shuffle pods in 'NOTES'. 16 | 17 | Example output: 18 | 19 | ``` 20 | NOTES: 21 | Created a Spark shuffle daemonset with following Pod labels: 22 | app: spark-shuffle-service 23 | spark-version: 2.2.0 24 | 25 | ``` 26 | Above mentioned labels can be used in spark-submit configuration options to enable dynamic executor scaling 27 | 28 | For example in values.yaml of [spark-k8s-zeppelin chart](https://github.com/SnappyDataInc/spark-on-k8s/tree/master/charts/zeppelin-with-spark), modify the SPARK_SUBMIT_OPTIONS 29 | as given below (note the options for dynamicAllocation and shuffle). This will enable dynamic executor scaling and use the 30 | shuffle service installed above. 31 | 32 | ``` 33 | SPARK_SUBMIT_OPTIONS: >- 34 | --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 35 | --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 36 | --conf spark.driver.cores="300m" 37 | --conf spark.local.dir=/tmp/spark-local 38 | --conf spark.dynamicAllocation.enabled=true 39 | --conf spark.shuffle.service.enabled=true 40 | --conf spark.kubernetes.shuffle.namespace=spark 41 | --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.2.0" 42 | --conf spark.dynamicAllocation.initialExecutors=0 43 | --conf spark.dynamicAllocation.minExecutors=1 44 | --conf spark.dynamicAllocation.maxExecutors=5 45 | ``` 46 | 47 | ## Configuration 48 | The following table lists the configuration parameters available for this chart 49 | 50 | | Parameter | Description | Default | 51 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- | 52 | | `image.repository` | Docker repo for the shuffle service image | `SnappyDataInc` | 53 | | `image.tag` | Tag for the Docker image | `spark-shuffle:v2.2.0-kubernetes-0.5.1` | 54 | | `image.pullPolicy` | Pull policy for the image | `IfNotPresent` | 55 | | `serviceAccount` | Service account used to deploy shuffle service daemonset | `default` | 56 | | `shufflePodLabels` | Labels assigned to pods of the shuffle service. These can be used to target a particular service while running jobs. By default two labels are created `app: spark-shuffle-service` and `spark-version: 2.2.0`| | 57 | | `resources` | CPU and Memory resources for the RSS pod | | 58 | | `global.umbrellaChart` | Internal attribute. Do not modify | `false` | 59 | 60 | These configuration attributes can be set in the `values.yaml` file or while using the helm install command, for example, 61 | 62 | ``` 63 | # set an attribute while using helm install command 64 | helm install --name shuffle --namespace spark --set serviceAccount=spark ./spark-shuffle 65 | ``` 66 | -------------------------------------------------------------------------------- /charts/spark-shuffle/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Created a Spark shuffle daemonset with following Pod labels: 2 | {{- range $key, $val := .Values.shufflePodLabels }} 3 | {{ $key }}: {{ $val }} 4 | {{- end }} 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /charts/spark-shuffle/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "spark-shuffle.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "spark-shuffle.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "spark-shuffle.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/spark-shuffle/templates/shuffle-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: DaemonSet 3 | metadata: 4 | labels: 5 | {{- range $key, $val := .Values.shufflePodLabels }} 6 | {{ $key }}: {{ $val }} 7 | {{- end }} 8 | name: {{ template "spark-shuffle.fullname" . }} 9 | spec: 10 | template: 11 | metadata: 12 | labels: 13 | {{- range $key, $val := .Values.shufflePodLabels }} 14 | {{ $key }}: {{ $val }} 15 | {{- end }} 16 | spec: 17 | volumes: 18 | - name: temp-volume 19 | hostPath: 20 | path: '/tmp/spark-local' # change this path according to your cluster configuration. 21 | containers: 22 | - name: {{ .Chart.Name }} 23 | # This is an official image that is built 24 | # from the dockerfiles/shuffle directory 25 | # in the spark distribution. 26 | image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}" 27 | imagePullPolicy: {{ .Values.image.pullPolicy }} 28 | volumeMounts: 29 | - mountPath: '/tmp/spark-local' 30 | name: temp-volume 31 | # more volumes can be mounted here. 32 | # The spark job must be configured to use these 33 | # mounts using the configuration: 34 | # spark.kubernetes.shuffle.dir=,,... 35 | resources: 36 | {{ toYaml .Values.resources | indent 12 }} 37 | serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }} 38 | -------------------------------------------------------------------------------- /charts/spark-shuffle/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark-shuffle. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: 6 | repository: snappydatainc 7 | tag: spark-shuffle:v2.2.0-kubernetes-0.5.1 8 | pullPolicy: IfNotPresent 9 | 10 | shufflePodLabels: 11 | app: spark-shuffle-service 12 | spark-version: 2.2.0 13 | 14 | serviceAccount: default 15 | 16 | resources: 17 | limits: 18 | cpu: 200m 19 | # memory: 128Mi 20 | requests: 21 | cpu: 200m 22 | # memory: 128Mi 23 | 24 | #internal attribute, do not change 25 | global: 26 | umbrellaChart: false -------------------------------------------------------------------------------- /charts/spark-umbrella/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/spark-umbrella/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for running Spark applications on Kubernetes. 4 | name: spark-umbrella 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /charts/spark-umbrella/charts/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/charts/.gitignore -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/secrets/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/secrets/.gitignore -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/spark/.gitignore -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/fairscheduler.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | FAIR 23 | 1 24 | 2 25 | 26 | 27 | FIFO 28 | 2 29 | 3 30 | 31 | 32 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/log4j.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/metrics.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # syntax: [instance].sink|source.[name].[options]=[value] 19 | 20 | # This file configures Spark's internal metrics system. The metrics system is 21 | # divided into instances which correspond to internal components. 22 | # Each instance can be configured to report its metrics to one or more sinks. 23 | # Accepted values for [instance] are "master", "worker", "executor", "driver", 24 | # and "applications". A wildcard "*" can be used as an instance name, in 25 | # which case all instances will inherit the supplied property. 26 | # 27 | # Within an instance, a "source" specifies a particular set of grouped metrics. 28 | # there are two kinds of sources: 29 | # 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will 30 | # collect a Spark component's internal state. Each instance is paired with a 31 | # Spark source that is added automatically. 32 | # 2. Common sources, like JvmSource, which will collect low level state. 33 | # These can be added through configuration options and are then loaded 34 | # using reflection. 35 | # 36 | # A "sink" specifies where metrics are delivered to. Each instance can be 37 | # assigned one or more sinks. 38 | # 39 | # The sink|source field specifies whether the property relates to a sink or 40 | # source. 41 | # 42 | # The [name] field specifies the name of source or sink. 43 | # 44 | # The [options] field is the specific property of this source or sink. The 45 | # source or sink is responsible for parsing this property. 46 | # 47 | # Notes: 48 | # 1. To add a new sink, set the "class" option to a fully qualified class 49 | # name (see examples below). 50 | # 2. Some sinks involve a polling period. The minimum allowed polling period 51 | # is 1 second. 52 | # 3. Wildcard properties can be overridden by more specific properties. 53 | # For example, master.sink.console.period takes precedence over 54 | # *.sink.console.period. 55 | # 4. A metrics specific configuration 56 | # "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be 57 | # added to Java properties using -Dspark.metrics.conf=xxx if you want to 58 | # customize metrics system. You can also put the file in ${SPARK_HOME}/conf 59 | # and it will be loaded automatically. 60 | # 5. The MetricsServlet sink is added by default as a sink in the master, 61 | # worker and driver, and you can send HTTP requests to the "/metrics/json" 62 | # endpoint to get a snapshot of all the registered metrics in JSON format. 63 | # For master, requests to the "/metrics/master/json" and 64 | # "/metrics/applications/json" endpoints can be sent separately to get 65 | # metrics snapshots of the master instance and applications. This 66 | # MetricsServlet does not have to be configured. 67 | 68 | ## List of available common sources and their properties. 69 | 70 | # org.apache.spark.metrics.source.JvmSource 71 | # Note: Currently, JvmSource is the only available common source. 72 | # It can be added to an instance by setting the "class" option to its 73 | # fully qualified class name (see examples below). 74 | 75 | ## List of available sinks and their properties. 76 | 77 | # org.apache.spark.metrics.sink.ConsoleSink 78 | # Name: Default: Description: 79 | # period 10 Poll period 80 | # unit seconds Unit of the poll period 81 | 82 | # org.apache.spark.metrics.sink.CSVSink 83 | # Name: Default: Description: 84 | # period 10 Poll period 85 | # unit seconds Unit of the poll period 86 | # directory /tmp Where to store CSV files 87 | 88 | # org.apache.spark.metrics.sink.GangliaSink 89 | # Name: Default: Description: 90 | # host NONE Hostname or multicast group of the Ganglia server, 91 | # must be set 92 | # port NONE Port of the Ganglia server(s), must be set 93 | # period 10 Poll period 94 | # unit seconds Unit of the poll period 95 | # ttl 1 TTL of messages sent by Ganglia 96 | # dmax 0 Lifetime in seconds of metrics (0 never expired) 97 | # mode multicast Ganglia network mode ('unicast' or 'multicast') 98 | 99 | # org.apache.spark.metrics.sink.JmxSink 100 | 101 | # org.apache.spark.metrics.sink.MetricsServlet 102 | # Name: Default: Description: 103 | # path VARIES* Path prefix from the web server root 104 | # sample false Whether to show entire set of samples for histograms 105 | # ('false' or 'true') 106 | # 107 | # * Default path is /metrics/json for all instances except the master. The 108 | # master has two paths: 109 | # /metrics/applications/json # App information 110 | # /metrics/master/json # Master information 111 | 112 | # org.apache.spark.metrics.sink.GraphiteSink 113 | # Name: Default: Description: 114 | # host NONE Hostname of the Graphite server, must be set 115 | # port NONE Port of the Graphite server, must be set 116 | # period 10 Poll period 117 | # unit seconds Unit of the poll period 118 | # prefix EMPTY STRING Prefix to prepend to every metric's name 119 | # protocol tcp Protocol ("tcp" or "udp") to use 120 | 121 | ## Examples 122 | # Enable JmxSink for all instances by class name 123 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink 124 | 125 | # Enable ConsoleSink for all instances by class name 126 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink 127 | 128 | # Polling period for the ConsoleSink 129 | #*.sink.console.period=10 130 | # Unit of the polling period for the ConsoleSink 131 | #*.sink.console.unit=seconds 132 | 133 | # Polling period for the ConsoleSink specific for the master instance 134 | #master.sink.console.period=15 135 | # Unit of the polling period for the ConsoleSink specific for the master 136 | # instance 137 | #master.sink.console.unit=seconds 138 | 139 | # Enable CsvSink for all instances by class name 140 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink 141 | 142 | # Polling period for the CsvSink 143 | #*.sink.csv.period=1 144 | # Unit of the polling period for the CsvSink 145 | #*.sink.csv.unit=minutes 146 | 147 | # Polling directory for CsvSink 148 | #*.sink.csv.directory=/tmp/ 149 | 150 | # Polling period for the CsvSink specific for the worker instance 151 | #worker.sink.csv.period=10 152 | # Unit of the polling period for the CsvSink specific for the worker instance 153 | #worker.sink.csv.unit=minutes 154 | 155 | # Enable Slf4jSink for all instances by class name 156 | #*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink 157 | 158 | # Polling period for the Slf4JSink 159 | #*.sink.slf4j.period=1 160 | # Unit of the polling period for the Slf4jSink 161 | #*.sink.slf4j.unit=minutes 162 | 163 | # Enable JvmSource for instance master, worker, driver and executor 164 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource 165 | 166 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource 167 | 168 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource 169 | 170 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource 171 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | spark.submit.deployMode client 29 | spark.kubernetes.driver.docker.image snappydatainc/spark-driver-py:v2.2.0-kubernetes-0.5.1 30 | spark.kubernetes.executor.docker.image snappydatainc/spark-executor-py:v2.2.0-kubernetes-0.5.1 31 | spark.kubernetes.initcontainer.docker.image snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1 32 | spark.kubernetes.docker.image.pullPolicy Always 33 | # Replace sparkonk8s-test.json with the actual name of your keyfile 34 | # to enable access to Google Cloud Storage. 35 | spark.hadoop.google.cloud.auth.service.account.json.keyfile /etc/secrets/sparkonk8s-test.json 36 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/spark-defaults.conf.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/spark/spark-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # This file is sourced when running various Spark programs. 21 | # Copy it as spark-env.sh and edit that to configure Spark for your site. 22 | 23 | # Options read when launching programs locally with 24 | # ./bin/run-example or ./bin/spark-submit 25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 28 | 29 | # Options read by executors and drivers running inside the cluster 30 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 31 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 32 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 33 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos 34 | 35 | # Options read in YARN client mode 36 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 37 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). 38 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) 39 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) 40 | 41 | # Options for the daemons used in the standalone deploy mode 42 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname 43 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master 44 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") 45 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 46 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) 47 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker 48 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 49 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") 50 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). 51 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") 52 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") 53 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") 54 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers 55 | 56 | # Generic options for the daemons used in the standalone deploy mode 57 | # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) 58 | # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) 59 | # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) 60 | # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) 61 | # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) 62 | # - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. 63 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/zeppelin/.gitignore -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/configuration.xsl: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 |
namevaluedescription
38 | 39 | 40 |
41 |
42 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/interpreter-list: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # 18 | # [name] [maven artifact] [description] 19 | 20 | alluxio org.apache.zeppelin:zeppelin-alluxio:0.7.0 Alluxio interpreter 21 | angular org.apache.zeppelin:zeppelin-angular:0.7.0 HTML and AngularJS view rendering 22 | beam org.apache.zeppelin:zeppelin-beam:0.7.0 Beam interpreter 23 | bigquery org.apache.zeppelin:zeppelin-bigquery:0.7.0 BigQuery interpreter 24 | cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.7.0 Cassandra interpreter built with Scala 2.11 25 | elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.7.0 Elasticsearch interpreter 26 | file org.apache.zeppelin:zeppelin-file:0.7.0 HDFS file interpreter 27 | flink org.apache.zeppelin:zeppelin-flink_2.11:0.7.0 Flink interpreter built with Scala 2.11 28 | hbase org.apache.zeppelin:zeppelin-hbase:0.7.0 Hbase interpreter 29 | ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.7.0 Ignite interpreter built with Scala 2.11 30 | jdbc org.apache.zeppelin:zeppelin-jdbc:0.7.0 Jdbc interpreter 31 | kylin org.apache.zeppelin:zeppelin-kylin:0.7.0 Kylin interpreter 32 | lens org.apache.zeppelin:zeppelin-lens:0.7.0 Lens interpreter 33 | livy org.apache.zeppelin:zeppelin-livy:0.7.0 Livy interpreter 34 | md org.apache.zeppelin:zeppelin-markdown:0.7.0 Markdown support 35 | pig org.apache.zeppelin:zeppelin-pig:0.7.0 Pig interpreter 36 | python org.apache.zeppelin:zeppelin-python:0.7.0 Python interpreter 37 | scio org.apache.zeppelin:zeppelin-scio_2.11:0.7.0 Scio interpreter 38 | shell org.apache.zeppelin:zeppelin-shell:0.7.0 Shell command 39 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | log4j.rootLogger = INFO, dailyfile 19 | 20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n 23 | 24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd 25 | log4j.appender.dailyfile.Threshold = INFO 26 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender 27 | log4j.appender.dailyfile.File = ${zeppelin.log.file} 28 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout 29 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n 30 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/shiro.ini.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [users] 19 | # List of users with their password allowed to access Zeppelin. 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections 21 | # To enable admin user, uncomment the following line and set an appropriate password. 22 | #admin = password1, admin 23 | user1 = password2, role1, role2 24 | user2 = password3, role3 25 | user3 = password4, role2 26 | 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm 28 | [main] 29 | ### A sample for configuring Active Directory Realm 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm 31 | #activeDirectoryRealm.systemUsername = userNameA 32 | 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html 34 | #activeDirectoryRealm.systemPassword = passwordA 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr" 39 | #activeDirectoryRealm.authorizationCachingEnabled = false 40 | 41 | ### A sample for configuring LDAP Directory Realm 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm 43 | ## search base for ldap groups (only relevant for LdapGroupRealm): 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM 47 | #ldapRealm.contextFactory.authenticationMechanism = simple 48 | 49 | ### A sample PAM configuration 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm 51 | #pamRealm.service=sshd 52 | 53 | ### A sample for configuring ZeppelinHub Realm 54 | #zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm 55 | ## Url of ZeppelinHub 56 | #zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com 57 | #securityManager.realms = $zeppelinHubRealm 58 | 59 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager 60 | 61 | ### If caching of user is required then uncomment below lines 62 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager 63 | #securityManager.cacheManager = $cacheManager 64 | 65 | ### Enables 'HttpOnly' flag in Zeppelin cookies 66 | cookie = org.apache.shiro.web.servlet.SimpleCookie 67 | cookie.name = JSESSIONID 68 | cookie.httpOnly = true 69 | ### Uncomment the below line only when Zeppelin is running over HTTPS 70 | #cookie.secure = true 71 | sessionManager.sessionIdCookie = $cookie 72 | 73 | securityManager.sessionManager = $sessionManager 74 | # 86,400,000 milliseconds = 24 hour 75 | securityManager.sessionManager.globalSessionTimeout = 86400000 76 | shiro.loginUrl = /api/login 77 | 78 | [roles] 79 | role1 = * 80 | role2 = * 81 | role3 = * 82 | admin = * 83 | 84 | [urls] 85 | # This section is used for url-based security. For details see the shiro.ini documentation. 86 | # 87 | # You can secure interpreter, configuration and credential information by urls. 88 | # Comment or uncomment the below urls that you want to hide: 89 | # anon means the access is anonymous. 90 | # authc means form based auth Security. 91 | # 92 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request 93 | # in the order they are defined and the FIRST MATCH WINS. 94 | # 95 | # To allow anonymous access to all but the stated urls, 96 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc) 97 | # 98 | /api/version = anon 99 | /api/interpreter/** = authc, roles[admin] 100 | /api/configurations/** = authc, roles[admin] 101 | /api/credential/** = authc, roles[admin] 102 | #/** = anon 103 | /** = authc 104 | -------------------------------------------------------------------------------- /charts/spark-umbrella/conf/zeppelin/zeppelin-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # export JAVA_HOME= 20 | # export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. 21 | # export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" 22 | # export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m 23 | # export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m 24 | # export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. 25 | # export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) 26 | 27 | # export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default. 28 | # export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default. 29 | # export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory. 30 | # export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved 31 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z 32 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" 33 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved 34 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket 35 | # export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json 36 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID 37 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region 38 | # export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks 39 | # export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost" 40 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin" 41 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes" 42 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false" 43 | # export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. 44 | # export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. 45 | # export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading 46 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading 47 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL # Remote Node installer url for Helium dependency loader 48 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL # Remote Npm installer url for Helium dependency loader 49 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL # Remote Yarn package installer url for Helium dependency loader 50 | # export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). 51 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? 52 | # export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise 53 | 54 | #### Spark interpreter configuration #### 55 | 56 | ## Kerberos ticket refresh setting 57 | ## 58 | #export KINIT_FAIL_THRESHOLD # (optional) How many times should kinit retry. The default value is 5. 59 | #export KERBEROS_REFRESH_INTERVAL # (optional) The refresh interval for Kerberos ticket. The default value is 1d. 60 | 61 | ## Use provided spark installation ## 62 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit 63 | ## 64 | # export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries 65 | # export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". 66 | # export SPARK_APP_NAME # (optional) The name of spark application. 67 | 68 | ## Use embedded spark binaries ## 69 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. 70 | ## however, it is not encouraged when you can define SPARK_HOME 71 | ## 72 | # Options read in YARN client mode 73 | # export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. 74 | # Pyspark (supported with Spark 1.2.1 and above) 75 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI 76 | # export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. 77 | # export PYTHONPATH 78 | 79 | ## Spark interpreter options ## 80 | ## 81 | # export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. 82 | # export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. 83 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. 84 | # export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. 85 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 86 | 87 | 88 | #### HBase interpreter configuration #### 89 | 90 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set 91 | 92 | # export HBASE_HOME= # (require) Under which HBase scripts and configuration should be 93 | # export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml 94 | 95 | #### ZeppelinHub connection configuration #### 96 | # export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use 97 | # export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user 98 | # export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. 99 | 100 | #### Zeppelin impersonation configuration 101 | # export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' 102 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled 103 | -------------------------------------------------------------------------------- /charts/spark-umbrella/requirements.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: spark-hs 3 | repository: file://../spark-hs 4 | version: 0.1.0 5 | - name: zeppelin-with-spark 6 | repository: file://../zeppelin-with-spark 7 | version: 0.1.0 8 | - name: jupyter-with-spark 9 | repository: file://../jupyter-with-spark 10 | version: 0.1.0 11 | - name: spark-rss 12 | repository: file://../spark-rss 13 | version: 0.1.0 14 | - name: spark-shuffle 15 | repository: file://../spark-shuffle 16 | version: 0.1.0 17 | digest: sha256:821eb2bc83ac2430ca9c0c259bcea7c5f7417ad58d31be672240cb04e652df62 18 | generated: 2018-05-11T11:47:45.512908935+05:30 19 | -------------------------------------------------------------------------------- /charts/spark-umbrella/requirements.yaml: -------------------------------------------------------------------------------- 1 | # parent's requirements.yaml file 2 | dependencies: 3 | - name: spark-hs 4 | repository: file://../spark-hs 5 | version: 0.1.0 6 | alias: historyserver 7 | condition: historyserver.enabled 8 | - name: zeppelin-with-spark 9 | repository: file://../zeppelin-with-spark 10 | version: 0.1.0 11 | alias: zeppelin 12 | condition: zeppelin.enabled 13 | - name: jupyter-with-spark 14 | repository: file://../jupyter-with-spark 15 | version: 0.1.0 16 | alias: jupyter 17 | condition: jupyter.enabled 18 | - name: spark-rss 19 | repository: file://../spark-rss 20 | version: 0.1.0 21 | alias: rss 22 | - name: spark-shuffle 23 | repository: file://../spark-shuffle 24 | version: 0.1.0 25 | alias: shuffle 26 | 27 | -------------------------------------------------------------------------------- /charts/spark-umbrella/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.global.mountSecrets }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: history-secrets 6 | type: Opaque 7 | data: 8 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 9 | {{- end }} 10 | --- 11 | {{- if .Values.global.mountZeppelinConf }} 12 | apiVersion: v1 13 | kind: ConfigMap 14 | metadata: 15 | name: {{ .Release.Name }}-zeppelin-configmap 16 | data: 17 | {{ (.Files.Glob "conf/zeppelin/*").AsConfig | indent 2 }} 18 | {{- end }} 19 | --- 20 | {{- if .Values.global.mountJupyterConf }} 21 | apiVersion: v1 22 | kind: ConfigMap 23 | metadata: 24 | name: {{ .Release.Name }}-jupyter-configmap 25 | data: 26 | {{ (.Files.Glob "conf/jupyter/*").AsConfig | indent 2 }} 27 | {{- end }} 28 | --- 29 | {{- if .Values.global.mountSparkConf }} 30 | apiVersion: v1 31 | kind: ConfigMap 32 | metadata: 33 | name: {{ .Release.Name }}-zpspark-configmap 34 | data: 35 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }} 36 | --- 37 | apiVersion: v1 38 | kind: ConfigMap 39 | metadata: 40 | name: {{ .Release.Name }}-jp-spark-configmap 41 | data: 42 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }} 43 | {{- end }} 44 | --- 45 | {{- if .Values.global.mountSecrets }} 46 | apiVersion: v1 47 | kind: Secret 48 | metadata: 49 | name: {{ .Release.Name }}-zp-secrets 50 | type: Opaque 51 | data: 52 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 53 | {{- end }} 54 | --- 55 | {{- if .Values.global.mountSecrets }} 56 | apiVersion: v1 57 | kind: Secret 58 | metadata: 59 | name: {{ .Release.Name }}-jp-secrets 60 | type: Opaque 61 | data: 62 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 63 | {{- end }} -------------------------------------------------------------------------------- /charts/spark-umbrella/values.yaml: -------------------------------------------------------------------------------- 1 | ################## 2 | # Configure Zeppelin, Jupyter, History server, Spark resource staging and Spark shuffle service here. 3 | # This umbrella chart configuration overrides the values.yaml in the sub-charts 4 | ################## 5 | 6 | ################## 7 | # GLOBAL ATTRIBUTES FOR UMBRELLA CHART 8 | ################## 9 | global: 10 | mountSecrets: true 11 | mountSparkConf: true 12 | mountZeppelinConf: true 13 | mountJupyterConf: true 14 | serviceAccount: default 15 | # internal attribute. Do not change 16 | umbrellaChart: true 17 | 18 | ################## 19 | # CONFIGURATION FOR ZEPPELIN ENVIRONMENT 20 | ################## 21 | zeppelin: 22 | # whether to enable Zeppelin 23 | enabled: true 24 | # Any environment variables that need to be made available to the container are defined here 25 | # This may include environment variables used by Spark, Zeppelin 26 | 27 | # sparkonk8s-test.json is name of the keyfile to access GCS bucket got history log. 28 | # Change it to reflect name of your key file. Key file will always be in path /etc/secrets 29 | environment: 30 | SPARK_SUBMIT_OPTIONS: >- 31 | --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 32 | --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 33 | --conf spark.executor.instances=2 34 | # --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json 35 | sparkEventLog: 36 | enableHistoryEvents: false 37 | # eventsLogDir should point to a URI of GCS bucket where history events will be dumped 38 | eventLogDir: "gs://spark-history-server-store/" 39 | noteBookStorage: 40 | usePVForNoteBooks: true 41 | # If using PV for notebook storage, 'notebookDir' will be an 42 | # absolute path in the mounted persistent volume 43 | notebookDir: "/notebooks" 44 | 45 | ## Enable persistence using Persistent Volume Claims 46 | ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 47 | ## 48 | persistence: 49 | enabled: true 50 | ## If 'existingClaim' is defined, PVC must be created manually before 51 | ## volume will be bound 52 | # existingClaim: 53 | 54 | ## If defined, storageClassName: 55 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 56 | ## If undefined (the default) or set to null, no storageClassName spec is 57 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 58 | ## GKE, Azure & OpenStack) 59 | ## 60 | # storageClass: "-" 61 | accessMode: ReadWriteOnce 62 | size: 8Gi 63 | # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned 64 | keepResource: true 65 | 66 | resources: {} 67 | # limits: 68 | # cpu: 100m 69 | # memory: 128Mi 70 | # requests: 71 | # cpu: 100m 72 | # memory: 128Mi 73 | 74 | ################## 75 | # CONFIGURATION FOR JUPYTER NOTEBOOK ENVIRONMENT 76 | ################## 77 | jupyter: 78 | # Set false to exclude launching Jupyter notebook server with this Helm chart. 79 | enabled: true 80 | 81 | # If using PV for notebook storage, provide absolute path starting with /data/ in jupyter conf file 82 | # using 'c.NotebookApp.notebook_dir'. See jupyter_notebook_config.py.template 83 | 84 | sparkEventLog: 85 | enableHistoryEvents: false 86 | # eventsLogDir should point to a URI of GCS bucket where history events will be dumped 87 | eventLogDir: "gs://spark-history-server-store/" 88 | # Also, edit conf/spark/spark-defaults.conf to specify the keyfile for Google Cloud service account. 89 | 90 | persistence: 91 | enabled: true 92 | ## If 'existingClaim' is defined, PVC must be created manually before 93 | ## volume will be bound 94 | # existingClaim: 95 | 96 | ## If defined, storageClassName: 97 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 98 | ## If undefined (the default) or set to null, no storageClassName spec is 99 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 100 | ## GKE, Azure & OpenStack) 101 | ## 102 | # storageClass: "-" 103 | accessMode: ReadWriteOnce 104 | size: 6Gi 105 | # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned 106 | keepResource: true 107 | 108 | resources: {} 109 | # limits: 110 | # cpu: 100m 111 | # memory: 128Mi 112 | # requests: 113 | # cpu: 100m 114 | # memory: 128Mi 115 | 116 | ################## 117 | # CONFIGURATION FOR SPARK HISTORY SERVER 118 | ################## 119 | historyserver: 120 | # whether to enable history server 121 | enabled: false 122 | historyServerConf: 123 | # URI of the GCS bucket 124 | eventsDir: "gs://spark-history-server-store/" 125 | 126 | # sparkonk8s-test.json is name of the keyfile to access GCS bucket got history log. 127 | # Change it to reflect name of your key file. Key file will always be in path /etc/secret 128 | environment: 129 | SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json 130 | 131 | resources: {} 132 | # limits: 133 | # cpu: 100m 134 | # memory: 128Mi 135 | # requests: 136 | # cpu: 100m 137 | # memory: 128Mi 138 | 139 | ################## 140 | # configuration for Spark Resource Staging Server 141 | ################## 142 | rss: 143 | # properties that can be made available via configmap 144 | properties: 145 | spark.ssl.kubernetes.resourceStagingServer.enabled: false 146 | resources: {} 147 | # limits: 148 | # cpu: 100m 149 | # memory: 1Gi 150 | # requests: 151 | # cpu: 100m 152 | # memory: 256Mi 153 | 154 | ################## 155 | # configuration for Spark Shuffle Service 156 | ################## 157 | shuffle: 158 | shufflePodLabels: 159 | app: spark-shuffle-service 160 | spark-version: 2.2.0 161 | resources: {} 162 | # limits: 163 | # cpu: 200m 164 | # # memory: 128Mi 165 | # requests: 166 | # cpu: 200m 167 | # # memory: 128Mi 168 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: Spark cluster managed by Kubernetes with Zeppelin Notebook environment 4 | name: zeppelin-with-spark 5 | version: 0.1.0 6 | home: https://github.com/apache-spark-on-k8s/spark 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png 8 | maintainers: 9 | - name: SnappyData 10 | email: chomp@snappydata.io 11 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/secrets/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/zeppelin-with-spark/conf/secrets/.gitignore -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/spark/fairscheduler.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | FAIR 23 | 1 24 | 2 25 | 26 | 27 | FIFO 28 | 2 29 | 3 30 | 31 | 32 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/spark/log4j.properties.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/spark/spark-defaults.conf.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/spark/spark-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # This file is sourced when running various Spark programs. 21 | # Copy it as spark-env.sh and edit that to configure Spark for your site. 22 | 23 | # Options read when launching programs locally with 24 | # ./bin/run-example or ./bin/spark-submit 25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program 28 | 29 | # Options read by executors and drivers running inside the cluster 30 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node 31 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program 32 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data 33 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos 34 | 35 | # Options read in YARN client mode 36 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files 37 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). 38 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) 39 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) 40 | 41 | # Options for the daemons used in the standalone deploy mode 42 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname 43 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master 44 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") 45 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine 46 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) 47 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker 48 | # - SPARK_WORKER_DIR, to set the working directory of worker processes 49 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") 50 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). 51 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") 52 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") 53 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") 54 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers 55 | 56 | # Generic options for the daemons used in the standalone deploy mode 57 | # - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) 58 | # - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) 59 | # - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) 60 | # - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) 61 | # - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) 62 | # - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. 63 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/zeppelin/configuration.xsl: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 |
namevaluedescription
38 | 39 | 40 |
41 |
42 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/zeppelin/interpreter-list: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # 18 | # [name] [maven artifact] [description] 19 | 20 | alluxio org.apache.zeppelin:zeppelin-alluxio:0.7.0 Alluxio interpreter 21 | angular org.apache.zeppelin:zeppelin-angular:0.7.0 HTML and AngularJS view rendering 22 | beam org.apache.zeppelin:zeppelin-beam:0.7.0 Beam interpreter 23 | bigquery org.apache.zeppelin:zeppelin-bigquery:0.7.0 BigQuery interpreter 24 | cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.7.0 Cassandra interpreter built with Scala 2.11 25 | elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.7.0 Elasticsearch interpreter 26 | file org.apache.zeppelin:zeppelin-file:0.7.0 HDFS file interpreter 27 | flink org.apache.zeppelin:zeppelin-flink_2.11:0.7.0 Flink interpreter built with Scala 2.11 28 | hbase org.apache.zeppelin:zeppelin-hbase:0.7.0 Hbase interpreter 29 | ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.7.0 Ignite interpreter built with Scala 2.11 30 | jdbc org.apache.zeppelin:zeppelin-jdbc:0.7.0 Jdbc interpreter 31 | kylin org.apache.zeppelin:zeppelin-kylin:0.7.0 Kylin interpreter 32 | lens org.apache.zeppelin:zeppelin-lens:0.7.0 Lens interpreter 33 | livy org.apache.zeppelin:zeppelin-livy:0.7.0 Livy interpreter 34 | md org.apache.zeppelin:zeppelin-markdown:0.7.0 Markdown support 35 | pig org.apache.zeppelin:zeppelin-pig:0.7.0 Pig interpreter 36 | python org.apache.zeppelin:zeppelin-python:0.7.0 Python interpreter 37 | scio org.apache.zeppelin:zeppelin-scio_2.11:0.7.0 Scio interpreter 38 | shell org.apache.zeppelin:zeppelin-shell:0.7.0 Shell command 39 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/zeppelin/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | log4j.rootLogger = INFO, dailyfile 19 | 20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n 23 | 24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd 25 | log4j.appender.dailyfile.Threshold = INFO 26 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender 27 | log4j.appender.dailyfile.File = ${zeppelin.log.file} 28 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout 29 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n 30 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/zeppelin/shiro.ini.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [users] 19 | # List of users with their password allowed to access Zeppelin. 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections 21 | # To enable admin user, uncomment the following line and set an appropriate password. 22 | #admin = password1, admin 23 | user1 = password2, role1, role2 24 | user2 = password3, role3 25 | user3 = password4, role2 26 | 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm 28 | [main] 29 | ### A sample for configuring Active Directory Realm 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm 31 | #activeDirectoryRealm.systemUsername = userNameA 32 | 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html 34 | #activeDirectoryRealm.systemPassword = passwordA 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr" 39 | #activeDirectoryRealm.authorizationCachingEnabled = false 40 | 41 | ### A sample for configuring LDAP Directory Realm 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm 43 | ## search base for ldap groups (only relevant for LdapGroupRealm): 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM 47 | #ldapRealm.contextFactory.authenticationMechanism = simple 48 | 49 | ### A sample PAM configuration 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm 51 | #pamRealm.service=sshd 52 | 53 | ### A sample for configuring ZeppelinHub Realm 54 | #zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm 55 | ## Url of ZeppelinHub 56 | #zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com 57 | #securityManager.realms = $zeppelinHubRealm 58 | 59 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager 60 | 61 | ### If caching of user is required then uncomment below lines 62 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager 63 | #securityManager.cacheManager = $cacheManager 64 | 65 | ### Enables 'HttpOnly' flag in Zeppelin cookies 66 | cookie = org.apache.shiro.web.servlet.SimpleCookie 67 | cookie.name = JSESSIONID 68 | cookie.httpOnly = true 69 | ### Uncomment the below line only when Zeppelin is running over HTTPS 70 | #cookie.secure = true 71 | sessionManager.sessionIdCookie = $cookie 72 | 73 | securityManager.sessionManager = $sessionManager 74 | # 86,400,000 milliseconds = 24 hour 75 | securityManager.sessionManager.globalSessionTimeout = 86400000 76 | shiro.loginUrl = /api/login 77 | 78 | [roles] 79 | role1 = * 80 | role2 = * 81 | role3 = * 82 | admin = * 83 | 84 | [urls] 85 | # This section is used for url-based security. For details see the shiro.ini documentation. 86 | # 87 | # You can secure interpreter, configuration and credential information by urls. 88 | # Comment or uncomment the below urls that you want to hide: 89 | # anon means the access is anonymous. 90 | # authc means form based auth Security. 91 | # 92 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request 93 | # in the order they are defined and the FIRST MATCH WINS. 94 | # 95 | # To allow anonymous access to all but the stated urls, 96 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc) 97 | # 98 | /api/version = anon 99 | /api/interpreter/** = authc, roles[admin] 100 | /api/configurations/** = authc, roles[admin] 101 | /api/credential/** = authc, roles[admin] 102 | #/** = anon 103 | /** = authc 104 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/conf/zeppelin/zeppelin-env.sh.template: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # export JAVA_HOME= 20 | # export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. 21 | # export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" 22 | # export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m 23 | # export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m 24 | # export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. 25 | # export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) 26 | 27 | # export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default. 28 | # export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default. 29 | # export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory. 30 | # export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved 31 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z 32 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" 33 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved 34 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket 35 | # export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json 36 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID 37 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region 38 | # export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks 39 | # export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost" 40 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin" 41 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes" 42 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false" 43 | # export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. 44 | # export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. 45 | # export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading 46 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading 47 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL # Remote Node installer url for Helium dependency loader 48 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL # Remote Npm installer url for Helium dependency loader 49 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL # Remote Yarn package installer url for Helium dependency loader 50 | # export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). 51 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? 52 | # export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise 53 | 54 | #### Spark interpreter configuration #### 55 | 56 | ## Kerberos ticket refresh setting 57 | ## 58 | #export KINIT_FAIL_THRESHOLD # (optional) How many times should kinit retry. The default value is 5. 59 | #export KERBEROS_REFRESH_INTERVAL # (optional) The refresh interval for Kerberos ticket. The default value is 1d. 60 | 61 | ## Use provided spark installation ## 62 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit 63 | ## 64 | # export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries 65 | # export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". 66 | # export SPARK_APP_NAME # (optional) The name of spark application. 67 | 68 | ## Use embedded spark binaries ## 69 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. 70 | ## however, it is not encouraged when you can define SPARK_HOME 71 | ## 72 | # Options read in YARN client mode 73 | # export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. 74 | # Pyspark (supported with Spark 1.2.1 and above) 75 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI 76 | # export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. 77 | # export PYTHONPATH 78 | 79 | ## Spark interpreter options ## 80 | ## 81 | # export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. 82 | # export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. 83 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. 84 | # export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. 85 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 86 | 87 | 88 | #### HBase interpreter configuration #### 89 | 90 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set 91 | 92 | # export HBASE_HOME= # (require) Under which HBase scripts and configuration should be 93 | # export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml 94 | 95 | #### ZeppelinHub connection configuration #### 96 | # export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use 97 | # export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user 98 | # export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. 99 | 100 | #### Zeppelin impersonation configuration 101 | # export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' 102 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled 103 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Get the application URL by running these commands: 2 | {{ if contains "NodePort" .Values.zeppelinService.type }} 3 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "zeppelin-with-spark.fullname" . }}) 4 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 5 | echo http://$NODE_IP:$NODE_PORT 6 | {{- else if contains "LoadBalancer" .Values.zeppelinService.type }} 7 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 8 | You can watch the status of by running 'kubectl get svc -w {{ template "zeppelin-with-spark.fullname" . }}' 9 | export ZEPPELIN_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "zeppelin-with-spark.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 10 | echo "Access Zeppelin at http://$ZEPPELIN_SERVICE_IP:{{ .Values.zeppelinService.zeppelinPort }}" 11 | echo "Access Spark at http://$ZEPPELIN_SERVICE_IP:{{ .Values.zeppelinService.sparkUIPort }} after a Spark job is run." 12 | {{- else if contains "ClusterIP" .Values.zeppelinService.type }} 13 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "zeppelin-with-spark.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 14 | echo "Visit http://127.0.0.1:8080 to access Zeppelin server" 15 | kubectl port-forward $POD_NAME 8080:80 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "zeppelin-with-spark.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "zeppelin-with-spark.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "zeppelin-with-spark.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }}-zpenv-configmap 5 | data: 6 | {{- range $key, $val := .Values.environment }} 7 | {{ $key }}: {{ $val | quote }} 8 | {{- end}} 9 | {{- range $key, $val := .Values.noteBookStorage }} 10 | {{ $key }}: {{ $val | quote }} 11 | {{- end}} 12 | {{- range $key, $val := .Values.sparkEventLog }} 13 | {{ $key }}: {{ $val | quote }} 14 | {{- end}} 15 | --- 16 | {{- if (not .Values.global.umbrellaChart) }} 17 | apiVersion: v1 18 | kind: ConfigMap 19 | metadata: 20 | name: {{ .Release.Name }}-zeppelin-configmap 21 | data: 22 | {{ (.Files.Glob "conf/zeppelin/*").AsConfig | indent 2 }} 23 | {{- end }} 24 | --- 25 | {{- if (not .Values.global.umbrellaChart) }} 26 | apiVersion: v1 27 | kind: ConfigMap 28 | metadata: 29 | name: {{ .Release.Name }}-zpspark-configmap 30 | data: 31 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }} 32 | {{- end }} 33 | --- 34 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }} 35 | apiVersion: v1 36 | kind: Secret 37 | metadata: 38 | name: {{ .Release.Name }}-zp-secrets 39 | type: Opaque 40 | data: 41 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }} 42 | {{- end }} -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "zeppelin-with-spark.fullname" . }} 5 | labels: 6 | app: {{ template "zeppelin-with-spark.name" . }} 7 | chart: {{ template "zeppelin-with-spark.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: {{ template "zeppelin-with-spark.name" . }} 15 | release: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app: {{ template "zeppelin-with-spark.name" . }} 20 | release: {{ .Release.Name }} 21 | spec: 22 | containers: 23 | - name: {{ .Chart.Name }} 24 | image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}" 25 | imagePullPolicy: {{ .Values.image.pullPolicy }} 26 | ports: 27 | - name: http 28 | containerPort: {{ .Values.zeppelinService.zeppelinPort }} 29 | protocol: TCP 30 | - name: web-ui 31 | containerPort: {{ .Values.zeppelinService.sparkUIPort }} 32 | protocol: TCP 33 | command: 34 | - "/bin/bash" 35 | - "-c" 36 | - > 37 | cp /spark_conf/* /opt/spark/conf; 38 | cp /zeppelin_conf/* /zeppelin/conf; 39 | export MASTER=k8s://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT; 40 | if [ "$usePVForNoteBooks" == "true" ]; then 41 | export ZEPPELIN_NOTEBOOK_DIR="/data/$notebookDir" 42 | mkdir -p "$ZEPPELIN_NOTEBOOK_DIR" 43 | cp -nR /zeppelin/notebook/* "$ZEPPELIN_NOTEBOOK_DIR" 44 | fi; 45 | if [ "$enableHistoryEvents" == "true" ]; then 46 | # currently not using PV for history events so commented out 47 | #if [ "$usePVForEventsLog" == "true" ]; then 48 | # SPARK_EVENTS_DIR="/data/$eventLogDir" 49 | # mkdir -p "$SPARK_EVENTS_DIR" 50 | #else 51 | # SPARK_EVENTS_DIR="$eventLogDir" 52 | #fi 53 | SPARK_EVENTS_DIR="$eventLogDir" 54 | SPARK_EVENT_LOG_CONFIG=" --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=$SPARK_EVENTS_DIR" 55 | fi; 56 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 57 | SPARK_SECRETS_CONFIG=" --conf spark.kubernetes.driver.secrets.{{ .Release.Name }}-zp-secrets=/etc/secrets \ 58 | --conf spark.kubernetes.executor.secrets.{{ .Release.Name }}-zp-secrets=/etc/secrets" 59 | {{- end }} 60 | export SPARK_SUBMIT_OPTIONS="$SPARK_SUBMIT_OPTIONS $SPARK_EVENT_LOG_CONFIG $SPARK_SECRETS_CONFIG \ 61 | --conf spark.kubernetes.namespace={{ .Release.Namespace }} 62 | --conf spark.kubernetes.driver.pod.name=$HOSTNAME \ 63 | --conf spark.kubernetes.authenticate.driver.serviceAccountName={{ .Values.global.serviceAccount | default .Values.serviceAccount }} \ 64 | --conf spark.ui.port={{ .Values.zeppelinService.sparkUIPort }}"; 65 | echo "SPARK_SUBMIT_OPTIONS are: $SPARK_SUBMIT_OPTIONS"; 66 | rm -f /zeppelin/conf/interpreter.json; 67 | echo "Starting Zeppelin server ..."; 68 | /zeppelin/bin/zeppelin.sh 69 | envFrom: 70 | - configMapRef: 71 | name: {{ .Release.Name }}-zpenv-configmap 72 | livenessProbe: 73 | httpGet: 74 | path: / 75 | port: http 76 | initialDelaySeconds: 30 77 | readinessProbe: 78 | httpGet: 79 | path: / 80 | port: http 81 | initialDelaySeconds: 30 82 | resources: 83 | {{ toYaml .Values.resources | indent 12 }} 84 | volumeMounts: 85 | - name: data 86 | mountPath: /data/ 87 | - name: zeppelin-config-properties 88 | mountPath: /zeppelin_conf 89 | - name: spark-config-properties 90 | mountPath: /spark_conf 91 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 92 | - name: secrets-volume 93 | mountPath: /etc/secrets 94 | {{- end }} 95 | {{- with .Values.nodeSelector }} 96 | nodeSelector: 97 | {{ toYaml . | indent 8 }} 98 | {{- end }} 99 | serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }} 100 | {{- with .Values.affinity }} 101 | affinity: 102 | {{ toYaml . | indent 8 }} 103 | {{- end }} 104 | {{- with .Values.tolerations }} 105 | tolerations: 106 | {{ toYaml . | indent 8 }} 107 | {{- end }} 108 | volumes: 109 | {{- if or .Values.mountSecrets .Values.global.mountSecrets }} 110 | - name: secrets-volume 111 | secret: 112 | secretName: {{ .Release.Name }}-zp-secrets 113 | {{- end }} 114 | - name: spark-config-properties 115 | configMap: 116 | name: {{ .Release.Name }}-zpspark-configmap 117 | - name: zeppelin-config-properties 118 | configMap: 119 | name: {{ .Release.Name }}-zeppelin-configmap 120 | - name: data 121 | {{- if .Values.persistence.enabled }} 122 | persistentVolumeClaim: 123 | claimName: {{ .Values.persistence.existingClaim | default (include "zeppelin-with-spark.fullname" .) }} 124 | {{- else }} 125 | emptyDir: {} 126 | {{- end -}} -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/pvc.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} 2 | kind: PersistentVolumeClaim 3 | apiVersion: v1 4 | metadata: 5 | name: {{ template "zeppelin-with-spark.fullname" . }} 6 | labels: 7 | app: {{ template "zeppelin-with-spark.fullname" . }} 8 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 9 | release: "{{ .Release.Name }}" 10 | heritage: "{{ .Release.Service }}" 11 | {{- if .Values.persistence.keepResource }} 12 | annotations: 13 | "helm.sh/resource-policy": keep 14 | {{- end }} 15 | spec: 16 | accessModes: 17 | - {{ .Values.persistence.accessMode | quote }} 18 | resources: 19 | requests: 20 | storage: {{ .Values.persistence.size | quote }} 21 | {{- if .Values.persistence.storageClass }} 22 | {{- if (eq "-" .Values.persistence.storageClass) }} 23 | storageClassName: "" 24 | {{- else }} 25 | storageClassName: "{{ .Values.persistence.storageClass }}" 26 | {{- end }} 27 | {{- end }} 28 | {{- end }} 29 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "zeppelin-with-spark.fullname" . }} 5 | labels: 6 | app: {{ template "zeppelin-with-spark.name" . }} 7 | chart: {{ template "zeppelin-with-spark.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | type: {{ .Values.zeppelinService.type | default "LoadBalancer" | quote }} 12 | ports: 13 | - port: {{ .Values.zeppelinService.zeppelinPort }} 14 | targetPort: http 15 | protocol: TCP 16 | name: http 17 | - port: {{ .Values.zeppelinService.sparkUIPort }} 18 | targetPort: web-ui 19 | protocol: TCP 20 | name: web-ui 21 | selector: 22 | app: {{ template "zeppelin-with-spark.name" . }} 23 | release: {{ .Release.Name }} 24 | --- 25 | -------------------------------------------------------------------------------- /charts/zeppelin-with-spark/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for zeppelin-with-spark. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: 6 | repository: snappydatainc 7 | tag: zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1-test.1 8 | pullPolicy: IfNotPresent 9 | 10 | zeppelinService: 11 | type: LoadBalancer 12 | zeppelinPort: 8080 13 | sparkUIPort: 4040 14 | 15 | serviceAccount: default 16 | 17 | # Any environment variables that need to be made available to the container are defined here 18 | # This may include environment variables used by Spark, Zeppelin 19 | environment: 20 | # Provide configuration parameters, use syntax as expected by spark-submit 21 | SPARK_SUBMIT_OPTIONS: >- 22 | --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 23 | --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 24 | --conf spark.executor.instances=2 25 | # --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json 26 | 27 | sparkEventLog: 28 | enableHistoryEvents: false 29 | # eventsLogDir should point to a URI of GCS bucket where history events will be dumped 30 | eventLogDir: "gs://spark-history-server-store/" 31 | 32 | # if mountSecrets is set to true files in 'conf/secrets' directory will be mounted 33 | # on path '/etc/secrets' as secrets 34 | mountSecrets: false 35 | 36 | noteBookStorage: 37 | usePVForNoteBooks: true 38 | # If using PV for notebook storage, 'notebookDir' will be an 39 | # absolute path in the mounted persistent volume 40 | notebookDir: "/notebooks" 41 | 42 | ## Enable persistence using Persistent Volume Claims 43 | ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 44 | ## 45 | persistence: 46 | enabled: true 47 | ## If 'existingClaim' is defined, PVC must be created manually before 48 | ## volume will be bound 49 | # existingClaim: 50 | 51 | ## If defined, storageClassName: 52 | ## If set to "-", storageClassName: "", which disables dynamic provisioning 53 | ## If undefined (the default) or set to null, no storageClassName spec is 54 | ## set, choosing the default provisioner. (gp2 on AWS, standard on 55 | ## GKE, Azure & OpenStack) 56 | ## 57 | # storageClass: "-" 58 | accessMode: ReadWriteOnce 59 | size: 8Gi 60 | # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned 61 | keepResource: true 62 | 63 | resources: {} 64 | # limits: 65 | # cpu: 100m 66 | # memory: 128Mi 67 | # requests: 68 | # cpu: 100m 69 | # memory: 128Mi 70 | 71 | nodeSelector: {} 72 | 73 | tolerations: [] 74 | 75 | affinity: {} 76 | 77 | #internal attribute, do not change 78 | global: 79 | umbrellaChart: false -------------------------------------------------------------------------------- /dockerfiles/jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | # Refer to https://github.com/SnappyDataInc/spark-on-k8s/tree/master/docs/building-images.md#jupyter-image 5 | # for instructions to build the Docker image. 6 | # This Dockerfile should be present in the same directory where spark-on-k8s distribution 7 | # directory (spark-2.2.0-k8s-0.5.0-bin-2.7.3) is kept. 8 | 9 | FROM jupyter/scipy-notebook 10 | 11 | USER root 12 | 13 | # Copied from pyspark notebook- start 14 | # Spark dependencies 15 | ENV APACHE_SPARK_VERSION 2.2.0 16 | ENV HADOOP_VERSION 2.7 17 | 18 | RUN apt-get -y update && \ 19 | apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \ 20 | apt-get clean && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | # Copied from pyspark notebook- end 24 | 25 | ####### Begin changes for Spark-on-k8s ################# 26 | 27 | RUN mkdir -p /opt/spark && \ 28 | mkdir -p /opt/spark/work-dir \ 29 | touch /opt/spark/RELEASE && \ 30 | rm -f /bin/sh && \ 31 | ln -sv /bin/bash /bin/sh && \ 32 | chgrp root /etc/passwd && chmod ug+rw /etc/passwd 33 | 34 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/jars /opt/spark/jars 35 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/bin /opt/spark/bin 36 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/sbin /opt/spark/sbin 37 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/conf /opt/spark/conf 38 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/dockerfiles/spark-base/entrypoint.sh /opt/ 39 | 40 | ADD spark-2.2.0-k8s-0.5.0-bin-2.7.3/examples /opt/spark/examples 41 | ADD spark-2.2.0-k8s-0.5.0-bin-2.7.3/python /opt/spark/python 42 | 43 | # Copy aws and gcp jars 44 | # COPY aws_gcp_jars/hadoop-aws-2.7.3.jar /opt/spark/jars 45 | # COPY aws_gcp_jars/aws-java-sdk-1.7.4.jar /opt/spark/jars 46 | # COPY aws_gcp_jars/gcs-connector-latest-hadoop2.jar /opt/spark/jars 47 | 48 | ENV SPARK_HOME /opt/spark 49 | 50 | ENV PYTHON_VERSION 2.7.13 51 | ENV PYSPARK_PYTHON python 52 | ENV PYSPARK_DRIVER_PYTHON python 53 | ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH} 54 | 55 | CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ 56 | env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt && \ 57 | readarray -t SPARK_DRIVER_JAVA_OPTS < /tmp/java_opts.txt && \ 58 | if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ 59 | if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ 60 | if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ 61 | if ! [ -z ${SPARK_MOUNTED_FILES_DIR+x} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ 62 | if ! [ -z ${SPARK_MOUNTED_FILES_FROM_SECRET_DIR+x} ]; then cp -R "$SPARK_MOUNTED_FILES_FROM_SECRET_DIR/." .; fi && \ 63 | ${JAVA_HOME}/bin/java "${SPARK_DRIVER_JAVA_OPTS[@]}" -cp "$SPARK_CLASSPATH" -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS $SPARK_DRIVER_CLASS $PYSPARK_PRIMARY $PYSPARK_FILES $SPARK_DRIVER_ARGS 64 | 65 | 66 | # Copied from pyspark notebook- start 67 | # Spark config 68 | ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip 69 | ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info 70 | # Copied from pyspark notebook- end 71 | 72 | RUN chown -R $NB_USER:users /opt/spark 73 | 74 | ####### End changes for Spark-on-k8s ########################## 75 | 76 | USER $NB_USER 77 | 78 | # Added to anble python2 notebooks 79 | RUN conda create --quiet --yes \ 80 | -n ipykernel_py2 python=2 ipykernel && \ 81 | source activate ipykernel_py2 && \ 82 | python -m ipykernel install --user 83 | 84 | 85 | RUN source activate ipykernel_py2 && \ 86 | conda install --yes \ 87 | matplotlib \ 88 | scipy \ 89 | numpy \ 90 | pandas \ 91 | nltk \ 92 | tensorflow && \ 93 | source activate ipykernel_py2 && \ 94 | pip install \ 95 | sklearn \ 96 | wordcloud \ 97 | treeinterpreter 98 | 99 | -------------------------------------------------------------------------------- /dockerfiles/zeppelin/setSparkEnvVars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export MASTER=k8s://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT 4 | export SPARK_SUBMIT_OPTIONS="--kubernetes-namespace default --conf spark.kubernetes.driver.pod.name=$HOSTNAME --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1" 5 | -------------------------------------------------------------------------------- /docs/building-images.md: -------------------------------------------------------------------------------- 1 | 2 | # Building and publishing Docker images 3 | 4 | ## Prerequisites 5 | 6 | You should have Docker installed on your local setup from where you would want to build and publish the Docker images. 7 | 8 | Refer to [this page](https://docs.docker.com/install) to get information about installing Docker. 9 | 10 | ## Spark Images 11 | 12 | The binaries used to build the Spark images are based on the [spark-on-k8s](https://github.com/apache-spark-on-k8s/spark) project, with few additional changes. 13 | These have been committed into a clone of branch-2.2-kubernetes branch in above repository, and is available as a branch in SnappyData's fork of Apache Spark. 14 | 15 | Get the latest branch: 16 | 17 | ```bash 18 | $ git clone https://github.com/SnappyDataInc/spark.git -b snappy/branch-2.2-kubernetes 19 | ``` 20 | 21 | Go to the checkout directory and build the project using [maven](https://maven.apache.org/install.html). 22 | Also, package the build into a tarball, which will be needed when building the Docker images for Jupyter and Apache Zeppelin. 23 | 24 | ```bash 25 | $ ./build/mvn -Pkubernetes -DskipTests clean package 26 | $ ./dev/make-distribution.sh --name 2.7.3 --pip --tgz -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes 27 | ``` 28 | 29 | Now that the binaries are built, you also need to download and place following jars into the directories 30 | assembly/target/scala-2.11/jars and dist/jars of your checkout. 31 | These are needed for enabling access to Google Cloud Storage and AWS S3 buckets, which your Spark applications may need. 32 | 33 | 1. [aws-java-sdk-1.7.4.jar](http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar) 34 | 2. [hadoop-aws-2.7.3.jar](http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar) 35 | 3. [gcs-connector-latest-hadoop2.jar](https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar) 36 | 37 | Now build and publish the Docker images to your DockerHub account. It may take several minutes depending upon your network speed. 38 | 39 | ```bash 40 | $ ./sbin/build-push-docker-images.sh -r -t build 41 | ``` 42 | 43 | Make sure you are logged in to your Docker Hub account before publishing the images: 44 | 45 | ```bash 46 | $ docker login 47 | Login with your Docker ID to push and pull images from Docker Hub. If you don't have a Docker ID, head over to https://hub.docker.com to create one. 48 | Username: 49 | Password: 50 | $ ./sbin/build-push-docker-images.sh -r -t push 51 | ``` 52 | 53 | ## Jupyter Image 54 | 55 | This image will contain the Spark binaries you built above apart from the dependencies needed for Jupyter Notebook server. 56 | 57 | Extract the Spark tarball generated above into a directory where you have copied the [Dockerfile for Jupyter image](../dockerfiles/jupyter/Dockerfile) to. 58 | 59 | Make sure that the third party jars needed to access GCS and AWS S3 are copied to the jars directory of the extracted tarball. 60 | 61 | Build and publish the Jupyter image: 62 | 63 | ```bash 64 | $ docker build -t /jupyter-notebook: -f Dockerfile . 65 | $ docker push /jupyter-notebook: 66 | ``` 67 | 68 | For example: 69 | ```bash 70 | $ docker build -t snappydatainc/jupyter-notebook:5.2.2-spark-v2.2.0-kubernetes-0.5.1 -f Dockerfile . 71 | $ docker push snappydatainc/jupyter-notebook:5.2.2-spark-v2.2.0-kubernetes-0.5.1 72 | ``` 73 | 74 | ## Zeppelin Image 75 | 76 | This image will contain the Spark binaries built earlier apart from the dependencies needed for launching Apache Zeppelin server. 77 | 78 | Extract the Spark tarball generated above into a directory where you have copied the [Dockerfile for Zeppelin image](../dockerfiles/zeppelin/Dockerfile) to. 79 | Also, copy the script [setSparkEnvVars.sh](../dockerfiles/zeppelin/setSparkEnvVars.sh) to the same location. 80 | 81 | Make sure that the third party jars needed to access GCS and AWS S3 are copied to the jars directory of the extracted tarball. 82 | 83 | Build and publish the Zeppelin image. 84 | 85 | ```bash 86 | $ docker build -t /zeppelin: -f Dockerfile . 87 | $ docker push /zeppelin: 88 | ``` 89 | 90 | For example: 91 | ```bash 92 | $ docker build -t snappydatainc/zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1 -f Dockerfile . 93 | $ docker push snappydatainc/zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1 94 | ``` 95 | 96 | ## SnappyData Image 97 | 98 | The SnappyData Docker image available on DockerHub is built using the OSS version of the product. Docker image with 99 | SnappyData Enterprise bits will be available soon. 100 | 101 | Currently, some manual steps are needed to build this image which will be automated later. 102 | 103 | - Download the Snappydata OSS tarball of the version you need and available on 104 | [GitHub releases page](https://github.com/snappydatainc/snappydata/releases) and extract its content into a directory. 105 | 106 | - Copy the Dockerfile and start script required for SnappyData image 107 | [from this branch](https://github.com/SnappyDataInc/snappy-cloud-tools/blob/SNAP-2280/docker) into the extracted 108 | SnappyData directory. 109 | 110 | - Copy the [SnappyData interpreter jar](https://github.com/SnappyDataInc/zeppelin-interpreter/releases) for 111 | Apache Zeppelin into the jars directory. 112 | 113 | - Optionally, one can also add the third party jar needed to access GCS to the jars directory. The libraries to access 114 | AWS S3 and HDFS are already included. 115 | 116 | - Switch to the extracted directory to build and publish the SnappyData image using following commands. 117 | 118 | ```bash 119 | $ cd 120 | $ docker build -t /snappydata: -f Dockerfile . 121 | $ docker push /snappydata: 122 | ``` 123 | 124 | For example: 125 | ```bash 126 | $ docker build -t snappydatainc/snappydata:1.0.1 -f Dockerfile . 127 | $ docker push snappydatainc/snappydata:1.0.1 128 | ``` 129 | -------------------------------------------------------------------------------- /k8s-helm-spark-architecture-draw.io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/k8s-helm-spark-architecture-draw.io.png -------------------------------------------------------------------------------- /kubernetes-how-does-it-work.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/kubernetes-how-does-it-work.1.png -------------------------------------------------------------------------------- /spark-on-kubernetes-how-does-it-work.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/spark-on-kubernetes-how-does-it-work.2.png -------------------------------------------------------------------------------- /tiles/README.md: -------------------------------------------------------------------------------- 1 | # Pivotal Cloud Foundry tile for SnappyData 2 | 3 | ## Building the tile 4 | 5 | - Install the [tile generator](https://docs.pivotal.io/tiledev/2-1/tile-generator.html) tool on your local machine. 6 | 7 | - You also need to install [latest bosh CLI](https://bosh.io/docs/#downloads). 8 | 9 | - Move to [tiles/snappydata](snappydata/) directory of the cloned repository where the tile.yml is present and build the tile. 10 | - `$ tile build` 11 | 12 | - Optionally, one could specify version for the tile. 13 | - `$ tile build 1.0` 14 | 15 | - Upload the generated .pivotal file from product/ folder to Pivotal Ops Manager. 16 | 17 | ## Configuring the tile 18 | 19 | Once the .pivotal file imported into Pivotal Ops Manager, add the tile to the dashboard by clicking on the '+' sign. 20 | Click on the tile to configure it. 21 | 22 | Here, at a minimum, you need to specify 1. the credentials for the Kubernetes/PKS cluster you would launch the SnappyData chart on and also 2. select the appropriate network. 23 | The credentials to connect to Kubernetes/PKS cluster include CA Cert, Cluster Token and Cluster url. (Note that the CA cert needs to be base64 decoded after fetching it from your kubeconfig file.) 24 | 25 | Save these configurations and return to the installation dashboard and hit 'Apply changes'. 26 | 27 | 28 | ## Creating and consuming a service 29 | 30 | Once installed, users can create the service instance of SnappyData which essentially installs the Helm chart on the 31 | Kubernetes/PKS cluster provided during tile configuration. 32 | 33 | - Install the [CF CLI](https://docs.cloudfoundry.org/cf-cli/install-go-cli.html) and log in to your Cloud Foundry's API server. 34 | - `$ cf login -a https:// --skip-ssl-validation` 35 | - You can view that the SnappyData service broker is now visible. 36 | - `$ cf service-brokers` 37 | - Create a service instance using the broker. It'll launch the SnappyData cluster on your configured Kubernetes/PKS cluster. 38 | - Currently, we have three plans for the SnappyData service: 1. small (default), 2. medium and 3. large. All of these start the cluster with one locator, one lead and two servers but with different memory. 39 | - `$ cf create-service snappydata small snappydata_small` 40 | - We'll add to or change these plans in future. 41 | - Now that the service is available, you can bind it to any of your running apps. 42 | - `$ cf bs snappydata_small` 43 | - `$ cf restage ` 44 | - Now, your app has access to information about the service via environment variable VCAP_SERVICES. Typically, all the Kubernetes services created by the chart are included in this environment variable. 45 | - `$ cf env ` 46 | 47 | -------------------------------------------------------------------------------- /tiles/snappydata/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/tiles/snappydata/icon.png -------------------------------------------------------------------------------- /tiles/snappydata/tile-history.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 0.1.0 3 | -------------------------------------------------------------------------------- /tiles/snappydata/tile.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: snappydata 3 | icon_file: icon.png 4 | label: SnappyData On PKS 5 | description: Test tile for Kibosh with SnappyData Helm chart 6 | 7 | packages: 8 | - name: snappydata 9 | type: kibosh 10 | helm_chart_dir: ../../charts/snappydata 11 | -------------------------------------------------------------------------------- /utils/debug-pod-override-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "v1", 3 | "kind": "Pod", 4 | "metadata": { 5 | "labels": { 6 | "run": "snappy-debug-pod" 7 | }, 8 | "name": "snappy-debug-pod", 9 | "namespace": "spark" 10 | }, 11 | "spec": { 12 | "containers": [ 13 | { 14 | "args": [ 15 | "bash" 16 | ], 17 | IMAGE_MARKER 18 | "imagePullPolicy": "IfNotPresent", 19 | "name": "snappy-debug-pod", 20 | "stdin": true, 21 | "stdinOnce": true, 22 | "tty": true, 23 | "volumeMounts": [ 24 | VOLUME_MOUNTS_MARKER 25 | ] 26 | } 27 | ], 28 | "restartPolicy": "Never", 29 | "volumes": [ 30 | VOLUMES_MARKER 31 | ] 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /utils/snappy-debug-pod.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #copyright (c) 2018 SnappyData, Inc. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you 5 | # may not use this file except in compliance with the License. You 6 | # may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | # implied. See the License for the specific language governing 14 | # permissions and limitations under the License. See accompanying 15 | # LICENSE file. 16 | # 17 | 18 | usage() { 19 | echo "Usage: snappy-debug-pod.sh --pvc [--namespace ] [--image ]" 20 | echo "Options namespace and image are optional. Default namespace is 'spark'" 21 | echo "" 22 | echo "This script launches a pod in the K8S cluster with user specified persistent volumes mounted on it. User must provide list of persistent volume claims as an input for the volumes to be mounted. 23 | Volumes will be mounted on the path starting with /data (volume1 on /data0 and so on). This script can be used to inspect logs on volumes even when SnappyData system is not online" 24 | echo "" 25 | echo "Example usage: snappy-debug-pod.sh --pvc snappy-disk-claim-snappydata-leader-0,snappy-disk-claim-snappydata-server-0 --namespace default" 26 | } 27 | 28 | namespace=spark 29 | image="snappydatainc/snappydata:1.0.1.1-test.1" 30 | imageString='"image": '\""$image"\"',' 31 | mountString="" 32 | volumeString="" 33 | gotPVC=false 34 | 35 | while (( "$#" )); do 36 | case "$1" in 37 | --pvc) 38 | pvclist=$2 39 | gotPVC=true 40 | 41 | # read the comma separated PVC names into an array 42 | oIFS="$IFS"; IFS=, ; 43 | read -r -a array <<< "$pvclist"; 44 | IFS="$oIFS" 45 | 46 | # using PVCs provided by the user, 47 | # create valid JSON string for volumes and volumeMounts atrbutes of a pod 48 | for index in "${!array[@]}" 49 | do 50 | if [ $index -eq 0 ] 51 | then 52 | mountString="{\"mountPath\": \"/data$index/\", \"name\": \"snappy-disk-claim$index\"}" 53 | volumeString="{\"name\": \"snappy-disk-claim$index\", \"persistentVolumeClaim\": {\"claimName\": \"${array[index]}\"}}" 54 | else 55 | mountString="$mountString, {\"mountPath\": \"/data$index/\", \"name\": \"snappy-disk-claim$index\"}" 56 | volumeString="$volumeString, {\"name\": \"snappy-disk-claim$index\", \"persistentVolumeClaim\": {\"claimName\": \"${array[index]}\"}}" 57 | fi 58 | # echo "$index ${array[index]}" 59 | # echo $mountString 60 | # echo $volumeString 61 | echo "Volume for ${array[index]} will be mounted on /data$index" 62 | done 63 | shift 2 64 | ;; 65 | --namespace) 66 | namespace=$2 67 | shift 2 68 | ;; 69 | --image) 70 | image=$2 71 | imageString='"image": '\""$image"\"',' 72 | shift 2 73 | ;; 74 | --help) 75 | usage 76 | exit 0 77 | ;; 78 | *) 79 | break 80 | ;; 81 | esac 82 | done 83 | 84 | if [ "$gotPVC" = false ] ; then 85 | echo 'ERROR: PVC list not provided' 86 | usage 87 | exit 1 88 | fi 89 | 90 | 91 | # debug-pod-override-template contains, the template JSON in which 92 | # JSON string for volumes and volumeMounts is added 93 | # first create a copy to modify JSON 94 | cp debug-pod-override-template.json /tmp/debug-pod-override-actual.json 95 | sed -i 's|IMAGE_MARKER|'"$imageString"'|; s|VOLUME_MOUNTS_MARKER|'"$mountString"'|; s|VOLUMES_MARKER|'"$volumeString"'|' /tmp/debug-pod-override-actual.json 96 | 97 | #run the actual command that will launch a pod 98 | overrides=$( /tmp/kubecommand 107 | chmod +x /tmp/kubecommand 108 | /tmp/kubecommand 109 | rm -f /tmp/kubecommand 110 | #$cmd 111 | --------------------------------------------------------------------------------