├── .gitignore
├── README.md
├── charts
    ├── jupyter-with-spark
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── conf
    │   │   ├── jupyter
    │   │   │   └── jupyter_notebook_config.py.template
    │   │   └── spark
    │   │   │   └── spark-defaults.conf
    │   ├── templates
    │   │   ├── NOTES.txt
    │   │   ├── _helpers.tpl
    │   │   ├── configmap.yaml
    │   │   ├── deployment.yaml
    │   │   ├── pvc.yaml
    │   │   └── service.yaml
    │   └── values.yaml
    ├── snappydata
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── conf
    │   │   ├── debug.conf.template
    │   │   ├── docker.properties.template
    │   │   ├── fairscheduler.xml
    │   │   ├── fairscheduler.xml.template
    │   │   ├── leads.template
    │   │   ├── locators.template
    │   │   ├── log4j.properties.template
    │   │   ├── metrics.properties.template
    │   │   ├── servers.template
    │   │   ├── slaves.template
    │   │   ├── snappy-env.sh.template
    │   │   ├── spark-defaults.conf.template
    │   │   └── spark-env.sh.template
    │   ├── plans.yaml
    │   ├── plans
    │   │   ├── large.yaml
    │   │   ├── medium.yaml
    │   │   └── small.yaml
    │   ├── templates
    │   │   ├── NOTES.txt
    │   │   ├── _helpers.tpl
    │   │   ├── configmap.yaml
    │   │   ├── leader_statefulset.yaml
    │   │   ├── locator_statefulset.yaml
    │   │   ├── role-binding.yaml
    │   │   ├── server_statefulset.yaml
    │   │   └── service.yaml
    │   └── values.yaml
    ├── spark-hs
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── conf
    │   │   └── secrets
    │   │   │   └── .gitignore
    │   ├── templates
    │   │   ├── NOTES.txt
    │   │   ├── _helpers.tpl
    │   │   ├── configmap.yaml
    │   │   ├── deployment.yaml
    │   │   └── service.yaml
    │   └── values.yaml
    ├── spark-rss
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── templates
    │   │   ├── NOTES.txt
    │   │   ├── _helpers.tpl
    │   │   ├── configmap.yaml
    │   │   ├── deployment.yaml
    │   │   └── service.yaml
    │   └── values.yaml
    ├── spark-shuffle
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── templates
    │   │   ├── NOTES.txt
    │   │   ├── _helpers.tpl
    │   │   └── shuffle-daemonset.yaml
    │   └── values.yaml
    ├── spark-umbrella
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── charts
    │   │   └── .gitignore
    │   ├── conf
    │   │   ├── secrets
    │   │   │   └── .gitignore
    │   │   ├── spark
    │   │   │   ├── .gitignore
    │   │   │   ├── fairscheduler.xml.template
    │   │   │   ├── log4j.properties.template
    │   │   │   ├── metrics.properties.template
    │   │   │   ├── spark-defaults.conf
    │   │   │   ├── spark-defaults.conf.template
    │   │   │   └── spark-env.sh.template
    │   │   └── zeppelin
    │   │   │   ├── .gitignore
    │   │   │   ├── configuration.xsl
    │   │   │   ├── interpreter-list
    │   │   │   ├── log4j.properties
    │   │   │   ├── shiro.ini.template
    │   │   │   ├── zeppelin-env.sh.template
    │   │   │   └── zeppelin-site.xml.template
    │   ├── requirements.lock
    │   ├── requirements.yaml
    │   ├── templates
    │   │   └── configmap.yaml
    │   └── values.yaml
    └── zeppelin-with-spark
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── conf
    │       ├── secrets
    │       │   └── .gitignore
    │       ├── spark
    │       │   ├── fairscheduler.xml.template
    │       │   ├── log4j.properties.template
    │       │   ├── metrics.properties.template
    │       │   ├── spark-defaults.conf.template
    │       │   └── spark-env.sh.template
    │       └── zeppelin
    │       │   ├── configuration.xsl
    │       │   ├── interpreter-list
    │       │   ├── log4j.properties
    │       │   ├── shiro.ini.template
    │       │   ├── zeppelin-env.sh.template
    │       │   └── zeppelin-site.xml.template
    │   ├── templates
    │       ├── NOTES.txt
    │       ├── _helpers.tpl
    │       ├── configmap.yaml
    │       ├── deployment.yaml
    │       ├── pvc.yaml
    │       └── service.yaml
    │   └── values.yaml
├── dockerfiles
    ├── jupyter
    │   └── Dockerfile
    └── zeppelin
    │   ├── Dockerfile
    │   └── setSparkEnvVars.sh
├── docs
    └── building-images.md
├── k8s-helm-spark-architecture-draw.io.png
├── kubernetes-how-does-it-work.1.png
├── spark-on-kubernetes-how-does-it-work.2.png
├── templates
    └── snappydata-cluster.yml
├── tiles
    ├── README.md
    └── snappydata
    │   ├── icon.png
    │   ├── tile-history.yml
    │   └── tile.yml
└── utils
    ├── debug-pod-override-template.json
    └── snappy-debug-pod.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.iml
3 | 
4 | tiles/snappydata/product/
5 | tiles/snappydata/release/
6 | 
7 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | appVersion: "1.0"
 3 | description: A Helm chart for launching Jupyter notebooks with Apache Spark in-cluster client mode.
 4 | name: jupyter-with-spark
 5 | version: 0.1.0
 6 | home: https://github.com/apache-spark-on-k8s/spark
 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png
 8 | maintainers:
 9 |   - name: SnappyData, Inc.
10 |     email: chomp@snappydata.io
11 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/conf/spark/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.submit.deployMode                        client
29 | spark.kubernetes.driver.docker.image           snappydatainc/spark-driver-py:v2.2.0-kubernetes-0.5.1
30 | spark.kubernetes.executor.docker.image         snappydatainc/spark-executor-py:v2.2.0-kubernetes-0.5.1
31 | spark.kubernetes.initcontainer.docker.image    snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1
32 | spark.kubernetes.docker.image.pullPolicy       Always
33 | # Replace sparkonk8s-test.json with the actual name of your keyfile
34 | # to enable access to Google Cloud Storage.
35 | spark.hadoop.google.cloud.auth.service.account.json.keyfile   /etc/secrets/sparkonk8s-test.json
36 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | Get the application URL by running these commands:
 2 | {{- if contains "NodePort" .Values.jupyterService.type }}
 3 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ .Release.Name }}-jupyter-spark)
 4 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
 5 |   echo "Access Jupyter notebooks at http://$NODE_IP:$NODE_PORT"
 6 |   echo "Access Spark at http://$NODE_IP:{{ .Values.jupyterService.sparkUIPort }} after a Spark job is run."
 7 | {{- else if contains "LoadBalancer" .Values.jupyterService.type }}
 8 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
 9 |            You can watch the status of by running 'kubectl get svc -w {{ .Release.Name }}-jupyter-spark'
10 |   export JUPYTER_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ .Release.Name }}-jupyter-spark -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
11 |   echo "Access Jupyter notebooks at http://$JUPYTER_SERVICE_IP:{{ .Values.jupyterService.jupyterPort }}"
12 |   echo "Access Spark at http://$SPARK_UI_SERVICE_IP:{{ .Values.jupyterService.sparkUIPort }} after a Spark job is run."
13 | {{- else if contains "ClusterIP" .Values.jupyterService.type }}
14 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "jupyter-with-spark.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
15 |   echo "Visit http://127.0.0.1:8888 to access Jupyter notebooks"
16 |   kubectl port-forward $POD_NAME 8888:80
17 | {{- end }}
18 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "jupyter-with-spark.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "jupyter-with-spark.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "jupyter-with-spark.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | {{- if (not .Values.global.umbrellaChart) }}
 2 | apiVersion: v1
 3 | kind: ConfigMap
 4 | metadata:
 5 |   name: {{ .Release.Name }}-jupyter-configmap
 6 | data:
 7 |     jupyter_notebook_config.py: |-
 8 |       {{- range .Files.Lines "conf/jupyter/jupyter_notebook_config.py" }}
 9 |         {{ . }}{{ end }}
10 | {{- end }}
11 | ---
12 | {{- if (not .Values.global.umbrellaChart) }}
13 | apiVersion: v1
14 | kind: ConfigMap
15 | metadata:
16 |   name: {{ .Release.Name }}-jp-spark-configmap
17 | data:
18 |     spark-defaults.conf: |-
19 |       {{- range .Files.Lines "conf/spark/spark-defaults.conf" }}
20 |         {{ . }}{{ end }}
21 |     fairscheduler.xml: |-
22 |       {{- range .Files.Lines "conf/spark/fairscheduler.xml" }}
23 |         {{ . }}{{ end }}
24 |     log4j.properties: |-
25 |       {{- range .Files.Lines "conf/spark/log4j.properties" }}
26 |         {{ . }}{{ end }}
27 | {{- end }}
28 | ---
29 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }}
30 | apiVersion: v1
31 | kind: Secret
32 | metadata:
33 |   name: {{ .Release.Name }}-jp-secrets
34 | type: Opaque
35 | data:
36 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
37 | {{- end }}
38 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/deployment.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1beta1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: {{ template "jupyter-with-spark.fullname" . }}
  5 |   labels:
  6 |     app: {{ template "jupyter-with-spark.name" . }}
  7 |     chart: {{ template "jupyter-with-spark.chart" . }}
  8 |     release: {{ .Release.Name }}
  9 |     heritage: {{ .Release.Service }}
 10 | spec:
 11 |   replicas: 1
 12 |   selector:
 13 |     matchLabels:
 14 |       app: {{ template "jupyter-with-spark.name" . }}
 15 |       release: {{ .Release.Name }}
 16 |   template:
 17 |     metadata:
 18 |       labels:
 19 |         app: {{ template "jupyter-with-spark.name" . }}
 20 |         release: {{ .Release.Name }}
 21 |     spec:
 22 |       containers:
 23 |         - name: {{ .Chart.Name }}
 24 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
 25 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
 26 |           ports:
 27 |             - name: http
 28 |               containerPort: {{ .Values.jupyterService.jupyterPort }}
 29 |               protocol: TCP
 30 |             - name: web-ui
 31 |               containerPort: {{ .Values.jupyterService.sparkUIPort }}
 32 |               protocol: TCP
 33 |           command:
 34 |             - "/bin/sh"
 35 |             - "-c"
 36 |             - >
 37 |               jupyter notebook --generate-config;
 38 |               size=$(wc -c < /tmp/conf/jupyter/jupyter_notebook_config.py);
 39 |               if [ $size -gt 0 ]; then cp /tmp/conf/jupyter/jupyter_notebook_config.py /home/jovyan/.jupyter/jupyter_notebook_config.py; fi;
 40 |               cp /tmp/conf/spark/* /opt/spark/conf/;
 41 |               mkdir -p /home/jovyan/notebooks;
 42 |               prefix=`date +%s%N | cut -b1-13`;
 43 |               echo "" >> /opt/spark/conf/spark-defaults.conf;
 44 |               echo "spark.master                                   k8s://https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT" >> /opt/spark/conf/spark-defaults.conf;
 45 |               echo "spark.kubernetes.driver.pod.name               $HOSTNAME" >> /opt/spark/conf/spark-defaults.conf;
 46 |               echo "spark.kubernetes.executor.podNamePrefix        spark-$prefix" >> /opt/spark/conf/spark-defaults.conf;
 47 |               echo "spark.kubernetes.authenticate.driver.serviceAccountName {{ .Values.global.serviceAccount | default .Values.serviceAccount }}" >> /opt/spark/conf/spark-defaults.conf;
 48 |               echo "spark.ui.port                                  {{ .Values.jupyterService.sparkUIPort }}" >> /opt/spark/conf/spark-defaults.conf;
 49 |               echo "spark.kubernetes.namespace                     {{ .Release.Namespace }}" >> /opt/spark/conf/spark-defaults.conf;
 50 |             {{- if .Values.sparkEventLog.enableHistoryEvents }}
 51 |               echo "spark.eventLog.enabled                         true" >> /opt/spark/conf/spark-defaults.conf;
 52 |               echo "spark.eventLog.dir                             {{ .Values.sparkEventLog.eventLogDir }}" >> /opt/spark/conf/spark-defaults.conf;
 53 |             {{- end }}
 54 |             {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
 55 |               echo "spark.kubernetes.driver.secrets.{{ .Release.Name }}-secrets   /etc/secrets" >> /opt/spark/conf/spark-defaults.conf;
 56 |             {{- end }}
 57 |             {{- if eq .Values.jupyterService.password "" }}
 58 |               . /usr/local/bin/start.sh jupyter notebook --NotebookApp.token='' --NotebookApp.port={{ .Values.jupyterService.jupyterPort }};
 59 |             {{ else }}
 60 |               echo "{ \"NotebookApp\": { \"password\": \"REPLACE_ME\" } }" > ~/.jupyter/jupyter_notebook_config.json;
 61 |               printf "import os\nfrom notebook.auth import passwd\nprint(passwd('{{ .Values.jupyterService.password }}'))" > hash.py;
 62 |               export HASHED=`python hash.py`;
 63 |               rm hash.py;
 64 |               sed -i -e "s/REPLACE_ME/${HASHED}/g" ~/.jupyter/jupyter_notebook_config.json;
 65 |               . /usr/local/bin/start.sh jupyter notebook --NotebookApp.port={{ .Values.jupyterService.jupyterPort }};
 66 |             {{- end }}
 67 |           livenessProbe:
 68 |             httpGet:
 69 |               path: /
 70 |               port: http
 71 |           readinessProbe:
 72 |             httpGet:
 73 |               path: /
 74 |               port: http
 75 |           resources:
 76 | {{ toYaml .Values.resources | indent 12 }}
 77 |           volumeMounts:
 78 |           - name: data
 79 |             mountPath: /data/
 80 |           - name: spark-config
 81 |             mountPath: /tmp/conf/spark/
 82 |           - name: jupyter-config
 83 |             mountPath: /tmp/conf/jupyter/
 84 |         {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
 85 |           - name: secrets-volume
 86 |             mountPath: /etc/secrets/
 87 |         {{- end }}
 88 |     {{- with .Values.nodeSelector }}
 89 |       nodeSelector:
 90 | {{ toYaml . | indent 8 }}
 91 |     {{- end }}
 92 |       serviceAccount: {{ .Values.serviceAccount }}
 93 |     {{- with .Values.affinity }}
 94 |       affinity:
 95 | {{ toYaml . | indent 8 }}
 96 |     {{- end }}
 97 |     {{- with .Values.tolerations }}
 98 |       tolerations:
 99 | {{ toYaml . | indent 8 }}
100 |     {{- end }}
101 |       volumes:
102 |         - name: data
103 |         {{- if .Values.persistence.enabled }}
104 |           persistentVolumeClaim:
105 |             claimName: {{ .Values.persistence.existingClaim | default (include "jupyter-with-spark.fullname" .) }}
106 |         {{- else }}
107 |           emptyDir: {}
108 |         {{- end }}
109 |         - name: jupyter-config
110 |           configMap:
111 |             name: {{ .Release.Name }}-jupyter-configmap
112 |         - name: spark-config
113 |           configMap:
114 |             name: {{ .Release.Name }}-jp-spark-configmap
115 |         {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
116 |         - name: secrets-volume
117 |           secret:
118 |             secretName: {{ .Release.Name }}-jp-secrets
119 |         {{- end }}


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/pvc.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }}
 2 | kind: PersistentVolumeClaim
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: {{ template "jupyter-with-spark.fullname" . }}
 6 |   labels:
 7 |     app: {{ template "jupyter-with-spark.fullname" . }}
 8 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 9 |     release: "{{ .Release.Name }}"
10 |     heritage: "{{ .Release.Service }}"
11 | {{- if .Values.persistence.keepResource }}
12 |   annotations:
13 |     "helm.sh/resource-policy": keep
14 | {{- end }}
15 | spec:
16 |   accessModes:
17 |     - {{ .Values.persistence.accessMode | quote }}
18 |   resources:
19 |     requests:
20 |       storage: {{ .Values.persistence.size | quote }}
21 | {{- if .Values.persistence.storageClass }}
22 | {{- if (eq "-" .Values.persistence.storageClass) }}
23 |   storageClassName: ""
24 | {{- else }}
25 |   storageClassName: "{{ .Values.persistence.storageClass }}"
26 | {{- end }}
27 | {{- end }}
28 | {{- end }}
29 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ .Release.Name }}-jupyter-spark
 5 |   labels:
 6 |     app: {{ template "jupyter-with-spark.name" . }}
 7 |     chart: {{ template "jupyter-with-spark.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   type: {{ .Values.jupyterService.type | default "LoadBalancer" | quote }}
12 |   ports:
13 |     - port: {{ .Values.jupyterService.jupyterPort }}
14 |       targetPort: http
15 |       protocol: TCP
16 |       name: http
17 |     - port: {{ .Values.jupyterService.sparkUIPort }}
18 |       targetPort: web-ui
19 |       protocol: TCP
20 |       name: web-ui
21 |   selector:
22 |     app: {{ template "jupyter-with-spark.name" . }}
23 |     release: {{ .Release.Name }}
24 | ---
25 | 


--------------------------------------------------------------------------------
/charts/jupyter-with-spark/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for jupyter-with-spark helm chart.
 2 | # Declare variables to be passed into your templates.
 3 | 
 4 | image:
 5 |   repository: snappydatainc/jupyter-notebook
 6 |   tag: 5.2.2-spark-v2.2.0-kubernetes-0.5.1
 7 |   pullPolicy: IfNotPresent
 8 | 
 9 | jupyterService:
10 |   type: LoadBalancer
11 |   jupyterPort: 8888
12 |   sparkUIPort: 4040
13 |   # Set your password to access the notebook server. A default ('abc123') has been set for you.
14 |   # Setting the password to empty string will disable the authentication (not recommended).
15 |   password: 'abc123'
16 | 
17 | sparkWebUI:
18 |   type: LoadBalancer
19 |   port: 4040
20 | 
21 | serviceAccount: default
22 | mountSecrets: false
23 | 
24 | sparkEventLog:
25 |   enableHistoryEvents: false
26 |   # eventsLogDir should point to a URI of GCS bucket where history events will be dumped
27 |   eventLogDir: "gs://spark-history-server-store/"
28 | 
29 | persistence:
30 |   enabled: false
31 |   # A manually managed Persistent Volume and Claim
32 |   # Requires persistence.enabled: true
33 |   # If defined, PVC must be created manually before the volume can be bound.
34 |   # existingClaim:
35 | 
36 |   # If defined, storageClassName: <storageClass>
37 |   # If set to "-", storageClassName: "", which disables dynamic provisioning
38 |   # If undefined (the default) or set to null, no storageClassName spec is
39 |   #   set, choosing the default provisioner.  (gp2 on AWS, standard on
40 |   #   GKE, Azure & OpenStack)
41 |   #
42 |   # storageClass: "-"
43 |   accessMode: ReadWriteOnce
44 |   size: 6Gi
45 |   # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned
46 |   keepResource: true
47 | 
48 | resources: {}
49 |   # We usually recommend not to specify default resources and to leave this as a conscious
50 |   # choice for the user. This also increases chances charts run on environments with little
51 |   # resources, such as Minikube. If you do want to specify resources, uncomment the following
52 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
53 |   # limits:
54 |   #  cpu: 100m
55 |   #  memory: 128Mi
56 |   # requests:
57 |   #  cpu: 100m
58 |   #  memory: 128Mi
59 | 
60 | nodeSelector: {}
61 | 
62 | tolerations: []
63 | 
64 | affinity: {}
65 | 
66 | #internal attribute, do not change
67 | global:
68 |   umbrellaChart: false


--------------------------------------------------------------------------------
/charts/snappydata/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 
23 | # PAS tile specific artifacts
24 | images/
25 | plans/
26 | plans.yaml


--------------------------------------------------------------------------------
/charts/snappydata/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | appVersion: "1.0"
3 | description: A Helm chart for launching SnappyData cluster on Kubernetes.
4 | name: snappydata
5 | version: 0.1.0
6 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/debug.conf.template:
--------------------------------------------------------------------------------
1 | MEMBERS_FILE=$SNAPPY_HOME/work/members.txt
2 | NO_OF_STACK_DUMPS=2
3 | INTERVAL_BETWEEN_DUMPS=10
4 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/docker.properties.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | spark.mesos.executor.docker.image: <image built from `../docker/spark-mesos/Dockerfile`>
19 | spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
20 | spark.mesos.executor.home: /opt/spark
21 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/fairscheduler.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <allocations>
21 |   <pool name="default">
22 |     <schedulingMode>FAIR</schedulingMode>
23 |     <weight>1</weight>
24 |   </pool>
25 |   <pool name="lowlatency">
26 |     <schedulingMode>FAIR</schedulingMode>
27 |     <weight>2</weight>
28 |     <minShare>2</minShare>
29 |   </pool>
30 | </allocations>
31 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/fairscheduler.xml.template:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <allocations>
21 |   <pool name="production">
22 |     <schedulingMode>FAIR</schedulingMode>
23 |     <weight>1</weight>
24 |     <minShare>2</minShare>
25 |   </pool>
26 |   <pool name="test">
27 |     <schedulingMode>FIFO</schedulingMode>
28 |     <weight>2</weight>
29 |     <minShare>3</minShare>
30 |   </pool>
31 | </allocations>
32 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/leads.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you
 5 | # may not use this file except in compliance with the License. You
 6 | # may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 | # implied. See the License for the specific language governing
14 | # permissions and limitations under the License. See accompanying
15 | # LICENSE file.
16 | #
17 | #
18 | # Here are examples using common configuration properties
19 | # I) 
20 | # Specify the host name on which a Snappy lead will be started. Also
21 | # specify the directory where the logs and metadata files
22 | # for that lead instance will be created. If the directory and properties
23 | # are not specified a default directory is created inside the SNAPPY_HOME directory.
24 | # 
25 | #     lead1   -dir=/tmp/data/lead (config args)
26 | #
27 | # II) 
28 | # Below is an example of how you can specify multiple locators for a lead and also
29 | # set its heap size to 8 GB.
30 | # 
31 | #     lead1   -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=8g
32 | # 
33 | # III)
34 | # Another example which shows how to specify Spark properties.
35 | # 
36 | #     lead1   -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16
37 | #
38 | # IV) Start the SnappyData Zeppelin interpreter on the Lead node
39 | #
40 | #     lead1   -dir=/tmp/data/server -spark.ui.port=3333 -spark.executor.cores=16 -zeppelin.interpreter.enable=true -classpath=<Path to jar of Zeppelin Interpreter for SnappyData>
41 | #
42 | # For more options, see http://snappydatainc.github.io/snappydata/configuration/#configuration
43 | localhost
44 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/locators.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you
 5 | # may not use this file except in compliance with the License. You
 6 | # may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 | # implied. See the License for the specific language governing
14 | # permissions and limitations under the License. See accompanying
15 | # LICENSE file.
16 | #
17 | #
18 | # By default, SnappyData launch scripts will start a single locator on localhost
19 | # and uses <snappyData_dir>/work/localhost-locator-1/ as the directory for logs and 
20 | # statistics. 
21 | # Assuming your network is ssh enabled you can add hostnames (one line per host) to start
22 | # locator on multiple hosts. 
23 | #
24 | # Example configurations:
25 | # I) Configuring the hostname/IP address for cluster members to find the locator:
26 | #
27 | # By default, locator binds to provided hostname on port 10334 for discovering other members of the cluster.
28 | # Optionally set peer-discovery-address to a hostname/IP (usually the internal LAN IP) where other members of
29 | # cluster can talk to locator (configured as their -locators option) which is the provided hostname by default,
30 | # and peer-discover-port if you want to change port from the default 10334.
31 | # The peer-discovery-address can be a wildcard like 0.0.0.0 to listen on all interfaces.
32 | #
33 | #     locator1 -peer-discovery-port=9988 -locators=locator2:8899
34 | #
35 | # If there are multiple locators in the cluster, then specify hostname:port of other locators in the
36 | # -locators option.
37 | # 
38 | #     locator1 -peer-discovery-port=9988 -locators=locator2:8899
39 | #     locator2 -peer-discovery-port=9988 -locators=locator1:8899
40 | # 
41 | # II) Using client bind address:
42 | #
43 | # One can specify bind address for clients to allow clients from outside this machine to connect
44 | # using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost).
45 | #
46 | # In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments),
47 | # you should also configure the -hostname-for-clients else clients from outside the network
48 | # will not be able to connect to the locators/servers. It should be set to the public hostname
49 | # or public IP address that will be sent to clients to connect to. It can be skipped for cases
50 | # where private hostname is the same as public hostname (e.g. DNS translates appropriately).
51 | # Default is the `client-bind-address` of the locator.
52 | #
53 | #     <private hostname/IP> -client-bind-address=<private hostname/IP> -hostname-for-clients=<public hostname/IP for clients>
54 | #
55 | # III) Logging to different directory 
56 | # Specify the startup directory where the logs and configuration files for that locator instance
57 | # are managed. 
58 | #
59 | #     locator1 -dir=/tmp/data/locator -client-bind-address=locator1
60 | # 
61 | # For more configuration options, see
62 | # http://snappydatainc.github.io/snappydata/configuration/#configuration
63 | localhost
64 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/log4j.properties.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | log4j.rootCategory=INFO, file
 19 | 
 20 | # RollingFile appender
 21 | log4j.appender.file=org.apache.log4j.RollingFileAppender
 22 | log4j.appender.file.append=true
 23 | log4j.appender.file.file=snappydata.log
 24 | log4j.appender.file.MaxFileSize=1GB
 25 | log4j.appender.file.MaxBackupIndex=10000
 26 | log4j.appender.file.layout=io.snappydata.log4j.PatternLayout
 27 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n
 28 | 
 29 | # Console appender
 30 | log4j.appender.console=org.apache.log4j.ConsoleAppender
 31 | log4j.appender.console.target=System.out
 32 | log4j.appender.console.layout=io.snappydata.log4j.PatternLayout
 33 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS zzz} %t %p %c{1}: %m%n
 34 | 
 35 | # Ignore messages below warning level from Jetty, because it's a bit verbose
 36 | log4j.logger.org.spark-project.jetty=WARN
 37 | org.spark-project.jetty.LEVEL=WARN
 38 | log4j.logger.org.mortbay.jetty=WARN
 39 | log4j.logger.org.eclipse.jetty=WARN
 40 | 
 41 | # Some packages are noisy for no good reason.
 42 | log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
 43 | log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
 44 | 
 45 | log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false
 46 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF
 47 | 
 48 | log4j.additivity.hive.log=false
 49 | log4j.logger.hive.log=OFF
 50 | 
 51 | log4j.additivity.parquet.hadoop.ParquetRecordReader=false
 52 | log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
 53 | 
 54 | log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false
 55 | log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF
 56 | 
 57 | log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false
 58 | log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF
 59 | 
 60 | log4j.additivity.hive.ql.metadata.Hive=false
 61 | log4j.logger.hive.ql.metadata.Hive=OFF
 62 | 
 63 | log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false
 64 | log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR
 65 | 
 66 | # Other Spark classes that generate unnecessary logs at INFO level
 67 | log4j.logger.org.apache.spark.broadcast.TorrentBroadcast=WARN
 68 | log4j.logger.org.apache.spark.ContextCleaner=WARN
 69 | log4j.logger.org.apache.spark.MapOutputTracker=WARN
 70 | log4j.logger.org.apache.spark.scheduler.TaskSchedulerImpl=WARN
 71 | log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN
 72 | log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN
 73 | log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN
 74 | log4j.logger.org.apache.spark.scheduler.FairSchedulableBuilder=WARN
 75 | log4j.logger.org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint=WARN
 76 | log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN
 77 | log4j.logger.org.apache.hadoop.hive=WARN
 78 | # for all Spark generated code (including ad-hoc UnsafeProjection calls etc)
 79 | log4j.logger.org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator=WARN
 80 | log4j.logger.org.apache.spark.sql.execution.datasources=WARN
 81 | log4j.logger.org.apache.spark.scheduler.SnappyTaskSchedulerImpl=WARN
 82 | log4j.logger.org.apache.spark.MapOutputTrackerMasterEndpoint=WARN
 83 | log4j.logger.org.apache.spark.MapOutputTrackerMaster=WARN
 84 | log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN
 85 | log4j.logger.org.apache.spark.MapOutputTrackerWorker=WARN
 86 | log4j.logger.org.apache.parquet=ERROR
 87 | log4j.logger.parquet=ERROR
 88 | log4j.logger.org.apache.hadoop.io.compress=WARN
 89 | log4j.logger.spark.jobserver.LocalContextSupervisorActor=WARN
 90 | log4j.logger.spark.jobserver.JarManager=WARN
 91 | log4j.logger.org.apache.spark.sql.hive.HiveClientUtil=WARN
 92 | log4j.logger.org.datanucleus=ERROR
 93 | # Task logger created in SparkEnv
 94 | log4j.logger.org.apache.spark.Task=WARN
 95 | log4j.logger.org.apache.spark.sql.catalyst.parser.CatalystSqlParser=WARN
 96 | 
 97 | # Keep log-level of some classes as INFO even if root level is higher
 98 | log4j.logger.io.snappydata.impl.LeadImpl=INFO
 99 | log4j.logger.io.snappydata.impl.ServerImpl=INFO
100 | log4j.logger.io.snappydata.impl.LocatorImpl=INFO
101 | log4j.logger.spray.can.server.HttpListener=INFO
102 | 
103 | # for generated code of plans
104 | # log4j.logger.org.apache.spark.sql.execution.WholeStageCodegenExec=DEBUG
105 | # for SnappyData generated code used on store (ComplexTypeSerializer, JDBC inserts ...)
106 | # log4j.logger.org.apache.spark.sql.store.CodeGeneration=DEBUG
107 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/metrics.properties.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | #  syntax: [instance].sink|source.[name].[options]=[value]
 19 | 
 20 | #  This file configures Spark's internal metrics system. The metrics system is
 21 | #  divided into instances which correspond to internal components.
 22 | #  Each instance can be configured to report its metrics to one or more sinks.
 23 | #  Accepted values for [instance] are "master", "worker", "executor", "driver",
 24 | #  and "applications". A wildcard "*" can be used as an instance name, in
 25 | #  which case all instances will inherit the supplied property.
 26 | #
 27 | #  Within an instance, a "source" specifies a particular set of grouped metrics.
 28 | #  there are two kinds of sources:
 29 | #    1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
 30 | #    collect a Spark component's internal state. Each instance is paired with a
 31 | #    Spark source that is added automatically.
 32 | #    2. Common sources, like JvmSource, which will collect low level state.
 33 | #    These can be added through configuration options and are then loaded
 34 | #    using reflection.
 35 | #
 36 | #  A "sink" specifies where metrics are delivered to. Each instance can be
 37 | #  assigned one or more sinks.
 38 | #
 39 | #  The sink|source field specifies whether the property relates to a sink or
 40 | #  source.
 41 | #
 42 | #  The [name] field specifies the name of source or sink.
 43 | #
 44 | #  The [options] field is the specific property of this source or sink. The
 45 | #  source or sink is responsible for parsing this property.
 46 | #
 47 | #  Notes:
 48 | #    1. To add a new sink, set the "class" option to a fully qualified class
 49 | #    name (see examples below).
 50 | #    2. Some sinks involve a polling period. The minimum allowed polling period
 51 | #    is 1 second.
 52 | #    3. Wildcard properties can be overridden by more specific properties.
 53 | #    For example, master.sink.console.period takes precedence over
 54 | #    *.sink.console.period.
 55 | #    4. A metrics specific configuration
 56 | #    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
 57 | #    added to Java properties using -Dspark.metrics.conf=xxx if you want to
 58 | #    customize metrics system. You can also put the file in ${SPARK_HOME}/conf
 59 | #    and it will be loaded automatically.
 60 | #    5. The MetricsServlet sink is added by default as a sink in the master,
 61 | #    worker and driver, and you can send HTTP requests to the "/metrics/json"
 62 | #    endpoint to get a snapshot of all the registered metrics in JSON format.
 63 | #    For master, requests to the "/metrics/master/json" and
 64 | #    "/metrics/applications/json" endpoints can be sent separately to get
 65 | #    metrics snapshots of the master instance and applications. This
 66 | #    MetricsServlet does not have to be configured.
 67 | 
 68 | ## List of available common sources and their properties.
 69 | 
 70 | # org.apache.spark.metrics.source.JvmSource
 71 | #   Note: Currently, JvmSource is the only available common source.
 72 | #         It can be added to an instance by setting the "class" option to its
 73 | #         fully qualified class name (see examples below).
 74 | 
 75 | ## List of available sinks and their properties.
 76 | 
 77 | # org.apache.spark.metrics.sink.ConsoleSink
 78 | #   Name:   Default:   Description:
 79 | #   period  10         Poll period
 80 | #   unit    seconds    Unit of the poll period
 81 | 
 82 | # org.apache.spark.metrics.sink.CSVSink
 83 | #   Name:     Default:   Description:
 84 | #   period    10         Poll period
 85 | #   unit      seconds    Unit of the poll period
 86 | #   directory /tmp       Where to store CSV files
 87 | 
 88 | # org.apache.spark.metrics.sink.GangliaSink
 89 | #   Name:     Default:   Description:
 90 | #   host      NONE       Hostname or multicast group of the Ganglia server,
 91 | #                        must be set
 92 | #   port      NONE       Port of the Ganglia server(s), must be set
 93 | #   period    10         Poll period
 94 | #   unit      seconds    Unit of the poll period
 95 | #   ttl       1          TTL of messages sent by Ganglia
 96 | #   dmax      0          Lifetime in seconds of metrics (0 never expired)
 97 | #   mode      multicast  Ganglia network mode ('unicast' or 'multicast')
 98 | 
 99 | # org.apache.spark.metrics.sink.JmxSink
100 | 
101 | # org.apache.spark.metrics.sink.MetricsServlet
102 | #   Name:     Default:   Description:
103 | #   path      VARIES*    Path prefix from the web server root
104 | #   sample    false      Whether to show entire set of samples for histograms
105 | #                        ('false' or 'true')
106 | #
107 | # * Default path is /metrics/json for all instances except the master. The
108 | #   master has two paths:
109 | #     /metrics/applications/json # App information
110 | #     /metrics/master/json       # Master information
111 | 
112 | # org.apache.spark.metrics.sink.GraphiteSink
113 | #   Name:     Default:      Description:
114 | #   host      NONE          Hostname of the Graphite server, must be set
115 | #   port      NONE          Port of the Graphite server, must be set
116 | #   period    10            Poll period
117 | #   unit      seconds       Unit of the poll period
118 | #   prefix    EMPTY STRING  Prefix to prepend to every metric's name
119 | #   protocol  tcp           Protocol ("tcp" or "udp") to use
120 | 
121 | ## Examples
122 | # Enable JmxSink for all instances by class name
123 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
124 | 
125 | # Enable ConsoleSink for all instances by class name
126 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
127 | 
128 | # Polling period for the ConsoleSink
129 | #*.sink.console.period=10
130 | # Unit of the polling period for the ConsoleSink
131 | #*.sink.console.unit=seconds
132 | 
133 | # Polling period for the ConsoleSink specific for the master instance
134 | #master.sink.console.period=15
135 | # Unit of the polling period for the ConsoleSink specific for the master
136 | # instance
137 | #master.sink.console.unit=seconds
138 | 
139 | # Enable CsvSink for all instances by class name
140 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
141 | 
142 | # Polling period for the CsvSink
143 | #*.sink.csv.period=1
144 | # Unit of the polling period for the CsvSink
145 | #*.sink.csv.unit=minutes
146 | 
147 | # Polling directory for CsvSink
148 | #*.sink.csv.directory=/tmp/
149 | 
150 | # Polling period for the CsvSink specific for the worker instance
151 | #worker.sink.csv.period=10
152 | # Unit of the polling period for the CsvSink specific for the worker instance
153 | #worker.sink.csv.unit=minutes
154 | 
155 | # Enable Slf4jSink for all instances by class name
156 | #*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
157 | 
158 | # Polling period for the Slf4JSink
159 | #*.sink.slf4j.period=1
160 | # Unit of the polling period for the Slf4jSink
161 | #*.sink.slf4j.unit=minutes
162 | 
163 | # Enable JvmSource for instance master, worker, driver and executor
164 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
165 | 
166 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
167 | 
168 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
169 | 
170 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
171 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/servers.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2017 SnappyData, Inc. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you
 5 | # may not use this file except in compliance with the License. You
 6 | # may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 | # implied. See the License for the specific language governing
14 | # permissions and limitations under the License. See accompanying
15 | # LICENSE file.
16 | #
17 | #
18 | # Specify the host name on which a Snappy server will be started. Also
19 | # specify the startup directory where the logs and configuration files
20 | # for that server instance are kept. If the directory and properties
21 | # are not specified a default directory is created inside the SNAPPY_HOME directory.
22 | #
23 | #     server1   -dir=/tmp/data/server [config args]
24 | #
25 | # An example of how you can specify multiple locators for a server and
26 | # set its heap size to 64 GB.
27 | # 
28 | #     server1   -dir=/tmp/data/server -locators=locator1:9988,locator2:8899 -heap-size=64g
29 | # 
30 | # One can specify bind address for clients to allow clients from outside this machine to connect
31 | # using JDBC/ODBC/Thrift protocols (default for `client-bind-address` is localhost).
32 | #
33 | # In environments with an internal hostname/IP and a different public hostname (e.g. cloud deployments),
34 | # you should also configure the -hostname-for-clients else clients from outside the network
35 | # will not be able to connect to the servers. It should be set to the public hostname
36 | # or public IP address that will be sent to clients to connect to. It can be skipped for cases
37 | # where private hostname is the same as public hostname (e.g. DNS translates appropriately).
38 | # Default is the `client-bind-address` of the server.
39 | #
40 | #     <private hostname/IP> -client-bind-address=<private hostname/IP> -client-port=1555 -hostname-for-clients=<public hostname/IP for clients>
41 | # 
42 | # For more configuration options,
43 | # see http://snappydatainc.github.io/snappydata/configuration/#configuration
44 | localhost
45 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/slaves.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # A Spark Worker will be started on each of the machines listed below.
19 | localhost


--------------------------------------------------------------------------------
/charts/snappydata/conf/snappy-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This file is sourced when running various Snappy programs.
21 | # Copy it as snappy-env.sh and edit that to configure Spark and Snappy for your site.
22 | 
23 | # Options read when launching programs locally with
24 | # ./bin/run-example or ./bin/spark-submit
25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28 | # - SPARK_CLASSPATH, default classpath entries to append
29 | 
30 | # Options read by executors and drivers running inside the cluster
31 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
32 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
33 | # - SPARK_CLASSPATH, default classpath entries to append
34 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
35 | 
36 | # Options read by Snappy servers, leads and locators
37 | # - LOCATOR_STARTUP_OPTIONS, options and properties that are passed to all the locators.
38 | # - SERVER_STARTUP_OPTIONS, options and properties that are passed to all the snappy servers
39 | # - LEAD_STARTUP_OPTIONS, options and properties that are passed to all the lead opts.


--------------------------------------------------------------------------------
/charts/snappydata/conf/spark-defaults.conf.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | 


--------------------------------------------------------------------------------
/charts/snappydata/conf/spark-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This file is sourced when running various Spark programs.
21 | # Copy it as spark-env.sh and edit that to configure Spark for your site.
22 | 
23 | # Options read when launching programs locally with
24 | # ./bin/run-example or ./bin/spark-submit
25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28 | # - SPARK_CLASSPATH, default classpath entries to append
29 | 
30 | # Options read by executors and drivers running inside the cluster
31 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
32 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
33 | # - SPARK_CLASSPATH, default classpath entries to append
34 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
35 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
36 | 
37 | # Options read in YARN client mode
38 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
39 | # - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
40 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
41 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
42 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
43 | 
44 | # Options for the daemons used in the standalone deploy mode
45 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
46 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
47 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
48 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
49 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
50 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
51 | # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
52 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
53 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
54 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
55 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
56 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
57 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
58 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
59 | 
60 | # Generic options for the daemons used in the standalone deploy mode
61 | # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
62 | # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
63 | # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
64 | # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
65 | # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
66 | # - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
67 | 


--------------------------------------------------------------------------------
/charts/snappydata/plans.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Specific to PCF tile
 3 | #############################
 4 | - name: "small"
 5 |   description: "default (small) plan for snappydata cluster"
 6 |   file: "small.yaml"
 7 | - name: "medium"
 8 |   description: "medium plan for snappydata cluster"
 9 |   file: "medium.yaml"
10 | - name: "large"
11 |   description: "large plan for snappydata cluster"
12 |   file: "large.yaml"
13 | 


--------------------------------------------------------------------------------
/charts/snappydata/plans/large.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Specific to PCF tile
 3 | #############################
 4 | locators:
 5 |   resources:
 6 |     requests:
 7 |       memory: 1024Mi
 8 |   persistence:
 9 |     size: 10Gi
10 | 
11 | servers:
12 |   replicaCount: 2
13 |   resources:
14 |     requests:
15 |       memory: 4096Mi
16 |   persistence:
17 |     size: 20Gi
18 | 
19 | leaders:
20 |   resources:
21 |     requests:
22 |       memory: 4096Mi
23 |   persistence:
24 |     size: 20Gi


--------------------------------------------------------------------------------
/charts/snappydata/plans/medium.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Specific to PCF tile
 3 | #############################
 4 | locators:
 5 |   resources:
 6 |     requests:
 7 |       memory: 1024Mi
 8 |   persistence:
 9 |     size: 10Gi
10 | 
11 | servers:
12 |   replicaCount: 2
13 |   resources:
14 |     requests:
15 |       memory: 2048Mi
16 |   persistence:
17 |     size: 10Gi
18 | 
19 | leaders:
20 |   resources:
21 |     requests:
22 |       memory: 2048Mi
23 |   persistence:
24 |     size: 10Gi


--------------------------------------------------------------------------------
/charts/snappydata/plans/small.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Specific to PCF tile
 3 | #############################
 4 | locators:
 5 |   resources:
 6 |     requests:
 7 |       memory: 1024Mi
 8 |   persistence:
 9 |     size: 10Gi
10 | 
11 | servers:
12 |   replicaCount: 2
13 |   resources:
14 |     requests:
15 |       memory: 1024Mi
16 |   persistence:
17 |     size: 10Gi
18 | 
19 | leaders:
20 |   resources:
21 |     requests:
22 |       memory: 1024Mi
23 |   persistence:
24 |     size: 10Gi


--------------------------------------------------------------------------------
/charts/snappydata/templates/NOTES.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/snappydata/templates/NOTES.txt


--------------------------------------------------------------------------------
/charts/snappydata/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "snappydata.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "snappydata.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "snappydata.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/snappydata/templates/configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 |   name: {{ .Release.Name }}-configmap
5 | data:
6 | {{ (.Files.Glob "conf/*").AsConfig | indent 2 }}


--------------------------------------------------------------------------------
/charts/snappydata/templates/leader_statefulset.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Statefulset for leaders
 3 | #############################
 4 | apiVersion: apps/v1
 5 | kind: StatefulSet
 6 | metadata:
 7 |   name: "{{ .Release.Name }}-leader"
 8 | #  TODO: Do we need to change, for example like the one given below?
 9 | #  name: {{ template "snappydata.fullname" . }}
10 |   labels:
11 |     app: {{ template "snappydata.name" . }}
12 |     chart: {{ template "snappydata.chart" . }}
13 |     release: {{ .Release.Name }}
14 |     heritage: {{ .Release.Service }}
15 | spec:
16 |   serviceName: "{{ .Release.Name }}-leader"
17 |   replicas: {{ .Values.leaders.replicaCount | default 1 }}
18 |   selector:
19 |     matchLabels:
20 |       app: "{{ .Release.Name }}-leader"
21 |   template:
22 |     metadata:
23 |       labels:
24 |         app: "{{ .Release.Name }}-leader"
25 |         release: {{ .Release.Name }}
26 |     spec:
27 |       terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }}
28 |       {{- if .Values.imagePullSecrets }}
29 |       imagePullSecrets:
30 |         - name: {{ .Values.imagePullSecrets }}
31 |       {{- end }}
32 |       containers:
33 |       - name: "{{ .Release.Name }}-leader"
34 |         image: "{{ .Values.image }}:{{ .Values.imageTag }}"
35 |         imagePullPolicy: {{ .Values.imagePullPolicy }}
36 |         ports:
37 |         - containerPort: 5050
38 |           name: sparkui
39 |         livenessProbe:
40 |           exec:
41 |             command:
42 |              - /bin/sh
43 |              - -c
44 |              - /opt/snappydata/sbin/snappy-leads.sh status | grep -e running -e waiting
45 | #         initial delay intentionally kept large, as lead waits(250 seconds) for servers to be available
46 |           initialDelaySeconds: {{ .Values.leaders.initialDelaySeconds | default 360 }}
47 |         command:
48 |           - "/bin/bash"
49 |           - "-c"
50 |           - >
51 |             cp /snappy_conf/* /opt/snappydata/conf;
52 | 
53 |             WAIT_FOR_SERVICE_ARG="--get-ip {{ .Release.Name }}-leader-public --wait-for {{ .Release.Name }}-server 1527";
54 |             USER_PROVIDED_STARTUP_CONF={{ .Values.leaders.conf | default "" | quote }};
55 |             SNAPPY_STARTUP_CONF="-locators={{ .Release.Name }}-locator:10334 $USER_PROVIDED_STARTUP_CONF";
56 |             echo "Executing command: start lead $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF";
57 | 
58 |             start lead $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF;
59 |         lifecycle:
60 |           preStop:
61 |             exec:
62 |               command: ["/opt/snappydata/sbin/snappy-leads.sh", "stop"]
63 |         resources:
64 | {{ toYaml .Values.leaders.resources | indent 12 }}
65 |     {{- with .Values.nodeSelector }}
66 |       nodeSelector:
67 | {{ toYaml . | indent 8 }}
68 |     {{- end }}
69 |     {{- with .Values.affinity }}
70 |       affinity:
71 | {{ toYaml . | indent 8 }}
72 |     {{- end }}
73 |     {{- with .Values.tolerations }}
74 |       tolerations:
75 | {{ toYaml . | indent 8 }}
76 |     {{- end }}
77 |         volumeMounts:
78 |         - mountPath: "/opt/snappydata/work"
79 |           name: snappy-disk-claim
80 |         - mountPath: /snappy_conf
81 |           name: snappy-config-properties
82 |       volumes:
83 |         - name: snappy-config-properties
84 |           configMap:
85 |             name: {{ .Release.Name }}-configmap
86 | 
87 |   volumeClaimTemplates:
88 |   - metadata:
89 |       name: snappy-disk-claim
90 |     spec:
91 |       accessModes: [ {{ .Values.leaders.persistence.accessMode | quote }} ]
92 |       resources:
93 |         requests:
94 |           storage: {{ .Values.leaders.persistence.size | quote }}
95 | {{- if .Values.leaders.persistence.storageClass }}
96 |       storageClassName: {{ .Values.leaders.persistence.storageClass | quote }}
97 | {{- end }}
98 | 


--------------------------------------------------------------------------------
/charts/snappydata/templates/locator_statefulset.yaml:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # Statefulset for locators
 3 | #############################
 4 | apiVersion: apps/v1
 5 | kind: StatefulSet
 6 | metadata:
 7 |   name: "{{ .Release.Name }}-locator"
 8 | #  TODO: Do we need to change, for example like the one given below?
 9 | #  name: {{ template "snappydata.fullname" . }}
10 |   labels:
11 |     app: {{ template "snappydata.name" . }}
12 |     chart: {{ template "snappydata.chart" . }}
13 |     release: {{ .Release.Name }}
14 |     heritage: {{ .Release.Service }}
15 | spec:
16 |   serviceName: "{{ .Release.Name }}-locator"
17 |   replicas: {{ .Values.locators.replicaCount | default 1 }}
18 |   selector:
19 |     matchLabels:
20 |       app: "{{ .Release.Name }}-locator"
21 |   template:
22 |     metadata:
23 |       labels:
24 |         app: "{{ .Release.Name }}-locator"
25 |         release: {{ .Release.Name }}
26 |     spec:
27 |       terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }}
28 |       {{- if .Values.imagePullSecrets }}
29 |       imagePullSecrets:
30 |         - name: {{ .Values.imagePullSecrets }}
31 |       {{- end }}
32 |       containers:
33 |       - name: "{{ .Release.Name }}-locator"
34 |         image: "{{ .Values.image }}:{{ .Values.imageTag }}"
35 |         imagePullPolicy: {{ .Values.imagePullPolicy }}
36 |         ports:
37 |         - containerPort: 10334
38 |           name: locator
39 |         - containerPort: 1527
40 |           name: jdbc
41 |         livenessProbe:
42 |           exec:
43 |             command:
44 |              - /bin/sh
45 |              - -c
46 |              - /opt/snappydata/sbin/snappy-locators.sh status | grep -e running -e waiting
47 |           initialDelaySeconds: {{ .Values.locators.initialDelaySeconds | default 80 }}
48 |         command:
49 |           - "/bin/bash"
50 |           - "-c"
51 |           - >
52 |             cp /snappy_conf/* /opt/snappydata/conf;
53 | 
54 |             SNAPPY_STARTUP_CONF={{ .Values.locators.conf | default "" | quote }};
55 |             echo "Executing command: start locator $SNAPPY_STARTUP_CONF";
56 | 
57 |             start locator $SNAPPY_STARTUP_CONF;
58 |         lifecycle:
59 |           preStop:
60 |             exec:
61 |               command: ["/opt/snappydata/sbin/snappy-locators.sh", "stop"]
62 |         resources:
63 | {{ toYaml .Values.locators.resources | indent 12 }}
64 |     {{- with .Values.nodeSelector }}
65 |       nodeSelector:
66 | {{ toYaml . | indent 8 }}
67 |     {{- end }}
68 |     {{- with .Values.affinity }}
69 |       affinity:
70 | {{ toYaml . | indent 8 }}
71 |     {{- end }}
72 |     {{- with .Values.tolerations }}
73 |       tolerations:
74 | {{ toYaml . | indent 8 }}
75 |     {{- end }}
76 |         volumeMounts:
77 |         - mountPath: "/opt/snappydata/work"
78 |           name: snappy-disk-claim
79 |         - mountPath: /snappy_conf
80 |           name: snappy-config-properties
81 |       volumes:
82 |         - name: snappy-config-properties
83 |           configMap:
84 |             name: {{ .Release.Name }}-configmap
85 | 
86 |   volumeClaimTemplates:
87 |   - metadata:
88 |       name: snappy-disk-claim
89 |     spec:
90 |       accessModes: [ {{ .Values.locators.persistence.accessMode | quote }} ]
91 |       resources:
92 |         requests:
93 |           storage: {{ .Values.locators.persistence.size | quote }}
94 | {{- if .Values.locators.persistence.storageClass }}
95 |       storageClassName: {{ .Values.locators.persistence.storageClass | quote }}
96 | {{- end }}
97 | 


--------------------------------------------------------------------------------
/charts/snappydata/templates/role-binding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: RoleBinding
 3 | metadata:
 4 |   name: service-view-binding
 5 | roleRef:
 6 |   apiGroup: rbac.authorization.k8s.io
 7 |   kind: ClusterRole
 8 |   name: view
 9 | subjects:
10 |   - kind: ServiceAccount
11 |     name: default
12 | 


--------------------------------------------------------------------------------
/charts/snappydata/templates/server_statefulset.yaml:
--------------------------------------------------------------------------------
  1 | #############################
  2 | # Statefulset for servers
  3 | #############################
  4 | apiVersion: apps/v1
  5 | kind: StatefulSet
  6 | metadata:
  7 |   name: "{{ .Release.Name }}-server"
  8 | #  TODO: Do we need to change, for example like the one given below?
  9 | #  name: {{ template "snappydata.fullname" . }}
 10 |   labels:
 11 |     app: {{ template "snappydata.name" . }}
 12 |     chart: {{ template "snappydata.chart" . }}
 13 |     release: {{ .Release.Name }}
 14 |     heritage: {{ .Release.Service }}
 15 | spec:
 16 |   serviceName: "{{ .Release.Name }}-server"
 17 |   replicas: {{ .Values.servers.replicaCount | default 2 }}
 18 |   selector:
 19 |     matchLabels:
 20 |       app: "{{ .Release.Name }}-server"
 21 |   template:
 22 |     metadata:
 23 |       labels:
 24 |         app: "{{ .Release.Name }}-server"
 25 |         release: {{ .Release.Name }}
 26 |     spec:
 27 |       terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 10 }}
 28 |       {{- if .Values.imagePullSecrets }}
 29 |       imagePullSecrets:
 30 |         - name: {{ .Values.imagePullSecrets }}
 31 |       {{- end }}
 32 |       containers:
 33 |       - name: "{{ .Release.Name }}-server"
 34 |         image: "{{ .Values.image }}:{{ .Values.imageTag }}"
 35 |         imagePullPolicy: {{ .Values.imagePullPolicy }}
 36 |         # Even servers use the same port as locator ... all run on independent pods
 37 |         # ... and, the service will either roundrobin or loadbalance
 38 |         ports:
 39 |         - containerPort: 1527
 40 |           name: jdbc
 41 |         livenessProbe:
 42 |           exec:
 43 |            command:
 44 |            - /bin/sh
 45 |            - -c
 46 |            - /opt/snappydata/sbin/snappy-servers.sh status | grep -e running -e waiting
 47 | #         initial delay intentionally kept large, as server waits(250 seconds) for locator to be available
 48 |           initialDelaySeconds: {{ .Values.servers.initialDelaySeconds | default 360 }}
 49 |         command:
 50 |           - "/bin/bash"
 51 |           - "-c"
 52 |           - >
 53 |             cp /snappy_conf/* /opt/snappydata/conf;
 54 | 
 55 |             WAIT_FOR_SERVICE_ARG="--get-ip {{ .Release.Name }}-server-public --wait-for {{ .Release.Name }}-locator 10334";
 56 |             USER_PROVIDED_STARTUP_CONF={{ .Values.servers.conf | default "" | quote }};
 57 |             SNAPPY_STARTUP_CONF="-locators={{ .Release.Name }}-locator:10334 $USER_PROVIDED_STARTUP_CONF";
 58 |             echo "Executing command: start server $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF";
 59 | 
 60 |             start server $WAIT_FOR_SERVICE_ARG $SNAPPY_STARTUP_CONF;
 61 |         lifecycle:
 62 |           preStop:
 63 |             exec:
 64 |               command: ["/opt/snappydata/sbin/snappy-servers.sh", "stop"]
 65 |         resources:
 66 | {{ toYaml .Values.servers.resources | indent 12 }}
 67 |     {{- with .Values.nodeSelector }}
 68 |       nodeSelector:
 69 | {{ toYaml . | indent 8 }}
 70 |     {{- end }}
 71 |     {{- with .Values.affinity }}
 72 |       affinity:
 73 | {{ toYaml . | indent 8 }}
 74 |     {{- end }}
 75 |     {{- with .Values.tolerations }}
 76 |       tolerations:
 77 | {{ toYaml . | indent 8 }}
 78 |     {{- end }}
 79 |         volumeMounts:
 80 |         - mountPath: "/opt/snappydata/work"
 81 |           name: snappy-disk-claim
 82 |         - mountPath: /snappy_conf
 83 |           name: snappy-config-properties
 84 |       volumes:
 85 |         - name: snappy-config-properties
 86 |           configMap:
 87 |             name: {{ .Release.Name }}-configmap
 88 | 
 89 |   volumeClaimTemplates:
 90 |   - metadata:
 91 |       name: snappy-disk-claim
 92 |     spec:
 93 |       accessModes: [ {{ .Values.servers.persistence.accessMode | quote }} ]
 94 |       resources:
 95 |         requests:
 96 |           storage: {{ .Values.servers.persistence.size | quote }}
 97 | {{- if .Values.servers.persistence.storageClass }}
 98 |       storageClassName: {{ .Values.servers.persistence.storageClass | quote }}
 99 | {{- end }}
100 | 


--------------------------------------------------------------------------------
/charts/snappydata/templates/service.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Service
  3 | metadata:
  4 |   name: "{{ .Release.Name }}-locator-public"
  5 |   labels:
  6 |     app: {{ template "snappydata.name" . }}
  7 |     chart: {{ template "snappydata.chart" . }}
  8 |     release: {{ .Release.Name }}
  9 |     heritage: {{ .Release.Service }}
 10 | spec:
 11 |   ports:
 12 |   - port: 1527
 13 |     targetPort: 1527
 14 |     name: jdbc
 15 |   type: LoadBalancer
 16 |   selector:
 17 |     app: "{{ .Release.Name }}-locator"
 18 | ---
 19 | apiVersion: v1
 20 | kind: Service
 21 | metadata:
 22 |   name: "{{ .Release.Name }}-server-public"
 23 |   labels:
 24 |     app: {{ template "snappydata.name" . }}
 25 |     chart: {{ template "snappydata.chart" . }}
 26 |     release: {{ .Release.Name }}
 27 |     heritage: {{ .Release.Service }}
 28 | spec:
 29 |   ports:
 30 |   - port: 1527
 31 |     targetPort: 1527
 32 |     name: jdbc
 33 |   type: LoadBalancer
 34 |   selector:
 35 |     app: "{{ .Release.Name }}-server"
 36 | ---
 37 | apiVersion: v1
 38 | kind: Service
 39 | metadata:
 40 |   name: "{{ .Release.Name }}-leader-public"
 41 |   labels:
 42 |     app: {{ template "snappydata.name" . }}
 43 |     chart: {{ template "snappydata.chart" . }}
 44 |     release: {{ .Release.Name }}
 45 |     heritage: {{ .Release.Service }}
 46 | spec:
 47 |   ports:
 48 |   - port: 5050
 49 |     targetPort: 5050
 50 |     name: spark
 51 |   - port: 8090
 52 |     targetPort: 8090
 53 |     name: jobserver
 54 |   - port: 3768
 55 |     targetPort: 3768
 56 |     name: zeppelin-interpreter
 57 |   - port: 10000
 58 |     targetPort: 10000
 59 |     name: hive-thrift-server
 60 |   type: LoadBalancer
 61 |   selector:
 62 |     app: "{{ .Release.Name }}-leader"
 63 | ---
 64 | 
 65 | ####################
 66 | # Headless services
 67 | ####################
 68 | apiVersion: v1
 69 | kind: Service
 70 | metadata:
 71 |   name: "{{ .Release.Name }}-locator"
 72 |   labels:
 73 |     app: {{ template "snappydata.name" . }}
 74 |     chart: {{ template "snappydata.chart" . }}
 75 |     release: {{ .Release.Name }}
 76 |     heritage: {{ .Release.Service }}
 77 | spec:
 78 |   ports:
 79 |   - port: 10334
 80 |     targetPort: 10334
 81 |     name: locator
 82 |   - port: 1527
 83 |     targetPort: 1527
 84 |     name: jdbc
 85 |   clusterIP: None
 86 |   selector:
 87 |     app: "{{ .Release.Name }}-locator"
 88 | ---
 89 | apiVersion: v1
 90 | kind: Service
 91 | metadata:
 92 |   name: "{{ .Release.Name }}-server"
 93 |   labels:
 94 |     app: {{ template "snappydata.name" . }}
 95 |     chart: {{ template "snappydata.chart" . }}
 96 |     release: {{ .Release.Name }}
 97 |     heritage: {{ .Release.Service }}
 98 | spec:
 99 |   ports:
100 |   - port: 1527
101 |     targetPort: 1527
102 |     name: jdbc
103 |   clusterIP: None
104 |   selector:
105 |     app: "{{ .Release.Name }}-server"
106 | ---
107 | apiVersion: v1
108 | kind: Service
109 | metadata:
110 |   name: "{{ .Release.Name }}-leader"
111 |   labels:
112 |     app: {{ template "snappydata.name" . }}
113 |     chart: {{ template "snappydata.chart" . }}
114 |     release: {{ .Release.Name }}
115 |     heritage: {{ .Release.Service }}
116 | spec:
117 |   ports:
118 |   - port: 5050
119 |     targetPort: 5050
120 |     name: spark
121 |   clusterIP: None
122 |   selector:
123 |     app: "{{ .Release.Name }}-leader"
124 | ---


--------------------------------------------------------------------------------
/charts/snappydata/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for snappydata.
 2 | # This is a YAML-formatted file.
 3 | 
 4 | image: snappydatainc/snappydata
 5 | imageTag: 1.0.2.1
 6 | imagePullPolicy: IfNotPresent
 7 | # Use "imagePullSecrets" to specify the secret name to be used while pulling image from a private registry
 8 | # Replace "secretname" with the actual name of the secret
 9 | #imagePullSecrets: secretname
10 | 
11 | locators:
12 |   ## config options for locators
13 |   conf: ""
14 |   resources:
15 |     # limits:
16 |     #  cpu: 100m
17 |     #  memory: 128Mi
18 |     requests:
19 |     #  cpu: 100m
20 |       memory: 1024Mi
21 |   ## PV configuration
22 |   persistence:
23 |     ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes
24 |     ## If undefined (the default) or set to null, no storageClassName spec is
25 |     ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
26 |     ##   GKE, Azure & OpenStack)
27 |     ##
28 |     # storageClass:
29 |     accessMode: ReadWriteOnce
30 |     size: 10Gi
31 | 
32 | servers:
33 |   replicaCount: 2
34 |   ## config options for servers
35 |   conf: ""
36 |   resources:
37 |     # limits:
38 |     #  cpu: 100m
39 |     #  memory: 128Mi
40 |     requests:
41 |     #  cpu: 100m
42 |       memory: 4096Mi
43 |   ## PV configuration
44 |   persistence:
45 |     ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes
46 |     ## If undefined (the default) or set to null, no storageClassName spec is
47 |     ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
48 |     ##   GKE, Azure & OpenStack)
49 |     ##
50 |     # storageClass:
51 |     accessMode: ReadWriteOnce
52 |     size: 10Gi
53 | 
54 | leaders:
55 |   ## config options for leaders
56 |   conf: "-zeppelin.interpreter.enable=true"
57 |   resources:
58 |     # limits:
59 |     #  cpu: 100m
60 |     #  memory: 128Mi
61 |     requests:
62 |     #  cpu: 100m
63 |       memory: 4096Mi
64 |   ## PV configuration
65 |   persistence:
66 |     ## Use 'storageClass' to specify the storageClassName to be used while dynamically provisioning volumes
67 |     ## If undefined (the default) or set to null, no storageClassName spec is
68 |     ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
69 |     ##   GKE, Azure & OpenStack)
70 |     ##
71 |     # storageClass:
72 |     accessMode: ReadWriteOnce
73 |     size: 10Gi
74 | 
75 | nodeSelector: {}
76 | 
77 | tolerations: []
78 | 
79 | affinity: {}
80 | 


--------------------------------------------------------------------------------
/charts/spark-hs/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/charts/spark-hs/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | appVersion: "1.0"
 3 | description: A Helm chart to deploy Spark history server for Kubernetes
 4 | name: spark-hs
 5 | version: 0.1.0
 6 | home: https://github.com/apache-spark-on-k8s/spark
 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png
 8 | maintainers:
 9 |   - name:
10 |     email:
11 | 
12 | 


--------------------------------------------------------------------------------
/charts/spark-hs/README.md:
--------------------------------------------------------------------------------
  1 | # A Helm chart for Spark History Server
  2 | [Spark History Server](https://spark.apache.org/docs/latest/monitoring.html#viewing-after-the-fact) Web UI 
  3 | allows users to view job execution details even after the application has finished execution. To use History Server, 
  4 | Spark applications should be configured to log events to a directory from which Spark History Server will read events
  5 | to construct the job execution visualization. The events directory can be a local file path, an HDFS path, or any alternative 
  6 | file system supported by Hadoop APIs. 
  7 | 
  8 | ## Chart Details
  9 | This chart launches Spark History Server on Kubernetes. History server can read events from any 
 10 | HDFS compatible system (GCS/S3/HDFS) or a file system path mounted on the pod. A user can set GCS bucket 
 11 | URI in 'historyServerConf.eventsDir' attribute. 
 12 | 
 13 | *Note:* This README file describes history server configuration to read history events from GCS bucket
 14 |  
 15 | ## Steps to configure and install chart
 16 | 
 17 | 1. Setup gsutil and gcloud on your local laptop and associate them with your GCP project, create a bucket, 
 18 | create an IAM service account sparkonk8s-test, generate a json key file sparkonk8s-test.json, to grant sparkonk8s-test 
 19 | admin permission to bucket gs://spark-history-server.
 20 | 
 21 |     ```
 22 |     $ gsutil mb -c nearline gs://spark-history-server
 23 |     $ export ACCOUNT_NAME=sparkonk8s-test
 24 |     $ export GCP_PROJECT_ID=project-id
 25 |     $ gcloud iam service-accounts create ${ACCOUNT_NAME} --display-name "${ACCOUNT_NAME}"
 26 |     $ gcloud iam service-accounts keys create "${ACCOUNT_NAME}.json" --iam-account "${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com"
 27 |     $ gcloud projects add-iam-policy-binding ${GCP_PROJECT_ID} --member "serviceAccount:${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com" --role roles/storage.admin
 28 |     $ gsutil iam ch serviceAccount:${ACCOUNT_NAME}@${GCP_PROJECT_ID}.iam.gserviceaccount.com:objectAdmin gs://spark-history-server
 29 |     ```
 30 |     
 31 | 2.  In order for history server to be able read from the GCS bucket, we need 
 32 |     to mount the json key file on the history server pod. First copy the json file into 'conf/secrets' 
 33 |     directory for spark history server chart
 34 |     
 35 |     ```
 36 |     $ cp sparkonk8s-test.json spark-hs/conf/secrets/
 37 |     ```
 38 |     
 39 | 3.  Modify the 'values.yaml' file and specify the GCS bucket path created above. History server 
 40 |     will read spark events from this path 
 41 |     
 42 |     ```
 43 |     historyServerConf:
 44 |       eventsDir: "gs://spark-history-server/"
 45 |     ```
 46 |         
 47 |     Also set 'mountSecrets' field of values.yaml file to true. When 'mountSecrets' 
 48 |     is set to true json key file will be mounted on path '/etc/secrets' of the pod.  
 49 |     
 50 |     ```
 51 |     mountSecrets: true
 52 |     ```
 53 |     
 54 |     Lastly set the SPARK_HISTORY_OPTS so that history server uses json key file while 
 55 |     accessing the GCS bucket  
 56 |     
 57 |     ```
 58 |     environment:
 59 |       SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json
 60 |     ```
 61 | 
 62 | 4.  Install the chart
 63 |     ```
 64 |     helm install --name history --namespace spark ./spark-hs/
 65 |     ```
 66 |     
 67 |     Spark History UI URL can now be accessed as follows:
 68 |     ```
 69 |     $ export SERVICE_IP=$(kubectl get svc --namespace default example-history-spark-hs -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
 70 |     $ echo http://$SERVICE_IP:18080
 71 |     ```
 72 |     Use the URL to access History UI in a browser.
 73 |         
 74 | ## Enable spark-submit to log spark history events
 75 | The spark-submit example below shows Spark job that logs historical events 
 76 | to the GCS bucket created in above steps. Once job finishes, use the 
 77 | Spark history server UI to view the job execution details.
 78 | 
 79 |   ```
 80 |   bin/spark-submit \
 81 |       --master k8s://https://<k8s-master-IP> \
 82 |       --deploy-mode cluster \
 83 |       --name spark-pi \
 84 |       --conf spark.kubernetes.namespace=spark \
 85 |       --class org.apache.spark.examples.SparkPi \
 86 |       --conf spark.eventLog.enabled=true \
 87 |       --conf spark.eventLog.dir=gs://spark-history-server/ \
 88 |       --conf spark.executor.instances=2 \
 89 |       --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json \
 90 |       --conf spark.kubernetes.driver.secrets.history-secrets=/etc/secrets \
 91 |       --conf spark.kubernetes.executor.secrets.history-secrets=/etc/secrets \
 92 |       --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 \
 93 |       --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 \
 94 |       local:///opt/spark/examples/jars/spark-examples_2.11-2.2.0-k8s-0.5.0.jar  
 95 |   ```
 96 | 
 97 |      
 98 | ## Deleting the chart
 99 | Use `helm delete` command to delete the chart
100 |    ```
101 |    $ helm delete --purge example-history
102 |    ```
103 | 


--------------------------------------------------------------------------------
/charts/spark-hs/conf/secrets/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-hs/conf/secrets/.gitignore


--------------------------------------------------------------------------------
/charts/spark-hs/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | 1. Get the application URL by running these commands:
 2 | {{- if contains "NodePort" .Values.service.type }}
 3 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "spark-hs.fullname" . }})
 4 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
 5 |   echo http://$NODE_IP:$NODE_PORT
 6 | {{- else if contains "LoadBalancer" .Values.service.type }}
 7 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
 8 |            You can watch the status of by running 'kubectl get svc -w {{ template "spark-hs.fullname" . }}'
 9 |   export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "spark-hs.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
10 |   echo http://$SERVICE_IP:{{ .Values.service.port }}
11 | {{- else if contains "ClusterIP" .Values.service.type }}
12 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "spark-hs.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
13 |   echo "Visit http://127.0.0.1:8080 to use your application"
14 |   kubectl port-forward $POD_NAME 8080:80
15 | {{- end }}
16 | 


--------------------------------------------------------------------------------
/charts/spark-hs/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "spark-hs.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "spark-hs.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "spark-hs.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/spark-hs/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: {{ .Release.Name }}-hsenv-configmap
 5 | data:
 6 |   {{- range $key, $val := .Values.environment }}
 7 |   {{ $key }}: {{ $val | quote }}
 8 |   {{- end }}
 9 |   {{- range $key, $val := .Values.historyServerConf }}
10 |   {{ $key }}: {{ $val | quote }}
11 |   {{- end }}
12 | ---
13 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }}
14 | apiVersion: v1
15 | kind: Secret
16 | metadata:
17 |   name: history-secrets
18 | type: Opaque
19 | data:
20 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
21 | {{- end }}


--------------------------------------------------------------------------------
/charts/spark-hs/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ template "spark-hs.fullname" . }}
 5 |   labels:
 6 |     app: {{ template "spark-hs.name" . }}
 7 |     chart: {{ template "spark-hs.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   replicas: 1
12 |   selector:
13 |     matchLabels:
14 |       app: {{ template "spark-hs.name" . }}
15 |       release: {{ .Release.Name }}
16 |   template:
17 |     metadata:
18 |       labels:
19 |         app: {{ template "spark-hs.name" . }}
20 |         release: {{ .Release.Name }}
21 |     spec:
22 |       containers:
23 |         - name: {{ .Chart.Name }}
24 |           image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}"
25 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
26 |           ports:
27 |             - name: historyport
28 |               containerPort: 18080
29 |               protocol: TCP
30 |           command:
31 |             - "/bin/sh"
32 |             - "-c"
33 |             - >
34 |               if [ "$enablePVC" == "true" ]; then
35 |                 export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \
36 |                 -Dspark.history.fs.logDirectory=file:/mnt/$eventsDir";
37 |               else
38 |                 export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \
39 |                 -Dspark.history.fs.logDirectory=$eventsDir";
40 |               fi;
41 |               /opt/spark/bin/spark-class org.apache.spark.deploy.history.HistoryServer;
42 |           envFrom:
43 |           - configMapRef:
44 |               name: {{ .Release.Name }}-hsenv-configmap
45 |           livenessProbe:
46 |             httpGet:
47 |               path: /
48 |               port: historyport
49 |           readinessProbe:
50 |             httpGet:
51 |               path: /
52 |               port: historyport
53 |           resources:
54 | {{ toYaml .Values.resources | indent 12 }}
55 |           volumeMounts:
56 |           - name: data
57 |             mountPath: /mnt
58 |           {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
59 |           - name: secrets-volume
60 |             mountPath: /etc/secrets
61 |           {{- end }}
62 |     {{- with .Values.nodeSelector }}
63 |       nodeSelector:
64 | {{ toYaml . | indent 8 }}
65 |     {{- end }}
66 |       serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }}
67 |     {{- with .Values.affinity }}
68 |       affinity:
69 | {{ toYaml . | indent 8 }}
70 |     {{- end }}
71 |     {{- with .Values.tolerations }}
72 |       tolerations:
73 | {{ toYaml . | indent 8 }}
74 |     {{- end }}
75 |       volumes:
76 |         {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
77 |         - name: secrets-volume
78 |           secret:
79 |             secretName: history-secrets
80 |         {{- end }}
81 |         - name: data
82 |         {{- if .Values.historyServerConf.enablePVC }}
83 |           persistentVolumeClaim:
84 |             claimName: {{ .Values.historyServerConf.existingClaimName }}
85 |         {{- else }}
86 |           emptyDir: {}
87 |         {{- end -}}
88 | 


--------------------------------------------------------------------------------
/charts/spark-hs/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ template "spark-hs.fullname" . }}
 5 |   labels:
 6 |     app: {{ template "spark-hs.name" . }}
 7 |     chart: {{ template "spark-hs.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   type: {{ .Values.service.type }}
12 |   ports:
13 |     - port: {{ .Values.service.port }}
14 |       targetPort: historyport
15 |       protocol: TCP
16 |       name: historyport
17 |   selector:
18 |     app: {{ template "spark-hs.name" . }}
19 |     release: {{ .Release.Name }}


--------------------------------------------------------------------------------
/charts/spark-hs/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for spark-hs.
 2 | # This is a YAML-formatted file.
 3 | # Declare variables to be passed into your templates.
 4 | 
 5 | image:
 6 |   repository: snappydatainc
 7 |   tag: spark-init:v2.2.0-kubernetes-0.5.1
 8 |   pullPolicy: IfNotPresent
 9 | 
10 | service:
11 |   type: LoadBalancer
12 |   port: 18080
13 | 
14 | serviceAccount: default
15 | 
16 | historyServerConf:
17 |   # if using file system, this should be an absolute path in the mounted volume
18 |   # if not using file system, mention HDFS compatible URI
19 |   eventsDir: "gs://spark-history-server-store/"
20 |   #eventsDir: "/"
21 | 
22 |   # to use a file system path for Spark events dir, set 'enablePVC' to true and mention the
23 |   # name of an already created persistent volume claim in existingClaimName.
24 |   # The volume will be mounted on /data in the pod
25 |   enablePVC: false
26 |   existingClaimName: "claim"
27 | 
28 | # copy your key file in 'conf/secrets' directory and set mountSecrets to true
29 | # key file will be mounted on '/etc/secrets'
30 | mountSecrets: true
31 | 
32 | # any environment variables that need to be made available to history server
33 | environment:
34 |   # Note: do not configure Spark history events directory using SPARK_HISTORY_OPTS. It will be
35 |   # configured by this chart based on the values in "historyServerConf" attributes in values.yaml
36 |   # However other options can be specified.
37 | 
38 |   SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json
39 |   #SPARK_DAEMON_MEMORY: 1g
40 |   #SPARK_DAEMON_JAVA_OPTS: ...
41 |   #SPARK_DAEMON_CLASSPATH: ...
42 |   #SPARK_PUBLIC_DNS: ...
43 | 
44 | resources: {}
45 |   # We usually recommend not to specify default resources and to leave this as a conscious
46 |   # choice for the user. This also increases chances charts run on environments with little
47 |   # resources, such as Minikube. If you do want to specify resources, uncomment the following
48 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
49 |   # limits:
50 |   #  cpu: 100m
51 |   #  memory: 128Mi
52 |   # requests:
53 |   #  cpu: 100m
54 |   #  memory: 128Mi
55 | 
56 | nodeSelector: {}
57 | 
58 | tolerations: []
59 | 
60 | affinity: {}
61 | 
62 | #internal attribute, do not change
63 | global:
64 |   umbrellaChart: false


--------------------------------------------------------------------------------
/charts/spark-rss/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | appVersion: "1.0"
3 | description: A Helm chart to deploy Spark resource staging server on Kubernetes
4 | name: spark-rss
5 | version: 0.1.0
6 | 


--------------------------------------------------------------------------------
/charts/spark-rss/README.md:
--------------------------------------------------------------------------------
  1 | # A Helm chart for Spark Resource Staging Server
  2 | This chart is still work-in-progress
  3 | 
  4 | ## Chart Details
  5 | This chart launches [Spark Resource Staging Server](https://apache-spark-on-k8s.github.io/userdocs/running-on-kubernetes.html#dependency-management).
  6 | Spark Resource Staging Server is used for dependency management when Spark is run in Kubernetes environment.
  7 |  
  8 | ## Installing the Chart
  9 | 
 10 | 1. By default this chart deploys Spark Resource Staging Server and configures a service of type 
 11 | LoadBalancer to access it.
 12 | 
 13 |     For example:
 14 | 
 15 |     ```
 16 |     helm install --name rss --namespace spark ./spark-rss/
 17 |     ```
 18 |     
 19 |     The above command will display output such as given below:
 20 |     ```
 21 |     NOTES:
 22 |     Get the resource staging server URI by running these commands:
 23 |          NOTE: It may take a few minutes for the LoadBalancer IP to be available.
 24 |                You can watch the status of by running 'kubectl get svc -w rss-spark-rss'
 25 |       export SERVICE_IP=$(kubectl get svc --namespace default rss-spark-rss -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
 26 |       echo http://$SERVICE_IP:10000
 27 |     ```
 28 |     
 29 |     The URI displayed by using the commands given in the above output can be used to run 
 30 |     spark-submit command with resource staging server. 
 31 |     
 32 | 2. Users also have a choice of using NodePort service instead of LoadBalancer. To use a  
 33 |    NodePort service use a command such as given below or alternatively modify 
 34 |    values.yaml and specify the service.type value as "NodePort"
 35 |      
 36 |      ```
 37 |      helm install --name rss --namespace spark --set service.type=NodePort ./spark-rss/
 38 |      ```
 39 |      
 40 |      The above command will display output such as given below:
 41 |      
 42 |      ```
 43 |      NOTES:
 44 |      Get the resource staging server URI by running these commands:
 45 |        export NODE_PORT=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services rss-spark-rss)
 46 |        export NODE_IP=$(kubectl get nodes --namespace default -o jsonpath="{.items[0].status.addresses[1].address}")
 47 |        echo http://$NODE_IP:$NODE_PORT
 48 |      ```
 49 |     NOTE: To access NodePort service externally, create a firewall rule that allows TCP traffic on your node port.
 50 |     For example, if Service has a NodePort value of 31000, create a firewall rule that allows TCP traffic on port 31000.
 51 |     Different cloud providers offer different ways of configuring firewall rules. Without the firewall you may not be
 52 |     able to use resource staging server as spark-submit will throw error.    
 53 |  
 54 | #### spark-submit command example
 55 | Given below is an example spark-submit command that uses resource staging server. 
 56 | 
 57 |     
 58 |     bin/spark-submit \
 59 |       --deploy-mode cluster \
 60 |       --class org.apache.spark.examples.SparkPi \
 61 |       --master k8s://<k8s-apiserver-host>:<k8s-apiserver-port> \
 62 |       --conf spark.kubernetes.namespace=spark \
 63 |       --conf spark.executor.instances=5 \
 64 |       --conf spark.app.name=spark-pi \
 65 |       --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 \
 66 |       --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1 \
 67 |       --conf spark.kubernetes.initcontainer.docker.image=snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1 \
 68 |       --conf spark.kubernetes.resourceStagingServer.uri=<URI of resource staging server as displayed on console while deploying it> \
 69 |       ./examples/jars/spark-examples_2.11-2.2.0-k8s-0.5.0.jar
 70 |     
 71 |     
 72 |      
 73 | ## Deleting the chart
 74 | Use `helm delete` command to delete the chart
 75 |    ```
 76 |    $ helm delete --purge rss
 77 |    ```
 78 |    
 79 | ## Configuration
 80 | The following table lists the configuration parameters available for this chart
 81 | 
 82 | | Parameter               | Description                        | Default                                                    |
 83 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- |
 84 | | `image.repository`      |  Docker repo for the image         |     `SnappyDataInc`                                        |
 85 | | `image.tag`             |  Tag for the Docker image          |     `spark-resource-staging-server:v2.2.0-kubernetes-0.5.1`        | 
 86 | | `image.pullPolicy`      |  Pull policy for the image         |     `IfNotPresent`                                         |
 87 | | `service.type`  |  K8S service type for Resource Staging Server     |     `LoadBalancer`                                  |
 88 | | `service.externalPort`  |  Port exposed externally for Resource Staging Server service         |      `10000`              |
 89 | | `service.internalPort`  |  Port exposed only internally for the pod so that LoadBalancer can connect to it  |      `10000` |
 90 | | `service.nodePort`  |  Used if the service is of NodePort type  |      `310000` |
 91 | | `serviceAccount`        |  Service account used to deploy Resource Staging Server |     `default`                                    |
 92 | | `properties`    | Configuration properties for Resource Staging Server. These will be made available as configmaps |  |
 93 | | `resources`           | CPU and Memory resources for the RSS pod  | |
 94 | | `global.umbrellaChart` | Internal attribute. Do not modify | `false` | 
 95 | 
 96 | These configuration attributes can be set in the `values.yaml` file or while using the helm install command, for example,
 97 | 
 98 | ```
 99 | # set an attribute while using helm install command
100 | helm install --name rss --namespace spark --set serviceAccount=spark ./spark-rss
101 | ```


--------------------------------------------------------------------------------
/charts/spark-rss/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | Get the resource staging server URI by running these commands:
 2 | {{- if contains "NodePort" .Values.service.type }}
 3 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "spark-rss.fullname" . }})
 4 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[1].address}")
 5 |   echo http://$NODE_IP:$NODE_PORT
 6 | 
 7 |   NOTE: To access NodePort service externally, create a firewall rule that allows TCP traffic on your node port.
 8 |   For example, if Service has a NodePort value of 31000, create a firewall rule that allows TCP traffic on port 31000.
 9 |   Different cloud providers offer different ways of configuring firewall rules. Without the firewall you may not be
10 |   able to use resource staging server as spark-submit will throw error.
11 | 
12 | {{- else if contains "LoadBalancer" .Values.service.type }}
13 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14 |            You can watch the status of by running 'kubectl get svc -w {{ template "spark-rss.fullname" . }}'
15 |   export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "spark-rss.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
16 |   echo http://$SERVICE_IP:{{ .Values.service.externalPort }}
17 | {{- end }}
18 | 


--------------------------------------------------------------------------------
/charts/spark-rss/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "spark-rss.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "spark-rss.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "spark-rss.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/spark-rss/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: {{ .Release.Name }}-rss-configmap
 5 | data:
 6 |   resource-staging-server.properties: |
 7 |     spark.kubernetes.resourceStagingServer.port={{ .Values.service.internalPort }}
 8 |     {{- range $key, $val := .Values.properties }}
 9 |     {{ $key }}={{ $val }}
10 |     {{- end}}


--------------------------------------------------------------------------------
/charts/spark-rss/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ template "spark-rss.fullname" . }}
 5 |   labels:
 6 |     app: {{ template "spark-rss.name" . }}
 7 |     chart: {{ template "spark-rss.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   replicas: 1
12 |   selector:
13 |     matchLabels:
14 |       app: {{ template "spark-rss.name" . }}
15 |       release: {{ .Release.Name }}
16 |   template:
17 |     metadata:
18 |       labels:
19 |         app: {{ template "spark-rss.name" . }}
20 |         release: {{ .Release.Name }}
21 |         resource-staging-server-instance: default
22 |     spec:
23 |       volumes:
24 |         - name: resource-staging-server-properties
25 |           configMap:
26 |             name: {{ .Release.Name }}-rss-configmap
27 |       containers:
28 |         - name: {{ .Chart.Name }}
29 |           image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}"
30 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
31 |           ports:
32 |             - name: rssport
33 |               containerPort: {{ .Values.service.internalPort }}
34 |               protocol: TCP
35 | # TODO: add a proper liveness probe
36 | #          livenessProbe:
37 | #            tcpSocket:
38 | #              port: rssport
39 | #              initialDelaySeconds: 180
40 | #          readinessProbe:
41 | #            tcpSocket:
42 | #              port: rssport
43 | #              initialDelaySeconds: 20
44 |           resources:
45 | {{ toYaml .Values.resources | indent 12 }}
46 |           volumeMounts:
47 |             - name: resource-staging-server-properties
48 |               mountPath: '/etc/spark-resource-staging-server'
49 |           args:
50 |             - '/etc/spark-resource-staging-server/resource-staging-server.properties'
51 |       serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }}


--------------------------------------------------------------------------------
/charts/spark-rss/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ template "spark-rss.fullname" . }}
 5 |   labels:
 6 |     app: {{ template "spark-rss.name" . }}
 7 |     chart: {{ template "spark-rss.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   type: {{ .Values.service.type }}
12 |   ports:
13 |     - port: {{ .Values.service.externalPort }}
14 |       targetPort: {{ .Values.service.internalPort }}
15 |       protocol: TCP
16 |       {{ if eq .Values.service.type "NodePort" -}}
17 |       nodePort: {{ .Values.service.nodePort }}
18 |       {{ end -}}
19 |       name: rssport
20 |   selector:
21 |     app: {{ template "spark-rss.name" . }}
22 |     release: {{ .Release.Name }}
23 | #   TODO: Is this required
24 |     resource-staging-server-instance: default


--------------------------------------------------------------------------------
/charts/spark-rss/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for spark-rss.
 2 | # This is a YAML-formatted file.
 3 | # Declare variables to be passed into your templates.
 4 | 
 5 | image:
 6 |   repository: snappydatainc
 7 |   tag: spark-resource-staging-server:v2.2.0-kubernetes-0.5.1
 8 |   pullPolicy: IfNotPresent
 9 | 
10 | service:
11 |   type: LoadBalancer
12 |   externalPort: 10000
13 |   internalPort: 10000
14 |   nodePort: 31000
15 | 
16 | serviceAccount: default
17 | 
18 | # properties that can be made available via configmap
19 | properties:
20 |     spark.ssl.kubernetes.resourceStagingServer.enabled: false
21 | # Other possible properties are listed below, primarily for setting up TLS. The paths given by KeyStore, password, and PEM files here should correspond to
22 | # files that are securely mounted into the resource staging server container, via e.g. secret volumes.
23 | #   spark.ssl.kubernetes.resourceStagingServer.keyStore=/mnt/secrets/resource-staging-server/keyStore.jks
24 | #   spark.ssl.kubernetes.resourceStagingServer.keyStorePassword=changeit
25 | #   spark.ssl.kubernetes.resourceStagingServer.keyPassword=changeit
26 | #   spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile=/mnt/secrets/resource-staging-server/keystore-password.txt
27 | #   spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile=/mnt/secrets/resource-staging-server/keystore-key-password.txt
28 | #   spark.ssl.kubernetes.resourceStagingServer.keyPem=/mnt/secrets/resource-staging-server/key.pem
29 | #   spark.ssl.kubernetes.resourceStagingServer.serverCertPem=/mnt/secrets/resource-staging-server/cert.pem
30 | 
31 | resources:
32 |   limits:
33 |     cpu: 100m
34 |     memory: 1Gi
35 |   requests:
36 |     cpu: 100m
37 |     memory: 256Mi
38 | 
39 | #internal attribute, do not change
40 | global:
41 |   umbrellaChart: false


--------------------------------------------------------------------------------
/charts/spark-shuffle/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/charts/spark-shuffle/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | appVersion: "1.0"
 3 | description: A Helm chart to deploy Spark shuffle service deamon set for Kubernetes
 4 | name: spark-shuffle
 5 | version: 0.1.0
 6 | home: https://github.com/apache-spark-on-k8s/spark
 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png
 8 | maintainers:
 9 |   - name: 
10 |     email: 
11 | 


--------------------------------------------------------------------------------
/charts/spark-shuffle/README.md:
--------------------------------------------------------------------------------
 1 | # A Helm chart to launch Spark shuffle service on k8s
 2 | 
 3 | This chart launches Spark shuffle service as a daemonset on Kubernetes. Spark shuffle service is 
 4 |  required for dynamic executor scaling in Spark
 5 | 
 6 | 
 7 | ## Installing the Chart
 8 | 
 9 | To install the chart use following command:
10 | 
11 |   ```
12 |   $ helm install --name example-shuffle --namespace spark ./spark-shuffle/
13 |   ```
14 | 
15 | Above command will deploy the chart and display labels attached to the Shuffle pods in 'NOTES'.
16 | 
17 | Example output:
18 | 
19 |   ```
20 |   NOTES:
21 |   Created a Spark shuffle daemonset with following Pod labels:
22 |    app: spark-shuffle-service
23 |    spark-version: 2.2.0
24 | 
25 |   ```
26 | Above mentioned labels can be used in spark-submit configuration options to enable dynamic executor scaling
27 |   
28 | For example in values.yaml of [spark-k8s-zeppelin chart](https://github.com/SnappyDataInc/spark-on-k8s/tree/master/charts/zeppelin-with-spark), modify the SPARK_SUBMIT_OPTIONS
29 |  as given below (note the options for dynamicAllocation and shuffle). This will enable dynamic executor scaling and use the 
30 |  shuffle service installed above.
31 | 
32 | ```
33 |   SPARK_SUBMIT_OPTIONS: >-
34 |      --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1
35 |      --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1
36 |      --conf spark.driver.cores="300m"
37 |      --conf spark.local.dir=/tmp/spark-local
38 |      --conf spark.dynamicAllocation.enabled=true
39 |      --conf spark.shuffle.service.enabled=true
40 |      --conf spark.kubernetes.shuffle.namespace=spark
41 |      --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.2.0"
42 |      --conf spark.dynamicAllocation.initialExecutors=0
43 |      --conf spark.dynamicAllocation.minExecutors=1
44 |      --conf spark.dynamicAllocation.maxExecutors=5
45 | ```
46 | 
47 | ## Configuration
48 | The following table lists the configuration parameters available for this chart
49 | 
50 | | Parameter               | Description                        | Default                                                    |
51 | | ----------------------- | ---------------------------------- | ---------------------------------------------------------- |
52 | | `image.repository`      |  Docker repo for the shuffle service image         |     `SnappyDataInc`                        |
53 | | `image.tag`             |  Tag for the Docker image          |     `spark-shuffle:v2.2.0-kubernetes-0.5.1`        | 
54 | | `image.pullPolicy`      |  Pull policy for the image         |     `IfNotPresent`                                 |
55 | | `serviceAccount`        |  Service account used to deploy shuffle service daemonset |     `default`               |
56 | | `shufflePodLabels` | Labels assigned to pods of the shuffle service. These can be used to target a particular service while running jobs. By default two labels are created `app: spark-shuffle-service` and `spark-version: 2.2.0`| |
57 | | `resources`           | CPU and Memory resources for the RSS pod  | |
58 | | `global.umbrellaChart` | Internal attribute. Do not modify | `false` | 
59 | 
60 | These configuration attributes can be set in the `values.yaml` file or while using the helm install command, for example, 
61 | 
62 | ```
63 | # set an attribute while using helm install command
64 | helm install --name shuffle --namespace spark --set serviceAccount=spark ./spark-shuffle
65 | ```
66 |   


--------------------------------------------------------------------------------
/charts/spark-shuffle/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | Created a Spark shuffle daemonset with following Pod labels:
2 |  {{- range $key, $val := .Values.shufflePodLabels }}
3 |  {{ $key }}: {{ $val }}
4 |  {{- end }}
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/charts/spark-shuffle/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "spark-shuffle.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "spark-shuffle.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "spark-shuffle.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/spark-shuffle/templates/shuffle-daemonset.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: DaemonSet
 3 | metadata:
 4 |   labels:
 5 |     {{- range $key, $val := .Values.shufflePodLabels }}
 6 |     {{ $key }}: {{ $val }}
 7 |     {{- end }}
 8 |   name: {{ template "spark-shuffle.fullname" . }}
 9 | spec:
10 |   template:
11 |     metadata:
12 |       labels:
13 |         {{- range $key, $val := .Values.shufflePodLabels }}
14 |         {{ $key }}: {{ $val }}
15 |         {{- end }}
16 |     spec:
17 |       volumes:
18 |         - name: temp-volume
19 |           hostPath:
20 |             path: '/tmp/spark-local' # change this path according to your cluster configuration.
21 |       containers:
22 |         - name: {{ .Chart.Name }}
23 |           # This is an official image that is built
24 |           # from the dockerfiles/shuffle directory
25 |           # in the spark distribution.
26 |           image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}"
27 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
28 |           volumeMounts:
29 |             - mountPath: '/tmp/spark-local'
30 |               name: temp-volume
31 |               # more volumes can be mounted here.
32 |               # The spark job must be configured to use these
33 |               # mounts using the configuration:
34 |               #   spark.kubernetes.shuffle.dir=<mount-1>,<mount-2>,...
35 |           resources:
36 | {{ toYaml .Values.resources | indent 12 }}
37 |       serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }}
38 | 


--------------------------------------------------------------------------------
/charts/spark-shuffle/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for spark-shuffle.
 2 | # This is a YAML-formatted file.
 3 | # Declare variables to be passed into your templates.
 4 | 
 5 | image:
 6 |   repository: snappydatainc
 7 |   tag: spark-shuffle:v2.2.0-kubernetes-0.5.1
 8 |   pullPolicy: IfNotPresent
 9 | 
10 | shufflePodLabels:
11 |   app: spark-shuffle-service
12 |   spark-version: 2.2.0
13 | 
14 | serviceAccount: default
15 | 
16 | resources:
17 |   limits:
18 |     cpu: 200m
19 |   #  memory: 128Mi
20 |   requests:
21 |     cpu: 200m
22 |   #  memory: 128Mi
23 | 
24 | #internal attribute, do not change
25 | global:
26 |   umbrellaChart: false


--------------------------------------------------------------------------------
/charts/spark-umbrella/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | appVersion: "1.0"
3 | description: A Helm chart for running Spark applications on Kubernetes.
4 | name: spark-umbrella
5 | version: 0.1.0
6 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/charts/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/charts/.gitignore


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/secrets/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/secrets/.gitignore


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/spark/.gitignore


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/fairscheduler.xml.template:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <allocations>
21 |   <pool name="production">
22 |     <schedulingMode>FAIR</schedulingMode>
23 |     <weight>1</weight>
24 |     <minShare>2</minShare>
25 |   </pool>
26 |   <pool name="test">
27 |     <schedulingMode>FIFO</schedulingMode>
28 |     <weight>2</weight>
29 |     <minShare>3</minShare>
30 |   </pool>
31 | </allocations>
32 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/log4j.properties.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=INFO, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=WARN
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=WARN
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35 | log4j.logger.org.apache.parquet=ERROR
36 | log4j.logger.parquet=ERROR
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/metrics.properties.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | #  syntax: [instance].sink|source.[name].[options]=[value]
 19 | 
 20 | #  This file configures Spark's internal metrics system. The metrics system is
 21 | #  divided into instances which correspond to internal components.
 22 | #  Each instance can be configured to report its metrics to one or more sinks.
 23 | #  Accepted values for [instance] are "master", "worker", "executor", "driver",
 24 | #  and "applications". A wildcard "*" can be used as an instance name, in
 25 | #  which case all instances will inherit the supplied property.
 26 | #
 27 | #  Within an instance, a "source" specifies a particular set of grouped metrics.
 28 | #  there are two kinds of sources:
 29 | #    1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
 30 | #    collect a Spark component's internal state. Each instance is paired with a
 31 | #    Spark source that is added automatically.
 32 | #    2. Common sources, like JvmSource, which will collect low level state.
 33 | #    These can be added through configuration options and are then loaded
 34 | #    using reflection.
 35 | #
 36 | #  A "sink" specifies where metrics are delivered to. Each instance can be
 37 | #  assigned one or more sinks.
 38 | #
 39 | #  The sink|source field specifies whether the property relates to a sink or
 40 | #  source.
 41 | #
 42 | #  The [name] field specifies the name of source or sink.
 43 | #
 44 | #  The [options] field is the specific property of this source or sink. The
 45 | #  source or sink is responsible for parsing this property.
 46 | #
 47 | #  Notes:
 48 | #    1. To add a new sink, set the "class" option to a fully qualified class
 49 | #    name (see examples below).
 50 | #    2. Some sinks involve a polling period. The minimum allowed polling period
 51 | #    is 1 second.
 52 | #    3. Wildcard properties can be overridden by more specific properties.
 53 | #    For example, master.sink.console.period takes precedence over
 54 | #    *.sink.console.period.
 55 | #    4. A metrics specific configuration
 56 | #    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
 57 | #    added to Java properties using -Dspark.metrics.conf=xxx if you want to
 58 | #    customize metrics system. You can also put the file in ${SPARK_HOME}/conf
 59 | #    and it will be loaded automatically.
 60 | #    5. The MetricsServlet sink is added by default as a sink in the master,
 61 | #    worker and driver, and you can send HTTP requests to the "/metrics/json"
 62 | #    endpoint to get a snapshot of all the registered metrics in JSON format.
 63 | #    For master, requests to the "/metrics/master/json" and
 64 | #    "/metrics/applications/json" endpoints can be sent separately to get
 65 | #    metrics snapshots of the master instance and applications. This
 66 | #    MetricsServlet does not have to be configured.
 67 | 
 68 | ## List of available common sources and their properties.
 69 | 
 70 | # org.apache.spark.metrics.source.JvmSource
 71 | #   Note: Currently, JvmSource is the only available common source.
 72 | #         It can be added to an instance by setting the "class" option to its
 73 | #         fully qualified class name (see examples below).
 74 | 
 75 | ## List of available sinks and their properties.
 76 | 
 77 | # org.apache.spark.metrics.sink.ConsoleSink
 78 | #   Name:   Default:   Description:
 79 | #   period  10         Poll period
 80 | #   unit    seconds    Unit of the poll period
 81 | 
 82 | # org.apache.spark.metrics.sink.CSVSink
 83 | #   Name:     Default:   Description:
 84 | #   period    10         Poll period
 85 | #   unit      seconds    Unit of the poll period
 86 | #   directory /tmp       Where to store CSV files
 87 | 
 88 | # org.apache.spark.metrics.sink.GangliaSink
 89 | #   Name:     Default:   Description:
 90 | #   host      NONE       Hostname or multicast group of the Ganglia server,
 91 | #                        must be set
 92 | #   port      NONE       Port of the Ganglia server(s), must be set
 93 | #   period    10         Poll period
 94 | #   unit      seconds    Unit of the poll period
 95 | #   ttl       1          TTL of messages sent by Ganglia
 96 | #   dmax      0          Lifetime in seconds of metrics (0 never expired)
 97 | #   mode      multicast  Ganglia network mode ('unicast' or 'multicast')
 98 | 
 99 | # org.apache.spark.metrics.sink.JmxSink
100 | 
101 | # org.apache.spark.metrics.sink.MetricsServlet
102 | #   Name:     Default:   Description:
103 | #   path      VARIES*    Path prefix from the web server root
104 | #   sample    false      Whether to show entire set of samples for histograms
105 | #                        ('false' or 'true')
106 | #
107 | # * Default path is /metrics/json for all instances except the master. The
108 | #   master has two paths:
109 | #     /metrics/applications/json # App information
110 | #     /metrics/master/json       # Master information
111 | 
112 | # org.apache.spark.metrics.sink.GraphiteSink
113 | #   Name:     Default:      Description:
114 | #   host      NONE          Hostname of the Graphite server, must be set
115 | #   port      NONE          Port of the Graphite server, must be set
116 | #   period    10            Poll period
117 | #   unit      seconds       Unit of the poll period
118 | #   prefix    EMPTY STRING  Prefix to prepend to every metric's name
119 | #   protocol  tcp           Protocol ("tcp" or "udp") to use
120 | 
121 | ## Examples
122 | # Enable JmxSink for all instances by class name
123 | #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
124 | 
125 | # Enable ConsoleSink for all instances by class name
126 | #*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
127 | 
128 | # Polling period for the ConsoleSink
129 | #*.sink.console.period=10
130 | # Unit of the polling period for the ConsoleSink
131 | #*.sink.console.unit=seconds
132 | 
133 | # Polling period for the ConsoleSink specific for the master instance
134 | #master.sink.console.period=15
135 | # Unit of the polling period for the ConsoleSink specific for the master
136 | # instance
137 | #master.sink.console.unit=seconds
138 | 
139 | # Enable CsvSink for all instances by class name
140 | #*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
141 | 
142 | # Polling period for the CsvSink
143 | #*.sink.csv.period=1
144 | # Unit of the polling period for the CsvSink
145 | #*.sink.csv.unit=minutes
146 | 
147 | # Polling directory for CsvSink
148 | #*.sink.csv.directory=/tmp/
149 | 
150 | # Polling period for the CsvSink specific for the worker instance
151 | #worker.sink.csv.period=10
152 | # Unit of the polling period for the CsvSink specific for the worker instance
153 | #worker.sink.csv.unit=minutes
154 | 
155 | # Enable Slf4jSink for all instances by class name
156 | #*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
157 | 
158 | # Polling period for the Slf4JSink
159 | #*.sink.slf4j.period=1
160 | # Unit of the polling period for the Slf4jSink
161 | #*.sink.slf4j.unit=minutes
162 | 
163 | # Enable JvmSource for instance master, worker, driver and executor
164 | #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
165 | 
166 | #worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
167 | 
168 | #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
169 | 
170 | #executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
171 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/spark-defaults.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | spark.submit.deployMode                        client
29 | spark.kubernetes.driver.docker.image           snappydatainc/spark-driver-py:v2.2.0-kubernetes-0.5.1
30 | spark.kubernetes.executor.docker.image         snappydatainc/spark-executor-py:v2.2.0-kubernetes-0.5.1
31 | spark.kubernetes.initcontainer.docker.image    snappydatainc/spark-init:v2.2.0-kubernetes-0.5.1
32 | spark.kubernetes.docker.image.pullPolicy       Always
33 | # Replace sparkonk8s-test.json with the actual name of your keyfile
34 | # to enable access to Google Cloud Storage.
35 | spark.hadoop.google.cloud.auth.service.account.json.keyfile   /etc/secrets/sparkonk8s-test.json
36 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/spark-defaults.conf.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/spark/spark-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This file is sourced when running various Spark programs.
21 | # Copy it as spark-env.sh and edit that to configure Spark for your site.
22 | 
23 | # Options read when launching programs locally with
24 | # ./bin/run-example or ./bin/spark-submit
25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28 | 
29 | # Options read by executors and drivers running inside the cluster
30 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
31 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
32 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
33 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
34 | 
35 | # Options read in YARN client mode
36 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
37 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
38 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
39 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
40 | 
41 | # Options for the daemons used in the standalone deploy mode
42 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
43 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
44 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
45 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
46 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
47 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
48 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
49 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
50 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
51 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
52 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
53 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
54 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
55 | 
56 | # Generic options for the daemons used in the standalone deploy mode
57 | # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
58 | # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
59 | # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
60 | # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
61 | # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
62 | # - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
63 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/spark-umbrella/conf/zeppelin/.gitignore


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one
 4 |   or more contributor license agreements.  See the NOTICE file
 5 |   distributed with this work for additional information
 6 |   regarding copyright ownership.  The ASF licenses this file
 7 |   to you under the Apache License, Version 2.0 (the
 8 |   "License"); you may not use this file except in compliance
 9 |   with the License.  You may obtain a copy of the License at
10 | 
11 |       http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |   Unless required by applicable law or agreed to in writing, software
14 |   distributed under the License is distributed on an "AS IS" BASIS,
15 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |   See the License for the specific language governing permissions and
17 |   limitations under the License.
18 | -->
19 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
20 | <xsl:output method="html"/>
21 | <xsl:template match="configuration">
22 | <html>
23 | <body>
24 | <table border="1">
25 | <tr>
26 |  <td>name</td>
27 |  <td>value</td>
28 |  <td>description</td>
29 | </tr>
30 | <xsl:for-each select="property">
31 | <tr>
32 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
33 |   <td><xsl:value-of select="value"/></td>
34 |   <td><xsl:value-of select="description"/></td>
35 | </tr>
36 | </xsl:for-each>
37 | </table>
38 | </body>
39 | </html>
40 | </xsl:template>
41 | </xsl:stylesheet>
42 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/interpreter-list:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | #
18 | # [name]  [maven artifact]  [description]
19 | 
20 | alluxio         org.apache.zeppelin:zeppelin-alluxio:0.7.0              Alluxio interpreter
21 | angular         org.apache.zeppelin:zeppelin-angular:0.7.0              HTML and AngularJS view rendering
22 | beam            org.apache.zeppelin:zeppelin-beam:0.7.0                 Beam interpreter
23 | bigquery        org.apache.zeppelin:zeppelin-bigquery:0.7.0             BigQuery interpreter
24 | cassandra       org.apache.zeppelin:zeppelin-cassandra_2.11:0.7.0       Cassandra interpreter built with Scala 2.11
25 | elasticsearch   org.apache.zeppelin:zeppelin-elasticsearch:0.7.0        Elasticsearch interpreter
26 | file            org.apache.zeppelin:zeppelin-file:0.7.0                 HDFS file interpreter
27 | flink           org.apache.zeppelin:zeppelin-flink_2.11:0.7.0           Flink interpreter built with Scala 2.11
28 | hbase           org.apache.zeppelin:zeppelin-hbase:0.7.0                Hbase interpreter
29 | ignite          org.apache.zeppelin:zeppelin-ignite_2.11:0.7.0          Ignite interpreter built with Scala 2.11
30 | jdbc            org.apache.zeppelin:zeppelin-jdbc:0.7.0                 Jdbc interpreter
31 | kylin           org.apache.zeppelin:zeppelin-kylin:0.7.0                Kylin interpreter
32 | lens            org.apache.zeppelin:zeppelin-lens:0.7.0                 Lens interpreter
33 | livy            org.apache.zeppelin:zeppelin-livy:0.7.0                 Livy interpreter
34 | md              org.apache.zeppelin:zeppelin-markdown:0.7.0             Markdown support
35 | pig             org.apache.zeppelin:zeppelin-pig:0.7.0                  Pig interpreter
36 | python          org.apache.zeppelin:zeppelin-python:0.7.0               Python interpreter
37 | scio            org.apache.zeppelin:zeppelin-scio_2.11:0.7.0            Scio interpreter
38 | shell           org.apache.zeppelin:zeppelin-shell:0.7.0                Shell command
39 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | log4j.rootLogger = INFO, dailyfile
19 | 
20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
23 | 
24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd
25 | log4j.appender.dailyfile.Threshold = INFO
26 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender
27 | log4j.appender.dailyfile.File = ${zeppelin.log.file}
28 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout
29 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
30 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/shiro.ini.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | [users]
 19 | # List of users with their password allowed to access Zeppelin.
 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
 21 | # To enable admin user, uncomment the following line and set an appropriate password.
 22 | #admin = password1, admin
 23 | user1 = password2, role1, role2
 24 | user2 = password3, role3
 25 | user3 = password4, role2
 26 | 
 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm
 28 | [main]
 29 | ### A sample for configuring Active Directory Realm
 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm
 31 | #activeDirectoryRealm.systemUsername = userNameA
 32 | 
 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html
 34 | #activeDirectoryRealm.systemPassword = passwordA
 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks
 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM
 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389
 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr"
 39 | #activeDirectoryRealm.authorizationCachingEnabled = false
 40 | 
 41 | ### A sample for configuring LDAP Directory Realm
 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm
 43 | ## search base for ldap groups (only relevant for LdapGroupRealm):
 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM
 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389
 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
 47 | #ldapRealm.contextFactory.authenticationMechanism = simple
 48 | 
 49 | ### A sample PAM configuration
 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm
 51 | #pamRealm.service=sshd
 52 | 
 53 | ### A sample for configuring ZeppelinHub Realm
 54 | #zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
 55 | ## Url of ZeppelinHub
 56 | #zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
 57 | #securityManager.realms = $zeppelinHubRealm
 58 | 
 59 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
 60 | 
 61 | ### If caching of user is required then uncomment below lines
 62 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager
 63 | #securityManager.cacheManager = $cacheManager
 64 | 
 65 | ### Enables 'HttpOnly' flag in Zeppelin cookies
 66 | cookie = org.apache.shiro.web.servlet.SimpleCookie
 67 | cookie.name = JSESSIONID
 68 | cookie.httpOnly = true
 69 | ### Uncomment the below line only when Zeppelin is running over HTTPS
 70 | #cookie.secure = true
 71 | sessionManager.sessionIdCookie = $cookie
 72 | 
 73 | securityManager.sessionManager = $sessionManager
 74 | # 86,400,000 milliseconds = 24 hour
 75 | securityManager.sessionManager.globalSessionTimeout = 86400000
 76 | shiro.loginUrl = /api/login
 77 | 
 78 | [roles]
 79 | role1 = *
 80 | role2 = *
 81 | role3 = *
 82 | admin = *
 83 | 
 84 | [urls]
 85 | # This section is used for url-based security. For details see the shiro.ini documentation.
 86 | #
 87 | # You can secure interpreter, configuration and credential information by urls.
 88 | # Comment or uncomment the below urls that you want to hide:
 89 | # anon means the access is anonymous.
 90 | # authc means form based auth Security.
 91 | #
 92 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request
 93 | # in the order they are defined and the FIRST MATCH WINS.
 94 | #
 95 | # To allow anonymous access to all but the stated urls,
 96 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc)
 97 | #
 98 | /api/version = anon
 99 | /api/interpreter/** = authc, roles[admin]
100 | /api/configurations/** = authc, roles[admin]
101 | /api/credential/** = authc, roles[admin]
102 | #/** = anon
103 | /** = authc
104 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/conf/zeppelin/zeppelin-env.sh.template:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #    http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | 
 19 | # export JAVA_HOME=
 20 | # export MASTER=                 		# Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
 21 | # export ZEPPELIN_JAVA_OPTS      		# Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
 22 | # export ZEPPELIN_MEM            		# Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
 23 | # export ZEPPELIN_INTP_MEM       		# zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
 24 | # export ZEPPELIN_INTP_JAVA_OPTS 		# zeppelin interpreter process jvm options.
 25 | # export ZEPPELIN_SSL_PORT       		# ssl port (used when ssl environment variable is set to true)
 26 | 
 27 | # export ZEPPELIN_LOG_DIR        		# Where log files are stored.  PWD by default.
 28 | # export ZEPPELIN_PID_DIR        		# The pid files are stored. ${ZEPPELIN_HOME}/run by default.
 29 | # export ZEPPELIN_WAR_TEMPDIR    		# The location of jetty temporary directory.
 30 | # export ZEPPELIN_NOTEBOOK_DIR   		# Where notebook saved
 31 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN		# Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
 32 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE	# hide homescreen notebook from list when this value set to "true". default "false"
 33 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET        # Bucket where notebook saved
 34 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT      # Endpoint of the bucket
 35 | # export ZEPPELIN_NOTEBOOK_S3_USER          # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
 36 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID    # AWS KMS key ID
 37 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION      # AWS KMS key region
 38 | # export ZEPPELIN_NOTEBOOK_S3_SSE      # Server-side encryption enabled for notebooks
 39 | # export ZEPPELIN_NOTEBOOK_MONGO_URI				# MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost"
 40 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE		# Database name to store notebook. Default "zeppelin"
 41 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes"
 42 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT	# If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false"
 43 | # export ZEPPELIN_IDENT_STRING   		# A string representing this instance of zeppelin. $USER by default.
 44 | # export ZEPPELIN_NICENESS       		# The scheduling priority for daemons. Defaults to 0.
 45 | # export ZEPPELIN_INTERPRETER_LOCALREPO         # Local repository for interpreter's additional dependency loading
 46 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO       # Remote principal repository for interpreter's additional dependency loading
 47 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL     # Remote Node installer url for Helium dependency loader
 48 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL      # Remote Npm installer url for Helium dependency loader
 49 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL  # Remote Yarn package installer url for Helium dependency loader
 50 | # export ZEPPELIN_NOTEBOOK_STORAGE 		# Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
 51 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC	# If there are multiple notebook storages, should we treat the first one as the only source of truth?
 52 | # export ZEPPELIN_NOTEBOOK_PUBLIC   # Make notebook public by default when created, private otherwise
 53 | 
 54 | #### Spark interpreter configuration ####
 55 | 
 56 | ## Kerberos ticket refresh setting
 57 | ##
 58 | #export KINIT_FAIL_THRESHOLD                    # (optional) How many times should kinit retry. The default value is 5.
 59 | #export KERBEROS_REFRESH_INTERVAL               # (optional) The refresh interval for Kerberos ticket. The default value is 1d.
 60 | 
 61 | ## Use provided spark installation ##
 62 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
 63 | ##
 64 | # export SPARK_HOME                             # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
 65 | # export SPARK_SUBMIT_OPTIONS                   # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
 66 | # export SPARK_APP_NAME                         # (optional) The name of spark application.
 67 | 
 68 | ## Use embedded spark binaries ##
 69 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
 70 | ## however, it is not encouraged when you can define SPARK_HOME
 71 | ##
 72 | # Options read in YARN client mode
 73 | # export HADOOP_CONF_DIR         		# yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
 74 | # Pyspark (supported with Spark 1.2.1 and above)
 75 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
 76 | # export PYSPARK_PYTHON          		# path to the python command. must be the same path on the driver(Zeppelin) and all workers.
 77 | # export PYTHONPATH
 78 | 
 79 | ## Spark interpreter options ##
 80 | ##
 81 | # export ZEPPELIN_SPARK_USEHIVECONTEXT  # Use HiveContext instead of SQLContext if set true. true by default.
 82 | # export ZEPPELIN_SPARK_CONCURRENTSQL   # Execute multiple SQL concurrently if set true. false by default.
 83 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT  # Import implicits, UDF collection, and sql if set true. true by default.
 84 | # export ZEPPELIN_SPARK_MAXRESULT       # Max number of Spark SQL result to display. 1000 by default.
 85 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE       # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
 86 | 
 87 | 
 88 | #### HBase interpreter configuration ####
 89 | 
 90 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
 91 | 
 92 | # export HBASE_HOME=                    # (require) Under which HBase scripts and configuration should be
 93 | # export HBASE_CONF_DIR=                # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
 94 | 
 95 | #### ZeppelinHub connection configuration ####
 96 | # export ZEPPELINHUB_API_ADDRESS		# Refers to the address of the ZeppelinHub service in use
 97 | # export ZEPPELINHUB_API_TOKEN			# Refers to the Zeppelin instance token of the user
 98 | # export ZEPPELINHUB_USER_KEY			# Optional, when using Zeppelin with authentication.
 99 | 
100 | #### Zeppelin impersonation configuration
101 | # export ZEPPELIN_IMPERSONATE_CMD       # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
102 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER  #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled
103 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/requirements.lock:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 | - name: spark-hs
 3 |   repository: file://../spark-hs
 4 |   version: 0.1.0
 5 | - name: zeppelin-with-spark
 6 |   repository: file://../zeppelin-with-spark
 7 |   version: 0.1.0
 8 | - name: jupyter-with-spark
 9 |   repository: file://../jupyter-with-spark
10 |   version: 0.1.0
11 | - name: spark-rss
12 |   repository: file://../spark-rss
13 |   version: 0.1.0
14 | - name: spark-shuffle
15 |   repository: file://../spark-shuffle
16 |   version: 0.1.0
17 | digest: sha256:821eb2bc83ac2430ca9c0c259bcea7c5f7417ad58d31be672240cb04e652df62
18 | generated: 2018-05-11T11:47:45.512908935+05:30
19 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/requirements.yaml:
--------------------------------------------------------------------------------
 1 | # parent's requirements.yaml file
 2 | dependencies:
 3 |   - name: spark-hs
 4 |     repository: file://../spark-hs
 5 |     version: 0.1.0
 6 |     alias: historyserver
 7 |     condition: historyserver.enabled
 8 |   - name:  zeppelin-with-spark
 9 |     repository: file://../zeppelin-with-spark
10 |     version: 0.1.0
11 |     alias: zeppelin
12 |     condition: zeppelin.enabled
13 |   - name:  jupyter-with-spark
14 |     repository: file://../jupyter-with-spark
15 |     version: 0.1.0
16 |     alias: jupyter
17 |     condition: jupyter.enabled
18 |   - name:  spark-rss
19 |     repository: file://../spark-rss
20 |     version: 0.1.0
21 |     alias: rss
22 |   - name:  spark-shuffle
23 |     repository: file://../spark-shuffle
24 |     version: 0.1.0
25 |     alias: shuffle
26 | 
27 | 


--------------------------------------------------------------------------------
/charts/spark-umbrella/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.global.mountSecrets }}
 2 | apiVersion: v1
 3 | kind: Secret
 4 | metadata:
 5 |   name: history-secrets
 6 | type: Opaque
 7 | data:
 8 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
 9 | {{- end }}
10 | ---
11 | {{- if .Values.global.mountZeppelinConf }}
12 | apiVersion: v1
13 | kind: ConfigMap
14 | metadata:
15 |   name: {{ .Release.Name }}-zeppelin-configmap
16 | data:
17 | {{ (.Files.Glob "conf/zeppelin/*").AsConfig | indent 2 }}
18 | {{- end }}
19 | ---
20 | {{- if .Values.global.mountJupyterConf }}
21 | apiVersion: v1
22 | kind: ConfigMap
23 | metadata:
24 |   name: {{ .Release.Name }}-jupyter-configmap
25 | data:
26 | {{ (.Files.Glob "conf/jupyter/*").AsConfig | indent 2 }}
27 | {{- end }}
28 | ---
29 | {{- if .Values.global.mountSparkConf }}
30 | apiVersion: v1
31 | kind: ConfigMap
32 | metadata:
33 |   name: {{ .Release.Name }}-zpspark-configmap
34 | data:
35 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }}
36 | ---
37 | apiVersion: v1
38 | kind: ConfigMap
39 | metadata:
40 |   name: {{ .Release.Name }}-jp-spark-configmap
41 | data:
42 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }}
43 | {{- end }}
44 | ---
45 | {{- if .Values.global.mountSecrets }}
46 | apiVersion: v1
47 | kind: Secret
48 | metadata:
49 |   name: {{ .Release.Name }}-zp-secrets
50 | type: Opaque
51 | data:
52 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
53 | {{- end }}
54 | ---
55 | {{- if .Values.global.mountSecrets }}
56 | apiVersion: v1
57 | kind: Secret
58 | metadata:
59 |   name: {{ .Release.Name }}-jp-secrets
60 | type: Opaque
61 | data:
62 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
63 | {{- end }}


--------------------------------------------------------------------------------
/charts/spark-umbrella/values.yaml:
--------------------------------------------------------------------------------
  1 | ##################
  2 | # Configure Zeppelin, Jupyter, History server, Spark resource staging and Spark shuffle service here. 
  3 | # This umbrella chart configuration overrides the values.yaml in the sub-charts
  4 | ##################
  5 | 
  6 | ##################
  7 | # GLOBAL ATTRIBUTES FOR UMBRELLA CHART
  8 | ##################
  9 | global:
 10 |   mountSecrets: true
 11 |   mountSparkConf: true
 12 |   mountZeppelinConf: true
 13 |   mountJupyterConf: true
 14 |   serviceAccount: default
 15 |   # internal attribute. Do not change
 16 |   umbrellaChart: true
 17 | 
 18 | ##################
 19 | # CONFIGURATION FOR ZEPPELIN ENVIRONMENT
 20 | ##################
 21 | zeppelin:
 22 |   # whether to enable Zeppelin
 23 |   enabled: true
 24 |   # Any environment variables that need to be made available to the container are defined here
 25 |   # This may include environment variables used by Spark, Zeppelin
 26 | 
 27 |   # sparkonk8s-test.json is name of the keyfile to access GCS bucket got history log.
 28 |   # Change it to reflect name of your key file. Key file will always be in path /etc/secrets
 29 |   environment:
 30 |     SPARK_SUBMIT_OPTIONS: >-
 31 |        --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1
 32 |        --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1
 33 |        --conf spark.executor.instances=2
 34 | #       --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json
 35 |   sparkEventLog:
 36 |     enableHistoryEvents: false
 37 |     # eventsLogDir should point to a URI of GCS bucket where history events will be dumped
 38 |     eventLogDir: "gs://spark-history-server-store/"
 39 |   noteBookStorage:
 40 |     usePVForNoteBooks: true
 41 |     # If using PV for notebook storage, 'notebookDir' will be an
 42 |     # absolute path in the mounted persistent volume
 43 |     notebookDir: "/notebooks"
 44 | 
 45 |   ## Enable persistence using Persistent Volume Claims
 46 |   ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
 47 |   ##
 48 |   persistence:
 49 |     enabled: true
 50 |     ## If 'existingClaim' is defined, PVC must be created manually before
 51 |     ## volume will be bound
 52 |     # existingClaim:
 53 | 
 54 |     ## If defined, storageClassName: <storageClass>
 55 |     ## If set to "-", storageClassName: "", which disables dynamic provisioning
 56 |     ## If undefined (the default) or set to null, no storageClassName spec is
 57 |     ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
 58 |     ##   GKE, Azure & OpenStack)
 59 |     ##
 60 |     # storageClass: "-"
 61 |     accessMode: ReadWriteOnce
 62 |     size: 8Gi
 63 |     # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned
 64 |     keepResource: true
 65 | 
 66 |   resources: {}
 67 |     # limits:
 68 |     #  cpu: 100m
 69 |     #  memory: 128Mi
 70 |     # requests:
 71 |     #  cpu: 100m
 72 |     #  memory: 128Mi
 73 | 
 74 | ##################
 75 | # CONFIGURATION FOR JUPYTER NOTEBOOK ENVIRONMENT
 76 | ##################
 77 | jupyter:
 78 |   # Set false to exclude launching Jupyter notebook server with this Helm chart.
 79 |   enabled: true
 80 | 
 81 |   # If using PV for notebook storage, provide absolute path starting with /data/ in jupyter conf file
 82 |   # using 'c.NotebookApp.notebook_dir'. See jupyter_notebook_config.py.template
 83 | 
 84 |   sparkEventLog:
 85 |     enableHistoryEvents: false
 86 |     # eventsLogDir should point to a URI of GCS bucket where history events will be dumped
 87 |     eventLogDir: "gs://spark-history-server-store/"
 88 |     # Also, edit conf/spark/spark-defaults.conf to specify the keyfile for Google Cloud service account.
 89 | 
 90 |   persistence:
 91 |     enabled: true
 92 |     ## If 'existingClaim' is defined, PVC must be created manually before
 93 |     ## volume will be bound
 94 |     # existingClaim:
 95 | 
 96 |     ## If defined, storageClassName: <storageClass>
 97 |     ## If set to "-", storageClassName: "", which disables dynamic provisioning
 98 |     ## If undefined (the default) or set to null, no storageClassName spec is
 99 |     ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
100 |     ##   GKE, Azure & OpenStack)
101 |     ##
102 |     # storageClass: "-"
103 |     accessMode: ReadWriteOnce
104 |     size: 6Gi
105 |     # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned
106 |     keepResource: true
107 | 
108 |   resources: {}
109 |     # limits:
110 |     #  cpu: 100m
111 |     #  memory: 128Mi
112 |     # requests:
113 |     #  cpu: 100m
114 |     #  memory: 128Mi
115 | 
116 | ##################
117 | # CONFIGURATION FOR SPARK HISTORY SERVER
118 | ##################
119 | historyserver:
120 |   # whether to enable history server
121 |   enabled: false
122 |   historyServerConf:
123 |     # URI of the GCS bucket
124 |     eventsDir: "gs://spark-history-server-store/"
125 | 
126 |     # sparkonk8s-test.json is name of the keyfile to access GCS bucket got history log.
127 |     # Change it to reflect name of your key file. Key file will always be in path /etc/secret
128 |   environment:
129 |     SPARK_HISTORY_OPTS: -Dspark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json
130 | 
131 |   resources: {}
132 |     # limits:
133 |     #  cpu: 100m
134 |     #  memory: 128Mi
135 |     # requests:
136 |     #  cpu: 100m
137 |     #  memory: 128Mi
138 | 
139 | ##################
140 | # configuration for Spark Resource Staging Server
141 | ##################
142 | rss:
143 |   # properties that can be made available via configmap
144 |   properties:
145 |       spark.ssl.kubernetes.resourceStagingServer.enabled: false
146 | resources: {}
147 | #  limits:
148 | #    cpu: 100m
149 | #    memory: 1Gi
150 | #  requests:
151 | #    cpu: 100m
152 | #    memory: 256Mi
153 | 
154 | ##################
155 | # configuration for Spark Shuffle Service
156 | ##################
157 | shuffle:
158 |   shufflePodLabels:
159 |     app: spark-shuffle-service
160 |     spark-version: 2.2.0
161 |   resources: {}
162 | #    limits:
163 | #      cpu: 200m
164 | #    #  memory: 128Mi
165 | #    requests:
166 | #      cpu: 200m
167 | #    #  memory: 128Mi
168 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | appVersion: "1.0"
 3 | description: Spark cluster managed by Kubernetes with Zeppelin Notebook environment 
 4 | name: zeppelin-with-spark
 5 | version: 0.1.0
 6 | home: https://github.com/apache-spark-on-k8s/spark
 7 | icon: http://spark.apache.org/images/spark-logo-trademark.png
 8 | maintainers:
 9 |   - name: SnappyData
10 |     email: chomp@snappydata.io
11 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/secrets/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/charts/zeppelin-with-spark/conf/secrets/.gitignore


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/spark/fairscheduler.xml.template:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <allocations>
21 |   <pool name="production">
22 |     <schedulingMode>FAIR</schedulingMode>
23 |     <weight>1</weight>
24 |     <minShare>2</minShare>
25 |   </pool>
26 |   <pool name="test">
27 |     <schedulingMode>FIFO</schedulingMode>
28 |     <weight>2</weight>
29 |     <minShare>3</minShare>
30 |   </pool>
31 | </allocations>
32 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/spark/log4j.properties.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=INFO, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24 | 
25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the
26 | # log level for this class is used to overwrite the root logger's log level, so that
27 | # the user can have different defaults for the shell and regular Spark apps.
28 | log4j.logger.org.apache.spark.repl.Main=WARN
29 | 
30 | # Settings to quiet third party logs that are too verbose
31 | log4j.logger.org.spark_project.jetty=WARN
32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35 | log4j.logger.org.apache.parquet=ERROR
36 | log4j.logger.parquet=ERROR
37 | 
38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
41 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/spark/spark-defaults.conf.template:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | # Example:
22 | # spark.master                     spark://master:7077
23 | # spark.eventLog.enabled           true
24 | # spark.eventLog.dir               hdfs://namenode:8021/directory
25 | # spark.serializer                 org.apache.spark.serializer.KryoSerializer
26 | # spark.driver.memory              5g
27 | # spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
28 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/spark/spark-env.sh.template:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This file is sourced when running various Spark programs.
21 | # Copy it as spark-env.sh and edit that to configure Spark for your site.
22 | 
23 | # Options read when launching programs locally with
24 | # ./bin/run-example or ./bin/spark-submit
25 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27 | # - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28 | 
29 | # Options read by executors and drivers running inside the cluster
30 | # - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
31 | # - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
32 | # - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
33 | # - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
34 | 
35 | # Options read in YARN client mode
36 | # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
37 | # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
38 | # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
39 | # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
40 | 
41 | # Options for the daemons used in the standalone deploy mode
42 | # - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
43 | # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
44 | # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
45 | # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
46 | # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
47 | # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
48 | # - SPARK_WORKER_DIR, to set the working directory of worker processes
49 | # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
50 | # - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
51 | # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
52 | # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
53 | # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
54 | # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
55 | 
56 | # Generic options for the daemons used in the standalone deploy mode
57 | # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
58 | # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
59 | # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
60 | # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
61 | # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
62 | # - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
63 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/zeppelin/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one
 4 |   or more contributor license agreements.  See the NOTICE file
 5 |   distributed with this work for additional information
 6 |   regarding copyright ownership.  The ASF licenses this file
 7 |   to you under the Apache License, Version 2.0 (the
 8 |   "License"); you may not use this file except in compliance
 9 |   with the License.  You may obtain a copy of the License at
10 | 
11 |       http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |   Unless required by applicable law or agreed to in writing, software
14 |   distributed under the License is distributed on an "AS IS" BASIS,
15 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |   See the License for the specific language governing permissions and
17 |   limitations under the License.
18 | -->
19 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
20 | <xsl:output method="html"/>
21 | <xsl:template match="configuration">
22 | <html>
23 | <body>
24 | <table border="1">
25 | <tr>
26 |  <td>name</td>
27 |  <td>value</td>
28 |  <td>description</td>
29 | </tr>
30 | <xsl:for-each select="property">
31 | <tr>
32 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
33 |   <td><xsl:value-of select="value"/></td>
34 |   <td><xsl:value-of select="description"/></td>
35 | </tr>
36 | </xsl:for-each>
37 | </table>
38 | </body>
39 | </html>
40 | </xsl:template>
41 | </xsl:stylesheet>
42 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/zeppelin/interpreter-list:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | #
18 | # [name]  [maven artifact]  [description]
19 | 
20 | alluxio         org.apache.zeppelin:zeppelin-alluxio:0.7.0              Alluxio interpreter
21 | angular         org.apache.zeppelin:zeppelin-angular:0.7.0              HTML and AngularJS view rendering
22 | beam            org.apache.zeppelin:zeppelin-beam:0.7.0                 Beam interpreter
23 | bigquery        org.apache.zeppelin:zeppelin-bigquery:0.7.0             BigQuery interpreter
24 | cassandra       org.apache.zeppelin:zeppelin-cassandra_2.11:0.7.0       Cassandra interpreter built with Scala 2.11
25 | elasticsearch   org.apache.zeppelin:zeppelin-elasticsearch:0.7.0        Elasticsearch interpreter
26 | file            org.apache.zeppelin:zeppelin-file:0.7.0                 HDFS file interpreter
27 | flink           org.apache.zeppelin:zeppelin-flink_2.11:0.7.0           Flink interpreter built with Scala 2.11
28 | hbase           org.apache.zeppelin:zeppelin-hbase:0.7.0                Hbase interpreter
29 | ignite          org.apache.zeppelin:zeppelin-ignite_2.11:0.7.0          Ignite interpreter built with Scala 2.11
30 | jdbc            org.apache.zeppelin:zeppelin-jdbc:0.7.0                 Jdbc interpreter
31 | kylin           org.apache.zeppelin:zeppelin-kylin:0.7.0                Kylin interpreter
32 | lens            org.apache.zeppelin:zeppelin-lens:0.7.0                 Lens interpreter
33 | livy            org.apache.zeppelin:zeppelin-livy:0.7.0                 Livy interpreter
34 | md              org.apache.zeppelin:zeppelin-markdown:0.7.0             Markdown support
35 | pig             org.apache.zeppelin:zeppelin-pig:0.7.0                  Pig interpreter
36 | python          org.apache.zeppelin:zeppelin-python:0.7.0               Python interpreter
37 | scio            org.apache.zeppelin:zeppelin-scio_2.11:0.7.0            Scio interpreter
38 | shell           org.apache.zeppelin:zeppelin-shell:0.7.0                Shell command
39 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/zeppelin/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | log4j.rootLogger = INFO, dailyfile
19 | 
20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
23 | 
24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd
25 | log4j.appender.dailyfile.Threshold = INFO
26 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender
27 | log4j.appender.dailyfile.File = ${zeppelin.log.file}
28 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout
29 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
30 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/zeppelin/shiro.ini.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | [users]
 19 | # List of users with their password allowed to access Zeppelin.
 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
 21 | # To enable admin user, uncomment the following line and set an appropriate password.
 22 | #admin = password1, admin
 23 | user1 = password2, role1, role2
 24 | user2 = password3, role3
 25 | user3 = password4, role2
 26 | 
 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm
 28 | [main]
 29 | ### A sample for configuring Active Directory Realm
 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm
 31 | #activeDirectoryRealm.systemUsername = userNameA
 32 | 
 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html
 34 | #activeDirectoryRealm.systemPassword = passwordA
 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks
 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM
 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389
 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr"
 39 | #activeDirectoryRealm.authorizationCachingEnabled = false
 40 | 
 41 | ### A sample for configuring LDAP Directory Realm
 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm
 43 | ## search base for ldap groups (only relevant for LdapGroupRealm):
 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM
 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389
 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
 47 | #ldapRealm.contextFactory.authenticationMechanism = simple
 48 | 
 49 | ### A sample PAM configuration
 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm
 51 | #pamRealm.service=sshd
 52 | 
 53 | ### A sample for configuring ZeppelinHub Realm
 54 | #zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
 55 | ## Url of ZeppelinHub
 56 | #zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
 57 | #securityManager.realms = $zeppelinHubRealm
 58 | 
 59 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
 60 | 
 61 | ### If caching of user is required then uncomment below lines
 62 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager
 63 | #securityManager.cacheManager = $cacheManager
 64 | 
 65 | ### Enables 'HttpOnly' flag in Zeppelin cookies
 66 | cookie = org.apache.shiro.web.servlet.SimpleCookie
 67 | cookie.name = JSESSIONID
 68 | cookie.httpOnly = true
 69 | ### Uncomment the below line only when Zeppelin is running over HTTPS
 70 | #cookie.secure = true
 71 | sessionManager.sessionIdCookie = $cookie
 72 | 
 73 | securityManager.sessionManager = $sessionManager
 74 | # 86,400,000 milliseconds = 24 hour
 75 | securityManager.sessionManager.globalSessionTimeout = 86400000
 76 | shiro.loginUrl = /api/login
 77 | 
 78 | [roles]
 79 | role1 = *
 80 | role2 = *
 81 | role3 = *
 82 | admin = *
 83 | 
 84 | [urls]
 85 | # This section is used for url-based security. For details see the shiro.ini documentation.
 86 | #
 87 | # You can secure interpreter, configuration and credential information by urls.
 88 | # Comment or uncomment the below urls that you want to hide:
 89 | # anon means the access is anonymous.
 90 | # authc means form based auth Security.
 91 | #
 92 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request
 93 | # in the order they are defined and the FIRST MATCH WINS.
 94 | #
 95 | # To allow anonymous access to all but the stated urls,
 96 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc)
 97 | #
 98 | /api/version = anon
 99 | /api/interpreter/** = authc, roles[admin]
100 | /api/configurations/** = authc, roles[admin]
101 | /api/credential/** = authc, roles[admin]
102 | #/** = anon
103 | /** = authc
104 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/conf/zeppelin/zeppelin-env.sh.template:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #    http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | 
 19 | # export JAVA_HOME=
 20 | # export MASTER=                 		# Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
 21 | # export ZEPPELIN_JAVA_OPTS      		# Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
 22 | # export ZEPPELIN_MEM            		# Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
 23 | # export ZEPPELIN_INTP_MEM       		# zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
 24 | # export ZEPPELIN_INTP_JAVA_OPTS 		# zeppelin interpreter process jvm options.
 25 | # export ZEPPELIN_SSL_PORT       		# ssl port (used when ssl environment variable is set to true)
 26 | 
 27 | # export ZEPPELIN_LOG_DIR        		# Where log files are stored.  PWD by default.
 28 | # export ZEPPELIN_PID_DIR        		# The pid files are stored. ${ZEPPELIN_HOME}/run by default.
 29 | # export ZEPPELIN_WAR_TEMPDIR    		# The location of jetty temporary directory.
 30 | # export ZEPPELIN_NOTEBOOK_DIR   		# Where notebook saved
 31 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN		# Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
 32 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE	# hide homescreen notebook from list when this value set to "true". default "false"
 33 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET        # Bucket where notebook saved
 34 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT      # Endpoint of the bucket
 35 | # export ZEPPELIN_NOTEBOOK_S3_USER          # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
 36 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID    # AWS KMS key ID
 37 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION      # AWS KMS key region
 38 | # export ZEPPELIN_NOTEBOOK_S3_SSE      # Server-side encryption enabled for notebooks
 39 | # export ZEPPELIN_NOTEBOOK_MONGO_URI				# MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost"
 40 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE		# Database name to store notebook. Default "zeppelin"
 41 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes"
 42 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT	# If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false"
 43 | # export ZEPPELIN_IDENT_STRING   		# A string representing this instance of zeppelin. $USER by default.
 44 | # export ZEPPELIN_NICENESS       		# The scheduling priority for daemons. Defaults to 0.
 45 | # export ZEPPELIN_INTERPRETER_LOCALREPO         # Local repository for interpreter's additional dependency loading
 46 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO       # Remote principal repository for interpreter's additional dependency loading
 47 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL     # Remote Node installer url for Helium dependency loader
 48 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL      # Remote Npm installer url for Helium dependency loader
 49 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL  # Remote Yarn package installer url for Helium dependency loader
 50 | # export ZEPPELIN_NOTEBOOK_STORAGE 		# Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
 51 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC	# If there are multiple notebook storages, should we treat the first one as the only source of truth?
 52 | # export ZEPPELIN_NOTEBOOK_PUBLIC   # Make notebook public by default when created, private otherwise
 53 | 
 54 | #### Spark interpreter configuration ####
 55 | 
 56 | ## Kerberos ticket refresh setting
 57 | ##
 58 | #export KINIT_FAIL_THRESHOLD                    # (optional) How many times should kinit retry. The default value is 5.
 59 | #export KERBEROS_REFRESH_INTERVAL               # (optional) The refresh interval for Kerberos ticket. The default value is 1d.
 60 | 
 61 | ## Use provided spark installation ##
 62 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
 63 | ##
 64 | # export SPARK_HOME                             # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
 65 | # export SPARK_SUBMIT_OPTIONS                   # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
 66 | # export SPARK_APP_NAME                         # (optional) The name of spark application.
 67 | 
 68 | ## Use embedded spark binaries ##
 69 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
 70 | ## however, it is not encouraged when you can define SPARK_HOME
 71 | ##
 72 | # Options read in YARN client mode
 73 | # export HADOOP_CONF_DIR         		# yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
 74 | # Pyspark (supported with Spark 1.2.1 and above)
 75 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
 76 | # export PYSPARK_PYTHON          		# path to the python command. must be the same path on the driver(Zeppelin) and all workers.
 77 | # export PYTHONPATH
 78 | 
 79 | ## Spark interpreter options ##
 80 | ##
 81 | # export ZEPPELIN_SPARK_USEHIVECONTEXT  # Use HiveContext instead of SQLContext if set true. true by default.
 82 | # export ZEPPELIN_SPARK_CONCURRENTSQL   # Execute multiple SQL concurrently if set true. false by default.
 83 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT  # Import implicits, UDF collection, and sql if set true. true by default.
 84 | # export ZEPPELIN_SPARK_MAXRESULT       # Max number of Spark SQL result to display. 1000 by default.
 85 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE       # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
 86 | 
 87 | 
 88 | #### HBase interpreter configuration ####
 89 | 
 90 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
 91 | 
 92 | # export HBASE_HOME=                    # (require) Under which HBase scripts and configuration should be
 93 | # export HBASE_CONF_DIR=                # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
 94 | 
 95 | #### ZeppelinHub connection configuration ####
 96 | # export ZEPPELINHUB_API_ADDRESS		# Refers to the address of the ZeppelinHub service in use
 97 | # export ZEPPELINHUB_API_TOKEN			# Refers to the Zeppelin instance token of the user
 98 | # export ZEPPELINHUB_USER_KEY			# Optional, when using Zeppelin with authentication.
 99 | 
100 | #### Zeppelin impersonation configuration
101 | # export ZEPPELIN_IMPERSONATE_CMD       # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
102 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER  #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled
103 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | Get the application URL by running these commands:
 2 | {{ if contains "NodePort" .Values.zeppelinService.type }}
 3 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "zeppelin-with-spark.fullname" . }})
 4 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
 5 |   echo http://$NODE_IP:$NODE_PORT
 6 | {{- else if contains "LoadBalancer" .Values.zeppelinService.type }}
 7 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
 8 |            You can watch the status of by running 'kubectl get svc -w {{ template "zeppelin-with-spark.fullname" . }}'
 9 |   export ZEPPELIN_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "zeppelin-with-spark.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
10 |   echo "Access Zeppelin at http://$ZEPPELIN_SERVICE_IP:{{ .Values.zeppelinService.zeppelinPort }}"
11 |   echo "Access Spark at http://$ZEPPELIN_SERVICE_IP:{{ .Values.zeppelinService.sparkUIPort }} after a Spark job is run."
12 | {{- else if contains "ClusterIP" .Values.zeppelinService.type }}
13 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "zeppelin-with-spark.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
14 |   echo "Visit http://127.0.0.1:8080 to access Zeppelin server"
15 |   kubectl port-forward $POD_NAME 8080:80
16 | {{- end }}
17 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "zeppelin-with-spark.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "zeppelin-with-spark.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "zeppelin-with-spark.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: {{ .Release.Name }}-zpenv-configmap
 5 | data:
 6 |   {{- range $key, $val := .Values.environment }}
 7 |   {{ $key }}: {{ $val | quote }}
 8 |   {{- end}}
 9 |   {{- range $key, $val := .Values.noteBookStorage }}
10 |   {{ $key }}: {{ $val | quote }}
11 |   {{- end}}
12 |   {{- range $key, $val := .Values.sparkEventLog }}
13 |   {{ $key }}: {{ $val | quote }}
14 |   {{- end}}
15 | ---
16 | {{- if (not .Values.global.umbrellaChart) }}
17 | apiVersion: v1
18 | kind: ConfigMap
19 | metadata:
20 |   name: {{ .Release.Name }}-zeppelin-configmap
21 | data:
22 | {{ (.Files.Glob "conf/zeppelin/*").AsConfig | indent 2 }}
23 | {{- end }}
24 | ---
25 | {{- if (not .Values.global.umbrellaChart) }}
26 | apiVersion: v1
27 | kind: ConfigMap
28 | metadata:
29 |   name: {{ .Release.Name }}-zpspark-configmap
30 | data:
31 | {{ (.Files.Glob "conf/spark/*").AsConfig | indent 2 }}
32 | {{- end }}
33 | ---
34 | {{- if and .Values.mountSecrets (not .Values.global.umbrellaChart) }}
35 | apiVersion: v1
36 | kind: Secret
37 | metadata:
38 |   name: {{ .Release.Name }}-zp-secrets
39 | type: Opaque
40 | data:
41 | {{ (.Files.Glob "conf/secrets/*").AsSecrets | indent 2 }}
42 | {{- end }}


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/deployment.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1beta1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: {{ template "zeppelin-with-spark.fullname" . }}
  5 |   labels:
  6 |     app: {{ template "zeppelin-with-spark.name" . }}
  7 |     chart: {{ template "zeppelin-with-spark.chart" . }}
  8 |     release: {{ .Release.Name }}
  9 |     heritage: {{ .Release.Service }}
 10 | spec:
 11 |   replicas: 1
 12 |   selector:
 13 |     matchLabels:
 14 |       app: {{ template "zeppelin-with-spark.name" . }}
 15 |       release: {{ .Release.Name }}
 16 |   template:
 17 |     metadata:
 18 |       labels:
 19 |         app: {{ template "zeppelin-with-spark.name" . }}
 20 |         release: {{ .Release.Name }}
 21 |     spec:
 22 |       containers:
 23 |         - name: {{ .Chart.Name }}
 24 |           image: "{{ .Values.image.repository }}/{{ .Values.image.tag }}"
 25 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
 26 |           ports:
 27 |             - name: http
 28 |               containerPort: {{ .Values.zeppelinService.zeppelinPort }}
 29 |               protocol: TCP
 30 |             - name: web-ui
 31 |               containerPort: {{ .Values.zeppelinService.sparkUIPort }}
 32 |               protocol: TCP
 33 |           command:
 34 |             - "/bin/bash"
 35 |             - "-c"
 36 |             - >
 37 |               cp /spark_conf/* /opt/spark/conf;
 38 |               cp /zeppelin_conf/* /zeppelin/conf;
 39 |               export MASTER=k8s://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT;
 40 |               if [ "$usePVForNoteBooks" == "true" ]; then
 41 |                 export ZEPPELIN_NOTEBOOK_DIR="/data/$notebookDir"
 42 |                 mkdir -p "$ZEPPELIN_NOTEBOOK_DIR"
 43 |                 cp -nR /zeppelin/notebook/* "$ZEPPELIN_NOTEBOOK_DIR"
 44 |               fi;
 45 |               if [ "$enableHistoryEvents" == "true" ]; then
 46 |                 # currently not using PV for history events so commented out
 47 |                 #if [ "$usePVForEventsLog" == "true" ]; then
 48 |                 #  SPARK_EVENTS_DIR="/data/$eventLogDir"
 49 |                 #  mkdir -p "$SPARK_EVENTS_DIR"
 50 |                 #else
 51 |                 #  SPARK_EVENTS_DIR="$eventLogDir"
 52 |                 #fi
 53 |                 SPARK_EVENTS_DIR="$eventLogDir"
 54 |                 SPARK_EVENT_LOG_CONFIG=" --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=$SPARK_EVENTS_DIR"
 55 |               fi;
 56 |               {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
 57 |               SPARK_SECRETS_CONFIG=" --conf spark.kubernetes.driver.secrets.{{ .Release.Name }}-zp-secrets=/etc/secrets \
 58 |                 --conf spark.kubernetes.executor.secrets.{{ .Release.Name }}-zp-secrets=/etc/secrets"
 59 |               {{- end }}
 60 |               export SPARK_SUBMIT_OPTIONS="$SPARK_SUBMIT_OPTIONS $SPARK_EVENT_LOG_CONFIG $SPARK_SECRETS_CONFIG \
 61 |                --conf spark.kubernetes.namespace={{ .Release.Namespace }}
 62 |                --conf spark.kubernetes.driver.pod.name=$HOSTNAME \
 63 |                --conf spark.kubernetes.authenticate.driver.serviceAccountName={{ .Values.global.serviceAccount | default .Values.serviceAccount }} \
 64 |                --conf spark.ui.port={{ .Values.zeppelinService.sparkUIPort }}";
 65 |               echo "SPARK_SUBMIT_OPTIONS are: $SPARK_SUBMIT_OPTIONS";
 66 |               rm -f /zeppelin/conf/interpreter.json;
 67 |               echo "Starting Zeppelin server ...";
 68 |               /zeppelin/bin/zeppelin.sh
 69 |           envFrom:
 70 |           - configMapRef:
 71 |               name: {{ .Release.Name }}-zpenv-configmap
 72 |           livenessProbe:
 73 |             httpGet:
 74 |               path: /
 75 |               port: http
 76 |             initialDelaySeconds: 30
 77 |           readinessProbe:
 78 |             httpGet:
 79 |               path: /
 80 |               port: http
 81 |             initialDelaySeconds: 30
 82 |           resources:
 83 | {{ toYaml .Values.resources | indent 12 }}
 84 |           volumeMounts:
 85 |           - name: data
 86 |             mountPath: /data/
 87 |           - name: zeppelin-config-properties
 88 |             mountPath: /zeppelin_conf
 89 |           - name: spark-config-properties
 90 |             mountPath: /spark_conf
 91 |           {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
 92 |           - name: secrets-volume
 93 |             mountPath: /etc/secrets
 94 |           {{- end }}
 95 |     {{- with .Values.nodeSelector }}
 96 |       nodeSelector:
 97 | {{ toYaml . | indent 8 }}
 98 |     {{- end }}
 99 |       serviceAccount: {{ .Values.global.serviceAccount | default .Values.serviceAccount }}
100 |     {{- with .Values.affinity }}
101 |       affinity:
102 | {{ toYaml . | indent 8 }}
103 |     {{- end }}
104 |     {{- with .Values.tolerations }}
105 |       tolerations:
106 | {{ toYaml . | indent 8 }}
107 |     {{- end }}
108 |       volumes:
109 |         {{- if or .Values.mountSecrets .Values.global.mountSecrets }}
110 |         - name: secrets-volume
111 |           secret:
112 |             secretName: {{ .Release.Name }}-zp-secrets
113 |         {{- end }}
114 |         - name: spark-config-properties
115 |           configMap:
116 |             name: {{ .Release.Name }}-zpspark-configmap
117 |         - name: zeppelin-config-properties
118 |           configMap:
119 |             name: {{ .Release.Name }}-zeppelin-configmap
120 |         - name: data
121 |         {{- if .Values.persistence.enabled }}
122 |           persistentVolumeClaim:
123 |             claimName: {{ .Values.persistence.existingClaim | default (include "zeppelin-with-spark.fullname" .) }}
124 |         {{- else }}
125 |           emptyDir: {}
126 |         {{- end -}}


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/pvc.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }}
 2 | kind: PersistentVolumeClaim
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: {{ template "zeppelin-with-spark.fullname" . }}
 6 |   labels:
 7 |     app: {{ template "zeppelin-with-spark.fullname" . }}
 8 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 9 |     release: "{{ .Release.Name }}"
10 |     heritage: "{{ .Release.Service }}"
11 | {{- if .Values.persistence.keepResource }}
12 |   annotations:
13 |     "helm.sh/resource-policy": keep
14 | {{- end }}
15 | spec:
16 |   accessModes:
17 |     - {{ .Values.persistence.accessMode | quote }}
18 |   resources:
19 |     requests:
20 |       storage: {{ .Values.persistence.size | quote }}
21 | {{- if .Values.persistence.storageClass }}
22 | {{- if (eq "-" .Values.persistence.storageClass) }}
23 |   storageClassName: ""
24 | {{- else }}
25 |   storageClassName: "{{ .Values.persistence.storageClass }}"
26 | {{- end }}
27 | {{- end }}
28 | {{- end }}
29 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ template "zeppelin-with-spark.fullname" . }}
 5 |   labels:
 6 |     app: {{ template "zeppelin-with-spark.name" . }}
 7 |     chart: {{ template "zeppelin-with-spark.chart" . }}
 8 |     release: {{ .Release.Name }}
 9 |     heritage: {{ .Release.Service }}
10 | spec:
11 |   type: {{ .Values.zeppelinService.type | default "LoadBalancer" | quote }}
12 |   ports:
13 |     - port: {{ .Values.zeppelinService.zeppelinPort }}
14 |       targetPort: http
15 |       protocol: TCP
16 |       name: http
17 |     - port: {{ .Values.zeppelinService.sparkUIPort }}
18 |       targetPort: web-ui
19 |       protocol: TCP
20 |       name: web-ui
21 |   selector:
22 |     app: {{ template "zeppelin-with-spark.name" . }}
23 |     release: {{ .Release.Name }}
24 | ---
25 | 


--------------------------------------------------------------------------------
/charts/zeppelin-with-spark/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for zeppelin-with-spark.
 2 | # This is a YAML-formatted file.
 3 | # Declare variables to be passed into your templates.
 4 | 
 5 | image:
 6 |   repository: snappydatainc
 7 |   tag: zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1-test.1
 8 |   pullPolicy: IfNotPresent
 9 | 
10 | zeppelinService:
11 |   type: LoadBalancer
12 |   zeppelinPort: 8080
13 |   sparkUIPort: 4040
14 | 
15 | serviceAccount: default
16 | 
17 | # Any environment variables that need to be made available to the container are defined here
18 | # This may include environment variables used by Spark, Zeppelin
19 | environment:
20 |   # Provide configuration parameters, use syntax as expected by spark-submit
21 |   SPARK_SUBMIT_OPTIONS: >-
22 |      --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1
23 |      --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1
24 |      --conf spark.executor.instances=2
25 | #     --conf spark.hadoop.google.cloud.auth.service.account.json.keyfile=/etc/secrets/sparkonk8s-test.json
26 | 
27 | sparkEventLog:
28 |   enableHistoryEvents: false
29 |   # eventsLogDir should point to a URI of GCS bucket where history events will be dumped
30 |   eventLogDir: "gs://spark-history-server-store/"
31 | 
32 | # if mountSecrets is set to true files in 'conf/secrets' directory will be mounted
33 | # on path '/etc/secrets' as secrets
34 | mountSecrets: false
35 | 
36 | noteBookStorage:
37 |   usePVForNoteBooks: true
38 |   # If using PV for notebook storage, 'notebookDir' will be an
39 |   # absolute path in the mounted persistent volume
40 |   notebookDir: "/notebooks"
41 | 
42 | ## Enable persistence using Persistent Volume Claims
43 | ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
44 | ##
45 | persistence:
46 |   enabled: true
47 |   ## If 'existingClaim' is defined, PVC must be created manually before
48 |   ## volume will be bound
49 |   # existingClaim:
50 | 
51 |   ## If defined, storageClassName: <storageClass>
52 |   ## If set to "-", storageClassName: "", which disables dynamic provisioning
53 |   ## If undefined (the default) or set to null, no storageClassName spec is
54 |   ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
55 |   ##   GKE, Azure & OpenStack)
56 |   ##
57 |   # storageClass: "-"
58 |   accessMode: ReadWriteOnce
59 |   size: 8Gi
60 |   # Whether to keep the PVC when chart is deleted, if PV is dynamically provisioned
61 |   keepResource: true
62 | 
63 | resources: {}
64 |   # limits:
65 |   #  cpu: 100m
66 |   #  memory: 128Mi
67 |   # requests:
68 |   #  cpu: 100m
69 |   #  memory: 128Mi
70 | 
71 | nodeSelector: {}
72 | 
73 | tolerations: []
74 | 
75 | affinity: {}
76 | 
77 | #internal attribute, do not change
78 | global:
79 |   umbrellaChart: false


--------------------------------------------------------------------------------
/dockerfiles/jupyter/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Jupyter Development Team.
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | # Refer to https://github.com/SnappyDataInc/spark-on-k8s/tree/master/docs/building-images.md#jupyter-image
 5 | # for instructions to build the Docker image.
 6 | # This Dockerfile should be present in the same directory where spark-on-k8s distribution
 7 | # directory (spark-2.2.0-k8s-0.5.0-bin-2.7.3) is kept.
 8 | 
 9 | FROM jupyter/scipy-notebook
10 | 
11 | USER root
12 | 
13 | # Copied from pyspark notebook- start
14 | # Spark dependencies
15 | ENV APACHE_SPARK_VERSION 2.2.0
16 | ENV HADOOP_VERSION 2.7
17 | 
18 | RUN apt-get -y update && \
19 |     apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
20 |     apt-get clean && \
21 |     rm -rf /var/lib/apt/lists/*
22 | 
23 | # Copied from pyspark notebook- end
24 | 
25 | ####### Begin changes for Spark-on-k8s #################
26 | 
27 | RUN mkdir -p /opt/spark && \
28 |     mkdir -p /opt/spark/work-dir \
29 |     touch /opt/spark/RELEASE && \
30 |     rm -f /bin/sh && \
31 |     ln -sv /bin/bash /bin/sh && \
32 |     chgrp root /etc/passwd && chmod ug+rw /etc/passwd
33 | 
34 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/jars /opt/spark/jars
35 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/bin /opt/spark/bin
36 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/sbin /opt/spark/sbin
37 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/conf /opt/spark/conf
38 | COPY spark-2.2.0-k8s-0.5.0-bin-2.7.3/dockerfiles/spark-base/entrypoint.sh /opt/
39 | 
40 | ADD spark-2.2.0-k8s-0.5.0-bin-2.7.3/examples /opt/spark/examples
41 | ADD spark-2.2.0-k8s-0.5.0-bin-2.7.3/python /opt/spark/python
42 | 
43 | # Copy aws and gcp jars
44 | # COPY aws_gcp_jars/hadoop-aws-2.7.3.jar /opt/spark/jars
45 | # COPY aws_gcp_jars/aws-java-sdk-1.7.4.jar /opt/spark/jars
46 | # COPY aws_gcp_jars/gcs-connector-latest-hadoop2.jar /opt/spark/jars
47 | 
48 | ENV SPARK_HOME /opt/spark
49 | 
50 | ENV PYTHON_VERSION 2.7.13
51 | ENV PYSPARK_PYTHON python
52 | ENV PYSPARK_DRIVER_PYTHON python
53 | ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH}
54 | 
55 | CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \
56 |     env | grep SPARK_JAVA_OPT_ | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt && \
57 |     readarray -t SPARK_DRIVER_JAVA_OPTS < /tmp/java_opts.txt && \
58 |     if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \
59 |     if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \
60 |     if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \
61 |     if ! [ -z ${SPARK_MOUNTED_FILES_DIR+x} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \
62 |     if ! [ -z ${SPARK_MOUNTED_FILES_FROM_SECRET_DIR+x} ]; then cp -R "$SPARK_MOUNTED_FILES_FROM_SECRET_DIR/." .; fi && \
63 |     ${JAVA_HOME}/bin/java "${SPARK_DRIVER_JAVA_OPTS[@]}" -cp "$SPARK_CLASSPATH" -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY -Dspark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS $SPARK_DRIVER_CLASS $PYSPARK_PRIMARY $PYSPARK_FILES $SPARK_DRIVER_ARGS
64 | 
65 | 
66 | # Copied from pyspark notebook- start
67 | # Spark config
68 | ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip
69 | ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
70 | # Copied from pyspark notebook- end
71 | 
72 | RUN chown -R $NB_USER:users /opt/spark
73 | 
74 | ####### End changes for Spark-on-k8s ##########################
75 | 
76 | USER $NB_USER
77 | 
78 | # Added to anble python2 notebooks
79 | RUN conda create --quiet --yes \
80 |      -n ipykernel_py2 python=2 ipykernel && \
81 |      source activate ipykernel_py2 && \
82 |      python -m ipykernel install --user
83 | 
84 | 
85 | RUN  source activate ipykernel_py2 && \
86 |      conda install --yes \
87 |      matplotlib \
88 |      scipy \
89 |      numpy \
90 |      pandas \ 
91 |      nltk \
92 |      tensorflow && \
93 |      source activate ipykernel_py2 && \
94 |      pip install \
95 |      sklearn \
96 |      wordcloud \
97 |      treeinterpreter
98 | 
99 | 


--------------------------------------------------------------------------------
/dockerfiles/zeppelin/setSparkEnvVars.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export MASTER=k8s://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT
4 | export SPARK_SUBMIT_OPTIONS="--kubernetes-namespace default --conf spark.kubernetes.driver.pod.name=$HOSTNAME --conf spark.kubernetes.driver.docker.image=snappydatainc/spark-driver:v2.2.0-kubernetes-0.5.1 --conf spark.kubernetes.executor.docker.image=snappydatainc/spark-executor:v2.2.0-kubernetes-0.5.1"
5 | 


--------------------------------------------------------------------------------
/docs/building-images.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Building and publishing Docker images
  3 | 
  4 | ## Prerequisites
  5 | 
  6 | You should have Docker installed on your local setup from where you would want to build and publish the Docker images.
  7 | 
  8 | Refer to [this page](https://docs.docker.com/install) to get information about installing Docker.
  9 | 
 10 | ## Spark Images
 11 | 
 12 | The binaries used to build the Spark images are based on the [spark-on-k8s](https://github.com/apache-spark-on-k8s/spark) project, with few additional changes.
 13 | These have been committed into a clone of branch-2.2-kubernetes branch in above repository, and is available as a branch in SnappyData's fork of Apache Spark.
 14 | 
 15 | Get the latest branch:
 16 | 
 17 | ```bash
 18 | $ git clone https://github.com/SnappyDataInc/spark.git -b snappy/branch-2.2-kubernetes
 19 | ```
 20 | 
 21 | Go to the checkout directory and build the project using [maven](https://maven.apache.org/install.html).
 22 | Also, package the build into a tarball, which will be needed when building the Docker images for Jupyter and Apache Zeppelin.
 23 | 
 24 | ```bash
 25 | $ ./build/mvn -Pkubernetes -DskipTests clean package
 26 | $ ./dev/make-distribution.sh --name 2.7.3 --pip --tgz -Phadoop-2.7 -Phive -Phive-thriftserver -Pkubernetes
 27 | ```
 28 | 
 29 | Now that the binaries are built, you also need to download and place following jars into the directories
 30 | assembly/target/scala-2.11/jars and dist/jars of your checkout.
 31 | These are needed for enabling access to Google Cloud Storage and AWS S3 buckets, which your Spark applications may need.
 32 | 
 33 | 1. [aws-java-sdk-1.7.4.jar](http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar)
 34 | 2. [hadoop-aws-2.7.3.jar](http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar)
 35 | 3. [gcs-connector-latest-hadoop2.jar](https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar)
 36 | 
 37 | Now build and publish the Docker images to your DockerHub account. It may take several minutes depending upon your network speed.
 38 | 
 39 | ```bash
 40 | $ ./sbin/build-push-docker-images.sh -r <your-docker-repo-name> -t <image-tag> build
 41 | ```
 42 | 
 43 | Make sure you are logged in to your Docker Hub account before publishing the images:
 44 | 
 45 | ```bash
 46 | $ docker login
 47 | Login with your Docker ID to push and pull images from Docker Hub. If you don't have a Docker ID, head over to https://hub.docker.com to create one.
 48 | Username: <your-account-username>
 49 | Password: <password>
 50 | $ ./sbin/build-push-docker-images.sh -r <your-docker-repo-name> -t <image-tag> push
 51 | ```
 52 | 
 53 | ## Jupyter Image
 54 | 
 55 | This image will contain the Spark binaries you built above apart from the dependencies needed for Jupyter Notebook server.
 56 | 
 57 | Extract the Spark tarball generated above into a directory where you have copied the [Dockerfile for Jupyter image](../dockerfiles/jupyter/Dockerfile) to.
 58 | 
 59 | Make sure that the third party jars needed to access GCS and AWS S3 are copied to the jars directory of the extracted tarball.
 60 | 
 61 | Build and publish the Jupyter image:
 62 | 
 63 | ```bash
 64 | $ docker build -t <your-docker-repo-name>/jupyter-notebook:<image-tag> -f Dockerfile .
 65 | $ docker push <your-docker-repo-name>/jupyter-notebook:<image-tag>
 66 | ```
 67 | 
 68 | For example:
 69 | ```bash
 70 | $ docker build -t snappydatainc/jupyter-notebook:5.2.2-spark-v2.2.0-kubernetes-0.5.1 -f Dockerfile .
 71 | $ docker push snappydatainc/jupyter-notebook:5.2.2-spark-v2.2.0-kubernetes-0.5.1
 72 | ```
 73 | 
 74 | ## Zeppelin Image
 75 | 
 76 | This image will contain the Spark binaries built earlier apart from the dependencies needed for launching Apache Zeppelin server.
 77 | 
 78 | Extract the Spark tarball generated above into a directory where you have copied the [Dockerfile for Zeppelin image](../dockerfiles/zeppelin/Dockerfile) to.
 79 | Also, copy the script [setSparkEnvVars.sh](../dockerfiles/zeppelin/setSparkEnvVars.sh) to the same location.
 80 | 
 81 | Make sure that the third party jars needed to access GCS and AWS S3 are copied to the jars directory of the extracted tarball.
 82 | 
 83 | Build and publish the Zeppelin image.
 84 | 
 85 | ```bash
 86 | $ docker build -t <your-docker-repo-name>/zeppelin:<image-tag> -f Dockerfile .
 87 | $ docker push <your-docker-repo-name>/zeppelin:<image-tag>
 88 | ```
 89 | 
 90 | For example:
 91 | ```bash
 92 | $ docker build -t snappydatainc/zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1 -f Dockerfile .
 93 | $ docker push snappydatainc/zeppelin:0.7.3-spark-v2.2.0-kubernetes-0.5.1
 94 | ```
 95 | 
 96 | ## SnappyData Image
 97 | 
 98 | The SnappyData Docker image available on DockerHub is built using the OSS version of the product. Docker image with
 99 | SnappyData Enterprise bits will be available soon.
100 | 
101 | Currently, some manual steps are needed to build this image which will be automated later.
102 | 
103 | - Download the Snappydata OSS tarball of the version you need and available on
104 | [GitHub releases page](https://github.com/snappydatainc/snappydata/releases) and extract its content into a directory.
105 | 
106 | - Copy the Dockerfile and start script required for SnappyData image
107 | [from this branch](https://github.com/SnappyDataInc/snappy-cloud-tools/blob/SNAP-2280/docker) into the extracted
108 | SnappyData directory.
109 | 
110 | - Copy the [SnappyData interpreter jar](https://github.com/SnappyDataInc/zeppelin-interpreter/releases) for
111 | Apache Zeppelin into the jars directory.
112 | 
113 | - Optionally, one can also add the third party jar needed to access GCS to the jars directory. The libraries to access
114 | AWS S3 and HDFS are already included.
115 | 
116 | - Switch to the extracted directory to build and publish the SnappyData image using following commands.
117 | 
118 |     ```bash
119 |     $ cd <extracted-snappydata-directory>
120 |     $ docker build -t <your-docker-repo-name>/snappydata:<image-tag> -f Dockerfile .
121 |     $ docker push <your-docker-repo-name>/snappydata:<image-tag>
122 |     ```
123 | 
124 |     For example:
125 |     ```bash
126 |     $ docker build -t snappydatainc/snappydata:1.0.1 -f Dockerfile .
127 |     $ docker push snappydatainc/snappydata:1.0.1
128 |     ```
129 | 


--------------------------------------------------------------------------------
/k8s-helm-spark-architecture-draw.io.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/k8s-helm-spark-architecture-draw.io.png


--------------------------------------------------------------------------------
/kubernetes-how-does-it-work.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/kubernetes-how-does-it-work.1.png


--------------------------------------------------------------------------------
/spark-on-kubernetes-how-does-it-work.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/spark-on-kubernetes-how-does-it-work.2.png


--------------------------------------------------------------------------------
/tiles/README.md:
--------------------------------------------------------------------------------
 1 | # Pivotal Cloud Foundry tile for SnappyData
 2 | 
 3 | ## Building the tile
 4 | 
 5 | - Install the [tile generator](https://docs.pivotal.io/tiledev/2-1/tile-generator.html) tool on your local machine.
 6 | 
 7 | - You also need to install [latest bosh CLI](https://bosh.io/docs/#downloads).
 8 | 
 9 | - Move to [tiles/snappydata](snappydata/) directory of the cloned repository where the tile.yml is present and build the tile.
10 |     - `$ tile build`
11 | 
12 | - Optionally, one could specify version for the tile.
13 |     - `$ tile build 1.0`
14 | 
15 | - Upload the generated .pivotal file from product/ folder to Pivotal Ops Manager.
16 | 
17 | ## Configuring the tile
18 | 
19 | Once the .pivotal file imported into Pivotal Ops Manager, add the tile to the dashboard by clicking on the '+' sign.
20 | Click on the tile to configure it.
21 | 
22 | Here, at a minimum, you need to specify 1. the credentials for the Kubernetes/PKS cluster you would launch the SnappyData chart on and also 2. select the appropriate network.
23 | The credentials to connect to Kubernetes/PKS cluster include CA Cert, Cluster Token and Cluster url. (Note that the CA cert needs to be base64 decoded after fetching it from your kubeconfig file.)
24 | 
25 | Save these configurations and return to the installation dashboard and hit 'Apply changes'.
26 | 
27 | 
28 | ## Creating and consuming a service
29 | 
30 | Once installed, users can create the service instance of SnappyData which essentially installs the Helm chart on the
31 | Kubernetes/PKS cluster provided during tile configuration.
32 | 
33 | - Install the [CF CLI](https://docs.cloudfoundry.org/cf-cli/install-go-cli.html) and log in to your Cloud Foundry's API server.
34 |     - `$ cf login -a https://<api-server-name> --skip-ssl-validation`
35 | - You can view that the SnappyData service broker is now visible.
36 |     - `$ cf service-brokers`
37 | - Create a service instance using the broker. It'll launch the SnappyData cluster on your configured Kubernetes/PKS cluster.
38 | - Currently, we have three plans for the SnappyData service: 1. small (default), 2. medium and 3. large. All of these start the cluster with one locator, one lead and two servers but with different memory.
39 |     - `$ cf create-service snappydata small snappydata_small`
40 | - We'll add to or change these plans in future.
41 | - Now that the service is available, you can bind it to any of your running apps.
42 |     - `$ cf bs <app-name> snappydata_small`
43 |     - `$ cf restage <app-name>`
44 | - Now, your app has access to information about the service via environment variable VCAP_SERVICES. Typically, all the Kubernetes services created by the chart are included in this environment variable.
45 |     - `$ cf env <app-name>`
46 | 
47 | 


--------------------------------------------------------------------------------
/tiles/snappydata/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TIBCOSoftware/snappy-on-k8s/6939c11cda65435748af7f5c4654dc2b8d518eaf/tiles/snappydata/icon.png


--------------------------------------------------------------------------------
/tiles/snappydata/tile-history.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 0.1.0
3 | 


--------------------------------------------------------------------------------
/tiles/snappydata/tile.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: snappydata
 3 | icon_file: icon.png
 4 | label: SnappyData On PKS
 5 | description: Test tile for Kibosh with SnappyData Helm chart
 6 | 
 7 | packages:
 8 | - name: snappydata
 9 |   type: kibosh
10 |   helm_chart_dir: ../../charts/snappydata
11 | 


--------------------------------------------------------------------------------
/utils/debug-pod-override-template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "apiVersion": "v1",
 3 |     "kind": "Pod",
 4 |     "metadata": {
 5 |         "labels": {
 6 |             "run": "snappy-debug-pod"
 7 |         },
 8 |         "name": "snappy-debug-pod",
 9 |         "namespace": "spark"
10 |     },
11 |     "spec": {
12 |         "containers": [
13 |             {
14 |                 "args": [
15 |                     "bash"
16 |                 ],
17 |                 IMAGE_MARKER
18 |                 "imagePullPolicy": "IfNotPresent",
19 |                 "name": "snappy-debug-pod",
20 |                 "stdin": true,
21 |                 "stdinOnce": true,
22 |                 "tty": true,
23 |                 "volumeMounts": [
24 |                     VOLUME_MOUNTS_MARKER
25 |                 ]
26 |             }
27 |         ],
28 |         "restartPolicy": "Never",
29 |         "volumes": [
30 |              VOLUMES_MARKER
31 |         ]
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/utils/snappy-debug-pod.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | #copyright (c) 2018 SnappyData, Inc. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License"); you
  5 | # may not use this file except in compliance with the License. You
  6 | # may obtain a copy of the License at
  7 | #   
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #     
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 13 | # implied. See the License for the specific language governing
 14 | # permissions and limitations under the License. See accompanying
 15 | # LICENSE file.
 16 | #
 17 | 
 18 | usage() {
 19 | echo "Usage: snappy-debug-pod.sh --pvc <comma separated pvc list> [--namespace <namespace in which the PVCs exist>] [--image <docker cordinates on SnappyData image to be used>]"
 20 | echo "Options namespace and image are optional. Default namespace is 'spark'"
 21 | echo ""
 22 | echo "This script launches a pod in the K8S cluster with user specified persistent volumes mounted on it. User must provide list of persistent volume claims as an input for the volumes to be mounted. 
 23 | Volumes will be mounted on the path starting with /data (volume1 on /data0 and so on). This script can be used to inspect logs on volumes even when SnappyData system is not online"
 24 | echo ""
 25 | echo "Example usage: snappy-debug-pod.sh --pvc snappy-disk-claim-snappydata-leader-0,snappy-disk-claim-snappydata-server-0 --namespace default"
 26 | }
 27 | 
 28 | namespace=spark
 29 | image="snappydatainc/snappydata:1.0.1.1-test.1" 
 30 | imageString='"image": '\""$image"\"','
 31 | mountString=""
 32 | volumeString=""
 33 | gotPVC=false
 34 | 
 35 | while (( "$#" )); do
 36 |   case "$1" in
 37 |     --pvc)
 38 |       pvclist=$2
 39 |       gotPVC=true
 40 |       
 41 |       # read the comma separated PVC names into an array
 42 |       oIFS="$IFS"; IFS=, ; 
 43 |       read -r -a array <<< "$pvclist";
 44 |       IFS="$oIFS"
 45 | 
 46 |       # using PVCs provided by the user, 
 47 |       # create valid JSON string for volumes and volumeMounts atrbutes of a pod
 48 |       for index in "${!array[@]}"
 49 |       do
 50 |         if [ $index -eq 0 ]
 51 |         then
 52 |           mountString="{\"mountPath\": \"/data$index/\", \"name\": \"snappy-disk-claim$index\"}"
 53 |           volumeString="{\"name\": \"snappy-disk-claim$index\", \"persistentVolumeClaim\": {\"claimName\": \"${array[index]}\"}}"
 54 |         else
 55 |           mountString="$mountString, {\"mountPath\": \"/data$index/\", \"name\": \"snappy-disk-claim$index\"}"
 56 |           volumeString="$volumeString, {\"name\": \"snappy-disk-claim$index\", \"persistentVolumeClaim\": {\"claimName\": \"${array[index]}\"}}"
 57 |         fi  
 58 | #        echo "$index ${array[index]}"
 59 | #        echo $mountString
 60 | #        echo $volumeString
 61 |       echo "Volume for ${array[index]} will be mounted on /data$index"
 62 |       done
 63 |       shift 2
 64 |     ;;
 65 |     --namespace)
 66 |       namespace=$2
 67 |       shift 2
 68 |     ;;
 69 |     --image)
 70 |       image=$2
 71 |       imageString='"image": '\""$image"\"','
 72 |       shift 2
 73 |     ;;
 74 |     --help)
 75 |       usage
 76 |       exit 0 
 77 |     ;;  
 78 |     *)
 79 |       break
 80 |     ;;
 81 |   esac
 82 | done
 83 | 
 84 | if [ "$gotPVC" = false ] ; then
 85 |     echo 'ERROR: PVC list not provided'
 86 |     usage
 87 |     exit 1
 88 | fi
 89 | 
 90 | 
 91 | # debug-pod-override-template contains, the template JSON in which 
 92 | # JSON string for volumes and volumeMounts is added
 93 | # first create a copy to modify JSON 
 94 | cp debug-pod-override-template.json /tmp/debug-pod-override-actual.json
 95 | sed -i 's|IMAGE_MARKER|'"$imageString"'|; s|VOLUME_MOUNTS_MARKER|'"$mountString"'|; s|VOLUMES_MARKER|'"$volumeString"'|' /tmp/debug-pod-override-actual.json
 96 | 
 97 | #run the actual command that will launch a pod
 98 | overrides=$(</tmp/debug-pod-override-actual.json)
 99 | rm -f /tmp/debug-pod-override-actual.json 
100 | cmd="kubectl run -i --rm --tty snappy-debug-pod --overrides='$overrides' --image="$image" --restart=Never --namespace="$namespace" -- bash"
101 | 
102 | echo "Launching the POD"
103 | 
104 | #echo "command to be run is=$cmd"
105 | # Fix this. Can we avoid writing the command to a temp file 
106 | echo $cmd > /tmp/kubecommand
107 | chmod +x /tmp/kubecommand
108 | /tmp/kubecommand 
109 | rm -f /tmp/kubecommand
110 | #$cmd
111 | 


--------------------------------------------------------------------------------