├── .staroid
    └── pages
    │   └── overview.md
├── LICENSE
├── README.md
├── k8s
    ├── hive-metastore.yaml
    └── minikube.yaml
├── skaffold.yaml
└── staroid.yaml


/.staroid/pages/overview.md:
--------------------------------------------------------------------------------
 1 | :::: section column
 2 | ::: col 60%
 3 | Spark Serverless provides instant access to the Spark cluster from anywhere without thinking about infrastructure and maintenance.
 4 | 
 5 | Python REPL running on your laptop, favorite python IDE, online notebook environment like Colab, virtually any python environments are supported.
 6 | :::
 7 | 
 8 | ::: col 30%
 9 | @[youtube](https://www.youtube.com/watch?v=J43qKJnp_N8&feature=youtu.be)
10 | 
11 | See [Getting started](http://open-datastudio.io/computing/spark/from_python_environment.html) to learn more.
12 | :::
13 | ::::
14 | 
15 | :::: section column
16 | 
17 | ### Apache Spark with Serverless experience
18 | 
19 | ::: col 40%
20 | #### Super easy
21 | Just a few lines of code to get a Spark cluster from your python environment. No cluster installation, maintenance required.
22 | :::
23 | 
24 | ::: col 60%
25 | 
26 | ![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNiYAAAAAkAAxkR2eQAAAAASUVORK5CYII= =1x60)
27 | 
28 | ```python
29 | import ods
30 | spark = ods.spark("my_work1", worker_num=5).session()
31 | df = spark.read.load("...")
32 | ```
33 | 
34 | See [Getting started](http://open-datastudio.io/computing/spark/from_python_environment.html) to learn more.
35 | :::
36 | 
37 | ::: col 100%
38 | #### Don't change. Keep your tools
39 | You don't have to change your favorite tools. Run your pyspark code anywhere from you laptop to server, from shell to IDE. Keep your data pipeline unchanged. Spark Serverless provides spark executors remotely, no matter where your application runs.
40 | :::
41 | 
42 | ::: col 100%
43 | #### As many as you want
44 | Each spark application can have their own set of executors. No more waiting for a busy Spark cluster to finish other applications job.
45 | :::
46 | 
47 | ::: col 100%
48 | #### Multi-cloud
49 | Select which regions you want to use. AWS us-west2, GCP us-west1, and so on. We're continuously adding cloud regions to meet your needs. Run your jobs close to the data!
50 | 
51 | :::
52 | 
53 | ::: col 100%
54 | #### Pay as you go
55 | There's no control plane cost or upfront cost. Pay only Spark executors instance that runs your job.
56 | :::
57 | 
58 | ::::
59 | 
60 | :::: section column
61 | 
62 | ### Production ready
63 | 
64 | ::: col 30%
65 | #### Spark 3.x
66 | Enjoy latest Spark release. When you need, you can customize Spark container image.
67 | :::
68 | 
69 | ::: col 30%
70 | #### Spark UI
71 | You can access Spark UI, by simply click spark-ui link on Staroid management console. No complex proxy setup requiried.
72 | :::
73 | 
74 | ::: col 30%
75 | #### Delta lake
76 | Simply give `deta=True` parameter when you initialize Spark session.
77 | Delta lake will be automatically configured.
78 | :::
79 | 
80 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright 2020 The Open-datastudio Spark-serverless Authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Spark Serverless
 2 | 
 3 | Spark server provides Apache Spark in serverless configuration.
 4 | 
 5 | This repository is deployed to [staroid](https://staroid.com) cluster using [ods](https://github.com/open-datastudio/ods) python pacakge.
 6 | 
 7 | ## Quick start
 8 | 
 9 | Please visit https://github.com/open-datastudio/ods.
10 | 
11 | ## How it works
12 | 
13 | [Spark on Kubernetes](http://spark.apache.org/docs/latest/running-on-kubernetes.html) both provides Spark client mode and Cluster mode. That means if your spark application have access to Kubernetes API server, your application can create driver and executors itself.
14 | 
15 | Therefore, this project more focuses on providing Kubernetes API endpoint to the Spark application, 
16 | with optimized Spark configuration for the Kubernetes and each cloud platform providers.
17 | 
18 | ### Spark client mode application
19 | 
20 | ![](http://open-datastudio.io/_images/spark-serverless-client-mode.png)
21 | 
22 | ### Spark cluster mode application
23 | 
24 | ![](http://open-datastudio.io/_images/spark-serverless-cluster-mode.png)
25 | 
26 | 
27 | ## Get supported
28 | 
29 | Don't hesitate to create an [issue](https://github.com/open-datastudio/spark-serverless/issues) and [join our Slack channel](https://join.slack.com/t/opendatastudio/shared_invite/zt-fy2dsmb7-E9_UrBAh4UA47lzN5sUHUA).
30 | 
31 | For commercial support, please [contact staroid](https://staroid.com/site/contact).


--------------------------------------------------------------------------------
/k8s/hive-metastore.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: hive-metastore-info
 5 |   labels:
 6 |     # import from https://github.com/open-datastudio/hive-metastore/blob/master/k8s-hive.yaml.
 7 |     # see to know more about export/import https://docs.staroid.com/project/dependency.html
 8 |     dependency.staroid.com/import: hive-metastore-info
 9 | data:
10 |   HIVE_METASTORE_NAMESPACE: default
11 |   HIVE_METASTORE_SERVICE: hive-metastore-service
12 | 


--------------------------------------------------------------------------------
/k8s/minikube.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: policy/v1beta1
 2 | kind: PodSecurityPolicy
 3 | metadata:
 4 |   name: staroid-psp
 5 | spec:
 6 |   privileged: false
 7 |   allowPrivilegeEscalation: false
 8 |   hostNetwork: false
 9 |   hostIPC: false
10 |   hostPID: false
11 |   readOnlyRootFilesystem: false
12 |   runAsUser:
13 |     rule: 'MustRunAsNonRoot'
14 |   runAsGroup:
15 |     rule: 'MustRunAs'
16 |     ranges:
17 |     - min: 1
18 |       max: 65535
19 |   seLinux:
20 |     rule: 'RunAsAny'
21 |   supplementalGroups:
22 |     rule: 'MustRunAs'
23 |     ranges:
24 |     - min: 1
25 |       max: 65535
26 |   fsGroup:
27 |     rule: 'RunAsAny'
28 |   volumes:
29 |   - 'configMap'
30 |   - 'emptyDir'
31 |   - 'secret'
32 |   - 'persistentVolumeClaim'    
33 | ---
34 | kind: Role
35 | apiVersion: rbac.authorization.k8s.io/v1
36 | metadata:
37 |   name: staroid-role
38 | rules:
39 | - apiGroups: [""]
40 |   resources: ["pods", "pods/log", "pods/exec", "pods/binding", "services", "secrets", "configmaps", "persistentvolumeclaims"]
41 |   verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
42 | - apiGroups: ["apps"]
43 |   resources: ["deployments"]
44 |   verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
45 | - apiGroups: ["batch"]
46 |   resources: ["jobs", "cronjobs"]
47 |   verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
48 | - apiGroups: ['policy']
49 |   resources: ['podsecuritypolicies']
50 |   verbs:     ['use']
51 |   resourceNames: ['staroid-psp']
52 | ---
53 | kind: RoleBinding
54 | apiVersion: rbac.authorization.k8s.io/v1
55 | metadata:
56 |   name: staroid-rolebinding
57 | roleRef:
58 |   kind: Role
59 |   name: staroid-role
60 |   apiGroup: rbac.authorization.k8s.io
61 | subjects:
62 | - kind: ServiceAccount
63 |   name: default


--------------------------------------------------------------------------------
/skaffold.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: skaffold/v2beta1
 2 | kind: Config
 3 | deploy:
 4 |   kubectl:
 5 |     manifests:
 6 |       - k8s/hive-metastore.yaml
 7 | profiles:
 8 |   - name: minikube
 9 |     patches:
10 |       - op: add
11 |         path: /deploy/kubectl/manifests/0
12 |         value: ./k8s/minikube.yaml
13 | 


--------------------------------------------------------------------------------
/staroid.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: beta/v1
 2 | starRank:
 3 |   rate: 1.1
 4 | build:
 5 |   skaffold:
 6 |     file: skaffold.yaml
 7 | deploy:
 8 |   dependencies:
 9 |     - project: open-datastudio/hive-metastore
10 |       level: OPTIONAL
11 |   implements: open-datastudio/spark-serverless
12 | 


--------------------------------------------------------------------------------