├── .github
└── workflows
│ ├── build-docs.yml
│ └── publish-docs.yml
├── .gitignore
├── LICENSE
├── README.md
└── docs
├── .nojekyll
├── Makefile
├── _static
├── cuda-logo.png
├── dask-logo.png
├── delta-logo.png
├── jupyter-logo.png
├── open-datastudio-logo.png
├── open-datastudio-logo.svg
├── overview.png
├── ray-logo.png
├── ray-usecase.png
├── spark-logo.png
├── spark-serverless-client-mode.png
├── spark-serverless-cluster-mode.png
├── spark-usecase.png
└── zeppelin-logo.svg
├── _templates
└── layout.html
├── about
├── index.rst
└── overview.rst
├── business-intelligence
├── index.rst
├── metabase.rst
└── superset.rst
├── computing
├── dask.rst
├── flink.rst
├── index.rst
├── ray
│ ├── from_cluster_launcher.rst
│ ├── from_staroid_management_console.rst
│ └── index.rst
└── spark
│ ├── from_ods_zeppelin.rst
│ ├── from_python_environment.rst
│ ├── index.rst
│ ├── instances.rst
│ └── spark_ui.rst
├── conf.py
├── data-lake
├── delta.rst
├── hive-metastore.rst
├── index.rst
└── spark-thriftserver.rst
├── index.rst
├── machine-learning
├── index.rst
├── mlflow-model-serving.rst
└── mlflow-server.rst
├── notebook
├── index.rst
├── jupyter.rst
└── zeppelin.rst
├── ref.rst
├── requirements.txt
└── support
└── index.rst
/.github/workflows/build-docs.yml:
--------------------------------------------------------------------------------
1 | name: build-docs
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | docs:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | - uses: ammaraskar/sphinx-action@master
14 | with:
15 | docs-folder: "./docs"
16 |
--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yml:
--------------------------------------------------------------------------------
1 | name: publish-docs
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | docs:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: checkout master
13 | uses: actions/checkout@v2
14 | - name: build docs
15 | uses: ammaraskar/sphinx-action@master
16 | with:
17 | docs-folder: "./docs"
18 | - name: checkout gh-pages branch
19 | uses: actions/checkout@v2
20 | with:
21 | ref: gh-pages
22 | path: gh-pages
23 | - name: Commit documentation changes
24 | run: |
25 | cp -r docs/_build/html/* gh-pages/
26 | cd gh-pages
27 | git config --local user.email "action@github.com"
28 | git config --local user.name "GitHub Action"
29 | git add .
30 | git commit -m "Update documentation" -a || true
31 | # The above command will fail if no changes were present, so we ignore
32 | # the return code.
33 | - name: Push changes
34 | uses: ad-m/github-push-action@master
35 | with:
36 | branch: gh-pages
37 | directory: gh-pages
38 | github_token: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | docs/_build
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright 2020 The Open-datastudio Authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | # Open data studio
8 |
9 | Open data studio is an open initiative to bring machine learning and large scale data processing open-source software to click away for everyone.
10 |
11 | ## Documentation
12 |
13 | Please visit [open-datastudio.io](https://open-datastudio.io)
14 |
15 | ## Projects
16 |
17 | | Component | Project | Description | Integration Status |
18 | | ------- | --------- | ----------- | ------- |
19 | | Notebook | [jupyter](https://github.com/open-datastudio/jupyter) | Jupyter Lab | Integrated |
20 | | | [zeppelin](https://github.com/open-datastudio/zeppelin) | Integrates with Apache Zeppelin and Apache Spark on Kubernetes mode | Integrated |
21 | | Data Lake | [hive-metastore](https://github.com/open-datastudio/hive-metastore) | Provides hive metastore server with Postgresql database | Integrated |
22 | | | [spark-thriftserver](https://github.com/open-datastudio/spark-thriftserver) | Spark cluster on Kubernetes for ODBC/JDBC connection | Integrated |
23 | | Computing | [ray-cluster](https://github.com/open-datastudio/ray-cluster) | [Ray](https://ray.io/) cluster | Integrated |
24 | | | [spark-serverless](https://github.com/open-datastudio/spark-serverless) | On-demand [Spark](https://spark.apache.org) cluster from everywhere | Integrated |
25 | | Machine learning | [mlflow-server](https://github.com/open-datastudio/mlflow-server) | [MLflow](https://mlflow.org/) model remote tracking server and ui | Integrated
26 | | | [mlflow-model-serving](https://github.com/open-datastudio/mlflow-model-serving) | Deploy models from mlflow-server and get endpoint | Integrated
27 | | Business Intelligence | [metabase](https://github.com/open-datastudio/metabase) | Metabase Business Intelligence | Integrated |
28 | | | [superset](https://github.com/open-datastudio/superset) | Apache Superset Business Intelligence | Integrated |
29 | | Misc | [spark](https://github.com/open-datastudio/spark) | It does not integrates to Staroid but publishes docker image for other projects | - |
30 |
31 |
32 |
33 | ## How to contribute?
34 |
35 | You can create issues or pull requests to contribute individual repositories under [open-datasicence](https://github.com/open-datastudio).
36 |
37 | If you'd like to create a new integration project here, please create an [issue](https://github.com/open-datastudio/datastudio/issues) in this repository.
38 |
39 | We need your help!
40 |
41 | ## Community
42 |
43 | * Open data studio slack channel - [Join](https://join.slack.com/t/opendatastudio/shared_invite/zt-jq449y9j-DIPBteeWC15xBbQAqi4J4g)
44 |
45 | ## License
46 |
47 | Open data studio is an open source projects.
48 | LICENSE file is included in each repository.
49 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = .
8 | BUILDDIR = _build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 |
--------------------------------------------------------------------------------
/docs/_static/cuda-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/cuda-logo.png
--------------------------------------------------------------------------------
/docs/_static/dask-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/dask-logo.png
--------------------------------------------------------------------------------
/docs/_static/delta-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/delta-logo.png
--------------------------------------------------------------------------------
/docs/_static/jupyter-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/jupyter-logo.png
--------------------------------------------------------------------------------
/docs/_static/open-datastudio-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/open-datastudio-logo.png
--------------------------------------------------------------------------------
/docs/_static/open-datastudio-logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/docs/_static/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/overview.png
--------------------------------------------------------------------------------
/docs/_static/ray-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/ray-logo.png
--------------------------------------------------------------------------------
/docs/_static/ray-usecase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/ray-usecase.png
--------------------------------------------------------------------------------
/docs/_static/spark-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/spark-logo.png
--------------------------------------------------------------------------------
/docs/_static/spark-serverless-client-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/spark-serverless-client-mode.png
--------------------------------------------------------------------------------
/docs/_static/spark-serverless-cluster-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/spark-serverless-cluster-mode.png
--------------------------------------------------------------------------------
/docs/_static/spark-usecase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-datastudio/datastudio/5055579adf969ad6d7491454b30ab2fedbaaa067/docs/_static/spark-usecase.png
--------------------------------------------------------------------------------
/docs/_static/zeppelin-logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
78 |
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {% block footer %}
4 | {{ super() }}
5 |
6 |
7 |
14 |
15 |
16 |
26 | {% endblock %}
27 |
--------------------------------------------------------------------------------
/docs/about/index.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | About
3 | ===============
4 |
5 | .. toctree::
6 | :maxdepth: 2
7 |
8 | Overview
9 |
10 | .. include:: ../ref.rst
--------------------------------------------------------------------------------
/docs/about/overview.rst:
--------------------------------------------------------------------------------
1 | Overview
2 | ==================================
3 |
4 | Open data studio is an open initiative to bring machine learning and large scale data processing open-source software to click away for everyone.
5 |
6 | Why open data studio?
7 | ------------------------
8 |
9 | The ability to use data becomes the key differentiator of businesses.
10 |
11 | To use data, we need tools. Thankfully, machine learning, data analytics, large scale data processing are the areas where open source software dominates. So many great tools are available to build your ML/data pipeline.
12 |
13 | However, architecting, installing, integrating, and maintaining to build your pipeline became a new challenge, while it needs lots of investment to get proper setup/configurations, experiences, and best practices.
14 | We know not every organization can not invest as much as those top few trillion-dollar value companies to build ML/data pipeline.
15 |
16 | Open data studio's goal is to minimize this gap so any organization can have the
17 | same ability from day one.
18 |
19 |
20 | |overview-img|
21 |
22 | .. include:: ../ref.rst
23 |
24 | .. |overview-img| image:: ../_static/overview.png
25 | :width: 700px
26 | :alt: Open data studio overview
27 |
--------------------------------------------------------------------------------
/docs/business-intelligence/index.rst:
--------------------------------------------------------------------------------
1 | =====================
2 | Business Intelligence
3 | =====================
4 |
5 | Business Intelligence tools are designed to make sense of the huge quantities of data that organizations accumulate over time.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | Metabase
11 | Superset
12 |
13 | .. include:: ../ref.rst
14 |
--------------------------------------------------------------------------------
/docs/business-intelligence/metabase.rst:
--------------------------------------------------------------------------------
1 | =================
2 | Metabase
3 | =================
4 |
5 | Metabase is the easy, open source way for everyone in the users' company to ask questions and learn from data.
6 | Open data studio makes it easy to deploy on the cloud.
7 |
8 | Key features
9 |
10 | - Click to deploy. No setup required.
11 | - Postgres is pre-configured
12 | - Connect database on private network using `secure tunnel `__
13 |
14 | .. _metabase.Quickstart:
15 |
16 | Metabase Quickstart
17 | -------------------
18 |
19 | .. image:: https://staroid.com/api/run/button.svg
20 | :target: https://staroid.com/g/open-datastudio/metabase
21 |
22 | **Screenshots**
23 |
24 | |metabase-screenshot|
25 |
26 | =============================== ===================================================================
27 | Launch page https://staroid.com/g/open-datastudio/metabase
28 | Open data studio repository https://github.com/open-datastudio/metabase
29 | Original repository https://github.com/metabase/metabase
30 | Documentation https://www.metabase.com/docs/latest/
31 | =============================== ===================================================================
32 |
33 | .. include:: ../ref.rst
34 |
35 | .. |metabase-screenshot| image:: https://github.com/metabase/metabase/raw/master/docs/metabase-product-screenshot.png
36 | :width: 700px
37 | :alt: Metabase
38 |
--------------------------------------------------------------------------------
/docs/business-intelligence/superset.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Superset
3 | ===============
4 |
5 | .. raw:: html
6 |
7 |
8 |
9 | |
10 |
11 | Apache Superset is a modern, enterprise-ready business intelligence web application.
12 | Open data studio makes it easy to deploy on the cloud.
13 |
14 | Key features
15 |
16 | - Click to deploy. No setup required.
17 | - Postgres and Redis are pre-configured.
18 | - Connect database on private network using `secure tunnel `__
19 |
20 |
21 | Superset Quickstart
22 | -------------------
23 |
24 | .. image:: https://staroid.com/api/run/button.svg
25 | :target: https://staroid.com/g/open-datastudio/superset
26 |
27 | **Screenshots**
28 |
29 | |superset-screenshot|
30 |
31 |
32 | =============================== ===================================================================
33 | Launch page https://staroid.com/g/open-datastudio/superset
34 | Open data studio repository https://github.com/open-datastudio/superset
35 | Original repository https://github.com/apache/superset
36 | Documentation https://superset.apache.org/
37 | =============================== ===================================================================
38 |
39 | .. include:: ../ref.rst
40 |
41 | .. |superset-screenshot| image:: https://raw.githubusercontent.com/apache/incubator-superset/master/superset-frontend/images/screenshots/bank_dash.png
42 | :width: 700px
43 | :alt: Apache superset
44 |
--------------------------------------------------------------------------------
/docs/computing/dask.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Dask
3 | ==============
4 |
5 | https://github.com/open-datastudio/dask-cluster
6 |
7 | Dask supported is planned.
8 |
9 | .. include:: ../ref.rst
10 |
--------------------------------------------------------------------------------
/docs/computing/flink.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Apache Flink
3 | ==============
4 |
5 | Apache Flink support is planned.
6 |
7 | .. include:: ../ref.rst
8 |
--------------------------------------------------------------------------------
/docs/computing/index.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Computing
3 | ===============
4 |
5 | Large scale, parallel/distributed computing.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | Spark serverless
11 | Ray cluster
12 |
13 | Dask (planned)
14 | Flink (Planned)
15 |
16 | .. include:: ../ref.rst
17 |
--------------------------------------------------------------------------------
/docs/computing/ray/from_cluster_launcher.rst:
--------------------------------------------------------------------------------
1 | =========================================
2 | Ray cluster from Ray Cluster Launcher CLI
3 | =========================================
4 |
5 | Ray master branch includes `Ray cluster launcher for Staroid `_.
6 | This allows creating a ray cluster using standard ``ray up `` CLI command.
7 |
8 | Install Ray and dependency libraries
9 | ------------------------------------
10 |
11 | First, install ray (1.1.0 or newer) and python dependency packages.
12 |
13 | .. code-block:: bash
14 |
15 | $ pip install ray staroid kubernetes
16 |
17 | Configure Staroid access token
18 | ------------------------------
19 |
20 | Then, let's configure staroid access token. `Get access token `_ and set
21 | ``STAROID_ACCESS_TOKEN`` environment variable.
22 |
23 | .. code-block:: bash
24 |
25 | $ export STAROID_ACCESS_TOKEN=[your access token]
26 |
27 | Cluster configuration file
28 | --------------------------
29 |
30 | We can get example Ray cluster launcher configuration files for Staroid from Ray source tree.
31 |
32 | .. code-block:: bash
33 |
34 | $ git clone https://github.com/ray-project/ray.git
35 | $ ls ray/python/ray/autoscaler/staroid/example-*.yaml
36 |
37 | Open example configurations and modify them as you need.
38 |
39 | Start a Ray cluster
40 | -------------------
41 |
42 | Now, you can create a Ray cluster using ``ray up`` command.
43 |
44 | .. code-block:: bash
45 |
46 | $ ray up ray/python/ray/autoscaler/staroid/example-full.yaml
47 |
48 | Once cluster is up and running, you can attach your shell to the Ray head node.
49 |
50 | .. code-block:: bash
51 |
52 | $ ray attach ray/python/ray/autoscaler/staroid/example-full.yaml
53 |
54 | Ray instance management menu
55 | ----------------------------
56 |
57 | Check `Instance management menu `_.
58 | You'll see your Ray cluster instances.
59 |
60 | .. image:: https://user-images.githubusercontent.com/1540981/101430734-71d83780-38ba-11eb-94d4-f7b20f0135ae.png
61 | :width: 600
62 |
63 | You'll find link to Ray dashbord and Jupyter notebook.
64 |
65 |
66 | Shutdown Ray cluster
67 | --------------------
68 |
69 | To shutdown cluster,
70 |
71 | .. code-block:: bash
72 |
73 | $ ray down ray/python/ray/autoscaler/staroid/example-full.yaml
74 |
--------------------------------------------------------------------------------
/docs/computing/ray/from_staroid_management_console.rst:
--------------------------------------------------------------------------------
1 | ===============================================
2 | Ray cluster from Staroid management console GUI
3 | ===============================================
4 |
5 | Ray cluster can be managed from `Instance management menu `_
6 | without using Ray CLI (Command Line Interface).
7 |
8 | Start a Ray cluster from GUI
9 | ----------------------------
10 |
11 | Click ``Launch`` button from `Instance management menu `_.
12 |
13 | .. image:: https://user-images.githubusercontent.com/1540981/101434974-65ef7400-38c0-11eb-8647-22a4a11ca2e1.png
14 | :width: 500
15 | :alt: Ray cluster launch dialog
16 |
17 | In a launch dialog, you can configure a name of your Ray cluster instance, number of max workers and so on.
18 | Once launched, you can see status of your Ray cluster instance.
19 |
20 | .. note::
21 |
22 | Ray cluster takes few seconds to a couple of minutes to fully initialized.
23 | During initialization, it performs node provisioning, downloading Ray container image and executing bootstrap commands.
24 |
25 | Access Ray dashboard and Jupyter notebook
26 | -----------------------------------------
27 |
28 | Once your Ray cluster instance is fully initialized,
29 | you'll see link to the Ray dashbaord and Jupyter notebook.
30 |
31 | .. image:: https://user-images.githubusercontent.com/1540981/101435650-8f5ccf80-38c1-11eb-8619-ea448c33a50e.png
32 | :width: 600
33 |
34 | In the Jupyter notebook, ray environment is pre-configured so you can just run
35 |
36 | .. code-block:: python
37 |
38 | import ray
39 | ray.init() # no 'address' parameter required :)
40 |
41 | and use Ray cluster environment.
42 |
43 |
44 | Stop Ray cluster instance
45 | --------------------------
46 |
47 | In `Instance management menu `_ menu,
48 | You can find ``Stop`` (``Start``) and ``Terminate`` button.
49 |
50 | Stop
51 | Stop Ray head and all workers. Can be (re)started later. Data stored in persistent volume is not removed.
52 |
53 | Terminate
54 | Stop Ray head and all workers permanently. Can not be restarted. Data stored in persistent volume is also removed.
55 |
--------------------------------------------------------------------------------
/docs/computing/ray/index.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Ray cluster
3 | ==============
4 |
5 | .. image:: https://staroid.com/api/run/button.svg
6 | :target: https://staroid.com/g/open-datastudio/ray-cluster
7 |
8 | Ray is a distributed execution framework that makes it easy to scale your applications and to leverage state of the art machine learning libraries.
9 |
10 | Key features
11 |
12 | - Fully managed
13 | - Manage ray cluster using `Ray Cluster Launcher `_ (``ray up ...``)
14 | - Comes with Jupyter notebook
15 | - Ray Dashboard link to the cluster
16 | - GPU workers
17 |
18 | |
19 |
20 | |ray-logo|
21 |
22 | Getting started Ray cluster
23 | ---------------------------
24 |
25 | .. toctree::
26 | :maxdepth: 1
27 |
28 | Ray cluster from Ray Cluster Launcher CLI
29 | Ray cluster from Staroid management console GUI
30 |
31 | |
32 | |
33 |
34 | =============================== ===================================================================
35 | Open data studio Ray Cluster https://github.com/open-datastudio/ray-cluster
36 | Original repository https://github.com/ray-project/ray
37 | =============================== ===================================================================
38 |
39 |
40 | .. |ray-logo| image:: ../../_static/ray-logo.png
41 | :width: 150px
42 | :alt: Ray
43 |
44 | .. include:: ../../ref.rst
45 |
--------------------------------------------------------------------------------
/docs/computing/spark/from_ods_zeppelin.rst:
--------------------------------------------------------------------------------
1 | ---------------------------------------------------
2 | Spark cluster from Open data studio Zeppelin
3 | ---------------------------------------------------
4 |
5 | .. raw:: html
6 |
7 |
8 |
9 | |
10 |
11 | Open data studio :ref:`Apache Zeppelin` integrates Spark 3.x out of the box.
12 | Extra installation/initialization steps are not required.
13 |
14 | .. image:: https://user-images.githubusercontent.com/1540981/80290438-cf3bc180-86f9-11ea-8c1f-d2dedcd48a86.png
15 | :width: 600
16 |
17 | Launch and use spark interpreter. Spark cluster will be automatically created.
18 |
19 | .. code-block:: bash
20 | :caption: configure spark executors
21 |
22 | %spark.conf
23 | spark.executor.instances 3
24 |
25 |
26 | .. code-block:: scala
27 | :caption: run spark api
28 |
29 | %spark
30 | // 'sc' and 'spark' are automatically created
31 | spark.read.json(...)
32 |
33 |
34 | Check :ref:`Apache Zeppelin` for more details.
35 |
--------------------------------------------------------------------------------
/docs/computing/spark/from_python_environment.rst:
--------------------------------------------------------------------------------
1 | ---------------------------------------------------
2 | Spark cluster from your python environment
3 | ---------------------------------------------------
4 |
5 | .. raw:: html
6 |
7 |
8 |
9 | |
10 |
11 | Try in Google Colab
12 | .. image:: https://colab.research.google.com/assets/colab-badge.svg
13 | :target: https://colab.research.google.com/github/open-datastudio/ods/blob/master/notebook/open-data-studio.ipynb
14 |
15 |
16 | |
17 |
18 | Install
19 | --------------------------
20 |
21 | Install `ods `_ package using pip command.
22 |
23 | .. code-block:: bash
24 |
25 | $ pip install ods
26 |
27 | And let's get an `access token `_ and set ``STAROID_ACCESS_TOKEN`` environment variable.
28 |
29 | .. code-block:: bash
30 |
31 | $ export STAROID_ACCESS_TOKEN=""
32 |
33 | For alternative ways to configure access token, check `staroid-python `_.
34 |
35 | Create Kubernetes cluster
36 | --------------------------
37 |
38 | `staroid.com `_ -> Products -> Kubernetes (SKE) -> New Kubernetes cluster.
39 |
40 | .. image:: https://user-images.githubusercontent.com/1540981/87723637-ede8ac00-c76e-11ea-98d3-b6f8d972453d.png
41 | :width: 400
42 |
43 | And configure kubernetes cluster name after import python library.
44 |
45 | .. code-block:: python
46 |
47 | import ods
48 | # 'ske' is the name of kubernetes cluster created from staroid.com.
49 | # Alternatively, you can set the 'STAROID_SKE' environment variable.
50 | ods.init(ske="data-team1")
51 |
52 |
53 | Create PySpark session
54 | -----------------------
55 |
56 | Spark-serverless enables you to create an interactive PySpark sessions with executors running on the cloud remotely.
57 |
58 | .. code-block:: python
59 |
60 | import ods
61 | # 'ske' is the name of kubernetes cluster created from staroid.com.
62 | # Alternatively, you can set the 'STAROID_SKE' environment variable.
63 | ods.init(ske="data-team1")
64 |
65 | # get saprk session with 3 initial worker nodes, delta lake enabled
66 | spark = ods.spark("my-cluster", worker_num=3, delta=True).session()
67 |
68 | # Do your work with Spark session
69 | df = spark.read.load(...)
70 |
71 | Now you can use Spark session with 3 remotely running executors.
72 |
73 | .. note::
74 |
75 | There's no application packaging and submit step required. Everything runs interactively.
76 |
77 |
78 | .. include:: ../../ref.rst
79 |
--------------------------------------------------------------------------------
/docs/computing/spark/index.rst:
--------------------------------------------------------------------------------
1 | ================
2 | Spark serverless
3 | ================
4 |
5 | Apache Spark is a unified analytics engine for large-scale data processing.
6 | Open data studio provides instant access to the Spark cluster from anywhere without thinking about infrastructure and maintenance.
7 |
8 | Key features
9 |
10 | - Spark 3.x
11 | - Delta lake support
12 | - Fully managed
13 | - Spark UI access
14 |
15 | |
16 |
17 | |spark-logo|
18 |
19 | Spark serverless cluster
20 | -----------------------------
21 |
22 | .. toctree::
23 | :maxdepth: 1
24 |
25 | Getting Started (Python environment)
26 | Access Spark UI
27 | Managing cluster instances
28 |
29 | |
30 |
31 | =============================== ===================================================================
32 | Open data studio spark service https://github.com/open-datastudio/spark-serverless
33 | Original repository https://github.com/apache/spark
34 | =============================== ===================================================================
35 |
36 | .. |spark-logo| image:: ../../_static/spark-logo.png
37 | :width: 150px
38 | :alt: Apache spark
39 |
40 | .. include:: ../../ref.rst
41 |
--------------------------------------------------------------------------------
/docs/computing/spark/instances.rst:
--------------------------------------------------------------------------------
1 | ===========================
2 | Managing cluster instances
3 | ===========================
4 |
5 | Spark serverless doesn't really need complex management or maintenance of the Spark cluster.
6 | Upgrading, scaling-out, optimization, and other complex tasks are handled automatically.
7 | Enjoy **zero maintenance** serverless experience.
8 |
9 | All you need to do is simple task, such as Start or Stop cluster instances when you need.
10 |
11 | Cluster instance management operations can be done
12 | either programmatically using Python client library or with mouse clicks from `Instance management menu `_.
13 |
14 | Create a new Spark cluster instance
15 | -----------------------------------
16 |
17 | You can create multiple Spark serverless cluster instances in
18 | one or more Kubernetes cluster (SKE). See :ref:`Create Kubernetes cluster` section to create a SKE.
19 |
20 | You can create a cluster instance by creating a spark session from your Python environment.
21 |
22 | Create spark session with the default configuration
23 | .. code-block:: python
24 |
25 | import ods
26 | ods.init(ske="my-ske")
27 | spark = ods.spark("my-cluster").session()
28 |
29 | Create spark session with 3 initial worker nodes
30 | .. code-block:: python
31 |
32 | import ods
33 | ods.init(ske="my-ske")
34 | spark = ods.spark("my-cluster", worker_num=3).session()
35 |
36 | Create spark session with delta lake support
37 | .. code-block:: python
38 |
39 | import ods
40 | ods.init(ske="my-ske")
41 | spark = ods.spark("my-cluster", delta=True).session()
42 |
43 |
44 | .. note::
45 |
46 | ``pip install ods`` to install ods library.
47 | Python version 3.6, 3.7, 3.8 are supported.
48 |
49 | Done! You have Spark session that is connected to executors running remotely on the cloud.
50 | No application packaging and job submit to the cluster required.
51 |
52 | Your Spark session is capable of doing interactive computing.
53 | That means, you can use Spark session in Python REPL or in the Notebook.
54 |
55 |
56 | .. note::
57 |
58 | It may take a few seconds to minutes for executors to be fully ready. See next section to monitor status of executors.
59 |
60 |
61 | Spark cluster instance management menu
62 | --------------------------------------
63 |
64 | Open `Instance management menu `_
65 | and you'll find Spark cluster instance automatically created by the Spark session.
66 | You can also access :ref:`Access Spark UI` from here.
67 |
68 | .. note::
69 |
70 | Log console and shell terminal is provided for more advanced usage as well.
71 |
72 |
73 | Stop Spark cluster instance
74 | -----------------------------
75 |
76 | In `Instance management menu `_ menu,
77 | You can find ``Stop`` (``Start``) and ``Terminate`` button.
78 |
79 | Stop
80 | Stop all executors. Can be (re)started later. Data stored in persistent volume is not removed.
81 |
82 | Python API equivalent is
83 |
84 | .. code-block:: python
85 |
86 | # 'spark' is spark session created from 'spark = ods.spark("my-cluster").session()'
87 | spark.stop()
88 |
89 | Terminate
90 | Stop all executors permanently. Can not be restarted. Data stored in persistent volume is also removed.
91 |
92 | Python API equivalent is
93 |
94 | .. code-block:: python
95 |
96 | ods.spark("my-cluster").delete()
97 |
98 |
--------------------------------------------------------------------------------
/docs/computing/spark/spark_ui.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Access Spark UI
3 | ===============
4 |
5 | Access Spark UI locally
6 | -----------------------
7 |
8 | While Spark-serverless keeps Spark driver running on your python environment (client-side),
9 | you can simply browse ``localhost:4040`` (or subsequent port numbers) to access Spark UI when you are using your laptop.
10 |
11 |
12 | Access Spark UI remotely
13 | ------------------------
14 |
15 | If you're using some environment that accesses to the other local ports are limited
16 | (for example, notebook environment on the cloud, such as Google Colab) or you'd like to share
17 | your Spark UI with your team, you can find a Spark UI link, when you open a detail view of your instance
18 | from `Instance management menu `_.
19 |
20 | .. image:: https://user-images.githubusercontent.com/1540981/100956146-af108400-34cc-11eb-9ee5-1e8dd9937694.png
21 | :width: 600
22 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | # import os
16 | # import sys
17 | # sys.path.insert(0, os.path.abspath('.'))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = u'Open Data Studio'
23 | copyright = u'Open Data Studio Authors'
24 | author = u'Open Data Studio Authors'
25 |
26 | # The short X.Y version
27 | version = u''
28 | # The full version, including alpha/beta/rc tags
29 | release = u''
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | #
36 | # needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | 'sphinx.ext.autosectionlabel',
43 | 'aafigure.sphinxext'
44 | ]
45 |
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ['_templates']
48 |
49 | # The suffix(es) of source filenames.
50 | # You can specify multiple suffix as a list of string:
51 | #
52 | # source_suffix = ['.rst', '.md']
53 | source_suffix = '.rst'
54 |
55 | # The master toctree document.
56 | master_doc = 'index'
57 |
58 | # The language for content autogenerated by Sphinx. Refer to documentation
59 | # for a list of supported languages.
60 | #
61 | # This is also used if you do content translation via gettext catalogs.
62 | # Usually you set "language" from the command line for these cases.
63 | language = None
64 |
65 | # List of patterns, relative to source directory, that match files and
66 | # directories to ignore when looking for source files.
67 | # This pattern also affects html_static_path and html_extra_path.
68 | exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store']
69 |
70 | # The name of the Pygments (syntax highlighting) style to use.
71 | pygments_style = None
72 |
73 |
74 | # -- Options for HTML output -------------------------------------------------
75 |
76 | # The theme to use for HTML and HTML Help pages. See the documentation for
77 | # a list of builtin themes.
78 | #
79 | html_theme = 'sphinx_rtd_theme'
80 |
81 | # Theme options are theme-specific and customize the look and feel of a theme
82 | # further. For a list of options available for each theme, see the
83 | # documentation.
84 | #
85 | # html_theme_options = {}
86 |
87 | # Add any paths that contain custom static files (such as style sheets) here,
88 | # relative to this directory. They are copied after the builtin static files,
89 | # so a file named "default.css" will overwrite the builtin "default.css".
90 | html_static_path = ['_static']
91 |
92 | # Custom sidebar templates, must be a dictionary that maps document names
93 | # to template names.
94 | #
95 | # The default sidebars (for documents that don't match any pattern) are
96 | # defined by theme itself. Builtin themes are using these templates by
97 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
98 | # 'searchbox.html']``.
99 | #
100 | # html_sidebars = {}
101 | html_logo = '_static/open-datastudio-logo.svg'
102 |
103 | # Enable link of 'View page source'
104 | #html_show_sourcelink = False
105 | # Add 'Edit on Github' link instead of 'View page source'
106 | # reference:https://docs.readthedocs.io/en/latest/vcs.html
107 | html_context = {
108 | # Enable the "Edit in GitHub link within the header of each page.
109 | 'display_github': True,
110 | # Set the following variables to generate the resulting github URL for each page.
111 | # Format Template: https://{{ github_host|default("github.com") }}/{{ github_user }}
112 | #/{{ github_repo }}/blob/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}
113 | #https://github.com/runawayhorse001/SphinxGithub/blob/master/doc/index.rst
114 | 'github_user': 'open-datastudio',
115 | 'github_repo': 'datastudio',
116 | 'github_version': 'master/docs/',
117 | }
118 |
119 | # -- Options for HTMLHelp output ---------------------------------------------
120 |
121 | # Output file base name for HTML help builder.
122 | htmlhelp_basename = 'OpenDataStudioDoc'
123 |
124 |
125 | # -- Options for LaTeX output ------------------------------------------------
126 |
127 | latex_elements = {
128 | # The paper size ('letterpaper' or 'a4paper').
129 | #
130 | # 'papersize': 'letterpaper',
131 |
132 | # The font size ('10pt', '11pt' or '12pt').
133 | #
134 | # 'pointsize': '10pt',
135 |
136 | # Additional stuff for the LaTeX preamble.
137 | #
138 | # 'preamble': '',
139 |
140 | # Latex figure (float) alignment
141 | #
142 | # 'figure_align': 'htbp',
143 | }
144 |
145 | # Grouping the document tree into LaTeX files. List of tuples
146 | # (source start file, target name, title,
147 | # author, documentclass [howto, manual, or own class]).
148 | latex_documents = [
149 | (master_doc, 'OPENDATASTUDIO.tex', u'Open Data Studio Documentation',
150 | u'Open Data Studio', 'manual'),
151 | ]
152 |
153 |
154 | # -- Options for manual page output ------------------------------------------
155 |
156 | # One entry per manual page. List of tuples
157 | # (source start file, name, description, authors, manual section).
158 | man_pages = [
159 | (master_doc, 'open data studio', u'Open Data Studio Documentation',
160 | [author], 1)
161 | ]
162 |
163 |
164 | # -- Options for Texinfo output ----------------------------------------------
165 |
166 | # Grouping the document tree into Texinfo files. List of tuples
167 | # (source start file, target name, title, author,
168 | # dir menu entry, description, category)
169 | texinfo_documents = [
170 | (master_doc, 'Open Data Studio', u'Open Data Studio Documentation',
171 | author, 'Open Data Studio', 'Cloud data tools',
172 | 'Miscellaneous'),
173 | ]
174 |
175 |
176 | # -- Options for Epub output -------------------------------------------------
177 |
178 | # Bibliographic Dublin Core info.
179 | epub_title = project
180 |
181 | # The unique identifier of the text. This can be a ISBN number
182 | # or the project homepage.
183 | #
184 | # epub_identifier = ''
185 |
186 | # A unique identification for the text.
187 | #
188 | # epub_uid = ''
189 |
190 | # A list of files that should not be packed into the epub file.
191 | epub_exclude_files = ['search.html']
192 |
--------------------------------------------------------------------------------
/docs/data-lake/delta.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Delta Lake
3 | ==============
4 |
5 | Delta Lake is an open-source storage layer that brings ACID
6 | transactions to Apache Spark™ and big data workloads.
7 |
8 | Open data provides Delta lake in the following spark environments
9 |
10 | ================================================== ==========================================
11 | Service Note
12 | ================================================== ==========================================
13 | :ref:`Apache Zeppelin` Through ``%spark`` interpreter
14 | :ref:`Spark cluster from your python environment` ``ods.spark("cluster-name", delta=True)``
15 | ================================================== ==========================================
16 |
17 | .. include:: ../ref.rst
18 |
--------------------------------------------------------------------------------
/docs/data-lake/hive-metastore.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Hive metastore
3 | ==============
4 |
5 | Apache Hive Metastore is the central repository of Apache Hive, Apache Spark metadata and more.
6 | Open data studio makes it easy to deploy on the cloud.
7 |
8 | Key features
9 |
10 | - Based on Hive 3.1.2
11 | - Click to deploy. No setup required
12 | - Configured with Postgresql DB
13 | - Connect from :ref:`Spark thriftserver` and :ref:`Apache Zeppelin`. No configuration required
14 |
15 | =============================== ===================================================================
16 | Launch page https://staroid.com/g/open-datastudio/hive-metastore
17 | Open data studio repository https://github.com/open-datastudio/hive-metastore
18 | Original repository https://github.com/apache/hive
19 | Documentation http://hive.apache.org/
20 | =============================== ===================================================================
21 |
22 |
23 | Hive metastore Quickstart
24 | -------------------------
25 |
26 | .. image:: https://staroid.com/api/run/button.svg
27 | :target: https://staroid.com/g/open-datastudio/hive-metastore
28 |
29 |
30 | Get hive-metastore address
31 | ---------------------------------
32 |
33 | To learn how to get address of hive metastore server address after deploy,
34 | click `README `_.
35 |
36 | .. include:: ../ref.rst
37 |
--------------------------------------------------------------------------------
/docs/data-lake/index.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Data Lake
3 | ===============
4 |
5 | A data lake is a centralized repository that allows you to store all your structured and unstructured data at any scale.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | Delta lake
11 | Hive Metastore
12 | Spark Thriftserver
13 |
14 | .. include:: ../ref.rst
15 |
--------------------------------------------------------------------------------
/docs/data-lake/spark-thriftserver.rst:
--------------------------------------------------------------------------------
1 | ==================
2 | Spark thriftserver
3 | ==================
4 |
5 | Spark thrift server allowing multiple remote clients to access Spark.
6 | It provides a generic JDBC endpoint that let any client including BI tools connect and access the power of Spark.
7 | Open data studio makes it easy to deploy on the cloud.
8 |
9 | Key features
10 |
11 | - Allows JDBC/ODBC clients to execute SQL queries over JDBC and ODBC protocols on Apache Spark.
12 | - Spark 3.0
13 | - Spark cluster is automatically configured on Kubernetes
14 | - Connect to :ref:`Hive metastore`. No configuration required
15 |
16 | =============================== ===================================================================
17 | Launch page https://staroid.com/g/open-datastudio/spark-thriftserver
18 | Open data studio repository https://github.com/open-datastudio/spark-thriftserver
19 | Original repository https://github.com/apache/spark
20 | Documentation https://spark.apache.org/docs/latest/sql-distributed-sql-engine.html
21 | =============================== ===================================================================
22 |
23 | Spark thrift-server Quickstart
24 | ------------------------------
25 |
26 | .. image:: https://staroid.com/api/run/button.svg
27 | :target: https://staroid.com/g/open-datastudio/spark-thriftserver
28 |
29 |
30 | Get spark-thriftserver address
31 | -------------------------------------
32 |
33 | `spark-thriftserver-info `_ ConfigMap is created
34 | after deployment. The ConfigMap includes spark-thriftserver JDBC URL to connect.
35 | Other project can import this ConfigMap. `Learn more `__.
36 |
37 | .. include:: ../ref.rst
38 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | |ods-logo| Open Data Studio
2 | ==================================
3 |
4 | Open data studio is a fully managed computing service on Staroid_ cloud,
5 | built with open source development model.
6 |
7 | That means you can enjoy all the benefits of software as a service,
8 | without giving up ability to understand the code, contribute and improve like any other open source software.
9 |
10 |
11 | |
12 |
13 |
14 |
15 | Use cases
16 | ------------
17 |
18 | |spark-usecase|
19 |
20 | * From Python shell/ide/notebook on your laptop, interactively process massive data on your data lake with :ref:`Spark serverless`.
21 | * Connect your BI tools via JDBC using :ref:`Spark thriftserver`. On-demand Spark cluster is automatically configured for you.
22 | * Visualize your data on interactive notebook using :ref:`Apache Zeppelin`. On-demand Spark cluster is automatically configured for you.
23 |
24 | |
25 |
26 | |ray-usecase|
27 |
28 | * Use ``ray up`` command to launch fully managed :ref:`Ray cluster` on the cloud.
29 | * Deploy your model using Ray serve with authenticated REST API endpoint.
30 | * Launch GPU accelerated :ref:`Jupyter` instance on the cloud.
31 |
32 |
33 | .. |spark-usecase| image:: ./_static/spark-usecase.png
34 | :width: 650px
35 | :alt: Spark use case
36 |
37 | .. |ray-usecase| image:: ./_static/ray-usecase.png
38 | :width: 500px
39 | :alt: Ray use case
40 |
41 | Technology
42 | ------------
43 |
44 | Use all the latest machine learning technology in a single place.
45 | Open data studio continues to integrate the best technologies for machine learning.
46 |
47 | |spark-logo| |ray-logo| |delta-logo| |cuda-logo| |jupyter-logo| |zeppelin-logo|
48 |
49 | .. |spark-logo| image:: ./_static/spark-logo.png
50 | :width: 80px
51 | :alt: Apache spark
52 |
53 | .. |ray-logo| image:: ./_static/ray-logo.png
54 | :width: 100px
55 | :alt: Ray
56 |
57 | .. |delta-logo| image:: ./_static/delta-logo.png
58 | :width: 70px
59 | :alt: Delta lake
60 |
61 | .. |cuda-logo| image:: ./_static/cuda-logo.png
62 | :width: 70px
63 | :alt: Nvidia CUDA
64 |
65 | .. |jupyter-logo| image:: ./_static/jupyter-logo.png
66 | :width: 60px
67 | :alt: Jupyter notebook
68 |
69 | .. |zeppelin-logo| image:: ./_static/zeppelin-logo.svg
70 | :width: 80px
71 | :alt: Zeppelin notebook
72 |
73 | |
74 |
75 | Easy of use
76 | -----------
77 |
78 | Access to the latest machine learning technology shouldn't be more than a few clicks or a few lines of code away.
79 |
80 | .. code-block:: python
81 | :caption: Learn more about :ref:`Spark cluster from your python environment`
82 |
83 | # import open data studio library
84 | import ods
85 |
86 | # create a spark cluster on the cloud with 3 initial workers
87 | spark = ods.spark("my-spark", worker_num=3).session()
88 |
89 | # run spark task
90 | df = spark.read.load("...")
91 |
92 |
93 | .. code-block:: bash
94 | :caption: Learn more about :ref:`Ray cluster from Ray Cluster Launcher CLI`
95 |
96 | $ # install ray and staroid package
97 | $ pip install ray staroid kubernetes
98 |
99 | $ # switch to nightly build
100 | $ ray install-nightly
101 |
102 | $ # get autoscaler yaml files
103 | $ git clone https://github.com/ray-project/ray.git
104 |
105 | $ # spin-up cluster on the cloud and attach
106 | $ ray up ray/python/ray/autoscaler/staroid/example-full.yaml
107 | $ ray attach ray/python/ray/autoscaler/staroid/example-full.yaml
108 |
109 |
110 | |
111 |
112 |
113 | Fully managed
114 | -------------
115 |
116 | Save time and reduce risk.
117 | Open data studio is maintained by the committers of the open source project and industry experts
118 | on top of secure, reliable, and high performance cloud platform Staroid_.
119 |
120 | |
121 |
122 | Open source
123 | -----------
124 |
125 | Open data studio is an open source project.
126 | You can easily see source code, understand how it works, and get involved.
127 | When you need, fork and get your own version of managed service!
128 |
129 | Also, every time you launch projects, developers of the projects get funded via StarRank_.
130 |
131 | Community
132 | ---------
133 |
134 | * Open data studio github - https://github.com/open-datastudio
135 | * Open data studio slack channel - `Join `_
136 | * Issue tracker - You can find 'issue' menu on each projects. But if you're not sure, create an issue `here `_
137 |
138 |
139 | |
140 |
141 | .. toctree::
142 | :maxdepth: 2
143 |
144 | about/index
145 | notebook/index
146 | data-lake/index
147 | computing/index
148 | machine-learning/index
149 | business-intelligence/index
150 |
151 | .. include:: ./ref.rst
152 |
153 | .. |ods-logo| image:: ./_static/open-datastudio-logo.png
154 | :width: 60px
155 | :alt: Open Datastudio
156 |
--------------------------------------------------------------------------------
/docs/machine-learning/index.rst:
--------------------------------------------------------------------------------
1 | ================
2 | Machine Learning
3 | ================
4 |
5 | Manage, serve your models.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | MLflow server
11 | MLflow model serving
12 |
13 | .. include:: ../ref.rst
14 |
--------------------------------------------------------------------------------
/docs/machine-learning/mlflow-model-serving.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | MLflow model serving
3 | ====================
4 |
5 | Deploy models from :ref:`MLflow server`.
6 |
7 | Key features
8 |
9 | - Click to deploy. No setup required
10 | - Deploy models from :ref:`MLflow server`
11 |
12 | =============================== ===================================================================
13 | Launch page https://staroid.com/g/open-datastudio/mlflow-model-serving
14 | Open data studio repository https://github.com/open-datastudio/mlflow-model-serving
15 | Original repository https://github.com/mlflow/mlflow
16 | Documentation https://mlflow.org/docs/latest/index.html
17 | =============================== ===================================================================
18 |
19 | MLflow model serving Quickstart
20 | -------------------------------
21 |
22 | .. image:: https://staroid.com/api/run/button.svg
23 | :target: https://staroid.com/g/open-datastudio/mlflow-model-serving
24 |
25 | **Screenshots**
26 |
27 | |mlflow-model-serving-screenshot|
28 |
29 | .. include:: ../ref.rst
30 |
31 | .. |mlflow-model-serving-screenshot| image:: https://user-images.githubusercontent.com/1540981/89857151-256d2d00-db50-11ea-9512-4e69e7f0cf89.png
32 | :width: 700px
33 | :alt: MLflow model serving
34 |
35 |
36 |
--------------------------------------------------------------------------------
/docs/machine-learning/mlflow-server.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | MLflow server
3 | ==============
4 |
5 | `MLflow `_ is an open source platform for managing the end-to-end machine learning lifecycle.
6 | Open data studio makes it easy to deploy on the cloud.
7 |
8 | Key features
9 |
10 | - Click to deploy. No setup required
11 | - PostgreSQL backend store
12 | - Remote tracking server
13 | - Model registry
14 | - Tracking UI
15 | - Connect from :ref:`Jupyter`. No configuration required
16 |
17 | =============================== ===================================================================
18 | Launch page https://staroid.com/g/open-datastudio/mlflow-server
19 | Open data studio repository https://github.com/open-datastudio/mlflow-server
20 | Original repository https://github.com/mlflow/mlflow
21 | Documentation https://mlflow.org/docs/latest/index.html
22 | =============================== ===================================================================
23 |
24 | MLflow Server Quickstart
25 | ------------------------
26 |
27 | .. image:: https://staroid.com/api/run/button.svg
28 | :target: https://staroid.com/g/open-datastudio/mlflow-server
29 |
30 |
31 | **Screenshots**
32 |
33 | |mlflow-screenshot|
34 |
35 | |mlflow-screenshot2|
36 |
37 | .. include:: ../ref.rst
38 |
39 | .. |mlflow-screenshot| image:: https://github.com/mlflow/mlflow/blob/f39a90d5fdbe588f5f4414d9d88af4f97b8f3de3/docs/source/_static/images/quickstart-ui-screenshot.png?raw=true
40 | :width: 700px
41 | :alt: MLflow
42 |
43 | .. |mlflow-screenshot2| image:: https://github.com/mlflow/mlflow/blob/f39a90d5fdbe588f5f4414d9d88af4f97b8f3de3/docs/source/_static/images/tutorial-compare.png?raw=true
44 | :width: 700px
45 | :alt: MLflow
46 |
--------------------------------------------------------------------------------
/docs/notebook/index.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Notebook
3 | ==============
4 |
5 | Notebooks are a form of interactive computing, in which users write and execute code, visualize the results, and share insights.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | Jupyter
11 | Zeppelin
12 |
13 | .. include:: ../ref.rst
14 |
--------------------------------------------------------------------------------
/docs/notebook/jupyter.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Jupyter
3 | ==============
4 |
5 | .. raw:: html
6 |
7 |
8 |
9 | |
10 |
11 | The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain live code, equations, visualizations and narrative text.
12 | Open data studio makes it easy to deploy on the cloud.
13 |
14 | Key features
15 |
16 | - Click to deploy. No setup required
17 | - Jupyter Lab
18 | - NVIDIA GPU, CUDA support for machine learning
19 | - Works with MLflow remote tracking server and artifact store out of the box
20 | - Persistent storage for `~/work`
21 | - Connect to :ref:`MLflow server`. No configuration required
22 |
23 |
24 | Jupyter Quickstart
25 | -------------------
26 |
27 | .. image:: https://staroid.com/api/run/button.svg
28 | :target: https://staroid.com/g/open-datastudio/jupyter
29 |
30 | **Screenshots**
31 |
32 | |jupyter-screenshot|
33 |
34 | =============================== ===================================================================
35 | Launch page https://staroid.com/g/open-datastudio/jupyter
36 | Open data studio repository https://github.com/open-datastudio/jupyter
37 | Original repository https://github.com/jupyter/jupyter
38 | Documentation https://jupyter.readthedocs.io/en/latest/
39 | =============================== ===================================================================
40 |
41 | .. include:: ../ref.rst
42 |
43 | .. |jupyter-screenshot| image:: https://jupyter.org/assets/jupyterpreview.png
44 | :width: 700px
45 | :alt: Jupyter
46 |
--------------------------------------------------------------------------------
/docs/notebook/zeppelin.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Apache Zeppelin
3 | ===============
4 |
5 | .. raw:: html
6 |
7 |
8 |
9 | |
10 |
11 | Apache Zeppelin is a web-based notebook that enables data-driven, interactive data analytics and collaborative documents with SQL, Scala and more.
12 | Open data studio makes it easy to deploy on the cloud.
13 |
14 | Key features
15 |
16 | - Click to deploy. No setup required
17 | - Spark on Kubernetes is pre-configured
18 | - Spark 3.x and spark-ui
19 | - Comes with Spark, Python, JDBC interpreter
20 | - Connect to :ref:`Hive metastore`. No configuration required
21 |
22 | |
23 |
24 | |zeppelin-logo|
25 |
26 | Zeppelin Quickstart
27 | -------------------
28 |
29 | Visit https://staroid.com/g/open-datastudio/zeppelin, click ``Launch`` button.
30 |
31 | .. image:: https://staroid.com/api/run/button.svg
32 | :target: https://staroid.com/g/open-datastudio/zeppelin
33 |
34 | **Screenshots**
35 |
36 | Data visualization
37 | |zeppelin-screenshot|
38 |
39 | Spark cluster is automatically configured
40 |
41 | .. image:: https://user-images.githubusercontent.com/1540981/80290438-cf3bc180-86f9-11ea-8c1f-d2dedcd48a86.png
42 | :width: 700
43 |
44 | Spark cluster is automatically configured
45 |
46 | .. image:: https://user-images.githubusercontent.com/1540981/80290438-cf3bc180-86f9-11ea-8c1f-d2dedcd48a86.png
47 | :width: 700
48 |
49 | Spark UI access
50 |
51 | .. image:: https://user-images.githubusercontent.com/1540981/80290443-d8c52980-86f9-11ea-999c-eeafab25cf38.png
52 | :width: 700
53 |
54 | File manager
55 |
56 | .. image:: https://user-images.githubusercontent.com/1540981/82079532-d79f7080-9697-11ea-99c5-5787f070dce9.gif
57 | :width: 700
58 |
59 | =============================== ===================================================================
60 | Launch page https://staroid.com/g/open-datastudio/zeppelin
61 | Open data studio repository https://github.com/open-datastudio/zeppelin
62 | Original repository https://github.com/apache/zeppelin
63 | Documentation http://zeppelin.apache.org/docs/latest/
64 | =============================== ===================================================================
65 |
66 | .. include:: ../ref.rst
67 |
68 | .. |zeppelin-logo| image:: ../_static/zeppelin-logo.svg
69 | :width: 150px
70 | :alt: Apache zeppelin
71 |
72 | .. |zeppelin-screenshot| image:: http://zeppelin.apache.org/docs/0.8.2/assets/themes/zeppelin/img/notebook.png
73 | :width: 700px
74 | :alt: Apache zeppelin
75 |
--------------------------------------------------------------------------------
/docs/ref.rst:
--------------------------------------------------------------------------------
1 | .. _Staroid: https://staroid.com
2 | .. _staroid.com: https://staroid.com
3 | .. _StarRank: https://staroid.com/site/starrank
4 | .. _skaffold.yaml: https://skaffold.dev/docs/references/yaml/
5 | .. _Skaffold: https://skaffold.dev
6 | .. _Apache Superset: https://superset.apache.org
7 | .. _ods quick start: https://github.com/open-datastudio/ods#quick-start
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme
2 | aafigure
3 |
--------------------------------------------------------------------------------
/docs/support/index.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Support
3 | =========
4 |
5 | Open data studio is commercially supported by Staroid_.
6 |
7 | To get an enterprise support, please `contact `_.
8 |
9 | .. include:: ../ref.rst
--------------------------------------------------------------------------------