├── server ├── app │ ├── __init__.py │ ├── auth │ │ ├── __init__.py │ │ └── auth.py │ ├── models │ │ ├── __init__.py │ │ ├── user.py │ │ ├── directory.py │ │ ├── notebook.py │ │ ├── spark_app.py │ │ └── spark_app_config.py │ ├── routes │ │ ├── __init__.py │ │ ├── kernel.py │ │ ├── login.py │ │ ├── session.py │ │ ├── directory.py │ │ ├── spark_app.py │ │ └── notebook.py │ └── services │ │ ├── __init__.py │ │ ├── kernel.py │ │ ├── user.py │ │ └── session.py ├── tests │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── test_user_model.py │ │ ├── test_directory_model.py │ │ ├── test_notebook_model.py │ │ ├── test_spark_app_model.py │ │ └── test_spark_app_config_model.py │ ├── routes │ │ ├── __init__.py │ │ └── test_spark_app_route.py │ └── services │ │ ├── __init__.py │ │ ├── test_kernel_service.py │ │ └── test_directory_service.py ├── database.py ├── requirements.txt ├── Dockerfile ├── config.py └── run.py ├── docker ├── nginx │ ├── Dockerfile │ └── nginx.conf ├── history-server │ └── Dockerfile ├── notebook │ ├── gcs_save_hook.py │ ├── jupyter_notebook_config.py │ └── Dockerfile ├── airflow │ └── Dockerfile ├── spark │ └── Dockerfile └── postgres │ └── init.sql ├── webapp ├── .babelrc ├── public │ ├── robots.txt │ ├── favicon.ico │ ├── logo192.png │ ├── logo512.png │ ├── env.template.js │ ├── manifest.json │ └── index.html ├── src │ ├── components │ │ ├── notebook │ │ │ ├── content │ │ │ │ ├── cell │ │ │ │ │ ├── CellType.js │ │ │ │ │ ├── CellExecuteResultType.js │ │ │ │ │ ├── CellStatus.js │ │ │ │ │ ├── result │ │ │ │ │ │ ├── OutputType.js │ │ │ │ │ │ ├── ErrorResult.js │ │ │ │ │ │ ├── DisplayResult.js │ │ │ │ │ │ ├── CodeResult.js │ │ │ │ │ │ └── TextResult.js │ │ │ │ │ ├── header │ │ │ │ │ │ ├── TypeSelect.js │ │ │ │ │ │ ├── RunButton.js │ │ │ │ │ │ ├── MoreButton.js │ │ │ │ │ │ └── CellHeader.js │ │ │ │ │ ├── content │ │ │ │ │ │ ├── MarkdownEditor.js │ │ │ │ │ │ └── CodeEditor.js │ │ │ │ │ └── CellSideButtons.js │ │ │ │ ├── ContentType.js │ │ │ │ ├── Runs.js │ │ │ │ ├── Code.js │ │ │ │ └── NotebookToolbar.js │ │ │ └── header │ │ │ │ ├── SparkApplicationId.js │ │ │ │ ├── move │ │ │ │ ├── MoveButton.js │ │ │ │ └── MoveDialog.js │ │ │ │ ├── NotebookTitle.js │ │ │ │ ├── NotebookKernel.js │ │ │ │ └── NotebookHeader.js │ │ ├── Scheduler.js │ │ ├── HistoryServer.js │ │ ├── sidebar │ │ │ ├── workspace │ │ │ │ ├── Back.js │ │ │ │ ├── item │ │ │ │ │ ├── DeleteDialog.js │ │ │ │ │ ├── Item.js │ │ │ │ │ ├── RenameDialog.js │ │ │ │ │ └── MoreButton.js │ │ │ │ ├── header │ │ │ │ │ ├── CreateFolderDialog.js │ │ │ │ │ ├── CreateNotebookDialog.js │ │ │ │ │ ├── CreateButton.js │ │ │ │ │ └── WorkspaceSidebarHeader.js │ │ │ │ └── WorkspaceSidebar.js │ │ │ ├── create │ │ │ │ └── CreateSidebar.js │ │ │ └── account │ │ │ │ └── AccountSidebar.js │ │ └── auth │ │ │ └── LoginForm.js │ ├── reportWebVitals.js │ ├── styles │ │ ├── index.css │ │ └── App.css │ ├── index.js │ ├── config.js │ ├── utils │ │ └── StringUtils.js │ ├── models │ │ ├── KernelModel.js │ │ ├── SessionModel.js │ │ ├── SparkAppConfigModel.js │ │ ├── DirectoryModel.js │ │ └── SparkModel.js │ └── assets │ │ └── spark-start.svg ├── jest.config.js ├── .env.test ├── entrypoint.sh ├── .env.dev ├── test │ ├── setupTests.js │ └── component │ │ └── notebook │ │ └── cell │ │ └── header │ │ └── RunButton.test.js ├── config-overrides.js ├── Dockerfile └── package.json ├── resources └── images │ ├── logo.png │ ├── spark-ui.png │ ├── architecture.jpg │ └── notebook-spark-integration.png ├── helm ├── data-platform │ ├── templates │ │ ├── spark-serviceaccount.yaml │ │ ├── notebook-service.yaml │ │ ├── spark-history-server-service.yaml │ │ ├── spark-role-binding.yaml │ │ ├── spark-role.yaml │ │ ├── notebook-cluster-ip.yaml │ │ ├── spark-history-server-deployment.yaml │ │ ├── notebook-spark-ui.yaml │ │ └── notebook-deployment.yaml │ ├── .helmignore │ └── Chart.yaml └── spark-ui │ ├── templates │ └── spark-ui-service.yaml │ └── Chart.yaml ├── bin ├── upload_sample_dataset.sh ├── build_docker.sh ├── setup.sh ├── uninstall_helm_chart.sh ├── env_template.sh ├── delete_gcp.sh ├── install_helm_chart.sh ├── setup_security.sh └── connect_gcp.sh ├── examples ├── airflow_demo.py └── user_0@gmail.com │ ├── word-count │ ├── Dockerfile │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── example.txt │ │ │ └── scala │ │ │ └── WordCount.scala │ └── pom.xml │ └── demo.ipynb ├── .github └── workflows │ ├── test-webapp.yml │ ├── build-docker-server.yml │ ├── build-docker-webapp.yml │ ├── build-docker-spark.yml │ ├── build-docker-airflow.yml │ ├── build-docker-notebook.yml │ ├── build-docker-history-server.yml │ ├── integration-test.yml │ └── build-examples.yml ├── dags ├── demo.py └── sg_resale_flat_prices.py └── GCP.md /server/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/app/auth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/app/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/app/routes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/app/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/tests/routes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/tests/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:alpine 2 | COPY nginx.conf /etc/nginx/nginx.conf 3 | -------------------------------------------------------------------------------- /server/database.py: -------------------------------------------------------------------------------- 1 | from flask_sqlalchemy import SQLAlchemy 2 | 3 | db = SQLAlchemy() -------------------------------------------------------------------------------- /webapp/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env", "@babel/preset-react"] 3 | } -------------------------------------------------------------------------------- /webapp/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /resources/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/resources/images/logo.png -------------------------------------------------------------------------------- /webapp/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/webapp/public/favicon.ico -------------------------------------------------------------------------------- /webapp/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/webapp/public/logo192.png -------------------------------------------------------------------------------- /webapp/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/webapp/public/logo512.png -------------------------------------------------------------------------------- /resources/images/spark-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/resources/images/spark-ui.png -------------------------------------------------------------------------------- /resources/images/architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/resources/images/architecture.jpg -------------------------------------------------------------------------------- /webapp/src/components/notebook/content/cell/CellType.js: -------------------------------------------------------------------------------- 1 | export const CellType = { 2 | CODE: 'code', 3 | MARKDOWN: 'markdown', 4 | }; -------------------------------------------------------------------------------- /webapp/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | transform: { 3 | '^.+\\.jsx?$': 'babel-jest', 4 | }, 5 | testEnvironment: 'jsdom', 6 | }; -------------------------------------------------------------------------------- /resources/images/notebook-spark-integration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/HEAD/resources/images/notebook-spark-integration.png -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: spark 5 | namespace: spark-dev 6 | -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==3.0.3 2 | Flask-Cors==4.0.1 3 | requests==2.32.2 4 | Flask-SQLAlchemy==3.1.1 5 | psycopg2-binary==2.9.9 6 | flask-jwt-extended==4.6.0 -------------------------------------------------------------------------------- /webapp/src/components/notebook/content/cell/CellExecuteResultType.js: -------------------------------------------------------------------------------- 1 | export const CellExecuteResultType = { 2 | ERROR: 'error', 3 | SUCCESS: 'success', 4 | } -------------------------------------------------------------------------------- /webapp/src/components/notebook/content/ContentType.js: -------------------------------------------------------------------------------- 1 | import Config from "./Config"; 2 | 3 | export const ContentType = { 4 | CODE: 'code', 5 | Config: 'config', 6 | Runs: 'runs', 7 | }; -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | WORKDIR /server 4 | 5 | COPY . /server 6 | 7 | RUN pip install -r requirements.txt 8 | 9 | EXPOSE 5002 10 | 11 | CMD ["python", "run.py"] -------------------------------------------------------------------------------- /webapp/src/components/notebook/content/cell/CellStatus.js: -------------------------------------------------------------------------------- 1 | export const CellStatus = { 2 | INITIALIZING: 'initializing', 3 | WAITING: 'waiting', 4 | CONNECTING: 'connecting', 5 | BUSY: 'busy', 6 | IDLE: 'idle', 7 | }; -------------------------------------------------------------------------------- /webapp/src/components/notebook/content/cell/result/OutputType.js: -------------------------------------------------------------------------------- 1 | export const OutputType = { 2 | EXECUTE_RESULT: 'execute_result', 3 | ERROR: 'error', 4 | STREAM: 'stream', 5 | STATUS: 'status', 6 | DISPLAY_DATA: 'display_data', 7 | }; -------------------------------------------------------------------------------- /helm/data-platform/templates/notebook-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: notebook 5 | spec: 6 | selector: 7 | app: notebook 8 | ports: 9 | - port: 80 10 | targetPort: 8888 11 | type: LoadBalancer 12 | -------------------------------------------------------------------------------- /webapp/.env.test: -------------------------------------------------------------------------------- 1 | REACT_APP_SERVER_BASE_URL=server:5002 2 | REACT_APP_JUPYTER_BASE_URL=notebook:8888 3 | REACT_APP_SPARK_UI_BASE_URL=history-server:18080 4 | REACT_APP_AIRFLOW_BASE_URL=airflow-webserver:8080 5 | REACT_APP_USERNAME=user_0 6 | REACT_APP_PASSWORD=12345A -------------------------------------------------------------------------------- /webapp/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Replace placeholders in your built React app with actual environment variables 4 | envsubst < /usr/share/nginx/html/env.template.js > /usr/share/nginx/html/env.js 5 | 6 | # Start the nginx server 7 | nginx -g 'daemon off;' 8 | -------------------------------------------------------------------------------- /webapp/.env.dev: -------------------------------------------------------------------------------- 1 | REACT_APP_SERVER_BASE_URL=http://localhost:5002 2 | REACT_APP_JUPYTER_BASE_URL=http://localhost:8888 3 | REACT_APP_SPARK_UI_BASE_URL=http://localhost:18081 4 | REACT_APP_AIRFLOW_BASE_URL=http://localhost:8090 5 | REACT_APP_USERNAME=user_0 6 | REACT_APP_PASSWORD=12345A -------------------------------------------------------------------------------- /webapp/test/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom'; 6 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-history-server-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: spark-history-server 5 | spec: 6 | type: LoadBalancer 7 | ports: 8 | - port: 18080 9 | targetPort: 18080 10 | selector: 11 | app: spark-history-server 12 | -------------------------------------------------------------------------------- /bin/upload_sample_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the path to the file you want to upload 4 | FILE_PATH="examples/word-count/src/main/resources/example.txt" 5 | 6 | # Use gsutil to upload the file to the GCP bucket 7 | gsutil cp $FILE_PATH gs://$BUCKET_NAME/applications/word-count/input/example.txt -------------------------------------------------------------------------------- /bin/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DOCKER_ACCOUNT="wenyixu101" 4 | DOCKER_PATH="./kubernetes" 5 | DOCKERFILE="Dockerfile.spark-ui" 6 | 7 | DOCKER_IMAGE="kubectl:0.0.3" 8 | 9 | docker build --no-cache . -t ${DOCKER_ACCOUNT}/${DOCKER_IMAGE} -f ${DOCKER_PATH}/${DOCKERFILE} 10 | docker push ${DOCKER_ACCOUNT}/${DOCKER_IMAGE} 11 | 12 | -------------------------------------------------------------------------------- /examples/airflow_demo.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 4 | logger = logging.getLogger(__name__) 5 | 6 | def main(): 7 | logger.info("Starting task...") 8 | logger.info("Task completed successfully!") 9 | 10 | if __name__ == "__main__": 11 | main() -------------------------------------------------------------------------------- /webapp/public/env.template.js: -------------------------------------------------------------------------------- 1 | window._env_ = { 2 | REACT_APP_JUPYTER_BASE_URL: "$REACT_APP_JUPYTER_BASE_URL", 3 | REACT_APP_SERVER_BASE_URL: "$REACT_APP_SERVER_BASE_URL", 4 | REACT_APP_SPARK_UI_BASE_URL: "$REACT_APP_SPARK_UI_BASE_URL", 5 | REACT_APP_AIRFLOW_BASE_URL: "$REACT_APP_AIRFLOW_BASE_URL", 6 | REACT_APP_USERNAME: "$REACT_APP_USERNAME", 7 | REACT_APP_PASSWORD: "$REACT_APP_PASSWORD", 8 | }; 9 | -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Apache Spark base image 2 | FROM apache/spark:3.5.0 3 | 4 | ARG VERSION 5 | ARG JAR_FILE=target/word-count-${VERSION}.jar 6 | 7 | ADD ${JAR_FILE} /opt/spark/app.jar 8 | 9 | # Set the default command for the container to run your Spark application 10 | # Adjust the command below according to your application's requirements 11 | CMD ["ls", "-l", "/opt/spark"] 12 | -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/src/main/resources/example.txt: -------------------------------------------------------------------------------- 1 | Hello world, this is a sample text file for Spark word count. 2 | This file contains multiple words, some of which are repeated. 3 | Repeated words are useful for testing the word count functionality. 4 | The word 'word' is an example of a repeated word in this file. 5 | Spark is a powerful tool for processing large amounts of data. 6 | This is the end of the sample text file. -------------------------------------------------------------------------------- /helm/spark-ui/templates/spark-ui-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Values.serviceName }} 5 | namespace: spark-dev 6 | spec: 7 | type: LoadBalancer # Use NodePort if LoadBalancer is not required 8 | ports: 9 | - port: 4040 10 | targetPort: 4040 11 | name: spark-ui 12 | selector: 13 | app: spark 14 | spark-role: driver 15 | name: {{ .Values.appName }} 16 | -------------------------------------------------------------------------------- /webapp/src/reportWebVitals.js: -------------------------------------------------------------------------------- 1 | const reportWebVitals = onPerfEntry => { 2 | if (onPerfEntry && onPerfEntry instanceof Function) { 3 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 4 | getCLS(onPerfEntry); 5 | getFID(onPerfEntry); 6 | getFCP(onPerfEntry); 7 | getLCP(onPerfEntry); 8 | getTTFB(onPerfEntry); 9 | }); 10 | } 11 | }; 12 | 13 | export default reportWebVitals; 14 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding # Use ClusterRoleBinding for a ClusterRole 3 | metadata: 4 | name: spark-role-binding 5 | namespace: spark-dev 6 | subjects: 7 | - kind: ServiceAccount 8 | name: spark 9 | namespace: spark-dev 10 | roleRef: 11 | kind: Role # Use ClusterRole for a ClusterRoleBinding 12 | name: spark-role 13 | apiGroup: rbac.authorization.k8s.io 14 | -------------------------------------------------------------------------------- /.github/workflows/test-webapp.yml: -------------------------------------------------------------------------------- 1 | name: Test Web App 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'webapp/**' 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | 16 | - name: Install dependencies 17 | run: npm install 18 | working-directory: webapp 19 | 20 | - name: Run tests 21 | run: npx jest 22 | working-directory: webapp -------------------------------------------------------------------------------- /docker/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | events {} 2 | 3 | http { 4 | server { 5 | listen 80; 6 | 7 | location / { 8 | proxy_pass http://history-server:18080; 9 | proxy_hide_header X-Frame-Options; 10 | add_header X-Frame-Options "ALLOW-FROM http://localhost:3000" always; # Add the new header 11 | add_header X-Frame-Options "ALLOW-FROM http://webapp:5001" always; # Add the new header 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /helm/data-platform/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /webapp/config-overrides.js: -------------------------------------------------------------------------------- 1 | module.exports = function override(config, env) { 2 | config.resolve.fallback = { 3 | ...config.resolve.fallback, 4 | "path": require.resolve("path-browserify"), // Or false if you choose not to polyfill 5 | "crypto": false, // Or false if you choose not to polyfill 6 | "querystring": require.resolve("querystring-es3"), 7 | "stream": false, 8 | "buffer": require.resolve("buffer/"), 9 | }; 10 | return config; 11 | }; 12 | -------------------------------------------------------------------------------- /docker/history-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/spark:3.5.0 2 | 3 | # Set environment variables 4 | ENV SPARK_HOME=/opt/spark 5 | # Install GCS Connector 6 | RUN wget https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.0/gcs-connector-hadoop3-2.2.0-shaded.jar -P $SPARK_HOME/jars/ 7 | 8 | # Expose port 18080 for History Server UI 9 | EXPOSE 18080 10 | 11 | # Start the History Server 12 | CMD ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"] 13 | -------------------------------------------------------------------------------- /bin/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source bin/env.sh 3 | source bin/connect_gcp.sh 4 | source bin/upload_sample_dataset.sh 5 | 6 | # Install Helm chart 7 | source bin/install_helm_chart.sh 8 | 9 | # Setup cluster security 10 | source bin/setup_security.sh 11 | 12 | kubectl get namespace "$GKE_NAMESPACE" > /dev/null 2>&1 13 | if [ $? -ne 0 ]; then 14 | echo "Creating namespace: $GKE_NAMESPACE" 15 | kubectl create namespace "$GKE_NAMESPACE" 16 | else 17 | echo "Namespace $GKE_NAMESPACE already exists" 18 | fi -------------------------------------------------------------------------------- /bin/uninstall_helm_chart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if the release exists 4 | if helm list -n $GKE_NAMESPACE | grep -q $RELEASE_DATA_PLATFORM_NAME; then 5 | helm uninstall $RELEASE_DATA_PLATFORM_NAME -n $GKE_NAMESPACE 6 | else 7 | echo "Release '$RELEASE_DATA_PLATFORM_NAME' not found." 8 | fi 9 | 10 | if helm list -n $GKE_NAMESPACE | grep -q $RELEASE_SPARK_UI_NAME; then 11 | helm uninstall $RELEASE_SPARK_UI_NAME -n $GKE_NAMESPACE 12 | else 13 | echo "Release '$RELEASE_SPARK_UI_NAME' not found." 14 | fi -------------------------------------------------------------------------------- /webapp/src/styles/index.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap'); 2 | 3 | body { 4 | margin: 0; 5 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 6 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 7 | sans-serif; 8 | -webkit-font-smoothing: antialiased; 9 | -moz-osx-font-smoothing: grayscale; 10 | } 11 | 12 | code { 13 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 14 | monospace; 15 | } 16 | -------------------------------------------------------------------------------- /webapp/test/component/notebook/cell/header/RunButton.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render, screen } from '@testing-library/react'; 3 | import userEvent from '@testing-library/user-event'; 4 | import RunButton from '../../../../../src/components/notebook/content/cell/header/RunButton'; 5 | 6 | test('calls onClick prop when clicked', () => { 7 | const handleClick = jest.fn(); 8 | render(); 9 | 10 | // userEvent.click(screen.getByLabelText(/run/i)); 11 | // expect(handleClick).toHaveBeenCalledTimes(1); 12 | }); -------------------------------------------------------------------------------- /docker/notebook/gcs_save_hook.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | def gcs_save_hook(os_path, model, contents_manager, **kwargs): 5 | """Save hook function for uploading notebook to GCS.""" 6 | bucket_name = os.environ.get("BUCKET_NAME", "default-bucket-name") 7 | # Construct the GCS bucket path 8 | gcs_path = f"gs://{bucket_name}/notebooks" 9 | 10 | local_notebook_dir = "/home/jovyan/" 11 | exclude_pattern = '^(?!.*\.ipynb$).*$' 12 | # Sync from local to GCS 13 | subprocess.run(["gsutil", "-m", "rsync", "-r", "-x", exclude_pattern, local_notebook_dir, gcs_path]) 14 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role # Use ClusterRole if you need cluster-wide access 3 | metadata: 4 | namespace: spark-dev 5 | name: spark-role 6 | rules: 7 | - apiGroups: [""] 8 | resources: ["pods"] 9 | verbs: ["create", "get", "list", "watch", "delete"] 10 | - apiGroups: [""] 11 | resources: ["services"] 12 | verbs: ["create", "get", "delete"] 13 | - apiGroups: [""] 14 | resources: ["configmaps"] 15 | verbs: ["create", "get", "delete"] 16 | - apiGroups: [""] 17 | resources: ["persistentvolumeclaims"] 18 | verbs: ["list", "get"] 19 | -------------------------------------------------------------------------------- /dags/demo.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators.bash import BashOperator 3 | from datetime import datetime 4 | 5 | default_args = { 6 | 'owner': 'airflow', 7 | 'start_date': datetime(2023, 1, 1), 8 | 'catchup': True 9 | } 10 | 11 | dag = DAG( 12 | 'demo_dag', 13 | default_args=default_args, 14 | description='A simple DAG for demo', 15 | schedule_interval='@daily', 16 | catchup=False, 17 | ) 18 | 19 | run_script = BashOperator( 20 | task_id='display_logs', 21 | bash_command='python /opt/airflow/examples/airflow_demo.py', 22 | dag=dag, 23 | ) 24 | 25 | run_script -------------------------------------------------------------------------------- /webapp/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import './styles/index.css'; 4 | import App from './App'; 5 | import reportWebVitals from './reportWebVitals'; 6 | 7 | const root = ReactDOM.createRoot(document.getElementById('root')); 8 | root.render( 9 | 10 | 11 | 12 | ); 13 | 14 | // If you want to start measuring performance in your app, pass a function 15 | // to log results (for example: reportWebVitals(console.log)) 16 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals 17 | reportWebVitals(); 18 | -------------------------------------------------------------------------------- /webapp/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /server/app/routes/kernel.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from app.services.kernel import Kernel 3 | import logging 4 | 5 | kernel_blueprint = Blueprint('kernel', __name__) 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | @kernel_blueprint.route('/kernel/', methods=['GET']) 10 | def get_kernel_by_id(kernel_id): 11 | logging.info(f"Getting kernel with id: {kernel_id}") 12 | return Kernel.get_kernel_by_id(kernel_id) 13 | 14 | @kernel_blueprint.route('/kernel/restart/', methods=['POST']) 15 | def restart_kernel(kernel_id): 16 | logging.info(f"Restarting kernel with id: {kernel_id}") 17 | return Kernel.restart_kernel(kernel_id) 18 | -------------------------------------------------------------------------------- /server/app/models/user.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | from werkzeug.security import generate_password_hash, check_password_hash 3 | 4 | class UserModel(db.Model): 5 | 6 | __tablename__ = 'users' 7 | 8 | id = db.Column(db.Integer, primary_key=True, autoincrement=True) 9 | name = db.Column(db.String, unique=True, nullable=False) 10 | password_hash = db.Column(db.String, nullable=False) 11 | email = db.Column(db.String, unique=True, nullable=False) 12 | 13 | def set_password(self, password): 14 | self.password_hash = generate_password_hash(password) 15 | 16 | def check_password(self, password): 17 | return check_password_hash(self.password_hash, password) 18 | 19 | 20 | -------------------------------------------------------------------------------- /server/app/models/directory.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | 3 | class DirectoryModel(db.Model): 4 | 5 | __tablename__ = 'directories' 6 | 7 | id = db.Column(db.Integer, primary_key=True, autoincrement=True) 8 | name = db.Column(db.String, nullable=False) 9 | path = db.Column(db.String, nullable=False) 10 | user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) 11 | 12 | def __init__(self, name, path, user_id): 13 | self.name = name 14 | self.path = path 15 | self.user_id = user_id 16 | 17 | def to_dict(self): 18 | return { 19 | 'id': self.id, 20 | 'name': self.name, 21 | 'path': self.path, 22 | 'user_id': self.user_id 23 | } -------------------------------------------------------------------------------- /webapp/src/components/Scheduler.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import config from '../config'; 3 | 4 | function Scheduler({ showScheduler }) { 5 | return ( 6 |
{/* Adjust marginLeft based on your sidebar width */} 7 | {showScheduler && ( 8 |