├── .github └── workflows │ ├── build-docker-airflow.yml │ ├── build-docker-history-server.yml │ ├── build-docker-notebook.yml │ ├── build-docker-server.yml │ ├── build-docker-spark.yml │ ├── build-docker-webapp.yml │ ├── build-examples.yml │ ├── integration-test.yml │ └── test-webapp.yml ├── .gitignore ├── GCP.md ├── LICENSE ├── README.md ├── bin ├── build_docker.sh ├── connect_gcp.sh ├── delete_gcp.sh ├── env_template.sh ├── install_helm_chart.sh ├── setup.sh ├── setup_security.sh ├── submit_spark_app.sh ├── uninstall_helm_chart.sh └── upload_sample_dataset.sh ├── dags ├── demo.py └── sg_resale_flat_prices.py ├── datasets └── ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv ├── docker-compose.yaml ├── docker ├── airflow │ ├── Dockerfile │ ├── airflow.cfg │ └── webserver_config.py ├── history-server │ └── Dockerfile ├── nginx │ ├── Dockerfile │ └── nginx.conf ├── notebook │ ├── Dockerfile │ ├── gcs_save_hook.py │ ├── jupyter_notebook_config.py │ └── startup.py ├── postgres │ └── init.sql └── spark │ └── Dockerfile ├── examples ├── airflow_demo.py └── user_0@gmail.com │ ├── demo.ipynb │ ├── notebook.ipynb │ ├── quickstart.ipynb │ ├── sg-resale-flat-prices │ └── sg-resale-flat-prices.ipynb │ └── word-count │ ├── Dockerfile │ ├── pom.xml │ └── src │ └── main │ ├── resources │ └── example.txt │ └── scala │ └── WordCount.scala ├── helm ├── data-platform │ ├── .helmignore │ ├── Chart.yaml │ └── templates │ │ ├── notebook-cluster-ip.yaml │ │ ├── notebook-deployment.yaml │ │ ├── notebook-service.yaml │ │ ├── notebook-spark-ui.yaml │ │ ├── spark-history-server-deployment.yaml │ │ ├── spark-history-server-service.yaml │ │ ├── spark-role-binding.yaml │ │ ├── spark-role.yaml │ │ └── spark-serviceaccount.yaml └── spark-ui │ ├── Chart.yaml │ └── templates │ └── spark-ui-service.yaml ├── resources └── images │ ├── architecture.jpg │ ├── logo.png │ ├── logo_black.svg │ ├── notebook-spark-integration.png │ └── spark-ui.png ├── server ├── Dockerfile ├── app │ ├── __init__.py │ ├── auth │ │ ├── __init__.py │ │ └── auth.py │ ├── models │ │ ├── __init__.py │ │ ├── directory.py │ │ ├── notebook.py │ │ ├── spark_app.py │ │ ├── spark_app_config.py │ │ └── user.py │ ├── routes │ │ ├── __init__.py │ │ ├── directory.py │ │ ├── kernel.py │ │ ├── login.py │ │ ├── notebook.py │ │ ├── session.py │ │ └── spark_app.py │ └── services │ │ ├── __init__.py │ │ ├── directory.py │ │ ├── kernel.py │ │ ├── notebook.py │ │ ├── session.py │ │ ├── spark_app.py │ │ └── user.py ├── config.py ├── database.py ├── requirements.txt ├── run.py └── tests │ ├── __init__.py │ ├── models │ ├── __init__.py │ ├── test_directory_model.py │ ├── test_notebook_model.py │ ├── test_spark_app_config_model.py │ ├── test_spark_app_model.py │ └── test_user_model.py │ ├── routes │ ├── __init__.py │ ├── test_notebook_route.py │ └── test_spark_app_route.py │ └── services │ ├── __init__.py │ ├── test_directory_service.py │ ├── test_kernel_service.py │ ├── test_notebook_service.py │ ├── test_session_service.py │ └── test_spark_app_service.py └── webapp ├── .babelrc ├── .env.dev ├── .env.test ├── Dockerfile ├── config-overrides.js ├── entrypoint.sh ├── jest.config.js ├── package-lock.json ├── package.json ├── public ├── env.template.js ├── favicon.ico ├── index.html ├── logo192.png ├── logo512.png ├── manifest.json └── robots.txt ├── src ├── App.js ├── assets │ ├── logo_#222.svg │ ├── logo_#333.svg │ ├── logo_black.svg │ ├── spark-logo-rev.svg │ └── spark-start.svg ├── components │ ├── HistoryServer.js │ ├── Scheduler.js │ ├── auth │ │ └── LoginForm.js │ ├── notebook │ │ ├── Notebook.js │ │ ├── content │ │ │ ├── Code.js │ │ │ ├── Config.js │ │ │ ├── ContentType.js │ │ │ ├── NotebookToolbar.js │ │ │ ├── Runs.js │ │ │ └── cell │ │ │ │ ├── Cell.js │ │ │ │ ├── CellExecuteResultType.js │ │ │ │ ├── CellSideButtons.js │ │ │ │ ├── CellStatus.js │ │ │ │ ├── CellType.js │ │ │ │ ├── content │ │ │ │ ├── CodeEditor.js │ │ │ │ └── MarkdownEditor.js │ │ │ │ ├── header │ │ │ │ ├── CellHeader.js │ │ │ │ ├── MoreButton.js │ │ │ │ ├── RunButton.js │ │ │ │ └── TypeSelect.js │ │ │ │ └── result │ │ │ │ ├── CodeResult.js │ │ │ │ ├── DisplayResult.js │ │ │ │ ├── ErrorResult.js │ │ │ │ ├── OutputType.js │ │ │ │ └── TextResult.js │ │ └── header │ │ │ ├── NotebookHeader.js │ │ │ ├── NotebookKernel.js │ │ │ ├── NotebookTitle.js │ │ │ ├── SparkApplicationId.js │ │ │ └── move │ │ │ ├── MoveButton.js │ │ │ └── MoveDialog.js │ └── sidebar │ │ ├── Sidebar.js │ │ ├── account │ │ └── AccountSidebar.js │ │ ├── create │ │ └── CreateSidebar.js │ │ └── workspace │ │ ├── Back.js │ │ ├── WorkspaceSidebar.js │ │ ├── header │ │ ├── CreateButton.js │ │ ├── CreateFolderDialog.js │ │ ├── CreateNotebookDialog.js │ │ └── WorkspaceSidebarHeader.js │ │ └── item │ │ ├── DeleteDialog.js │ │ ├── Item.js │ │ ├── MoreButton.js │ │ └── RenameDialog.js ├── config.js ├── index.js ├── models │ ├── DirectoryModel.js │ ├── KernelModel.js │ ├── NotebookModel.js │ ├── SessionModel.js │ ├── SparkAppConfigModel.js │ └── SparkModel.js ├── reportWebVitals.js ├── styles │ ├── App.css │ └── index.css └── utils │ └── StringUtils.js └── test ├── component └── notebook │ └── cell │ └── header │ └── RunButton.test.js └── setupTests.js /.github/workflows/build-docker-airflow.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - Airflow 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'docker/airflow/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/airflow:$timestamp -f docker/airflow/Dockerfile ./docker/airflow 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/airflow:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/airflow:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/airflow:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/airflow:latest 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/build-docker-history-server.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - History Server 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'docker/history-server/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/history-server:$timestamp -f docker/history-server/Dockerfile ./docker/history-server 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/history-server:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/history-server:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/history-server:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/history-server:latest 28 | -------------------------------------------------------------------------------- /.github/workflows/build-docker-notebook.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - Notebook 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'docker/notebook/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/notebook:$timestamp -f docker/notebook/Dockerfile ./docker/notebook 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/notebook:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/notebook:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/notebook:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/notebook:latest -------------------------------------------------------------------------------- /.github/workflows/build-docker-server.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - Server 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'server/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/server:$timestamp -f server/Dockerfile ./server 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/server:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/server:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/server:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/server:latest 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/build-docker-spark.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - Spark 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'docker/spark/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/spark:$timestamp -f docker/spark/Dockerfile ./docker/spark 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/spark:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/spark:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/spark:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/spark:latest 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/build-docker-webapp.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker - WebApp 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'webapp/**' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: Log in to Docker Hub 14 | uses: docker/login-action@v1 15 | with: 16 | username: ${{ secrets.DOCKERHUB_USERNAME }} 17 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 18 | 19 | - name: Docker Build & Push 20 | run: | 21 | timestamp=$(date +"%Y%m%d%H%M%S") 22 | 23 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/webapp:$timestamp -f webapp/Dockerfile ./webapp 24 | docker tag ${{ secrets.DOCKERHUB_USERNAME }}/webapp:$timestamp ${{ secrets.DOCKERHUB_USERNAME }}/webapp:latest 25 | 26 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/webapp:$timestamp 27 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/webapp:latest 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/build-examples.yml: -------------------------------------------------------------------------------- 1 | name: Build Examples 2 | 3 | # Controls when the workflow will run 4 | on: 5 | push: 6 | paths: 7 | - 'examples/**' 8 | 9 | pull_request: 10 | paths: 11 | - 'examples/**' 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | inputs: 16 | deploy-example: 17 | type: choice 18 | description: Example to Deploy 19 | options: 20 | - None 21 | - WordCount 22 | 23 | jobs: 24 | build-examples: 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 29 | - uses: actions/checkout@v3 30 | 31 | # Runs a set of commands using the runners shell 32 | - name: Maven Package 33 | run: | 34 | cd examples/user_0@gmail.com/word-count 35 | mvn clean package 36 | 37 | version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) 38 | timestamp=$(date +"%Y%m%d%H%M%S") 39 | version_with_timestamp="${version}-${timestamp}" 40 | echo "Version with timestamp: $version_with_timestamp" 41 | 42 | echo "VERSION=$version" >> $GITHUB_ENV 43 | echo "VERSION_WITH_TIMESTAMP=$version_with_timestamp" >> $GITHUB_ENV 44 | 45 | - name: Log in to Docker Hub 46 | uses: docker/login-action@v1 47 | with: 48 | username: ${{ secrets.DOCKERHUB_USERNAME }} 49 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 50 | 51 | - name: Docker Build & Push 52 | run: | 53 | cd examples/user_0@gmail.com/word-count 54 | docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/word-count:$VERSION_WITH_TIMESTAMP --build-arg VERSION=$VERSION . 55 | docker push ${{ secrets.DOCKERHUB_USERNAME }}/word-count:$VERSION_WITH_TIMESTAMP 56 | 57 | -------------------------------------------------------------------------------- /.github/workflows/integration-test.yml: -------------------------------------------------------------------------------- 1 | name: Integration Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | 9 | services: 10 | postgres: 11 | image: postgres:13 12 | env: 13 | POSTGRES_USER: server 14 | POSTGRES_PASSWORD: password-server 15 | POSTGRES_DB: server_db 16 | ports: 17 | - 5432:5432 18 | options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 19 | 20 | notebook: 21 | image: wenyixu101/notebook:latest 22 | env: 23 | JUPYTER_ENABLE_LAB: "no" 24 | ports: 25 | - "8888:8888" 26 | options: --health-cmd "curl -f http://localhost:8888 || exit 1" --health-interval 10s --health-timeout 5s --health-retries 5 27 | 28 | 29 | steps: 30 | - name: Checkout code 31 | uses: actions/checkout@v2 32 | 33 | - name: Set up Python 34 | uses: actions/setup-python@v2 35 | with: 36 | python-version: '3.11' 37 | 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip 41 | pip install -r requirements.txt 42 | working-directory: server 43 | 44 | - name: Run tests 45 | run: | 46 | python -m unittest discover tests 47 | working-directory: server 48 | env: 49 | ENV: integration -------------------------------------------------------------------------------- /.github/workflows/test-webapp.yml: -------------------------------------------------------------------------------- 1 | name: Test Web App 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'webapp/**' 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | 16 | - name: Install dependencies 17 | run: npm install 18 | working-directory: webapp 19 | 20 | - name: Run tests 21 | run: npx jest 22 | working-directory: webapp -------------------------------------------------------------------------------- /GCP.md: -------------------------------------------------------------------------------- 1 | ## Quickstart 2 | ### Notebook 3 | #### Step1: Setup Configuration 4 | ```bash 5 | cp bin/env_template.yaml bin/env.yaml 6 | ``` 7 | Fill in the `env.yaml` file with your own configurations. 8 | 9 | #### Step2: Create a Kubernetes cluster on GCP 10 | ```bash 11 | source bin/setup.sh 12 | ``` 13 | 14 | #### Step3: Create a Jupyter Notebook 15 | A service `notebook` will be created on the Kubernetes cluster. 16 | 17 | #### Step4: Check Spark Integration 18 | ![Alt text]() 19 | 20 | Check Spark information by running the following code in a notebook cell: 21 | ```python 22 | start() 23 | ``` 24 | 25 | #### Step5: Check Spark UI 26 | ![Alt text]() 27 | 28 | Check Spark UI by clicking the link in the notebook cell output. 29 | 30 | ## Docker Image 31 | - [all-spark-notebook](https://hub.docker.com/repository/docker/wenyixu101/all-spark-notebook/general) 32 | - Based on jupyter/all-spark-notebook:spark-3.5.0 33 | - Include Google Cloud SDK and GCS connector 34 | - Include pyspark startup script 35 | - Include notebook save hook function to save notebook to GCS 36 | 37 | - [spark-history-server](https://hub.docker.com/repository/docker/wenyixu101/spark-history-server) 38 | - Based on apache/spark:3.5.0 39 | - Include GCS connector -------------------------------------------------------------------------------- /bin/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DOCKER_ACCOUNT="wenyixu101" 4 | DOCKER_PATH="./kubernetes" 5 | DOCKERFILE="Dockerfile.spark-ui" 6 | 7 | DOCKER_IMAGE="kubectl:0.0.3" 8 | 9 | docker build --no-cache . -t ${DOCKER_ACCOUNT}/${DOCKER_IMAGE} -f ${DOCKER_PATH}/${DOCKERFILE} 10 | docker push ${DOCKER_ACCOUNT}/${DOCKER_IMAGE} 11 | 12 | -------------------------------------------------------------------------------- /bin/connect_gcp.sh: -------------------------------------------------------------------------------- 1 | # Authenticate with Google Cloud 2 | echo "Authenticating with Google Cloud..." 3 | 4 | # Check if already logged in 5 | CURRENT_ACCOUNT=$(gcloud config get-value account) 6 | 7 | if [ -z "$CURRENT_ACCOUNT" ]; then 8 | echo "No active account, authenticating with Google Cloud..." 9 | gcloud auth login 10 | else 11 | echo "Already logged in as $CURRENT_ACCOUNT" 12 | fi 13 | 14 | gcloud config set project $GCP_PROJECT_ID 15 | 16 | # Check if the cluster already exists 17 | if gcloud container clusters describe $GKE_CLUSTER_NAME --zone $GKE_CLUSTER_ZONE --project $GKE_PROJECT_ID > /dev/null 2>&1; then 18 | echo "Cluster $GKE_CLUSTER_NAME already exists." 19 | else 20 | echo "Creating cluster $GKE_CLUSTER_NAME..." 21 | gcloud container clusters create $GKE_CLUSTER_NAME \ 22 | --zone $GKE_CLUSTER_ZONE \ 23 | --project $GCP_PROJECT_ID \ 24 | --num-nodes $GKE_CLUSTER_NUM_NODES \ 25 | --machine-type $GKE_CLUSTER_MACHINE_TYPE \ 26 | --workload-pool=$GCP_PROJECT_ID.svc.id.goog 27 | fi 28 | 29 | # Connect to the cluster 30 | echo "Getting credentials for cluster $GKE_CLUSTER_NAME..." 31 | gcloud container clusters get-credentials $GKE_CLUSTER_NAME --zone $GKE_CLUSTER_ZONE --project $GCP_PROJECT_ID 32 | 33 | # Now kubectl is configured to use your GKE cluster 34 | echo "Connected to GKE cluster: $GKE_CLUSTER_NAME" 35 | 36 | # Get GKE endpoint info 37 | export KUBERNETES_API_SERVER_HOST=$(gcloud container clusters describe $GKE_CLUSTER_NAME --zone $GKE_CLUSTER_ZONE --format='value(endpoint)') 38 | echo "Kubernetes API server host: $KUBERNETES_API_SERVER_HOST" 39 | 40 | # Check if the bucket already exists 41 | if gsutil ls -b "gs://$BUCKET_NAME" >/dev/null 2>&1; then 42 | echo "Bucket gs://$BUCKET_NAME already exists." 43 | else 44 | echo "Bucket gs://$BUCKET_NAME does not exist. Creating the bucket." 45 | gsutil mb -l $BUCKET_LOCATION -c $BUCKET_STORAGE_CLASS "gs://$BUCKET_NAME" 46 | fi 47 | 48 | # Create event-logs folder 49 | if gsutil ls -b "gs://$BUCKET_NAME/event-logs" >/dev/null 2>&1; then 50 | echo "Bucket gs://$BUCKET_NAME/event-logs already exists." 51 | else 52 | echo "Folder gs://$BUCKET_NAME/event-logs does not exist. Creating the folder." 53 | gsutil cp -r ./resources/event-logs gs://$BUCKET_NAME/event-logs 54 | fi -------------------------------------------------------------------------------- /bin/delete_gcp.sh: -------------------------------------------------------------------------------- 1 | # Check if the GKE cluster exists 2 | if gcloud container clusters list --project $GCP_PROJECT_ID --zone $GKE_CLUSTER_ZONE --filter="name=$GKE_CLUSTER_NAME" | grep -q $GKE_CLUSTER_NAME; then 3 | echo "Cluster $GKE_CLUSTER_NAME exists. Deleting the cluster." 4 | 5 | # Delete the cluster 6 | gcloud container clusters delete $GKE_CLUSTER_NAME --zone $GKE_CLUSTER_ZONE --project $GCP_PROJECT_ID 7 | else 8 | echo "Cluster $GKE_CLUSTER_NAME does not exist or you don't have permission to access it." 9 | fi 10 | 11 | # Check if the bucket exists 12 | if gsutil ls -b "gs://$BUCKET_NAME" >/dev/null 2>&1; then 13 | echo "Bucket gs://$BUCKET_NAME exists. Deleting the bucket and its contents." 14 | 15 | # Delete all objects in the bucket 16 | gsutil -m rm -r "gs://$BUCKET_NAME/**" 17 | 18 | # Remove the bucket 19 | gsutil rb "gs://$BUCKET_NAME" 20 | else 21 | echo "Bucket gs://$BUCKET_NAME does not exist or you don't have permission to access it." 22 | fi -------------------------------------------------------------------------------- /bin/env_template.sh: -------------------------------------------------------------------------------- 1 | export GCP_PROJECT_ID=PLACEHOLDER 2 | export GCP_SA_NAME=PLACEHOLDER 3 | export GCP_SA_DISPLAY_NAME=PLACEHOLDER 4 | export GCP_SA_KEY_FILE=PLACEHOLDER 5 | 6 | export GKE_CLUSTER_NAME=PLACEHOLDER 7 | export GKE_CLUSTER_ZONE=PLACEHOLDER 8 | export GKE_CLUSTER_NUM_NODES=PLACEHOLDER 9 | export GKE_CLUSTER_MACHINE_TYPE=PLACEHOLDER 10 | export GKE_NAMESPACE=PLACEHOLDER 11 | export GKE_SA_NAME=PLACEHOLDER 12 | 13 | export BUCKET_NAME=PLACEHOLDER 14 | export BUCKET_LOCATION=PLACEHOLDER 15 | export BUCKET_STORAGE_CLASS=PLACEHOLDER 16 | 17 | export KUBERNETES_API_SERVER_PORT=PLACEHOLDER 18 | 19 | # Helm 20 | export RELEASE_DATA_PLATFORM_NAME=PLACEHOLDER 21 | export CHART_DATA_PLATFORM_NAME=PLACEHOLDER 22 | export CHART_DATA_PLATFORM_VERSION=PLACEHOLDER 23 | 24 | export RELEASE_SPARK_UI_NAME=PLACEHOLDER 25 | export CHART_SPARK_UI_NAME=PLACEHOLDER 26 | export CHART_SPARK_UI_VERSION=PLACEHOLDER 27 | 28 | export SPARK_VERSION=PLACEHOLDER 29 | export SCALA_VERSION=PLACEHOLDER -------------------------------------------------------------------------------- /bin/install_helm_chart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if the release exists 4 | if helm list -n $GKE_NAMESPACE | grep -q $RELEASE_DATA_PLATFORM_NAME; then 5 | echo "Upgrading release '$RELEASE_DATA_PLATFORM_NAME'..." 6 | 7 | # Upgrade the Helm release 8 | helm upgrade $RELEASE_DATA_PLATFORM_NAME $CHART_DATA_PLATFORM_NAME \ 9 | -f $CHART_DATA_PLATFORM_NAME/Values.yaml \ 10 | --namespace $GKE_NAMESPACE \ 11 | --version $CHART_DATA_PLATFORM_VERSION \ 12 | --install # The --install flag ensures it installs if not present 13 | 14 | echo "Upgrade completed." 15 | else 16 | echo "Release '$RELEASE_DATA_PLATFORM_NAME' not found. Installing..." 17 | # Install the Helm chart as a new release 18 | helm install $RELEASE_DATA_PLATFORM_NAME $CHART_DATA_PLATFORM_NAME \ 19 | -f $CHART_DATA_PLATFORM_NAME/Values.yaml \ 20 | --namespace $GKE_NAMESPACE \ 21 | --version $CHART_DATA_PLATFORM_VERSION \ 22 | --create-namespace # Creates the namespace if it doesn't exist 23 | fi 24 | -------------------------------------------------------------------------------- /bin/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source bin/env.sh 3 | source bin/connect_gcp.sh 4 | source bin/upload_sample_dataset.sh 5 | 6 | # Install Helm chart 7 | source bin/install_helm_chart.sh 8 | 9 | # Setup cluster security 10 | source bin/setup_security.sh 11 | 12 | kubectl get namespace "$GKE_NAMESPACE" > /dev/null 2>&1 13 | if [ $? -ne 0 ]; then 14 | echo "Creating namespace: $GKE_NAMESPACE" 15 | kubectl create namespace "$GKE_NAMESPACE" 16 | else 17 | echo "Namespace $GKE_NAMESPACE already exists" 18 | fi -------------------------------------------------------------------------------- /bin/setup_security.sh: -------------------------------------------------------------------------------- 1 | # Create service account 2 | gcloud iam service-accounts create $GCP_SA_NAME --display-name "$GCP_SA_DISPLAY_NAME" 3 | 4 | # Grant permissions to the service account 5 | gcloud projects add-iam-policy-binding $GCP_PROJECT_ID \ 6 | --member "serviceAccount:$GCP_SA_NAME@$GCP_PROJECT_ID.iam.gserviceaccount.com" \ 7 | --role "roles/storage.admin" 8 | 9 | # Download the service account key 10 | gcloud iam service-accounts keys create $GCP_SA_KEY_FILE \ 11 | --iam-account $GCP_SA_NAME@$GCP_PROJECT_ID.iam.gserviceaccount.com 12 | 13 | # Enable Workload Identity on the cluster 14 | gcloud container clusters update $GKE_CLUSTER_NAME \ 15 | --zone $GKE_CLUSTER_ZONE \ 16 | --workload-pool=$GCP_PROJECT_ID.svc.id.goog 17 | 18 | # Bind the Kubernetes service account to the Google Cloud service account 19 | gcloud iam service-accounts add-iam-policy-binding \ 20 | --role roles/iam.workloadIdentityUser \ 21 | --member "serviceAccount:$GCP_PROJECT_ID.svc.id.goog[$GKE_NAMESPACE/$GKE_SA_NAME]" \ 22 | $GCP_SA_NAME@$GCP_PROJECT_ID.iam.gserviceaccount.com 23 | 24 | # Annotate the Kubernetes service account 25 | kubectl annotate serviceaccount $GKE_SA_NAME \ 26 | --namespace $GKE_NAMESPACE \ 27 | iam.gke.io/gcp-service-account=$GCP_SA_NAME@$GCP_PROJECT_ID.iam.gserviceaccount.com 28 | -------------------------------------------------------------------------------- /bin/uninstall_helm_chart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if the release exists 4 | if helm list -n $GKE_NAMESPACE | grep -q $RELEASE_DATA_PLATFORM_NAME; then 5 | helm uninstall $RELEASE_DATA_PLATFORM_NAME -n $GKE_NAMESPACE 6 | else 7 | echo "Release '$RELEASE_DATA_PLATFORM_NAME' not found." 8 | fi 9 | 10 | if helm list -n $GKE_NAMESPACE | grep -q $RELEASE_SPARK_UI_NAME; then 11 | helm uninstall $RELEASE_SPARK_UI_NAME -n $GKE_NAMESPACE 12 | else 13 | echo "Release '$RELEASE_SPARK_UI_NAME' not found." 14 | fi -------------------------------------------------------------------------------- /bin/upload_sample_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the path to the file you want to upload 4 | FILE_PATH="examples/word-count/src/main/resources/example.txt" 5 | 6 | # Use gsutil to upload the file to the GCP bucket 7 | gsutil cp $FILE_PATH gs://$BUCKET_NAME/applications/word-count/input/example.txt -------------------------------------------------------------------------------- /dags/demo.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators.bash import BashOperator 3 | from datetime import datetime 4 | 5 | default_args = { 6 | 'owner': 'airflow', 7 | 'start_date': datetime(2023, 1, 1), 8 | 'catchup': True 9 | } 10 | 11 | dag = DAG( 12 | 'demo_dag', 13 | default_args=default_args, 14 | description='A simple DAG for demo', 15 | schedule_interval='@daily', 16 | catchup=False, 17 | ) 18 | 19 | run_script = BashOperator( 20 | task_id='display_logs', 21 | bash_command='python /opt/airflow/examples/airflow_demo.py', 22 | dag=dag, 23 | ) 24 | 25 | run_script -------------------------------------------------------------------------------- /dags/sg_resale_flat_prices.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.providers.papermill.operators.papermill import PapermillOperator 3 | from datetime import datetime, timedelta 4 | 5 | default_args = { 6 | 'owner': 'airflow', 7 | 'depends_on_past': False, 8 | 'email_on_failure': False, 9 | 'email_on_retry': False, 10 | 'retries': 1, 11 | 'retry_delay': timedelta(minutes=5), 12 | 'start_date': datetime(2023, 1, 1), 13 | 'catchup': True 14 | } 15 | 16 | with DAG( 17 | 'SG_Resale_Flat_Prices', 18 | default_args=default_args, 19 | description='DAG for analysis on Singapore resale flat prices', 20 | schedule_interval=timedelta(days=1), 21 | catchup=False, 22 | ) as dag: 23 | 24 | run_notebook = PapermillOperator( 25 | task_id='sg_resale_flat_prices_notebook', 26 | input_nb='/opt/airflow/examples/sg-resale-flat-prices/sg-resale-flat-prices.ipynb', 27 | output_nb='/opt/airflow/examples/sg-resale-flat-prices/output/output-notebook-{{ execution_date }}.ipynb', 28 | parameters={ 29 | 'spark_master': 'spark://spark-master:7077' 30 | }, 31 | ) 32 | 33 | run_notebook -------------------------------------------------------------------------------- /docker/airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:2.9.1-python3.11 2 | 3 | USER root 4 | 5 | # Install OpenJDK 17 6 | RUN apt-get update && \ 7 | apt-get install -y openjdk-17-jdk && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | USER airflow 11 | 12 | RUN pip install --upgrade pip && \ 13 | pip install apache-airflow-providers-papermill ipython jupyter ipykernel papermill pandas numpy matplotlib seaborn pyspark==3.5.0 14 | 15 | # Add and install the Python 3 kernel 16 | RUN python3 -m ipykernel install --user --name python3 --display-name "Python 3" 17 | 18 | # Set JAVA_HOME environment variable 19 | ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64 20 | 21 | COPY webserver_config.py /opt/airflow/webserver_config.py 22 | COPY airflow.cfg /opt/airflow/airflow.cfg 23 | 24 | -------------------------------------------------------------------------------- /docker/history-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/spark:3.5.0 2 | 3 | # Set environment variables 4 | ENV SPARK_HOME=/opt/spark 5 | # Install GCS Connector 6 | RUN wget https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.0/gcs-connector-hadoop3-2.2.0-shaded.jar -P $SPARK_HOME/jars/ 7 | 8 | # Expose port 18080 for History Server UI 9 | EXPOSE 18080 10 | 11 | # Start the History Server 12 | CMD ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"] 13 | -------------------------------------------------------------------------------- /docker/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:alpine 2 | COPY nginx.conf /etc/nginx/nginx.conf 3 | -------------------------------------------------------------------------------- /docker/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | events {} 2 | 3 | http { 4 | server { 5 | listen 80; 6 | 7 | location / { 8 | proxy_pass http://history-server:18080; 9 | proxy_hide_header X-Frame-Options; 10 | add_header X-Frame-Options "ALLOW-FROM http://localhost:3000" always; # Add the new header 11 | add_header X-Frame-Options "ALLOW-FROM http://webapp:5001" always; # Add the new header 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /docker/notebook/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyter/all-spark-notebook:spark-3.5.0 2 | 3 | USER root 4 | 5 | # Install necessary packages for Google Cloud SDK 6 | RUN apt-get update -y && \ 7 | apt-get install -y curl gcc python3-dev apt-transport-https lsb-release gnupg && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | # Install pyspark 11 | RUN pip install pyspark 12 | 13 | # Add Google Cloud SDK to the sources list 14 | RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list 15 | 16 | # Import Google's public key 17 | RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - 18 | 19 | # Install Google Cloud SDK 20 | RUN apt-get update -y && \ 21 | apt-get install -y google-cloud-sdk && \ 22 | rm -rf /var/lib/apt/lists/* 23 | 24 | # Install the Google Cloud Storage Python library 25 | RUN pip install --upgrade google-cloud-storage \ 26 | kubernetes 27 | 28 | # GCS connector 29 | RUN wget -P /usr/local/spark/jars/ \ 30 | https://repo1.maven.org/maven2/com/google/cloud/bigdataoss/gcs-connector/hadoop3-2.2.6/gcs-connector-hadoop3-2.2.6-shaded.jar 31 | 32 | # Create a new directory for the IPython profile 33 | RUN mkdir -p /home/jovyan/.custom_ipython_profile/profile_default/startup/ && \ 34 | chown -R jovyan:users /home/jovyan/.custom_ipython_profile && \ 35 | chmod -R 775 /home/jovyan/.custom_ipython_profile 36 | 37 | # Copy the custom IPython profile to the new directory 38 | COPY startup.py /home/jovyan/.custom_ipython_profile/profile_default/startup/ 39 | 40 | # Copy the save hook script and configuration file into the container 41 | COPY gcs_save_hook.py /home/jovyan/.jupyter/ 42 | COPY jupyter_notebook_config.py /home/jovyan/.jupyter/ 43 | 44 | # Switch back to the jovyan user 45 | USER jovyan 46 | 47 | # Set environment variable to use the custom IPython profile directory 48 | ENV IPYTHONDIR=/home/jovyan/.custom_ipython_profile 49 | 50 | # Set JUPYTER_CONFIG_DIR to point to the directory with the config file 51 | ENV JUPYTER_CONFIG_DIR /home/jovyan/.jupyter/ 52 | 53 | # Add the JUPYTER_CONFIG_DIR to the PYTHONPATH 54 | ENV PYTHONPATH "${PYTHONPATH}:${JUPYTER_CONFIG_DIR}" 55 | 56 | ENV HOME_DIR="/home/jovyan" 57 | ENV BUCKET_NAME="data-platform-bucket-20231126" 58 | ENV NAMESPACE="spark-dev" 59 | ENV SERVICE_ACCOUNT="spark" 60 | ENV EXECUTOR_IMAGE="wenyixu101/spark:3.5.0-python3.11" 61 | ENV WEBUI_SERVICE_NAME="notebook-spark-ui" 62 | 63 | CMD ["jupyter", "notebook", "--ip='0.0.0.0'", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token=''", "--NotebookApp.password=''"] 64 | 65 | 66 | -------------------------------------------------------------------------------- /docker/notebook/gcs_save_hook.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | def gcs_save_hook(os_path, model, contents_manager, **kwargs): 5 | """Save hook function for uploading notebook to GCS.""" 6 | bucket_name = os.environ.get("BUCKET_NAME", "default-bucket-name") 7 | # Construct the GCS bucket path 8 | gcs_path = f"gs://{bucket_name}/notebooks" 9 | 10 | local_notebook_dir = "/home/jovyan/" 11 | exclude_pattern = '^(?!.*\.ipynb$).*$' 12 | # Sync from local to GCS 13 | subprocess.run(["gsutil", "-m", "rsync", "-r", "-x", exclude_pattern, local_notebook_dir, gcs_path]) 14 | -------------------------------------------------------------------------------- /docker/notebook/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | # Import the custom save hook from the same directory 2 | from gcs_save_hook import gcs_save_hook 3 | 4 | c = get_config() 5 | # c.FileContentsManager.post_save_hook = gcs_save_hook 6 | 7 | c.NotebookApp.tornado_settings = { 8 | 'headers': { 9 | 'Content-Security-Policy': "frame-ancestors 'self' http://localhost:5001 http://localhost:3000 http://localhost:5002 http://localhost:8888;", 10 | 'X-Frame-Options': 'ALLOW-FROM http://localhost:5001 http://localhost:3000 http://localhost:5002 http://localhost:8888', 11 | } 12 | } 13 | c.NotebookApp.allow_origin_pat = 'http://localhost:(3000|5001|5002|8888)' # Allows requests from your React app 14 | c.NotebookApp.allow_credentials = True # Allows cookies to be sent 15 | c.NotebookApp.disable_check_xsrf = True 16 | -------------------------------------------------------------------------------- /docker/postgres/init.sql: -------------------------------------------------------------------------------- 1 | CREATE USER server WITH PASSWORD 'password-server'; 2 | 3 | CREATE DATABASE server_db; 4 | 5 | \c server_db 6 | 7 | CREATE TABLE users ( 8 | id SERIAL PRIMARY KEY, 9 | name VARCHAR(100) NOT NULL, 10 | password_hash VARCHAR(255) NOT NULL, 11 | email VARCHAR(100) NOT NULL UNIQUE 12 | ); 13 | 14 | CREATE TABLE notebooks ( 15 | id SERIAL PRIMARY KEY, 16 | name VARCHAR(100) NOT NULL, 17 | path VARCHAR(100) NOT NULL, 18 | user_id INT REFERENCES users(id) 19 | ); 20 | 21 | CREATE TABLE directories ( 22 | id SERIAL PRIMARY KEY, 23 | name VARCHAR(100) NOT NULL, 24 | path VARCHAR(100) NOT NULL, 25 | user_id INT REFERENCES users(id) 26 | ); 27 | 28 | CREATE TABLE spark_apps ( 29 | spark_app_id VARCHAR(100) PRIMARY KEY, 30 | notebook_id INT REFERENCES notebooks(id), 31 | user_id INT REFERENCES users(id), 32 | status VARCHAR(100), 33 | created_at TIMESTAMP 34 | ); 35 | 36 | CREATE TABLE spark_app_config ( 37 | id SERIAL PRIMARY KEY, 38 | notebook_id INT REFERENCES notebooks(id), 39 | driver_memory VARCHAR(100), 40 | driver_memory_overhead VARCHAR(100), 41 | driver_cores INT, 42 | executor_memory VARCHAR(100), 43 | executor_memory_overhead VARCHAR(100), 44 | executor_memory_fraction FLOAT, 45 | executor_cores INT, 46 | executor_instances INT, 47 | dynamic_allocation_enabled BOOLEAN, 48 | executor_instances_min INT, 49 | executor_instances_max INT, 50 | shuffle_service_enabled BOOLEAN, 51 | executor_idle_timeout INT, 52 | queue VARCHAR(100) 53 | ); 54 | 55 | GRANT ALL PRIVILEGES ON TABLE users TO server; 56 | GRANT ALL PRIVILEGES ON SEQUENCE users_id_seq TO server; 57 | 58 | GRANT ALL PRIVILEGES ON TABLE notebooks TO server; 59 | GRANT ALL PRIVILEGES ON SEQUENCE notebooks_id_seq TO server; 60 | 61 | GRANT ALL PRIVILEGES ON TABLE directories TO server; 62 | GRANT ALL PRIVILEGES ON SEQUENCE directories_id_seq TO server; 63 | 64 | GRANT ALL PRIVILEGES ON TABLE spark_apps TO server; 65 | 66 | GRANT ALL PRIVILEGES ON TABLE spark_app_config TO server; 67 | GRANT ALL PRIVILEGES ON SEQUENCE spark_app_config_id_seq TO server; 68 | 69 | -- Add some initial data 70 | -- user_0 -12345A 71 | INSERT INTO users (name, password_hash, email) VALUES 72 | ('user_0', 'scrypt:32768:8:1$1k6HpQA8N58PkDz7$db383b0d69d7a2f6893116b1955da70cb217173dc44ce169acf57cfe6a79f63118ad7515563a0b4f8f39dda49510d061acdba26be8f7c8786c161dd54d7a91c1', 'user_0@gmail.com'), 73 | ('user_1', 'pbkdf2:sha256:150000$3Z6Z6Z6Z$e3', 'user_1@gmail.com'); 74 | 75 | INSERT INTO notebooks (name, path, user_id) VALUES 76 | ('demo.ipynb', 'work/user_0@gmail.com/demo.ipynb', 1), 77 | ('notebook.ipynb', 'work/user_0@gmail.com/notebook.ipynb', 1), 78 | ('quickstart.ipynb', 'work/user_0@gmail.com/quickstart.ipynb', 1), 79 | ('sg-resale-flat-prices.ipynb', 'work/user_0@gmail.com/sg-resale-flat-prices/sg-resale-flat-prices.ipynb', 1); 80 | 81 | INSERT INTO directories (name, path, user_id) VALUES 82 | ('user_0@gmail.com', '/work/user_0@gmail.com', 1), 83 | ('word-count', '/work/user_0@gmail.com/word-count', 1), 84 | ('sg-resale-flat-prices', '/work/user_0@gmail.com/sg-resale-flat-prices', 1), 85 | ('output', '/work/user_0@gmail.com/sg-resale-flat-prices/output', 1), 86 | ('user_1@gmail.com', '/work/user_0@gmail.com', 1); 87 | 88 | -------------------------------------------------------------------------------- /docker/spark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/spark:3.5.0 2 | 3 | USER root 4 | 5 | # Install Python 3.11 and pip 6 | RUN apt-get update && \ 7 | apt-get install -y software-properties-common && \ 8 | add-apt-repository ppa:deadsnakes/ppa && \ 9 | apt-get install -y python3.11 python3.11-venv python3.11-dev && \ 10 | apt-get clean && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | # Set Python 3.11 as the default Python version 14 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ 15 | update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 16 | 17 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ 18 | python3.11 get-pip.py 19 | 20 | RUN mkdir -p /opt/data/spark-warehouse 21 | RUN chmod -R 777 /opt/data/spark-warehouse 22 | 23 | USER spark 24 | -------------------------------------------------------------------------------- /examples/airflow_demo.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 4 | logger = logging.getLogger(__name__) 5 | 6 | def main(): 7 | logger.info("Starting task...") 8 | logger.info("Task completed successfully!") 9 | 10 | if __name__ == "__main__": 11 | main() -------------------------------------------------------------------------------- /examples/user_0@gmail.com/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "isExecuted": true, 6 | "metadata": {}, 7 | "source": [ 8 | "# Demo Notebook\n", 9 | "\n", 10 | "- This is just a demo notebook\n", 11 | "- For testing only" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "isExecuted": false, 17 | "lastExecutionResult": "success", 18 | "lastExecutionTime": "2024-12-10 10:26:03", 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "\n", 25 | "
\n", 26 | "

Spark Session Information

\n", 27 | "

Config: {'spark.driver.memory': '1g', 'spark.driver.cores': 1, 'spark.executor.memory': '1g', 'spark.executor.cores': 1, 'spark.executor.instances': 1, 'spark.dynamicAllocation.enabled': False}

\n", 28 | "

Application ID: app-20241210080310-0003

\n", 29 | "

Spark UI: http://localhost:18080/history/app-20241210080310-0003

\n", 30 | "
\n", 31 | " " 32 | ], 33 | "text/plain": [ 34 | "Custom Spark Session (App ID: app-20241210080310-0003) - UI: http://0edb0a63b2fb:4040" 35 | ] 36 | }, 37 | "execution_count": 11, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "spark = create_spark(\"work/user_0@gmail.com/demo.ipynb\")\n", 44 | "spark" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "isExecuted": true, 51 | "lastExecutionResult": "success", 52 | "lastExecutionTime": "2024-12-10 12:27:14", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "spark.stop()" 57 | ] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "Python 3 (ipykernel)", 63 | "language": "python", 64 | "name": "python3" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.11.6" 77 | }, 78 | "uuid": "647a82de-693a-48f0-ae3a-64a771d83da5" 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 4 82 | } 83 | -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Apache Spark base image 2 | FROM apache/spark:3.5.0 3 | 4 | ARG VERSION 5 | ARG JAR_FILE=target/word-count-${VERSION}.jar 6 | 7 | ADD ${JAR_FILE} /opt/spark/app.jar 8 | 9 | # Set the default command for the container to run your Spark application 10 | # Adjust the command below according to your application's requirements 11 | CMD ["ls", "-l", "/opt/spark"] 12 | -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.example 4 | word-count 5 | 1.0-SNAPSHOT 6 | 7 | 8 | 1.8 9 | 1.8 10 | 2.12.18 11 | 2.12 12 | 3.5.0 13 | hadoop3-2.2.0 14 | 30.1-jre:compile 15 | 16 | 17 | 18 | 19 | 20 | org.apache.spark 21 | spark-core_${scala.compat.version} 22 | ${spark.version} 23 | 24 | 25 | com.google.guava 26 | guava 27 | 28 | 29 | 30 | 31 | org.apache.spark 32 | spark-sql_${scala.compat.version} 33 | ${spark.version} 34 | 35 | 36 | com.google.guava 37 | guava 38 | 39 | 40 | 41 | 42 | com.google.cloud.bigdataoss 43 | gcs-connector 44 | ${gcs.connector.version} 45 | 51 | 52 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | net.alchim31.maven 65 | scala-maven-plugin 66 | 4.5.6 67 | 68 | 69 | 70 | compile 71 | testCompile 72 | 73 | 74 | 75 | 76 | 77 | 78 | org.apache.maven.plugins 79 | maven-shade-plugin 80 | 3.2.4 81 | 82 | 83 | package 84 | 85 | shade 86 | 87 | 88 | 89 | 90 | 91 | com.google.common 92 | shaded.com.google.common 93 | 94 | 95 | 96 | 97 | 98 | *:* 99 | 100 | META-INF/*.SF 101 | META-INF/*.DSA 102 | META-INF/*.RSA 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/src/main/resources/example.txt: -------------------------------------------------------------------------------- 1 | Hello world, this is a sample text file for Spark word count. 2 | This file contains multiple words, some of which are repeated. 3 | Repeated words are useful for testing the word count functionality. 4 | The word 'word' is an example of a repeated word in this file. 5 | Spark is a powerful tool for processing large amounts of data. 6 | This is the end of the sample text file. -------------------------------------------------------------------------------- /examples/user_0@gmail.com/word-count/src/main/scala/WordCount.scala: -------------------------------------------------------------------------------- 1 | import org.apache.spark.sql.SparkSession 2 | import java.lang.Thread.sleep 3 | 4 | object WordCount { 5 | def main(args: Array[String]) { 6 | val inputPath = args(0) 7 | val outputPath = args(1) 8 | 9 | // Create SparkSession 10 | val spark = SparkSession 11 | .builder 12 | .appName("Word Count") 13 | .getOrCreate() 14 | 15 | import spark.implicits._ 16 | 17 | val textFile = spark.read.text(inputPath).as[String] 18 | 19 | // Split each line into words 20 | val words = textFile.flatMap(line => line.split("\\W+")) // Split on non-word characters 21 | 22 | // Count each word 23 | val wordCounts = words.groupBy("value").count() 24 | 25 | wordCounts 26 | .write 27 | .format("csv") 28 | .option("path", outputPath) 29 | .save() 30 | 31 | // sleep(1000) 32 | // Stop the SparkSession 33 | spark.stop() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /helm/data-platform/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /helm/data-platform/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: data-platform 3 | description: A Helm chart for Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: "1.16.0" 25 | -------------------------------------------------------------------------------- /helm/data-platform/templates/notebook-cluster-ip.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | cloud.google.com/neg: '{"ingress":true}' 6 | name: notebook-cluster-ip 7 | spec: 8 | clusterIP: None 9 | clusterIPs: 10 | - None 11 | internalTrafficPolicy: Cluster 12 | selector: 13 | app: notebook 14 | ports: 15 | - port: 7077 # The port the Spark driver listens on 16 | name: driver-port 17 | protocol: TCP 18 | targetPort: 7077 19 | - name: driver-rpc-port 20 | port: 7078 21 | targetPort: 7078 22 | protocol: TCP 23 | - name: blockmanager 24 | port: 7079 25 | protocol: TCP 26 | targetPort: 7079 27 | - name: http 28 | port: 80 29 | targetPort: 8889 30 | protocol: TCP 31 | type: ClusterIP -------------------------------------------------------------------------------- /helm/data-platform/templates/notebook-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: notebook 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: notebook 10 | template: 11 | metadata: 12 | labels: 13 | app: notebook 14 | spec: 15 | serviceAccountName: spark 16 | containers: 17 | - name: notebook 18 | image: wenyixu101/all-spark-notebook:latest 19 | imagePullPolicy: Always 20 | command: ["/bin/bash", "-c", "start-notebook.sh"] 21 | ports: 22 | - containerPort: 8888 23 | name: notebook 24 | # Spark UI 25 | - containerPort: 4040 26 | - containerPort: 4041 27 | - containerPort: 4042 28 | - containerPort: 4043 29 | - containerPort: 4044 30 | - containerPort: 4045 31 | - containerPort: 4046 32 | - containerPort: 4047 33 | - containerPort: 4048 34 | - containerPort: 4049 35 | - containerPort: 4050 36 | env: 37 | - name: JUPYTER_ENABLE_LAB 38 | value: "no" 39 | - name: NOTEBOOK_ARGS 40 | value: "--NotebookApp.token='' --NotebookApp.password='' --NotebookApp.disable_check_xsrf=True" 41 | -------------------------------------------------------------------------------- /helm/data-platform/templates/notebook-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: notebook 5 | spec: 6 | selector: 7 | app: notebook 8 | ports: 9 | - port: 80 10 | targetPort: 8888 11 | type: LoadBalancer 12 | -------------------------------------------------------------------------------- /helm/data-platform/templates/notebook-spark-ui.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: notebook-spark-ui 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: notebook 9 | ports: 10 | - name: spark-ui-4040 11 | port: 4040 12 | targetPort: 4040 13 | - name: spark-ui-4041 14 | port: 4041 15 | targetPort: 4041 16 | - name: spark-ui-4042 17 | port: 4042 18 | targetPort: 4042 19 | - name: spark-ui-4043 20 | port: 4043 21 | targetPort: 4043 22 | - name: spark-ui-4044 23 | port: 4044 24 | targetPort: 4044 25 | - name: spark-ui-4045 26 | port: 4045 27 | targetPort: 4045 28 | - name: spark-ui-4046 29 | port: 4046 30 | targetPort: 4046 31 | - name: spark-ui-4047 32 | port: 4047 33 | targetPort: 4047 34 | - name: spark-ui-4048 35 | port: 4048 36 | targetPort: 4048 37 | - name: spark-ui-4049 38 | port: 4049 39 | targetPort: 4049 40 | - name: spark-ui-4050 41 | port: 4050 42 | targetPort: 4050 43 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-history-server-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: spark-history-server 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: spark-history-server 10 | template: 11 | metadata: 12 | labels: 13 | app: spark-history-server 14 | spec: 15 | containers: 16 | - name: spark-history-server 17 | image: wenyixu101/spark-history-server:latest 18 | command: ["/bin/sh", "-c"] 19 | args: ["/opt/spark/bin/spark-class org.apache.spark.deploy.history.HistoryServer"] 20 | env: 21 | - name: SPARK_HISTORY_OPTS 22 | value: "-Dspark.history.fs.logDirectory=gs://{{ .Values.bucket.name }}/event-logs/ -Dspark.eventLog.enabled=true" 23 | ports: 24 | - containerPort: 18080 25 | serviceAccountName: spark 26 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-history-server-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: spark-history-server 5 | spec: 6 | type: LoadBalancer 7 | ports: 8 | - port: 18080 9 | targetPort: 18080 10 | selector: 11 | app: spark-history-server 12 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding # Use ClusterRoleBinding for a ClusterRole 3 | metadata: 4 | name: spark-role-binding 5 | namespace: spark-dev 6 | subjects: 7 | - kind: ServiceAccount 8 | name: spark 9 | namespace: spark-dev 10 | roleRef: 11 | kind: Role # Use ClusterRole for a ClusterRoleBinding 12 | name: spark-role 13 | apiGroup: rbac.authorization.k8s.io 14 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role # Use ClusterRole if you need cluster-wide access 3 | metadata: 4 | namespace: spark-dev 5 | name: spark-role 6 | rules: 7 | - apiGroups: [""] 8 | resources: ["pods"] 9 | verbs: ["create", "get", "list", "watch", "delete"] 10 | - apiGroups: [""] 11 | resources: ["services"] 12 | verbs: ["create", "get", "delete"] 13 | - apiGroups: [""] 14 | resources: ["configmaps"] 15 | verbs: ["create", "get", "delete"] 16 | - apiGroups: [""] 17 | resources: ["persistentvolumeclaims"] 18 | verbs: ["list", "get"] 19 | -------------------------------------------------------------------------------- /helm/data-platform/templates/spark-serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: spark 5 | namespace: spark-dev 6 | -------------------------------------------------------------------------------- /helm/spark-ui/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: spark-ui 3 | description: A Helm chart for Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: "1.16.0" 25 | -------------------------------------------------------------------------------- /helm/spark-ui/templates/spark-ui-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Values.serviceName }} 5 | namespace: spark-dev 6 | spec: 7 | type: LoadBalancer # Use NodePort if LoadBalancer is not required 8 | ports: 9 | - port: 4040 10 | targetPort: 4040 11 | name: spark-ui 12 | selector: 13 | app: spark 14 | spark-role: driver 15 | name: {{ .Values.appName }} 16 | -------------------------------------------------------------------------------- /resources/images/architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/resources/images/architecture.jpg -------------------------------------------------------------------------------- /resources/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/resources/images/logo.png -------------------------------------------------------------------------------- /resources/images/notebook-spark-integration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/resources/images/notebook-spark-integration.png -------------------------------------------------------------------------------- /resources/images/spark-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/resources/images/spark-ui.png -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | WORKDIR /server 4 | 5 | COPY . /server 6 | 7 | RUN pip install -r requirements.txt 8 | 9 | EXPOSE 5002 10 | 11 | CMD ["python", "run.py"] -------------------------------------------------------------------------------- /server/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/app/__init__.py -------------------------------------------------------------------------------- /server/app/auth/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/app/auth/__init__.py -------------------------------------------------------------------------------- /server/app/auth/auth.py: -------------------------------------------------------------------------------- 1 | from flask import request, Response, g 2 | from app.models.user import UserModel 3 | from flask_jwt_extended import get_jwt_identity 4 | from functools import wraps 5 | import json 6 | 7 | def password_required(f): 8 | @wraps(f) 9 | def decorated(*args, **kwargs): 10 | auth = request.authorization 11 | if not auth or not auth.username or not auth.password: 12 | return Response( 13 | response=json.dumps({'message': 'Missing credentials'}), 14 | status=401 15 | ) 16 | 17 | user = UserModel.query.filter_by(name=auth.username).first() 18 | if not user or not user.check_password(auth.password): 19 | return Response( 20 | response=json.dumps({'message': 'Invalid credentials'}), 21 | status=401 22 | ) 23 | 24 | g.user = user 25 | return f(*args, **kwargs) 26 | return decorated 27 | 28 | def identify_user(f): 29 | @wraps(f) 30 | def decorated(*args, **kwargs): 31 | user_name = get_jwt_identity() 32 | g.user = UserModel.query.filter_by(name=user_name).first() 33 | return f(*args, **kwargs) 34 | return decorated 35 | -------------------------------------------------------------------------------- /server/app/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/app/models/__init__.py -------------------------------------------------------------------------------- /server/app/models/directory.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | 3 | class DirectoryModel(db.Model): 4 | 5 | __tablename__ = 'directories' 6 | 7 | id = db.Column(db.Integer, primary_key=True, autoincrement=True) 8 | name = db.Column(db.String, nullable=False) 9 | path = db.Column(db.String, nullable=False) 10 | user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) 11 | 12 | def __init__(self, name, path, user_id): 13 | self.name = name 14 | self.path = path 15 | self.user_id = user_id 16 | 17 | def to_dict(self): 18 | return { 19 | 'id': self.id, 20 | 'name': self.name, 21 | 'path': self.path, 22 | 'user_id': self.user_id 23 | } -------------------------------------------------------------------------------- /server/app/models/notebook.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | 3 | class NotebookModel(db.Model): 4 | 5 | __tablename__ = 'notebooks' 6 | 7 | id = db.Column(db.Integer, primary_key=True, autoincrement=True) 8 | name = db.Column(db.String, nullable=False) 9 | path = db.Column(db.String, nullable=False) 10 | user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) 11 | 12 | def __init__(self, name, path, user_id): 13 | self.name = name 14 | self.path = path 15 | self.user_id = user_id 16 | 17 | def to_dict(self): 18 | return { 19 | 'id': self.id, 20 | 'name': self.name, 21 | 'path': self.path, 22 | 'user_id': self.user_id 23 | } 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /server/app/models/spark_app.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | 3 | 4 | class SparkAppModel(db.Model): 5 | 6 | __tablename__ = 'spark_apps' 7 | 8 | spark_app_id = db.Column(db.String, primary_key=True, nullable=False) 9 | notebook_id = db.Column(db.Integer, db.ForeignKey('notebooks.id'), nullable=False) 10 | user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) 11 | status = db.Column(db.String, nullable=True) 12 | created_at = db.Column(db.DateTime, nullable=False) 13 | 14 | def __init__(self, spark_app_id, notebook_id, user_id, created_at): 15 | self.spark_app_id = spark_app_id 16 | self.notebook_id = notebook_id 17 | self.user_id = user_id 18 | self.created_at = created_at 19 | 20 | def set_status(self, status): 21 | self.status = status 22 | db.session.commit() 23 | 24 | def to_dict(self): 25 | return { 26 | 'spark_app_id': self.spark_app_id, 27 | 'notebook_id': self.notebook_id, 28 | 'user_id': self.user_id, 29 | 'status': self.status, 30 | 'created_at': self.created_at.strftime("%Y-%m-%d %H:%M:%S") 31 | } -------------------------------------------------------------------------------- /server/app/models/spark_app_config.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | 3 | class SparkAppConfigModel(db.Model): 4 | 5 | __tablename__ = 'spark_app_config' 6 | 7 | id = db.Column(db.Integer, primary_key=True, nullable=False) 8 | notebook_id = db.Column(db.Integer, db.ForeignKey('notebooks.id'), nullable=False) 9 | driver_memory = db.Column(db.String, nullable=True) 10 | driver_memory_overhead = db.Column(db.String, nullable=True) 11 | driver_cores = db.Column(db.Integer, nullable=True) 12 | 13 | executor_memory = db.Column(db.String, nullable=True) 14 | executor_memory_overhead = db.Column(db.String, nullable=True) 15 | executor_memory_fraction = db.Column(db.Float, nullable=True) 16 | executor_cores = db.Column(db.Integer, nullable=True) 17 | executor_instances = db.Column(db.Integer, nullable=True) 18 | 19 | dynamic_allocation_enabled = db.Column(db.Boolean, nullable=True) 20 | executor_instances_min = db.Column(db.Integer, nullable=True) 21 | executor_instances_max = db.Column(db.Integer, nullable=True) 22 | 23 | shuffle_service_enabled = db.Column(db.Boolean, nullable=True) 24 | executor_idle_timeout = db.Column(db.Integer, nullable=True) 25 | queue = db.Column(db.String, nullable=True) 26 | 27 | def __init__(self, notebook_id, driver_memory=None, driver_memory_overhead=None, driver_cores=None, 28 | executor_memory=None, executor_memory_overhead=None, executor_memory_fraction=None, 29 | executor_cores=None, executor_instances=None, dynamic_allocation_enabled=None, 30 | executor_instances_min=None, executor_instances_max=None, shuffle_service_enabled=None, 31 | executor_idle_timeout=None, queue=None): 32 | self.notebook_id = notebook_id 33 | self.driver_memory = driver_memory 34 | self.driver_memory_overhead = driver_memory_overhead 35 | self.driver_cores = driver_cores 36 | self.executor_memory = executor_memory 37 | self.executor_memory_overhead = executor_memory_overhead 38 | self.executor_memory_fraction = executor_memory_fraction 39 | self.executor_cores = executor_cores 40 | self.executor_instances = executor_instances 41 | self.dynamic_allocation_enabled = dynamic_allocation_enabled 42 | self.executor_instances_min = executor_instances_min 43 | self.executor_instances_max = executor_instances_max 44 | self.shuffle_service_enabled = shuffle_service_enabled 45 | self.executor_idle_timeout = executor_idle_timeout 46 | self.queue = queue 47 | 48 | def to_dict(self): 49 | return { 50 | 'notebook_id': self.notebook_id, 51 | 'driver_memory': self.driver_memory, 52 | 'driver_memory_overhead': self.driver_memory_overhead, 53 | 'driver_cores': self.driver_cores, 54 | 'executor_memory': self.executor_memory, 55 | 'executor_memory_overhead': self.executor_memory_overhead, 56 | 'executor_memory_fraction': self.executor_memory_fraction, 57 | 'executor_cores': self.executor_cores, 58 | 'executor_instances': self.executor_instances, 59 | 'dynamic_allocation_enabled': self.dynamic_allocation_enabled, 60 | 'executor_instances_min': self.executor_instances_min, 61 | 'executor_instances_max': self.executor_instances_max, 62 | 'shuffle_service_enabled': self.shuffle_service_enabled, 63 | 'executor_idle_timeout': self.executor_idle_timeout, 64 | 'queue': self.queue 65 | } 66 | -------------------------------------------------------------------------------- /server/app/models/user.py: -------------------------------------------------------------------------------- 1 | from database import db 2 | from werkzeug.security import generate_password_hash, check_password_hash 3 | 4 | class UserModel(db.Model): 5 | 6 | __tablename__ = 'users' 7 | 8 | id = db.Column(db.Integer, primary_key=True, autoincrement=True) 9 | name = db.Column(db.String, unique=True, nullable=False) 10 | password_hash = db.Column(db.String, nullable=False) 11 | email = db.Column(db.String, unique=True, nullable=False) 12 | 13 | def set_password(self, password): 14 | self.password_hash = generate_password_hash(password) 15 | 16 | def check_password(self, password): 17 | return check_password_hash(self.password_hash, password) 18 | 19 | 20 | -------------------------------------------------------------------------------- /server/app/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/app/routes/__init__.py -------------------------------------------------------------------------------- /server/app/routes/directory.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from app.services.directory import Directory 3 | import logging 4 | 5 | directory_blueprint = Blueprint('directory', __name__) 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | @directory_blueprint.route('/directory/', methods=['GET']) 10 | def get_directory_content(directory_path): 11 | return Directory.get_content_by_path(path=directory_path) 12 | 13 | @directory_blueprint.route('/directory', methods=['POST']) 14 | def create_directory(): 15 | data = request.get_json() 16 | directory_path = data.get('directoryPath', None) 17 | return Directory.create_directory(directory_path=directory_path) 18 | 19 | @directory_blueprint.route('/directory/', methods=['DELETE']) 20 | def delete_directory(directory_path): 21 | return Directory.delete_directory_by_path(directory_path=directory_path) 22 | 23 | @directory_blueprint.route('/directory/', methods=['PATCH']) 24 | def rename_directory(directory_path): 25 | data = request.get_json() 26 | new_directory_path = data.get('newPath', None) 27 | return Directory.rename_directory_by_path(directory_path=directory_path, new_directory_path=new_directory_path) -------------------------------------------------------------------------------- /server/app/routes/kernel.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from app.services.kernel import Kernel 3 | import logging 4 | 5 | kernel_blueprint = Blueprint('kernel', __name__) 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | @kernel_blueprint.route('/kernel/', methods=['GET']) 10 | def get_kernel_by_id(kernel_id): 11 | logging.info(f"Getting kernel with id: {kernel_id}") 12 | return Kernel.get_kernel_by_id(kernel_id) 13 | 14 | @kernel_blueprint.route('/kernel/restart/', methods=['POST']) 15 | def restart_kernel(kernel_id): 16 | logging.info(f"Restarting kernel with id: {kernel_id}") 17 | return Kernel.restart_kernel(kernel_id) 18 | -------------------------------------------------------------------------------- /server/app/routes/login.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, Response, g 2 | from flask_jwt_extended import create_access_token 3 | from app.auth.auth import password_required 4 | import logging 5 | import json 6 | 7 | login_blueprint = Blueprint('login', __name__) 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | @login_blueprint.route('/login', methods=['POST']) 12 | @password_required 13 | def login(): 14 | logging.info(f"Logging in user: {g.user.name}") 15 | access_token = create_access_token(identity=g.user.name) 16 | 17 | return Response( 18 | response=json.dumps({ 19 | 'message': 'Login successful', 20 | 'name': g.user.name, 21 | 'email': g.user.email, 22 | 'access_token': access_token 23 | }), 24 | status=200 25 | ) 26 | 27 | -------------------------------------------------------------------------------- /server/app/routes/notebook.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request, g 2 | from app.services.notebook import Notebook 3 | from flask_jwt_extended import jwt_required 4 | from app.auth.auth import identify_user 5 | import logging 6 | 7 | notebook_blueprint = Blueprint('notebook', __name__) 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | @notebook_blueprint.route('/notebook') 12 | def notebook(): 13 | return jsonify( 14 | { 15 | "message": "notebook endpoint" 16 | } 17 | ) 18 | 19 | @notebook_blueprint.route('/notebook/all', methods=['GET']) 20 | @jwt_required() 21 | @identify_user 22 | def get_all_notebooks(): 23 | logging.info(f"Getting all notebooks by user: {g.user.name}") 24 | return Notebook.get_all_notebooks() 25 | 26 | @notebook_blueprint.route('/notebook/', methods=['GET']) 27 | @jwt_required() 28 | @identify_user 29 | def get_notebook_by_path(notebook_path): 30 | logging.info(f"Getting notebook with path: {notebook_path} by user: {g.user.name}") 31 | return Notebook.get_notebook_by_path(notebook_path=notebook_path) 32 | 33 | @notebook_blueprint.route('/notebook', methods=['POST']) 34 | @jwt_required() 35 | @identify_user 36 | def create_notebook(): 37 | data = request.get_json() 38 | notebook_name = data.get('name', None) 39 | notebook_path = data.get('path', None) 40 | logging.info(f"Creating notebook with name: {notebook_name} and path: {notebook_path} by user {g.user.name}") 41 | return Notebook.create_notebook_with_init_cells(notebook_name=notebook_name, notebook_path=notebook_path) 42 | 43 | @notebook_blueprint.route('/notebook/', methods=['PUT']) 44 | @jwt_required() 45 | @identify_user 46 | def update_notebook(notebook_path): 47 | data = request.get_json() 48 | content = data.get('content', None) 49 | logging.info(f"Updating notebook with path: {notebook_path} by user: {g.user.name}") 50 | return Notebook.update_notebook(notebook_path=notebook_path, content=content) 51 | 52 | @notebook_blueprint.route('/notebook/', methods=['DELETE']) 53 | @jwt_required() 54 | @identify_user 55 | def delete_notebook(notebook_path): 56 | logging.info(f"Deleting notebook with path: {notebook_path}") 57 | return Notebook.delete_notebook_by_path(notebook_path=notebook_path) 58 | 59 | @notebook_blueprint.route('/notebook/', methods=['PATCH']) 60 | @jwt_required() 61 | @identify_user 62 | def rename_or_move_notebook(notebook_path): 63 | data = request.get_json() 64 | if 'newName' in data: 65 | logging.info(f"Renaming notebook with path: {notebook_path} to {data['newName']}") 66 | new_notebook_name = data.get('newName', None) 67 | return Notebook.rename_notebook_by_path(notebook_path=notebook_path, new_notebook_name=new_notebook_name) 68 | elif 'newPath' in data: 69 | logging.info(f"Moving notebook with path: {notebook_path} to {data['newPath']}") 70 | new_notebook_path = data.get('newPath', None) 71 | return Notebook.move_notebook(notebook_path=notebook_path, new_notebook_path=new_notebook_path) 72 | 73 | @notebook_blueprint.route('/notebook/spark_app/', methods=['GET']) 74 | @jwt_required() 75 | @identify_user 76 | def get_spark_app_by_notebook_path(notebook_path): 77 | logging.info(f"Get spark apps by notebook path: {notebook_path}") 78 | return Notebook.get_spark_app_by_notebook_path(notebook_path) 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /server/app/routes/session.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, request 2 | from app.services.session import Session 3 | import logging 4 | 5 | session_blueprint = Blueprint('session', __name__) 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | @session_blueprint.route('/session', methods=['POST']) 10 | def create_session(): 11 | logging.info(f"Create session") 12 | data = request.get_json() 13 | notebook_path = data.get('notebookPath', None) 14 | return Session.create_session(notebook_path) 15 | 16 | @session_blueprint.route('/session/all', methods=['GET']) 17 | def get_all_sessions(): 18 | return Session.get_all_sessions() 19 | 20 | @session_blueprint.route('/session/', methods=['GET']) 21 | def get_session_by_path(notebook_path): 22 | logging.info(f"Get session by path: {notebook_path}") 23 | return Session.get_session_by_path(notebook_path=notebook_path) 24 | 25 | -------------------------------------------------------------------------------- /server/app/routes/spark_app.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from app.services.spark_app import SparkApp 3 | from flask_jwt_extended import jwt_required 4 | from app.auth.auth import identify_user 5 | import logging 6 | 7 | spark_app_blueprint = Blueprint('spark_app', __name__) 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | @spark_app_blueprint.route('/spark_app//config', methods=['GET']) 12 | def get_spark_app_config(notbook_path): 13 | logging.info(f"Getting spark app config for notebook path: {notbook_path}") 14 | return SparkApp.get_spark_app_config_by_notebook_path(notbook_path) 15 | 16 | @spark_app_blueprint.route('/spark_app//config', methods=['POST']) 17 | def update_spark_app_config(notbook_path): 18 | logging.info(f"Updating spark app config for notebook path: {notbook_path}") 19 | data = request.get_json() 20 | return SparkApp.update_spark_app_config_by_notebook_path(notbook_path, data) 21 | 22 | @spark_app_blueprint.route('/spark_app//status', methods=['GET']) 23 | def get_spark_app_status(spark_app_id): 24 | logging.info(f"Getting spark app status for app id: {spark_app_id}") 25 | return SparkApp.get_spark_app_status(spark_app_id) 26 | 27 | @spark_app_blueprint.route('/spark_app/', methods=['POST']) 28 | @jwt_required() 29 | @identify_user 30 | def create_spark_app(spark_app_id): 31 | logging.info(f"Creating spark app with id: {spark_app_id}") 32 | data = request.get_json() 33 | notebook_path = data.get('notebookPath') 34 | return SparkApp.create_spark_app(spark_app_id=spark_app_id, notebook_path=notebook_path) -------------------------------------------------------------------------------- /server/app/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/app/services/__init__.py -------------------------------------------------------------------------------- /server/app/services/kernel.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from flask import Response 3 | import requests 4 | import json 5 | from flask import current_app as app 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class Kernel: 10 | 11 | @staticmethod 12 | def get_kernel_by_id(kernel_id): 13 | try: 14 | response = requests.get(app.config['JUPYTER_KERNEL_API_PATH'] + f"/{kernel_id}") 15 | except Exception as e: 16 | logger.error(f"Met exception getting all kernels: {e}") 17 | return Response( 18 | response=json.dumps({'message': 'Error getting all kernels from Jupyter Server: ' + str(e)}), 19 | status=404) 20 | 21 | if response.status_code != 200: 22 | logger.error(f"Error getting kernel: {response.content}") 23 | return Response( 24 | response=json.dumps({'message': 'Error getting kernel'}), 25 | status=404) 26 | 27 | return Response( 28 | response=response, 29 | status=200, 30 | mimetype='application/json' 31 | ) 32 | 33 | @staticmethod 34 | def restart_kernel(kernel_id): 35 | path = app.config['JUPYTER_KERNEL_API_PATH'] + f"/{kernel_id}/restart" 36 | try: 37 | response = requests.post(path) 38 | except Exception as e: 39 | logger.error(f"Met exception restarting kernel: {e}") 40 | return Response( 41 | response=json.dumps({'message': 'Error restarting kernel: ' + str(e)}), 42 | status=404) 43 | 44 | if response.status_code != 200: 45 | return Response( 46 | response=json.dumps({'message': 'Error restarting kernel'}), 47 | status=404) 48 | 49 | return Response( 50 | response=response.content, 51 | status=200, 52 | mimetype='application/json' 53 | ) -------------------------------------------------------------------------------- /server/app/services/session.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from flask import Response 3 | import requests 4 | import json 5 | from flask import current_app as app 6 | 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class Session: 12 | 13 | @staticmethod 14 | def get_all_sessions(): 15 | try: 16 | response = requests.get(app.config['JUPYTER_SESSION_API_PATH']) 17 | except Exception as e: 18 | logger.error(f"Met exception getting all sessions: {e}") 19 | return Response( 20 | response=json.dumps({'message': 'Error getting all sessions from Jupyter Server: ' + str(e)}), 21 | status=404) 22 | 23 | return Response( 24 | response=response, 25 | status=200, 26 | mimetype='application/json' 27 | ) 28 | 29 | @staticmethod 30 | def get_session_by_path(notebook_path: str) -> None: 31 | logger.info(f"Getting session for {notebook_path}") 32 | 33 | all_sessions = Session.get_all_sessions() 34 | if all_sessions.status_code != 200: 35 | return Response( 36 | response=json.dumps({'message': 'Error getting all sessions'}), 37 | status=404) 38 | 39 | sessions = json.loads(all_sessions.data.decode('utf-8')) 40 | session = [x for x in sessions if x["path"] == notebook_path] 41 | 42 | if len(session) == 0: 43 | return Response( 44 | response=json.dumps({'message': 'Session not found'}), 45 | status=404) 46 | elif len(session) > 1: 47 | return Response( 48 | response=json.dumps({'message': 'Multiple sessions found'}), 49 | status=404) 50 | else: 51 | return Response( 52 | response=json.dumps(session[0]), 53 | status=200, 54 | mimetype='application/json' 55 | ) 56 | 57 | 58 | @staticmethod 59 | def create_session(notebook_path: str) -> None: 60 | logger.info(f"Creating session for {notebook_path}") 61 | jupyter_api_path = app.config['JUPYTER_SESSION_API_PATH'] 62 | 63 | data = { 64 | "notebook": { 65 | "path": notebook_path 66 | }, 67 | "kernel": { 68 | "id": None, 69 | "name": "python3" 70 | } 71 | } 72 | 73 | try: 74 | response = requests.post( 75 | jupyter_api_path, 76 | json=data 77 | ) 78 | except Exception as e: 79 | logger.error(f"Met exception creating session: {e}") 80 | return Response( 81 | response=json.dumps({'message': 'Error creating session in Jupyter Server: ' + str(e)}), 82 | status=404) 83 | 84 | return Response( 85 | response=response.content, 86 | status=200, 87 | mimetype='application/json' 88 | ) 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /server/app/services/user.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from flask import Response 3 | import requests 4 | import json 5 | from flask import current_app as app 6 | from database import db 7 | from app.models.user import UserModel 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class User: 12 | 13 | @staticmethod 14 | def get_mock_user(): 15 | mock_user = UserModel.query.filter_by(name='testuser0').first() 16 | if mock_user is None: 17 | mock_user = UserModel(name='testuser0', email='testuser0@example.com') 18 | password = 'test_password' 19 | mock_user.set_password(password) 20 | db.session.add(mock_user) 21 | db.session.commit() 22 | 23 | return mock_user 24 | 25 | @staticmethod 26 | def get_user_by_name(name): 27 | user = UserModel.query.filter_by(name=name).first() 28 | return user 29 | 30 | @staticmethod 31 | def get_user_by_email(email): 32 | user = UserModel.query.filter_by(email=email).first() 33 | return user 34 | 35 | @staticmethod 36 | def create_user(name, email, password): 37 | user = UserModel(name=name, email=email) 38 | user.set_password(password) 39 | db.session.add(user) 40 | db.session.commit() 41 | return user 42 | 43 | @staticmethod 44 | def delete_user(name): 45 | user = UserModel.query.filter_by(name=name).first() 46 | db.session.delete(user) 47 | db.session.commit() 48 | return Response( 49 | response=json.dumps({'message': 'User deleted successfully'}), 50 | status=200 51 | ) 52 | 53 | @staticmethod 54 | def update_user(name, email, password): 55 | user = UserModel.query.filter_by(name=name).first() 56 | user.email = email 57 | user.set_password(password) 58 | db.session.commit() 59 | return Response( 60 | response=json.dumps({'message': 'User updated successfully'}), 61 | status=200 62 | ) 63 | 64 | @staticmethod 65 | def get_all_users(): 66 | users = UserModel.query.all() 67 | return users 68 | 69 | @staticmethod 70 | def validate_user_by_name(name, password): 71 | user = UserModel.query.filter_by(name=name).first() 72 | if user is None: 73 | return False 74 | return user.check_password(password) 75 | 76 | @staticmethod 77 | def validate_user_by_email(email, password): 78 | user = UserModel.query.filter_by(email=email).first() 79 | if user is None: 80 | return False 81 | return user.check_password(password) 82 | -------------------------------------------------------------------------------- /server/config.py: -------------------------------------------------------------------------------- 1 | class Config(object): 2 | DEBUG = False 3 | TESTING = False 4 | SQLALCHEMY_DATABASE_URI = 'postgresql://user:password@localhost/production_db' 5 | JUPYTER_SERVER_PATH = 'http://localhost:8888' 6 | JUPYTER_CONTENT_API_PATH = JUPYTER_SERVER_PATH + '/api/contents' 7 | JUPYTER_SESSION_API_PATH = JUPYTER_SERVER_PATH + '/api/sessions' 8 | JUPYTER_KERNEL_API_PATH = JUPYTER_SERVER_PATH + '/api/kernels' 9 | JUPYTER_DEFAULT_PATH = 'work' 10 | 11 | class ProductionConfig(Config): 12 | pass 13 | 14 | class DevelopmentConfig(Config): 15 | DEBUG = True 16 | SQLALCHEMY_DATABASE_URI = 'postgresql://server:password-server@localhost:5432/server_db' 17 | JUPYTER_SERVER_PATH = 'http://localhost:8888' 18 | JUPYTER_CONTENT_API_PATH = JUPYTER_SERVER_PATH + '/api/contents' 19 | JUPYTER_SESSION_API_PATH = JUPYTER_SERVER_PATH + '/api/sessions' 20 | JUPYTER_KERNEL_API_PATH = JUPYTER_SERVER_PATH + '/api/kernels' 21 | JUPYTER_DEFAULT_PATH = 'work' 22 | 23 | class TestingConfig(Config): 24 | TESTING = True 25 | SQLALCHEMY_DATABASE_URI = 'postgresql://server:password-server@postgres:5432/server_db' 26 | JUPYTER_SERVER_PATH = 'http://notebook:8888' 27 | JUPYTER_CONTENT_API_PATH = JUPYTER_SERVER_PATH + '/api/contents' 28 | JUPYTER_SESSION_API_PATH = JUPYTER_SERVER_PATH + '/api/sessions' 29 | JUPYTER_KERNEL_API_PATH = JUPYTER_SERVER_PATH + '/api/kernels' 30 | JUPYTER_DEFAULT_PATH = 'work' 31 | 32 | class IntegrationTestingConfig(Config): 33 | TESTING = True 34 | SQLALCHEMY_DATABASE_URI = 'postgresql://server:password-server@localhost:5432/server_db' 35 | JUPYTER_SERVER_PATH = 'http://localhost:8888' 36 | JUPYTER_CONTENT_API_PATH = JUPYTER_SERVER_PATH + '/api/contents' 37 | JUPYTER_SESSION_API_PATH = JUPYTER_SERVER_PATH + '/api/sessions' 38 | JUPYTER_KERNEL_API_PATH = JUPYTER_SERVER_PATH + '/api/kernels' 39 | JUPYTER_DEFAULT_PATH = 'work' 40 | -------------------------------------------------------------------------------- /server/database.py: -------------------------------------------------------------------------------- 1 | from flask_sqlalchemy import SQLAlchemy 2 | 3 | db = SQLAlchemy() -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==3.0.3 2 | Flask-Cors==4.0.1 3 | requests==2.32.2 4 | Flask-SQLAlchemy==3.1.1 5 | psycopg2-binary==2.9.9 6 | flask-jwt-extended==4.6.0 -------------------------------------------------------------------------------- /server/run.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, Response 2 | from flask_cors import CORS 3 | from database import db 4 | import os 5 | import json 6 | from app.routes.notebook import notebook_blueprint 7 | from app.routes.directory import directory_blueprint 8 | from app.routes.session import session_blueprint 9 | from app.routes.kernel import kernel_blueprint 10 | from app.routes.spark_app import spark_app_blueprint 11 | from app.routes.login import login_blueprint 12 | from flask_jwt_extended import JWTManager 13 | from config import DevelopmentConfig, IntegrationTestingConfig, TestingConfig 14 | 15 | def create_app(): 16 | app = Flask(__name__) 17 | if os.environ.get('ENV', 'development') == 'development': 18 | app.config.from_object(DevelopmentConfig) 19 | elif os.environ.get('ENV', 'development') == 'testing': 20 | app.config.from_object(TestingConfig) 21 | elif os.environ.get('ENV', 'development') == 'integration': 22 | app.config.from_object(IntegrationTestingConfig) 23 | 24 | # Set the secret key for JWT 25 | try: 26 | from app_secrets import JWT_SECRET_KEY 27 | except ImportError: 28 | JWT_SECRET_KEY = os.environ.get('JWT_SECRET_KEY', 'default_secret_key') 29 | 30 | app.config['JWT_SECRET_KEY'] = JWT_SECRET_KEY 31 | jwt = JWTManager(app) 32 | @jwt.expired_token_loader 33 | def my_expired_token_callback(jwt_header, jwt_payload): 34 | return Response( 35 | response=json.dumps({'message': 'Token has expired'}), 36 | status=401) 37 | 38 | db.init_app(app) 39 | 40 | allowed_origins = ["http://localhost:5001", "http://localhost:3000"] 41 | CORS(app, resources={ 42 | r"/*": {"origins": allowed_origins} 43 | }) 44 | 45 | return app 46 | 47 | app = create_app() 48 | 49 | app.register_blueprint(notebook_blueprint) 50 | app.register_blueprint(directory_blueprint) 51 | app.register_blueprint(session_blueprint) 52 | app.register_blueprint(kernel_blueprint) 53 | app.register_blueprint(spark_app_blueprint) 54 | app.register_blueprint(login_blueprint) 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | app.run(debug=True, host='0.0.0.0', port=5002) 60 | -------------------------------------------------------------------------------- /server/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/tests/__init__.py -------------------------------------------------------------------------------- /server/tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/tests/models/__init__.py -------------------------------------------------------------------------------- /server/tests/models/test_directory_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from run import create_app 4 | from database import db 5 | from app.models.directory import DirectoryModel 6 | from app.models.user import UserModel 7 | 8 | class DirectoryModelTestCase(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.app = create_app() 12 | self.client = self.app.test_client() 13 | with self.app.app_context(): 14 | db.create_all() 15 | 16 | def tearDown(self): 17 | with self.app.app_context(): 18 | db.session.remove() 19 | db.drop_all() 20 | 21 | def test_directory_model(self): 22 | with self.app.app_context(): 23 | # Create user first 24 | user = UserModel(name='testuser', email='testuser@example.com') 25 | password = 'test_password' 26 | user.set_password(password) 27 | db.session.add(user) 28 | db.session.commit() 29 | 30 | # Create directory 31 | directory = DirectoryModel(name='Test Directory', path='/path/to/directory', user_id=user.id) 32 | db.session.add(directory) 33 | db.session.commit() 34 | 35 | self.assertIsNotNone(directory.id) 36 | self.assertEqual(directory.name, 'Test Directory') 37 | self.assertEqual(directory.path, '/path/to/directory') 38 | 39 | directory_dict = directory.to_dict() 40 | self.assertEqual(directory_dict, { 41 | 'id': directory.id, 42 | 'name': 'Test Directory', 43 | 'path': '/path/to/directory', 44 | 'user_id': user.id 45 | }) -------------------------------------------------------------------------------- /server/tests/models/test_notebook_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from run import create_app 4 | from database import db 5 | from app.models.notebook import NotebookModel 6 | from app.models.user import UserModel 7 | 8 | class NotebookModelTestCase(unittest.TestCase): 9 | def setUp(self): 10 | self.app = create_app() 11 | self.client = self.app.test_client() 12 | with self.app.app_context(): 13 | db.create_all() 14 | 15 | def tearDown(self): 16 | with self.app.app_context(): 17 | db.session.remove() 18 | db.drop_all() 19 | 20 | def test_notebook_model(self): 21 | with self.app.app_context(): 22 | # Create user first 23 | user = UserModel(name='testuser', email='testuser@example.com') 24 | password = 'test_password' 25 | user.set_password(password) 26 | db.session.add(user) 27 | db.session.commit() 28 | 29 | # Create notebook 30 | notebook = NotebookModel(name='Test Notebook', path='/path/to/notebook', user_id=user.id) 31 | db.session.add(notebook) 32 | db.session.commit() 33 | 34 | self.assertIsNotNone(notebook.id) 35 | self.assertEqual(notebook.name, 'Test Notebook') 36 | self.assertEqual(notebook.path, '/path/to/notebook') 37 | 38 | notebook_dict = notebook.to_dict() 39 | self.assertEqual(notebook_dict, { 40 | 'id': notebook.id, 41 | 'name': 'Test Notebook', 42 | 'path': '/path/to/notebook', 43 | 'user_id': user.id 44 | }) 45 | 46 | if __name__ == '__main__': 47 | unittest.main() -------------------------------------------------------------------------------- /server/tests/models/test_spark_app_config_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from run import create_app 4 | from database import db 5 | from app.models.spark_app_config import SparkAppConfigModel 6 | from app.models.notebook import NotebookModel 7 | from app.models.user import UserModel 8 | 9 | class SparkAppConfigModelTestCase(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.app = create_app() 13 | self.client = self.app.test_client() 14 | with self.app.app_context(): 15 | db.create_all() 16 | 17 | def tearDown(self): 18 | with self.app.app_context(): 19 | db.session.remove() 20 | db.drop_all() 21 | 22 | def test_spark_app_config_model(self): 23 | with self.app.app_context(): 24 | # Create user 25 | user = UserModel(name='testuser', email='testuser@example.com') 26 | password = 'test_password' 27 | user.set_password(password) 28 | db.session.add(user) 29 | db.session.commit() 30 | 31 | self.assertEqual(user.id, 1) 32 | 33 | # Create notebook 34 | notebook = NotebookModel(name = 'test_notebook', path='test_notebook', user_id=1) 35 | db.session.add(notebook) 36 | db.session.commit() 37 | 38 | self.assertEqual(notebook.id, 1) 39 | 40 | # Create spark app config 41 | spark_app_config = SparkAppConfigModel( 42 | notebook_id=1, 43 | driver_memory='1g', 44 | driver_memory_overhead='1g', 45 | driver_cores=1, 46 | executor_memory='1g', 47 | executor_memory_overhead='1g', 48 | executor_memory_fraction=1.0, 49 | executor_cores=1, 50 | executor_instances=1, 51 | dynamic_allocation_enabled=True, 52 | executor_instances_min=1, 53 | executor_instances_max=1, 54 | shuffle_service_enabled=True, 55 | executor_idle_timeout=1, 56 | queue='test_queue' 57 | ) 58 | db.session.add(spark_app_config) 59 | db.session.commit() 60 | 61 | spark_app_config_dict = spark_app_config.to_dict() 62 | self.assertEqual(spark_app_config_dict['notebook_id'], 1) 63 | self.assertEqual(spark_app_config_dict['driver_memory'], '1g') 64 | self.assertEqual(spark_app_config_dict['driver_memory_overhead'], '1g') 65 | self.assertEqual(spark_app_config_dict['driver_cores'], 1) 66 | self.assertEqual(spark_app_config_dict['executor_memory'], '1g') 67 | self.assertEqual(spark_app_config_dict['executor_memory_overhead'], '1g') 68 | self.assertEqual(spark_app_config_dict['executor_memory_fraction'], 1.0) 69 | self.assertEqual(spark_app_config_dict['executor_cores'], 1) 70 | self.assertEqual(spark_app_config_dict['executor_instances'], 1) 71 | self.assertEqual(spark_app_config_dict['dynamic_allocation_enabled'], True) 72 | self.assertEqual(spark_app_config_dict['executor_instances_min'], 1) 73 | self.assertEqual(spark_app_config_dict['executor_instances_max'], 1) 74 | self.assertEqual(spark_app_config_dict['shuffle_service_enabled'], True) 75 | self.assertEqual(spark_app_config_dict['executor_idle_timeout'], 1) 76 | self.assertEqual(spark_app_config_dict['queue'], 'test_queue') -------------------------------------------------------------------------------- /server/tests/models/test_spark_app_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from run import create_app 4 | from database import db 5 | from app.models.spark_app import SparkAppModel 6 | from app.models.notebook import NotebookModel 7 | from app.models.user import UserModel 8 | import datetime 9 | 10 | class SparkAppModelTestCase(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.app = create_app() 14 | self.client = self.app.test_client() 15 | with self.app.app_context(): 16 | db.create_all() 17 | 18 | def tearDown(self): 19 | with self.app.app_context(): 20 | db.session.remove() 21 | db.drop_all() 22 | 23 | def test_spark_app_model(self): 24 | with self.app.app_context(): 25 | # Create user first 26 | user = UserModel(name='testuser', email='testuser@example.com') 27 | password = 'test_password' 28 | user.set_password(password) 29 | db.session.add(user) 30 | db.session.commit() 31 | 32 | # Create notebook 33 | notebook = NotebookModel(name='Test Notebook', path='Test Path', user_id=user.id) 34 | db.session.add(notebook) 35 | db.session.commit() 36 | 37 | spark_app = SparkAppModel( 38 | spark_app_id='spark_app0000', 39 | notebook_id=notebook.id, 40 | user_id=user.id, 41 | created_at='2021-01-01 00:00:00') 42 | db.session.add(spark_app) 43 | db.session.commit() 44 | 45 | spark_app_dict = spark_app.to_dict() 46 | self.assertEqual(spark_app_dict['spark_app_id'], 'spark_app0000') 47 | self.assertEqual(spark_app_dict['notebook_id'], notebook.id) 48 | self.assertEqual(spark_app_dict['user_id'], user.id) 49 | self.assertEqual(spark_app_dict['status'], None) 50 | self.assertEqual(spark_app_dict['created_at'], '2021-01-01 00:00:00') -------------------------------------------------------------------------------- /server/tests/models/test_user_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from run import create_app 4 | from database import db 5 | from app.models.user import UserModel 6 | 7 | class UserModelTestCase(unittest.TestCase): 8 | 9 | def setUp(self): 10 | self.app = create_app() 11 | self.client = self.app.test_client() 12 | with self.app.app_context(): 13 | db.create_all() 14 | 15 | def tearDown(self): 16 | with self.app.app_context(): 17 | db.session.remove() 18 | db.drop_all() 19 | 20 | def test_user_model(self): 21 | with self.app.app_context(): 22 | user = UserModel(name='testuser', email='testuser@example.com') 23 | password = 'test_password' 24 | user.set_password(password) 25 | db.session.add(user) 26 | db.session.commit() 27 | assert user.id is not None 28 | assert user.name == 'testuser' 29 | assert user.email == 'testuser@example.com' 30 | 31 | def test_password_setter(self): 32 | with self.app.app_context(): 33 | user = UserModel(name='testuser', email='testuser@example.com') 34 | password = 'test_password' 35 | user.set_password(password) 36 | db.session.add(user) 37 | db.session.commit() 38 | assert user.password_hash is not None 39 | 40 | def test_check_password(self): 41 | with self.app.app_context(): 42 | user = UserModel(name='testuser', email='testuser@example.com') 43 | password = 'test_password' 44 | user.set_password(password) 45 | db.session.add(user) 46 | db.session.commit() 47 | assert user.check_password(password) 48 | assert not user.check_password('wrong_password') -------------------------------------------------------------------------------- /server/tests/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/tests/routes/__init__.py -------------------------------------------------------------------------------- /server/tests/routes/test_spark_app_route.py: -------------------------------------------------------------------------------- 1 | # import unittest 2 | # import json 3 | # from flask_cors import CORS 4 | # from flask import g 5 | # from database import db 6 | # from run import create_app 7 | # from app.routes.spark_app import spark_app_blueprint 8 | # from app.routes.login import login_blueprint 9 | # from app.services.directory import Directory 10 | # from app.models.user import UserModel 11 | # from app.services.user import User 12 | # from app.models.spark_app import SparkAppModel 13 | # from app.models.notebook import NotebookModel 14 | 15 | # class SparkAppRouteTestCase(unittest.TestCase): 16 | 17 | # def setUp(self): 18 | # self.app = create_app() 19 | # self.app.register_blueprint(spark_app_blueprint) 20 | # self.app.register_blueprint(login_blueprint) 21 | # self.client = self.app.test_client() 22 | # with self.app.app_context(): 23 | # db.create_all() 24 | # user = UserModel(name='test_user', email='test_email') 25 | # user.set_password('test_password') 26 | # db.session.add(user) 27 | # db.session.commit() 28 | 29 | # def tearDown(self): 30 | # with self.app.app_context(): 31 | # db.session.remove() 32 | # db.drop_all() 33 | 34 | # def login_and_get_token(self): 35 | # with self.app.app_context(): 36 | # response = self.client.post('/login', auth=('test_user', 'test_password')) 37 | # return json.loads(response.data)['access_token'] 38 | 39 | # # def test_create_spark_app(self): 40 | # # with self.app.app_context(): 41 | # # # Create Notebook 42 | # # notebook = NotebookModel(name='Test Notebook', path='/path/to/notebook', user_id=1) 43 | # # db.session.add(notebook) 44 | # # db.session.commit() 45 | 46 | # # # Create Spark App 47 | # # spark_app_id = 'app_0001' 48 | # # path = f'/spark-app/app_0001' 49 | 50 | # # # data = { 51 | # # # 'notebookPath': notebook.path 52 | # # # } 53 | 54 | # # # token = self.login_and_get_token() 55 | # # # headers = { 56 | # # # 'Authorization': f'Bearer {token}', 57 | # # # } 58 | 59 | # # response = self.client.post( 60 | # # path, 61 | # # # headers=headers, 62 | # # # json=json.dumps(data), 63 | # # ) 64 | 65 | # # print(response.data) 66 | # # # self.assertEqual(response.status_code, 200) 67 | # # # self.assertEqual(json.loads(response.data)['spark_app_id'], spark_app_id) 68 | # # # self.assertEqual(json.loads(response.data)['notebook_id'], notebook.id) 69 | # # # self.assertEqual(json.loads(response.data)['user_id'], notebook.user_id) 70 | 71 | # def test_get_spark_app_config_by_notebook_path(self): 72 | # with self.app.app_context(): 73 | # token = self.login_and_get_token() 74 | # headers = { 75 | # 'Authorization': f'Bearer {token}', 76 | # } 77 | 78 | # # response = self.client.get('/spark-app/path_to_notebook/config', headers=headers) 79 | # # print(response.data) 80 | 81 | # response = self.client.get('/spark-app') 82 | # print(response.data) 83 | -------------------------------------------------------------------------------- /server/tests/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/server/tests/services/__init__.py -------------------------------------------------------------------------------- /server/tests/services/test_directory_service.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from flask import g 4 | from run import create_app 5 | from database import db 6 | from app.models.directory import DirectoryModel 7 | from app.services.user import User 8 | from app.services.directory import Directory 9 | import json 10 | 11 | class DirectoryServiceTestCase(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.app = create_app() 15 | self.client = self.app.test_client() 16 | with self.app.app_context(): 17 | db.create_all() 18 | 19 | def tearDown(self): 20 | with self.app.app_context(): 21 | db.session.remove() 22 | db.drop_all() 23 | 24 | def test_create_directory(self): 25 | with self.app.app_context(): 26 | 27 | response_0 = Directory.get_content_by_path('work') 28 | self.assertEqual(response_0.status_code, 200) 29 | 30 | g.user = User.get_mock_user() 31 | 32 | # Create directory 33 | response_1 = Directory.create_directory('work/test_create_directory') 34 | self.assertEqual(response_1.status_code, 201) 35 | directoryFromDB = DirectoryModel.query.filter_by(path='work/test_create_directory').first() 36 | self.assertIsNotNone(directoryFromDB) 37 | self.assertEqual(directoryFromDB.name, 'test_create_directory') 38 | 39 | # Check if created directory could be detected 40 | response_2 = Directory.get_content_by_path('work') 41 | self.assertEqual(response_2.status_code, 200) 42 | self.assertEqual(len([x for x in json.loads(response_2.data)['content'] if x['name'] == 'test_create_directory']), 1) 43 | 44 | def test_delete_directory_by_path(self): 45 | with self.app.app_context(): 46 | 47 | g.user = User.get_mock_user() 48 | 49 | # Create directory 50 | response_0 = Directory.create_directory('work/test_delete_directory_by_path') 51 | directoryFromDB = DirectoryModel.query.filter_by(path='work/test_delete_directory_by_path').first() 52 | self.assertIsNotNone(directoryFromDB) 53 | self.assertEqual(directoryFromDB.name, 'test_delete_directory_by_path') 54 | 55 | response_1 = Directory.get_content_by_path('work') 56 | self.assertEqual(response_1.status_code, 200) 57 | self.assertEqual(len([x for x in json.loads(response_1.data)['content'] if x['name'] == 'test_delete_directory_by_path']), 1) 58 | 59 | # Delete directory 60 | response_2 = Directory.delete_directory_by_path('work/test_delete_directory_by_path') 61 | self.assertEqual(response_2.status_code, 200) 62 | 63 | # Check if deleted directory could not be detected 64 | response_3 = Directory.get_content_by_path('work') 65 | self.assertEqual(response_3.status_code, 200) 66 | self.assertEqual(len([x for x in json.loads(response_3.data)['content'] if x['name'] == 'test_delete_directory_by_path']), 0) 67 | 68 | def test_rename_directory_by_path(self): 69 | with self.app.app_context(): 70 | 71 | g.user = User.get_mock_user() 72 | 73 | response_0 = Directory.get_content_by_path('work') 74 | contents = json.loads(response_0.data)['content'] 75 | content_0 = [x for x in contents if x['name'] == 'updated_name'] 76 | self.assertEqual(response_0.status_code, 200) 77 | self.assertEqual(content_0, []) 78 | 79 | # Create directory 80 | response_0 = Directory.create_directory('work/original_name') 81 | directoryFromDB = DirectoryModel.query.filter_by(path='work/original_name').first() 82 | self.assertIsNotNone(directoryFromDB) 83 | self.assertEqual(directoryFromDB.name, 'original_name') 84 | 85 | # Rename directory 86 | response_1 = Directory.rename_directory_by_path('work/original_name', 'work/updated_name') 87 | self.assertEqual(response_1.status_code, 200) 88 | 89 | directoryFromDB = DirectoryModel.query.filter_by(path='work/updated_name').first() 90 | self.assertIsNotNone(directoryFromDB) 91 | self.assertEqual(directoryFromDB.name, 'updated_name') 92 | 93 | # Check if renamed directory could be detected 94 | response_2 = Directory.get_content_by_path('work') 95 | contents = json.loads(response_2.data)['content'] 96 | content_original = [x for x in contents if x['name'] == 'original_name'] 97 | content_updated = [x for x in contents if x['name'] == 'updated_name'] 98 | 99 | self.assertEqual(len(content_original), 0) 100 | self.assertEqual(len(content_updated), 1) 101 | self.assertEqual(content_updated[0]['name'], 'updated_name') 102 | self.assertEqual(content_updated[0]['path'], 'work/updated_name') 103 | 104 | -------------------------------------------------------------------------------- /server/tests/services/test_kernel_service.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from flask_cors import CORS 3 | from flask import g 4 | from run import create_app 5 | from database import db 6 | from app.models.user import UserModel 7 | from app.services.kernel import Kernel 8 | from app.services.notebook import Notebook 9 | from app.services.session import Session 10 | import json 11 | 12 | class KernelServiceTestCase(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.app = create_app() 16 | self.client = self.app.test_client() 17 | with self.app.app_context(): 18 | db.create_all() 19 | 20 | def tearDown(self): 21 | with self.app.app_context(): 22 | db.session.remove() 23 | db.drop_all() 24 | 25 | def test_get_kernel_by_id(self): 26 | with self.app.app_context(): 27 | # Get non-exist kernel 28 | response_0 = Kernel.get_kernel_by_id('kernel_id') 29 | self.assertEqual(response_0.status_code, 404) 30 | 31 | # Create User 32 | user = UserModel(name='testuser', email='testuser@example.com') 33 | password = 'test_password' 34 | user.set_password(password) 35 | g.user = user 36 | 37 | # Create Notebook 38 | response_1 = Notebook.create_notebook_with_init_cells(notebook_name='Notebook_1.ipynb', notebook_path='') 39 | self.assertEqual(response_1.status_code, 200) 40 | 41 | notebook_1 = json.loads(response_1.data.decode('utf-8')) 42 | notebook_path_1 = notebook_1['path'] 43 | 44 | # Create Session 45 | response_2 = Session.create_session(notebook_path_1) 46 | self.assertEqual(response_2.status_code, 200) 47 | session = json.loads(response_2.data.decode('utf-8')) 48 | kernelId = session['kernel']['id'] 49 | 50 | # Get kernel 51 | response_3 = Kernel.get_kernel_by_id(kernelId) 52 | self.assertEqual(response_3.status_code, 200) 53 | 54 | def test_restart_kernel(self): 55 | with self.app.app_context(): 56 | user = UserModel(name='testuser0', email='testuser0@example.com') 57 | password = 'test_password' 58 | user.set_password(password) 59 | db.session.add(user) 60 | db.session.commit() 61 | g.user = user 62 | 63 | # Restart non-exist kernel 64 | response_0 = Kernel.restart_kernel('kernel_id') 65 | self.assertEqual(response_0.status_code, 404) 66 | 67 | # Create Notebook 68 | response_1 = Notebook.create_notebook_with_init_cells(notebook_name='Notebook_1.ipynb', notebook_path='') 69 | self.assertEqual(response_1.status_code, 200) 70 | 71 | notebook_1 = json.loads(response_1.data.decode('utf-8')) 72 | notebook_path_1 = notebook_1['path'] 73 | 74 | # Create Session 75 | response_2 = Session.create_session(notebook_path_1) 76 | self.assertEqual(response_2.status_code, 200) 77 | session = json.loads(response_2.data.decode('utf-8')) 78 | kernelId = session['kernel']['id'] 79 | 80 | # Restart kernel 81 | response_3 = Kernel.restart_kernel(kernelId) 82 | self.assertEqual(response_3.status_code, 200) 83 | -------------------------------------------------------------------------------- /webapp/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env", "@babel/preset-react"] 3 | } -------------------------------------------------------------------------------- /webapp/.env.dev: -------------------------------------------------------------------------------- 1 | REACT_APP_SERVER_BASE_URL=http://localhost:5002 2 | REACT_APP_JUPYTER_BASE_URL=http://localhost:8888 3 | REACT_APP_SPARK_UI_BASE_URL=http://localhost:18081 4 | REACT_APP_AIRFLOW_BASE_URL=http://localhost:8090 5 | REACT_APP_USERNAME=user_0 6 | REACT_APP_PASSWORD=12345A -------------------------------------------------------------------------------- /webapp/.env.test: -------------------------------------------------------------------------------- 1 | REACT_APP_SERVER_BASE_URL=server:5002 2 | REACT_APP_JUPYTER_BASE_URL=notebook:8888 3 | REACT_APP_SPARK_UI_BASE_URL=history-server:18080 4 | REACT_APP_AIRFLOW_BASE_URL=airflow-webserver:8080 5 | REACT_APP_USERNAME=user_0 6 | REACT_APP_PASSWORD=12345A -------------------------------------------------------------------------------- /webapp/Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Build the React application 2 | FROM node:18 as build 3 | WORKDIR /app 4 | 5 | # Copy package files 6 | COPY package*.json ./ 7 | 8 | # Clean and setup npm 9 | RUN npm cache clean --force && \ 10 | npm set legacy-peer-deps=true 11 | 12 | # Install dependencies in a specific order 13 | RUN npm install && \ 14 | npm install @jridgewell/gen-mapping@0.3.2 && \ 15 | npm install @babel/generator@7.23.0 && \ 16 | npm install @babel/traverse@7.23.0 17 | 18 | # Copy the rest of the application 19 | COPY . . 20 | 21 | # Build with increased memory limit 22 | ENV NODE_OPTIONS="--max-old-space-size=4096" 23 | RUN npm rebuild && npm run build 24 | 25 | # Stage 2: Serve the app with nginx 26 | FROM nginx:stable-alpine 27 | COPY --from=build /app/build /usr/share/nginx/html 28 | COPY entrypoint.sh /entrypoint.sh 29 | COPY public/env.template.js /usr/share/nginx/html/env.template.js 30 | RUN chmod +x /entrypoint.sh 31 | EXPOSE 80 32 | ENTRYPOINT ["/entrypoint.sh"] 33 | -------------------------------------------------------------------------------- /webapp/config-overrides.js: -------------------------------------------------------------------------------- 1 | module.exports = function override(config, env) { 2 | config.resolve.fallback = { 3 | ...config.resolve.fallback, 4 | "path": require.resolve("path-browserify"), // Or false if you choose not to polyfill 5 | "crypto": false, // Or false if you choose not to polyfill 6 | "querystring": require.resolve("querystring-es3"), 7 | "stream": false, 8 | "buffer": require.resolve("buffer/"), 9 | }; 10 | return config; 11 | }; 12 | -------------------------------------------------------------------------------- /webapp/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Replace placeholders in your built React app with actual environment variables 4 | envsubst < /usr/share/nginx/html/env.template.js > /usr/share/nginx/html/env.js 5 | 6 | # Start the nginx server 7 | nginx -g 'daemon off;' 8 | -------------------------------------------------------------------------------- /webapp/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | transform: { 3 | '^.+\\.jsx?$': 'babel-jest', 4 | }, 5 | testEnvironment: 'jsdom', 6 | }; -------------------------------------------------------------------------------- /webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webapp", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@coreui/icons": "^3.0.1", 7 | "@coreui/icons-react": "^2.2.1", 8 | "@coreui/react": "^5.1.0", 9 | "@datalayer/icons-react": "^0.3.0", 10 | "@emotion/react": "^11.11.4", 11 | "@emotion/styled": "^11.11.5", 12 | "@jupyter/web-components": "^0.16.1", 13 | "@jupyterlab/ui-components": "^4.2.2", 14 | "@mui/lab": "^5.0.0-alpha.170", 15 | "@mui/material": "^5.3.0", 16 | "@mui/x-tree-view": "^7.7.0", 17 | "@testing-library/jest-dom": "^5.17.0", 18 | "@testing-library/user-event": "^13.5.0", 19 | "ace-builds": "^1.34.2", 20 | "ansi_up": "^6.0.2", 21 | "ansi-to-html": "^0.7.2", 22 | "brace": "^0.11.1", 23 | "buffer": "^6.0.3", 24 | "crypto-browserify": "^3.12.0", 25 | "path-browserify": "^1.0.1", 26 | "process": "^0.11.10", 27 | "querystring-es3": "^0.2.1", 28 | "react": "^18.3.1", 29 | "react-ace": "^11.0.1", 30 | "react-app-rewired": "^2.2.1", 31 | "react-dom": "^18.3.1", 32 | "react-icons": "^4.3.1", 33 | "react-markdown": "^9.0.1", 34 | "react-router-dom": "^6.23.1", 35 | "react-scripts": "5.0.1", 36 | "remark-gfm": "^4.0.0", 37 | "web-vitals": "^2.1.4" 38 | }, 39 | "scripts": { 40 | "start": "react-app-rewired start", 41 | "build": "react-app-rewired build", 42 | "test": "react-app-rewired test", 43 | "eject": "react-scripts eject" 44 | }, 45 | "eslintConfig": { 46 | "extends": [ 47 | "react-app", 48 | "react-app/jest" 49 | ] 50 | }, 51 | "browserslist": { 52 | "production": [ 53 | ">0.2%", 54 | "not dead", 55 | "not op_mini all" 56 | ], 57 | "development": [ 58 | "last 1 chrome version", 59 | "last 1 firefox version", 60 | "last 1 safari version" 61 | ] 62 | }, 63 | "devDependencies": { 64 | "@babel/core": "^7.24.7", 65 | "@babel/preset-env": "^7.24.7", 66 | "@babel/preset-react": "^7.24.7", 67 | "@testing-library/react": "^16.0.0", 68 | "jest": "^27.5.1", 69 | "react-app-rewired": "^2.2.1" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /webapp/public/env.template.js: -------------------------------------------------------------------------------- 1 | window._env_ = { 2 | REACT_APP_JUPYTER_BASE_URL: "$REACT_APP_JUPYTER_BASE_URL", 3 | REACT_APP_SERVER_BASE_URL: "$REACT_APP_SERVER_BASE_URL", 4 | REACT_APP_SPARK_UI_BASE_URL: "$REACT_APP_SPARK_UI_BASE_URL", 5 | REACT_APP_AIRFLOW_BASE_URL: "$REACT_APP_AIRFLOW_BASE_URL", 6 | REACT_APP_USERNAME: "$REACT_APP_USERNAME", 7 | REACT_APP_PASSWORD: "$REACT_APP_PASSWORD", 8 | }; 9 | -------------------------------------------------------------------------------- /webapp/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/webapp/public/favicon.ico -------------------------------------------------------------------------------- /webapp/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | React App 28 | 29 | 30 | 31 | 32 |
33 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /webapp/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/webapp/public/logo192.png -------------------------------------------------------------------------------- /webapp/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuwenyihust/PawMark/3d346d0497ba7aff70a94349bacaf2c2ac6b6139/webapp/public/logo512.png -------------------------------------------------------------------------------- /webapp/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /webapp/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /webapp/src/assets/spark-start.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /webapp/src/components/HistoryServer.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import config from '../config'; 3 | 4 | function HistoryServer({ showHistoryServer }) { 5 | return ( 6 |
{/* Adjust marginLeft based on your sidebar width */} 7 | {showHistoryServer && ( 8 |