├── .github └── workflows │ ├── docker-publish.yaml │ ├── lint.yaml │ └── release.yaml ├── .gitignore ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── README.md ├── launch_spark_history_server_locally.sh └── pom.xml ├── images ├── spark-webui-executors.png └── spark-webui-home.png └── stable └── spark-history-server ├── .helmignore ├── Chart.yaml ├── templates ├── NOTES.txt ├── _helpers.tpl ├── configmap.yaml ├── deployment.yaml ├── ingress.yaml ├── service.yaml ├── serviceaccount.yaml └── tests │ └── test-connection.yaml └── values.yaml /.github/workflows/docker-publish.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Push Multi-Arch Docker Image to GHCR 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - 'stable/spark-history-server/Chart.yaml' 8 | - 'docker/Dockerfile' 9 | - '**/*.sh' 10 | - '**/*.yaml' 11 | workflow_dispatch: 12 | inputs: 13 | override_version: 14 | description: 'Optional override for appVersion (e.g. 1.3.2)' 15 | required: false 16 | 17 | env: 18 | IMAGE_NAME: spark-history-server 19 | 20 | jobs: 21 | build-and-push: 22 | runs-on: ubuntu-latest 23 | 24 | permissions: 25 | contents: read 26 | packages: write 27 | id-token: write 28 | 29 | steps: 30 | - name: Checkout code 31 | uses: actions/checkout@v3 32 | 33 | - name: Set up Docker Buildx 34 | uses: docker/setup-buildx-action@v3 35 | 36 | - name: Log in to GitHub Container Registry (GHCR) 37 | uses: docker/login-action@v3 38 | with: 39 | registry: ghcr.io 40 | username: ${{ github.actor }} 41 | password: ${{ secrets.GITHUB_TOKEN }} 42 | 43 | - name: Set image namespace (lowercase) 44 | id: repo 45 | run: | 46 | owner_lower=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') 47 | echo "owner=${owner_lower}" >> $GITHUB_OUTPUT 48 | 49 | - name: Determine image version (manual or Chart.yaml) 50 | id: extract_version 51 | run: | 52 | if [ -n "${{ github.event.inputs.override_version }}" ]; then 53 | VERSION="${{ github.event.inputs.override_version }}" 54 | else 55 | VERSION=$(grep -E '^appVersion:[[:space:]]*' stable/spark-history-server/Chart.yaml | awk '{print $2}') 56 | fi 57 | if [ -z "$VERSION" ]; then 58 | echo "❌ ERROR: Version could not be determined" 59 | exit 1 60 | fi 61 | echo "✅ Using version: $VERSION" 62 | echo "VERSION=$VERSION" >> $GITHUB_OUTPUT 63 | 64 | - name: Check if image tag exists in GHCR 65 | id: tagcheck 66 | env: 67 | GHCR_USER: ${{ steps.repo.outputs.owner }} 68 | IMAGE: ${{ env.IMAGE_NAME }} 69 | TAG: ${{ steps.extract_version.outputs.VERSION }} 70 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 71 | run: | 72 | echo "🔍 Checking for ghcr.io/${GHCR_USER}/${IMAGE}:${TAG}" 73 | STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ 74 | -H "Authorization: Bearer ${GH_TOKEN}" \ 75 | -H "Accept: application/vnd.oci.image.manifest.v1+json" \ 76 | https://ghcr.io/v2/${GHCR_USER}/${IMAGE}/manifests/${TAG}) 77 | 78 | echo "GHCR tag check returned HTTP status: $STATUS" 79 | if [ "$STATUS" = "200" ]; then 80 | echo "Image already exists. Skipping build." 81 | echo "skip_build=true" >> $GITHUB_OUTPUT 82 | else 83 | echo "Image not found. Proceeding with build." 84 | echo "skip_build=false" >> $GITHUB_OUTPUT 85 | fi 86 | 87 | - name: Build and push multi-arch image to GHCR 88 | if: steps.tagcheck.outputs.skip_build == 'false' 89 | uses: docker/build-push-action@v5 90 | with: 91 | context: ./docker 92 | push: true 93 | platforms: linux/amd64,linux/arm64 94 | tags: | 95 | ghcr.io/${{ steps.repo.outputs.owner }}/${{ env.IMAGE_NAME }}:${{ steps.extract_version.outputs.VERSION }} 96 | ghcr.io/${{ steps.repo.outputs.owner }}/${{ env.IMAGE_NAME }}:latest 97 | labels: | 98 | org.opencontainers.image.source=https://github.com/${{ github.repository }} 99 | org.opencontainers.image.version=${{ steps.extract_version.outputs.VERSION }} 100 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint Charts 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - 'stable/**' 7 | 8 | workflow_dispatch: 9 | 10 | jobs: 11 | lint-chart: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v1 16 | - name: Run chart-testing (lint) 17 | uses: helm/chart-testing-action@main 18 | with: 19 | command: lint 20 | config: .github/ct.yaml 21 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'stable/**' 9 | release: 10 | types: [published, created] 11 | 12 | jobs: 13 | release: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v2 18 | - name: Publish Helm charts 19 | uses: stefanprodan/helm-gh-pages@v1.4.1 20 | with: 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | charts_dir: stable -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 💥 Spark History Server (Spark Web UI) 💥 2 | 3 | Spark History Server is a Web user interface to monitor the metrics and performance of the spark jobs from [Apache Spark](https://spark.apache.org/). 4 | 5 | ## 🚀 Features 6 | 7 | - Helm Chart bootstraps Spark History Server in [Amazon EKS](https://aws.amazon.com/eks/) or any [Kubernetes](https://kubernetes.io/) cluster 8 | - Configured to read [Spark Event Logs](https://spark.apache.org/docs/latest/monitoring.html#applying-compaction-on-rolling-event-log-files) from [Amazon S3](https://aws.amazon.com/s3/) buckets 9 | - Uses [IRSA (IAM Roles for Service Accounts)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) for secure S3 access 10 | - Multi-architecture support (amd64, arm64) 11 | - Supports both versioned and latest tags 12 | - [Local Docker](https://github.com/kubedai/spark-history-server/tree/main/docker) deployment option available 13 | 14 | ## 📋 Prerequisites 15 | 16 | - :white_check_mark: Kubernetes 1.19+ 17 | - :white_check_mark: [Helm 3+](https://helm.sh/docs/intro/install/) 18 | - :white_check_mark: [AWS CLI](https://aws.amazon.com/cli/) configured with appropriate credentials 19 | - :white_check_mark: [eksctl](https://docs.aws.amazon.com/eks/latest/userguide/eksctl.html) (for EKS clusters) 20 | 21 | ## 🔧 Installation 22 | 23 | ### 1. Create IRSA (IAM Role for Service Account) 24 | 25 | Run the following command to create AWS IRSA: 26 | 27 | ```bash 28 | eksctl create iamserviceaccount \ 29 | --cluster= \ 30 | --name=spark-history-server \ 31 | --namespace=spark-history-server \ 32 | --attach-policy-arn= 33 | ``` 34 | 35 | **Example:** 36 | ```bash 37 | eksctl create iamserviceaccount \ 38 | --cluster=eks-demo-cluster \ 39 | --name=spark-history-server \ 40 | --namespace=spark-history-server \ 41 | --attach-policy-arn=arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess 42 | ``` 43 | 44 | ### 2. Configure values.yaml 45 | 46 | Update the following in your `values.yaml`: 47 | 48 | ```yaml 49 | serviceAccount: 50 | create: false 51 | annotations: 52 | eks.amazonaws.com/role-arn: "" 53 | name: "spark-history-server" 54 | 55 | sparkHistoryOpts: "-Dspark.history.fs.logDirectory=s3a:////" 56 | ``` 57 | 58 | ### 3. Install the Chart 59 | 60 | ```bash 61 | # Add the Helm repository 62 | helm repo add kubedai https://kubedai.github.io/spark-history-server 63 | helm repo update 64 | 65 | # Install the chart 66 | helm install spark-history-server kubedai/spark-history-server \ 67 | --namespace spark-history-server \ 68 | --create-namespace \ 69 | -f values.yaml 70 | ``` 71 | 72 | ## 🔍 Accessing Spark WebUI 73 | 74 | ### Option 1: Using Port Forward 75 | 76 | ```bash 77 | kubectl port-forward services/spark-history-server 18085:80 -n spark-history-server 78 | ``` 79 | 80 | Then access the UI at `http://localhost:18085/` 81 | 82 | ### Option 2: Using Ingress (if enabled) 83 | 84 | Configure ingress in `values.yaml`: 85 | 86 | ```yaml 87 | ingress: 88 | enabled: true 89 | ingressClassName: nginx # or your preferred ingress class 90 | hosts: 91 | - host: spark-history.example.com 92 | paths: 93 | - path: / 94 | ``` 95 | 96 | ## 📸 UI Screenshots 97 | 98 | ### Home Page 99 |

100 | Spark Web UI Homepage 101 |

102 | 103 | ### Executors Page 104 |

105 | Spark Web UI Executors page 106 |

107 | 108 | ## 🔄 Upgrading 109 | 110 | ```bash 111 | helm upgrade spark-history-server kubedai/spark-history-server \ 112 | --namespace spark-history-server 113 | ``` 114 | 115 | ## 🗑️ Uninstalling 116 | 117 | ```bash 118 | helm uninstall spark-history-server --namespace spark-history-server 119 | ``` 120 | 121 | ## 🧱 Contributing 122 | 123 | To update the Docker image version published to **GitHub Container Registry (GHCR)**: 124 | 125 | 1. **Fork this repository** 126 | 2. **Bump the `appVersion:`** field in `stable/spark-history-server/Chart.yaml` 127 | 3. **Raise a Pull Request (PR)** targeting the `main` branch 128 | 129 | Once merged, GitHub Actions will automatically: 130 | - Build multi-architecture Docker image (`linux/amd64`, `linux/arm64`) 131 | - Push to GHCR: [`ghcr.io/kubedai/spark-history-server`](https://github.com/kubedai/spark-history-server/pkgs/container/spark-history-server) 132 | - Tag with both version and `latest` 133 | 134 | You can also manually trigger the workflow from GitHub Actions with an optional version override. 135 | 136 | ## ⚙️ Configuration 137 | 138 | Key configuration options in `values.yaml`: 139 | 140 | | Parameter | Description | Default | 141 | |-----------|-------------|---------| 142 | | `image.repository` | Image repository | `ghcr.io/kubedai/spark-history-server` | 143 | | `image.tag` | Image tag | `latest` | 144 | | `serviceAccount.create` | Create service account | `true` | 145 | | `sparkHistoryOpts` | Spark history server options | `""` | 146 | | `resources` | Pod resource requests/limits | See values.yaml | 147 | 148 | ## 🤝 Community 149 | 150 | Give us a star ⭐️ if you find this project useful! 151 | 152 | ## 📝 License 153 | 154 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. 155 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Base image with Maven and Amazon Corretto 8 (Java 8) 2 | # This is suitable for building Spark-compatible Java applications and resolving Maven dependencies 3 | FROM maven:3.6-amazoncorretto-8 4 | 5 | # Define a UID for the non-root user to run Spark History Server securely 6 | ARG spark_uid=1000 7 | 8 | # Set working directory for the build phase 9 | WORKDIR /tmp/ 10 | 11 | # Install essential utilities: 12 | # - procps: for basic process inspection (e.g., `ps`) 13 | # - curl: for downloading Spark binaries 14 | # - tar: for unpacking Spark archive 15 | RUN yum install -y procps curl tar && yum clean all 16 | 17 | # Copy the Maven POM file into the image 18 | # This is used to resolve all Hadoop, AWS SDK, and other dependencies 19 | COPY pom.xml /tmp 20 | 21 | # Download the Apache Spark binary (without Hadoop) to allow for custom Hadoop integrations 22 | # Unpack Spark into /opt/spark, which is the conventional install path 23 | RUN curl -o ./spark-3.5.1-bin-without-hadoop.tgz https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-without-hadoop.tgz && \ 24 | tar -xzf spark-3.5.1-bin-without-hadoop.tgz && \ 25 | mv spark-3.5.1-bin-without-hadoop /opt/spark && \ 26 | rm spark-3.5.1-bin-without-hadoop.tgz 27 | 28 | # Use Maven to resolve and copy all runtime dependencies from the pom.xml 29 | # into Spark's JAR directory. This ensures S3/Hadoop/AWS SDK integration. 30 | # Then remove conflicting or outdated jars to prevent runtime classloader issues. 31 | RUN mvn dependency:copy-dependencies -DoutputDirectory=/opt/spark/jars/ && \ 32 | rm -f /opt/spark/jars/jsr305-3.0.0.jar && \ 33 | rm -f /opt/spark/jars/jersey-*-1.19.jar && \ 34 | rm -f /opt/spark/jars/joda-time-2.8.1.jar && \ 35 | rm -f /opt/spark/jars/jmespath-java-*.jar && \ 36 | rm -f /opt/spark/jars/aws-java-sdk-core-*.jar && \ 37 | rm -f /opt/spark/jars/aws-java-sdk-kms-*.jar && \ 38 | rm -f /opt/spark/jars/aws-java-sdk-s3-*.jar && \ 39 | rm -f /opt/spark/jars/ion-java-1.0.2.jar 40 | 41 | # Create Spark logs directory and assign ownership to the non-root user 42 | # Also inject the user into /etc/passwd (required by some JVM tools and shells) 43 | RUN mkdir -p /opt/spark/logs && \ 44 | chown -R ${spark_uid}:${spark_uid} /opt/spark && \ 45 | echo "${spark_uid}:x:${spark_uid}:${spark_uid}:anonymous uid:/opt/spark:/bin/false" >> /etc/passwd 46 | 47 | # Switch to non-root user for security best practices 48 | USER ${spark_uid} 49 | 50 | # Set working directory to Spark home 51 | WORKDIR /opt/spark 52 | 53 | # Use bash as the entrypoint to allow Helm, K8s, or CI to pass runtime commands via CMD 54 | # For example: 55 | # CMD ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"] 56 | ENTRYPOINT ["/bin/bash", "-c"] -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # 🐳 Spark History Server Docker Image 2 | 3 | This guide explains how to build and run the Spark History Server using Docker. 4 | 5 | ## 📋 Prerequisites 6 | 7 | - Git 8 | - Docker client 9 | - AWS credentials configured (if using S3) 10 | 11 | ## 🔧 Building the Docker Image 12 | 13 | ### 1. Clone the Repository 14 | 15 | ```bash 16 | git clone https://github.com/kubedai/spark-history-server.git 17 | cd spark-history-server/docker 18 | ``` 19 | 20 | ### 2. Build the Image 21 | 22 | Build the Docker image using the provided Dockerfile: 23 | 24 | ```bash 25 | docker build -t spark-history-server:latest . 26 | ``` 27 | 28 | > Note: You can replace `spark-history-server:latest` with your preferred image name and tag. 29 | 30 | ### 3. Push to Registry (Optional) 31 | 32 | If you want to push the image to a container registry: 33 | 34 | ```bash 35 | # Tag the image for your registry 36 | docker tag spark-history-server:latest /spark-history-server:latest 37 | 38 | # Push to registry 39 | docker push /spark-history-server:latest 40 | ``` 41 | 42 | ## 🚀 Running Locally 43 | 44 | ### Using the Helper Script 45 | 46 | The repository includes a helper script to run the Spark History Server locally: 47 | 48 | ```bash 49 | # Show help 50 | ./launch_spark_history_server_locally.sh help 51 | 52 | # Run with default settings 53 | ./launch_spark_history_server_locally.sh 54 | ``` 55 | 56 | ### Manual Run 57 | 58 | You can also run the container manually: 59 | 60 | ```bash 61 | docker run -d \ 62 | --name spark-history-server \ 63 | -p 18080:18080 \ 64 | -e SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=s3a://your-bucket/your-prefix/" \ 65 | spark-history-server:latest 66 | ``` 67 | 68 | ## 🔍 Accessing the UI 69 | 70 | Once running, access the Spark History Server UI at: 71 | - http://localhost:18080 72 | 73 | ## ⚙️ Configuration 74 | 75 | Key environment variables: 76 | - `SPARK_HISTORY_OPTS`: Spark History Server configuration options 77 | - `SPARK_CONF`: Additional Spark configuration properties 78 | 79 | Example with custom configuration: 80 | 81 | ```bash 82 | docker run -d \ 83 | --name spark-history-server \ 84 | -p 18080:18080 \ 85 | -e SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=s3a://your-bucket/your-prefix/" \ 86 | -e SPARK_CONF="spark.history.ui.port=18080" \ 87 | spark-history-server:latest 88 | ``` 89 | 90 | ## 🔄 Updating the Image 91 | 92 | To update to a newer version: 93 | 94 | ```bash 95 | # Pull the latest changes 96 | git pull 97 | 98 | # Rebuild the image 99 | docker build -t spark-history-server:latest . 100 | ``` 101 | 102 | ## 🧹 Cleanup 103 | 104 | To remove the container and image: 105 | 106 | ```bash 107 | # Stop and remove container 108 | docker stop spark-history-server 109 | docker rm spark-history-server 110 | 111 | # Remove image 112 | docker rmi spark-history-server:latest 113 | ``` 114 | -------------------------------------------------------------------------------- /docker/launch_spark_history_server_locally.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | print_help(){ 5 | # Display Help 6 | echo "A helper script to start/stop your local Spark History Server" 7 | echo 8 | echo "Syntax: sh launch_spark_history_server_locally.sh {start|stop|restart|status|help} {options}" 9 | echo 10 | echo "actions:" 11 | echo "start start your local Spark History Server at 'localhost:18081'" 12 | echo "stop stop the running Spark History Server" 13 | echo "restart restart the Spark History Server" 14 | echo "status print the current running container details" 15 | echo "help print this message" 16 | echo 17 | echo "start/restart options" 18 | echo "-sb or --S3_BUCKET S3 bucket name where you have your Spark EventLogs. eg: my-bucket in 's3://my-bucket/evemts/'. REQUIRED with 'start/restart" 19 | echo "-sp or --S3_BUCKET_PREFIX S3 bucket prefix where you have your Spark EventLogs. eg: 'spark/history/events' in 's3://my-bucket/spark/history/events/'. REQUIRED with 'start/restart'" 20 | echo "-r or --AWS_REGION Your AWS Region where the S3 bucket is in. Default: us-east-1" 21 | echo "-cn or --CONTAINER_NAME Your Custom Container Name. Default: spark-history-server" 22 | echo "-du or --DOCKER_USER The local user used to build/publish the docker image. Default: $USER, the current logged in user" 23 | echo "-ak or --AWS_ACCESS_KEY_ID AWS access key id for authentication, optional: you may export ENV variables for the same" 24 | echo "-as or --AWS_SECRET_ACCESS_KEY AWS secret access key for authentication, optional: you may export ENV variables for the same" 25 | echo "-at or --AWS_SESSION_TOKEN AWS session token for authentication, optional: you may export ENV variables for the same" 26 | echo 27 | echo "eg:" 28 | echo "sh launch_spark_history_server_locally.sh start -sb my-bucket -sp spark/history/events" 29 | echo "sh launch_spark_history_server_locally.sh stop" 30 | echo "sh launch_spark_history_server_locally.sh restart -sb my-bucket -sp spark/history/events" 31 | echo "sh launch_spark_history_server_locally.sh status" 32 | echo "sh launch_spark_history_server_locally.sh help" 33 | echo 34 | } 35 | 36 | do_start(){ 37 | S3_BUCKET=$1 38 | S3_BUCKET_PREFIX=$2 39 | LOCAL_USER=$3 40 | 41 | LOG_DIR="s3a://$S3_BUCKET/$S3_BUCKET_PREFIX" 42 | DOCKER_IMAGE="$LOCAL_USER/spark-web-ui:latest" 43 | 44 | docker run -itd --name $CONTAINER_NAME -e SPARK_DAEMON_MEMORY="2g" -e SPARK_DAEMON_JAVA_OPTS="-XX:+UseG1GC" -e SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \ 45 | -Dspark.history.fs.logDirectory=$LOG_DIR \ 46 | -Dspark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID \ 47 | -Dspark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY \ 48 | -Dspark.hadoop.fs.s3a.session.token=$AWS_SESSION_TOKEN \ 49 | -Dspark.hadoop.fs.s3a.endpoint=$ENDPOINT \ 50 | -Dspark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider" -p 18080:18080 $DOCKER_IMAGE "/opt/spark/bin/spark-class $CLASS" 51 | sleep 10 52 | echo "Spark History Server running @ http://localhost:18080 " 53 | echo 54 | } 55 | 56 | do_stop(){ 57 | set +e 58 | docker stop $CONTAINER_NAME 59 | docker rm $CONTAINER_NAME 60 | set -e 61 | sleep 10 62 | } 63 | 64 | do_status(){ 65 | docker ps --filter "name=$CONTAINER_NAME" 66 | } 67 | 68 | # ENTER AWS CREDENTIALS TO RUN THE DOCKER IMAGE LOCALLY 69 | while [ $# -gt 0 ]; do 70 | case "$1" in 71 | start|stop|restart|status|help) 72 | ACTION="$1" 73 | ;; 74 | -sb|--S3_BUCKET) 75 | S3_BUCKET="$2" 76 | shift 77 | ;; 78 | -sp|--S3_PREFIX) 79 | S3_BUCKET_PREFIX="$2" 80 | shift 81 | ;; 82 | -r|--AWS_REGION) 83 | AWS_REGION="$2" 84 | shift 85 | ;; 86 | -cn|--CONTAINER_NAME) 87 | CONTAINER_NAME="$2" 88 | shift 89 | ;; 90 | -du|--DOCKER_USER) 91 | DOCKER_USER="$2" 92 | shift 93 | ;; 94 | -ak|--AWS_ACCESS_KEY_ID) 95 | AWS_ACCESS_KEY_ID="$2" 96 | shift 97 | ;; 98 | -as|--AWS_SECRET_ACCESS_KEY) 99 | AWS_SECRET_ACCESS_KEY="$2" 100 | shift 101 | ;; 102 | -at|--AWS_SESSION_TOKEN) 103 | AWS_SESSION_TOKEN="$2" 104 | shift 105 | ;; 106 | *) 107 | printf "* Error: Invalid argument.*\n" 108 | print_help 109 | exit 1 110 | esac 111 | shift 112 | done 113 | 114 | if [ -z "$AWS_REGION" ]; then AWS_REGION="us-east-1" ; fi 115 | if [ -z "$CONTAINER_NAME" ]; then CONTAINER_NAME="spark-history-server" ; fi 116 | if [ -z "$DOCKER_USER" ]; then DOCKER_USER="$USER" ; fi 117 | 118 | CLASS="org.apache.spark.deploy.history.HistoryServer" 119 | ENDPOINT="s3.$AWS_REGION.amazonaws.com" 120 | 121 | if [ -n "$AWS_ACCESS_KEY_ID" ]; then export AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" ; fi 122 | if [ -n "$AWS_SECRET_ACCESS_KEY" ]; then export AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" ; fi 123 | if [ -n "$AWS_SESSION_TOKEN" ]; then export AWS_SESSION_TOKEN="$AWS_SESSION_TOKEN" ; fi 124 | 125 | #echo "Submitted args: $NAME $ACTION $S3_BUCKET $S3_BUCKET_PREFIX $AWS_REGION $CONTAINER_NAME $DOCKER_USER $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY $AWS_SESSION_TOKEN" 126 | 127 | case $ACTION in 128 | status) 129 | echo "Print status: " 130 | do_status 131 | ;; 132 | start) 133 | echo "Starting Spark History Server: " 134 | do_start $S3_BUCKET $S3_BUCKET_PREFIX $DOCKER_USER 135 | do_status 136 | ;; 137 | stop) 138 | echo "Stopping Spark History Server: " 139 | do_stop 140 | ;; 141 | restart) 142 | echo "Restarting Spark History Server: " 143 | do_stop 144 | do_start $S3_BUCKET $S3_BUCKET_PREFIX $DOCKER_USER 145 | do_status 146 | ;; 147 | *) 148 | print_help 149 | exit 1 150 | esac 151 | 152 | exit 0 -------------------------------------------------------------------------------- /docker/pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 4.0.0 6 | com.amazonaws 7 | SparkHistoryServerForEKS 8 | 1.0-SNAPSHOT 9 | jar 10 | 11 | 12 | UTF-8 13 | 1.8 14 | 3.4.1 15 | 1.12.783 16 | 4.5.13 17 | 2.10.5 18 | 2.12.7.1 19 | 20 | 21 | 22 | 23 | 24 | com.amazonaws 25 | aws-java-sdk-bom 26 | ${awssdk.version} 27 | pom 28 | import 29 | 30 | 31 | 32 | 33 | 34 | 35 | org.apache.hadoop 36 | hadoop-common 37 | ${hadoop.version} 38 | provided 39 | 40 | 41 | 42 | org.apache.hadoop 43 | hadoop-client 44 | ${hadoop.version} 45 | provided 46 | 47 | 48 | 49 | org.apache.hadoop 50 | hadoop-aws 51 | ${hadoop.version} 52 | provided 53 | 54 | 55 | com.fasterxml.jackson.core 56 | jackson-core 57 | 58 | 59 | com.fasterxml.jackson.core 60 | jackson-databind 61 | 62 | 63 | com.fasterxml.jackson.core 64 | jackson-annotations 65 | 66 | 67 | 68 | 69 | 70 | com.amazonaws 71 | aws-java-sdk-bundle 72 | ${awssdk.version} 73 | 74 | 75 | 76 | com.amazonaws 77 | aws-java-sdk-core 78 | 79 | 80 | 81 | com.amazonaws 82 | aws-java-sdk-s3 83 | 84 | 85 | 86 | org.apache.httpcomponents 87 | httpclient 88 | ${httpclient.version} 89 | 90 | 91 | 92 | com.fasterxml.jackson.core 93 | jackson-core 94 | ${jackson.version} 95 | provided 96 | 97 | 98 | 99 | com.fasterxml.jackson.core 100 | jackson-databind 101 | ${jackson.databind.version} 102 | provided 103 | 104 | 105 | 106 | com.fasterxml.jackson.core 107 | jackson-annotations 108 | ${jackson.version} 109 | provided 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /images/spark-webui-executors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KubedAI/spark-history-server/991838638947f3ec03049e47be458a80cfed13aa/images/spark-webui-executors.png -------------------------------------------------------------------------------- /images/spark-webui-home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KubedAI/spark-history-server/991838638947f3ec03049e47be458a80cfed13aa/images/spark-webui-home.png -------------------------------------------------------------------------------- /stable/spark-history-server/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /stable/spark-history-server/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: spark-history-server 3 | description: A Helm chart for deploying Spark History Server in Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 1.3.2 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | appVersion: 1.3.2 24 | 25 | home: https://github.com/kubedai/spark-history-server 26 | sources: 27 | - https://github.com/kubedai/spark-history-server 28 | keywords: 29 | - spark 30 | - kubernetes 31 | - helm 32 | - history-server 33 | - monitoring 34 | - analytics 35 | 36 | maintainers: 37 | - name: vara-bonthu 38 | email: vara.bonthu@gmail.com -------------------------------------------------------------------------------- /stable/spark-history-server/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | {{- if .Values.ingress.enabled }} 3 | {{- range $host := .Values.ingress.hosts }} 4 | {{- range .paths }} 5 | http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ . }} 6 | {{- end }} 7 | {{- end }} 8 | {{- else if contains "NodePort" .Values.service.type }} 9 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "spark-history-server.fullname" . }}) 10 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 11 | echo http://$NODE_IP:$NODE_PORT 12 | {{- else if contains "LoadBalancer" .Values.service.type }} 13 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 14 | You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "spark-history-server.fullname" . }}' 15 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "spark-history-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") 16 | echo http://$SERVICE_IP:{{ .Values.service.port }} 17 | {{- else if contains "ClusterIP" .Values.service.type }} 18 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "spark-history-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 19 | echo "Visit http://127.0.0.1:8080 to use your application" 20 | kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:80 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "spark-history-server.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "spark-history-server.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "spark-history-server.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "spark-history-server.labels" -}} 37 | helm.sh/chart: {{ include "spark-history-server.chart" . }} 38 | {{ include "spark-history-server.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "spark-history-server.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "spark-history-server.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "spark-history-server.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "spark-history-server.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | 64 | {{/* 65 | Return the appropriate apiVersion for deployment. 66 | */}} 67 | {{- define "spark-history-server.deployment.apiVersion" -}} 68 | {{- print "apps/v1" -}} 69 | {{- end -}} -------------------------------------------------------------------------------- /stable/spark-history-server/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | labels: 5 | {{- include "spark-history-server.labels" . | nindent 4 }} 6 | name: {{ template "spark-history-server.fullname" . }} 7 | data: 8 | spark-defaults.conf: 9 | {{- toYaml .Values.sparkConf | nindent 4 }} 10 | 11 | log4j.properties: 12 | {{- toYaml .Values.log4jConfig | nindent 4 }} 13 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: {{ template "spark-history-server.deployment.apiVersion" . }} 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "spark-history-server.fullname" . }} 5 | labels: 6 | {{- include "spark-history-server.labels" . | nindent 4 }} 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | strategy: 10 | type: RollingUpdate 11 | rollingUpdate: 12 | maxUnavailable: 50% 13 | maxSurge: 1 14 | selector: 15 | matchLabels: 16 | {{- include "spark-history-server.selectorLabels" . | nindent 6 }} 17 | template: 18 | metadata: 19 | {{- with .Values.podAnnotations }} 20 | annotations: 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | labels: 24 | {{- include "spark-history-server.selectorLabels" . | nindent 8 }} 25 | {{- with .Values.podLabels }} 26 | {{- toYaml . | nindent 8 }} 27 | {{- end }} 28 | spec: 29 | {{- with .Values.imagePullSecrets }} 30 | imagePullSecrets: 31 | {{- toYaml . | nindent 8 }} 32 | {{- end }} 33 | serviceAccountName: {{ include "spark-history-server.serviceAccountName" . }} 34 | securityContext: 35 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 36 | volumes: 37 | - name: config-volume 38 | configMap: 39 | name: {{ template "spark-history-server.fullname" . }} 40 | containers: 41 | - name: {{ .Chart.Name }} 42 | securityContext: 43 | {{- toYaml .Values.securityContext | nindent 12 }} 44 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 45 | imagePullPolicy: {{ .Values.image.pullPolicy }} 46 | command: # Launches Spark history server from Docker container 47 | - '/opt/spark/sbin/start-history-server.sh' 48 | env: 49 | - name: SPARK_NO_DAEMONIZE 50 | value: "false" 51 | - name: SPARK_HISTORY_OPTS 52 | value: {{ .Values.sparkHistoryOpts }} 53 | - name: SPARK_CONF_DIR 54 | value: /opt/spark/conf 55 | volumeMounts: 56 | - name: config-volume 57 | mountPath: /opt/spark/conf/spark-defaults.conf 58 | subPath: spark-defaults.conf 59 | - name: config-volume 60 | mountPath: /opt/spark/conf/log4j.properties 61 | subPath: log4j.properties 62 | ports: 63 | - name: http 64 | containerPort: {{ .Values.service.internalPort }} 65 | protocol: TCP 66 | terminationMessagePath: /dev/termination-log 67 | terminationMessagePolicy: File 68 | livenessProbe: 69 | {{- toYaml .Values.livenessProbe | nindent 12 }} 70 | readinessProbe: 71 | {{- toYaml .Values.readinessProbe | nindent 12 }} 72 | resources: 73 | {{- toYaml .Values.resources | nindent 12 }} 74 | {{- with .Values.nodeSelector }} 75 | nodeSelector: 76 | {{- toYaml . | nindent 8 }} 77 | {{- end }} 78 | {{- with .Values.affinity }} 79 | affinity: 80 | {{- toYaml . | nindent 8 }} 81 | {{- end }} 82 | {{- with .Values.tolerations }} 83 | tolerations: 84 | {{- toYaml . | nindent 8 }} 85 | {{- end }} 86 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $serviceExternalPort := .Values.service.externalPort -}} 3 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} 4 | apiVersion: networking.k8s.io/v1 5 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} 6 | apiVersion: networking.k8s.io/v1beta1 7 | {{- else -}} 8 | apiVersion: extensions/v1beta1 9 | {{- end }} 10 | kind: Ingress 11 | metadata: 12 | name: {{ template "spark-history-server.fullname" . }} 13 | labels: 14 | {{- include "spark-history-server.labels" . | nindent 4 }} 15 | {{- with .Values.ingress.annotations }} 16 | annotations: 17 | {{- toYaml . | nindent 4 }} 18 | {{- end }} 19 | spec: 20 | ingressClassName: {{ .Values.ingress.ingressClassName }} 21 | {{- if .Values.ingress.tls }} 22 | tls: 23 | {{- range .Values.ingress.tls }} 24 | - hosts: 25 | {{- range .hosts }} 26 | - {{ . | quote }} 27 | {{- end }} 28 | secretName: {{ .secretName }} 29 | {{- end }} 30 | {{- end }} 31 | rules: 32 | {{- range .Values.ingress.hosts }} 33 | - host: {{ .host | quote }} 34 | http: 35 | paths: 36 | {{- range .paths }} 37 | - path: {{ . }} 38 | pathType: Prefix 39 | backend: 40 | service: 41 | name: {{ include "spark-history-server.fullname" $ }} 42 | port: 43 | number: {{ $serviceExternalPort }} 44 | {{- end }} 45 | {{- end }} 46 | {{- end }} 47 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "spark-history-server.fullname" . }} 5 | labels: 6 | {{- include "spark-history-server.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.externalPort }} 11 | targetPort: {{ .Values.service.internalPort }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "spark-history-server.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "spark-history-server.serviceAccountName" . }} 6 | labels: 7 | {{- include "spark-history-server.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- end }} 13 | -------------------------------------------------------------------------------- /stable/spark-history-server/templates/tests/test-connection.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: "{{ include "spark-history-server.fullname" . }}-test-connection" 5 | labels: 6 | {{- include "spark-history-server.labels" . | nindent 4 }} 7 | annotations: 8 | "helm.sh/hook": test-success 9 | spec: 10 | containers: 11 | - name: wget 12 | image: busybox 13 | command: ['wget'] 14 | args: ['{{ include "spark-history-server.fullname" . }}:{{ .Values.service.port }}'] 15 | restartPolicy: Never 16 | -------------------------------------------------------------------------------- /stable/spark-history-server/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for spark-history-server. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | replicaCount: 1 6 | 7 | image: 8 | repository: ghcr.io/kubedai/spark-history-server 9 | pullPolicy: Always 10 | # Overrides the image tag whose default is the chart appVersion. 11 | tag: latest 12 | 13 | imagePullSecrets: [] 14 | nameOverride: "" 15 | fullnameOverride: "" 16 | 17 | serviceAccount: 18 | create: true 19 | name: spark-history-server-sa 20 | # annotations: 21 | # # IRSA role attached to service account 22 | # eks.amazonaws.com/role-arn: 23 | 24 | 25 | # Enter S3 bucket with Spark Event logs location. 26 | # Ensure IRSA roles has permissions to read the files for the given S3 bucket 27 | # sparkHistoryOpts: "-Dspark.history.fs.logDirectory=s3a:////" 28 | sparkHistoryOpts: "" 29 | sparkConf: |- 30 | spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.WebIdentityTokenCredentialsProvider 31 | spark.history.fs.eventLog.rolling.maxFilesToRetain=5 32 | spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem 33 | spark.eventLog.enabled=true 34 | spark.history.ui.port=18080 35 | 36 | log4jConfig: |- 37 | log4j.rootCategory=INFO, console 38 | log4j.appender.console=org.apache.log4j.ConsoleAppender 39 | log4j.appender.console.target=System.out 40 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 41 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 42 | log4j.logger.org.apache.spark=INFO 43 | log4j.logger.org.apache.hadoop=INFO 44 | log4j.logger.org.apache.hadoop.fs.s3a=DEBUG 45 | log4j.logger.org.apache.spark.deploy.history.FsHistoryProvider=DEBUG 46 | 47 | podAnnotations: {} 48 | 49 | # Extra custom labels for pods 50 | podLabels: {} 51 | 52 | podSecurityContext: 53 | runAsUser: 1000 54 | fsGroup: 1000 55 | 56 | securityContext: 57 | capabilities: 58 | drop: 59 | - ALL 60 | readOnlyRootFilesystem: true 61 | runAsNonRoot: true 62 | runAsUser: 1000 63 | 64 | service: 65 | externalPort: 80 66 | internalPort: 18080 67 | type: ClusterIP 68 | 69 | ingress: 70 | enabled: false 71 | annotations: {} 72 | ## Set below values when ingress is enabled 73 | # ingressClassName: 74 | # hosts: 75 | # - host: 76 | # paths: 77 | # - 78 | 79 | resources: 80 | limits: 81 | cpu: 200m 82 | memory: 2G 83 | requests: 84 | cpu: 100m 85 | memory: 1G 86 | 87 | livenessProbe: 88 | httpGet: 89 | path: / 90 | port: 18080 91 | scheme: HTTP 92 | timeoutSeconds: 5 93 | periodSeconds: 30 94 | successThreshold: 1 95 | failureThreshold: 3 96 | 97 | readinessProbe: 98 | httpGet: 99 | path: / 100 | port: 18080 101 | scheme: HTTP 102 | timeoutSeconds: 5 103 | periodSeconds: 30 104 | successThreshold: 1 105 | failureThreshold: 3 106 | 107 | nodeSelector: {} 108 | 109 | tolerations: [] 110 | 111 | affinity: {} 112 | --------------------------------------------------------------------------------