├── .github
    └── workflows
    │   ├── docker-publish.yaml
    │   ├── lint.yaml
    │   └── release.yaml
├── .gitignore
├── LICENSE
├── README.md
├── docker
    ├── Dockerfile
    ├── README.md
    ├── launch_spark_history_server_locally.sh
    └── pom.xml
├── images
    ├── spark-webui-executors.png
    └── spark-webui-home.png
└── stable
    └── spark-history-server
        ├── .helmignore
        ├── Chart.yaml
        ├── templates
            ├── NOTES.txt
            ├── _helpers.tpl
            ├── configmap.yaml
            ├── deployment.yaml
            ├── ingress.yaml
            ├── service.yaml
            ├── serviceaccount.yaml
            └── tests
            │   └── test-connection.yaml
        └── values.yaml


/.github/workflows/docker-publish.yaml:
--------------------------------------------------------------------------------
  1 | name: Build and Push Multi-Arch Docker Image to GHCR
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main]
  6 |     paths:
  7 |       - 'stable/spark-history-server/Chart.yaml'
  8 |       - 'docker/Dockerfile'
  9 |       - '**/*.sh'
 10 |       - '**/*.yaml'
 11 |   workflow_dispatch:
 12 |     inputs:
 13 |       override_version:
 14 |         description: 'Optional override for appVersion (e.g. 1.3.2)'
 15 |         required: false
 16 | 
 17 | env:
 18 |   IMAGE_NAME: spark-history-server
 19 | 
 20 | jobs:
 21 |   build-and-push:
 22 |     runs-on: ubuntu-latest
 23 | 
 24 |     permissions:
 25 |       contents: read
 26 |       packages: write
 27 |       id-token: write
 28 | 
 29 |     steps:
 30 |       - name: Checkout code
 31 |         uses: actions/checkout@v3
 32 | 
 33 |       - name: Set up Docker Buildx
 34 |         uses: docker/setup-buildx-action@v3
 35 | 
 36 |       - name: Log in to GitHub Container Registry (GHCR)
 37 |         uses: docker/login-action@v3
 38 |         with:
 39 |           registry: ghcr.io
 40 |           username: ${{ github.actor }}
 41 |           password: ${{ secrets.GITHUB_TOKEN }}
 42 | 
 43 |       - name: Set image namespace (lowercase)
 44 |         id: repo
 45 |         run: |
 46 |           owner_lower=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
 47 |           echo "owner=${owner_lower}" >> $GITHUB_OUTPUT
 48 | 
 49 |       - name: Determine image version (manual or Chart.yaml)
 50 |         id: extract_version
 51 |         run: |
 52 |           if [ -n "${{ github.event.inputs.override_version }}" ]; then
 53 |             VERSION="${{ github.event.inputs.override_version }}"
 54 |           else
 55 |             VERSION=$(grep -E '^appVersion:[[:space:]]*' stable/spark-history-server/Chart.yaml | awk '{print $2}')
 56 |           fi
 57 |           if [ -z "$VERSION" ]; then
 58 |             echo "❌ ERROR: Version could not be determined"
 59 |             exit 1
 60 |           fi
 61 |           echo "✅ Using version: $VERSION"
 62 |           echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
 63 | 
 64 |       - name: Check if image tag exists in GHCR
 65 |         id: tagcheck
 66 |         env:
 67 |           GHCR_USER: ${{ steps.repo.outputs.owner }}
 68 |           IMAGE: ${{ env.IMAGE_NAME }}
 69 |           TAG: ${{ steps.extract_version.outputs.VERSION }}
 70 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 71 |         run: |
 72 |           echo "🔍 Checking for ghcr.io/${GHCR_USER}/${IMAGE}:${TAG}"
 73 |           STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
 74 |             -H "Authorization: Bearer ${GH_TOKEN}" \
 75 |             -H "Accept: application/vnd.oci.image.manifest.v1+json" \
 76 |             https://ghcr.io/v2/${GHCR_USER}/${IMAGE}/manifests/${TAG})
 77 |           
 78 |           echo "GHCR tag check returned HTTP status: $STATUS"
 79 |           if [ "$STATUS" = "200" ]; then
 80 |             echo "Image already exists. Skipping build."
 81 |             echo "skip_build=true" >> $GITHUB_OUTPUT
 82 |           else
 83 |             echo "Image not found. Proceeding with build."
 84 |             echo "skip_build=false" >> $GITHUB_OUTPUT
 85 |           fi
 86 | 
 87 |       - name: Build and push multi-arch image to GHCR
 88 |         if: steps.tagcheck.outputs.skip_build == 'false'
 89 |         uses: docker/build-push-action@v5
 90 |         with:
 91 |           context: ./docker
 92 |           push: true
 93 |           platforms: linux/amd64,linux/arm64
 94 |           tags: |
 95 |             ghcr.io/${{ steps.repo.outputs.owner }}/${{ env.IMAGE_NAME }}:${{ steps.extract_version.outputs.VERSION }}
 96 |             ghcr.io/${{ steps.repo.outputs.owner }}/${{ env.IMAGE_NAME }}:latest
 97 |           labels: |
 98 |             org.opencontainers.image.source=https://github.com/${{ github.repository }}
 99 |             org.opencontainers.image.version=${{ steps.extract_version.outputs.VERSION }}
100 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint Charts
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - 'stable/**'
 7 | 
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   lint-chart:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v1
16 |       - name: Run chart-testing (lint)
17 |         uses: helm/chart-testing-action@main
18 |         with:
19 |           command: lint
20 |           config: .github/ct.yaml
21 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - 'stable/**'
 9 |   release:
10 |     types: [published, created]
11 | 
12 | jobs:
13 |   release:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v2
18 |       - name: Publish Helm charts
19 |         uses: stefanprodan/helm-gh-pages@v1.4.1
20 |         with:
21 |           token: ${{ secrets.GITHUB_TOKEN }}
22 |           charts_dir: stable


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 💥 Spark History Server (Spark Web UI) 💥
  2 | 
  3 | Spark History Server is a Web user interface to monitor the metrics and performance of the spark jobs from [Apache Spark](https://spark.apache.org/).
  4 | 
  5 | ## 🚀 Features
  6 | 
  7 | - Helm Chart bootstraps Spark History Server in [Amazon EKS](https://aws.amazon.com/eks/) or any [Kubernetes](https://kubernetes.io/) cluster
  8 | - Configured to read [Spark Event Logs](https://spark.apache.org/docs/latest/monitoring.html#applying-compaction-on-rolling-event-log-files) from [Amazon S3](https://aws.amazon.com/s3/) buckets
  9 | - Uses [IRSA (IAM Roles for Service Accounts)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) for secure S3 access
 10 | - Multi-architecture support (amd64, arm64)
 11 | - Supports both versioned and latest tags
 12 | - [Local Docker](https://github.com/kubedai/spark-history-server/tree/main/docker) deployment option available
 13 | 
 14 | ## 📋 Prerequisites
 15 | 
 16 | - :white_check_mark: Kubernetes 1.19+
 17 | - :white_check_mark: [Helm 3+](https://helm.sh/docs/intro/install/)
 18 | - :white_check_mark: [AWS CLI](https://aws.amazon.com/cli/) configured with appropriate credentials
 19 | - :white_check_mark: [eksctl](https://docs.aws.amazon.com/eks/latest/userguide/eksctl.html) (for EKS clusters)
 20 | 
 21 | ## 🔧 Installation
 22 | 
 23 | ### 1. Create IRSA (IAM Role for Service Account)
 24 | 
 25 | Run the following command to create AWS IRSA:
 26 | 
 27 | ```bash
 28 | eksctl create iamserviceaccount \
 29 |   --cluster=<eks-cluster-name> \
 30 |   --name=spark-history-server \
 31 |   --namespace=spark-history-server \
 32 |   --attach-policy-arn=<policyARN>
 33 | ```
 34 | 
 35 | **Example:**
 36 | ```bash
 37 | eksctl create iamserviceaccount \
 38 |   --cluster=eks-demo-cluster \
 39 |   --name=spark-history-server \
 40 |   --namespace=spark-history-server \
 41 |   --attach-policy-arn=arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess
 42 | ```
 43 | 
 44 | ### 2. Configure values.yaml
 45 | 
 46 | Update the following in your `values.yaml`:
 47 | 
 48 | ```yaml
 49 | serviceAccount:
 50 |   create: false
 51 |   annotations:
 52 |     eks.amazonaws.com/role-arn: "<ENTER_IRSA_IAM_ROLE_ARN_HERE>"
 53 |   name: "spark-history-server"
 54 | 
 55 | sparkHistoryOpts: "-Dspark.history.fs.logDirectory=s3a://<ENTER_S3_BUCKET_NAME>/<PREFIX_FOR_SPARK_EVENT_LOGS>/"
 56 | ```
 57 | 
 58 | ### 3. Install the Chart
 59 | 
 60 | ```bash
 61 | # Add the Helm repository
 62 | helm repo add kubedai https://kubedai.github.io/spark-history-server
 63 | helm repo update
 64 | 
 65 | # Install the chart
 66 | helm install spark-history-server kubedai/spark-history-server \
 67 |   --namespace spark-history-server \
 68 |   --create-namespace \
 69 |   -f values.yaml
 70 | ```
 71 | 
 72 | ## 🔍 Accessing Spark WebUI
 73 | 
 74 | ### Option 1: Using Port Forward
 75 | 
 76 | ```bash
 77 | kubectl port-forward services/spark-history-server 18085:80 -n spark-history-server
 78 | ```
 79 | 
 80 | Then access the UI at `http://localhost:18085/`
 81 | 
 82 | ### Option 2: Using Ingress (if enabled)
 83 | 
 84 | Configure ingress in `values.yaml`:
 85 | 
 86 | ```yaml
 87 | ingress:
 88 |   enabled: true
 89 |   ingressClassName: nginx  # or your preferred ingress class
 90 |   hosts:
 91 |     - host: spark-history.example.com
 92 |       paths:
 93 |         - path: /
 94 | ```
 95 | 
 96 | ## 📸 UI Screenshots
 97 | 
 98 | ### Home Page
 99 | <p align="center">
100 |   <img src="https://github.com/kubedai/spark-history-server/blob/main/images/spark-webui-home.png" alt="Spark Web UI Homepage" width="100%">
101 | </p>
102 | 
103 | ### Executors Page
104 | <p align="center">
105 |   <img src="https://github.com/kubedai/spark-history-server/blob/main/images/spark-webui-executors.png" alt="Spark Web UI Executors page" width="100%">
106 | </p>
107 | 
108 | ## 🔄 Upgrading
109 | 
110 | ```bash
111 | helm upgrade spark-history-server kubedai/spark-history-server \
112 |   --namespace spark-history-server
113 | ```
114 | 
115 | ## 🗑️ Uninstalling
116 | 
117 | ```bash
118 | helm uninstall spark-history-server --namespace spark-history-server
119 | ```
120 | 
121 | ## 🧱 Contributing
122 | 
123 | To update the Docker image version published to **GitHub Container Registry (GHCR)**:
124 | 
125 | 1. **Fork this repository**
126 | 2. **Bump the `appVersion:`** field in `stable/spark-history-server/Chart.yaml`
127 | 3. **Raise a Pull Request (PR)** targeting the `main` branch
128 | 
129 | Once merged, GitHub Actions will automatically:
130 | - Build multi-architecture Docker image (`linux/amd64`, `linux/arm64`)
131 | - Push to GHCR: [`ghcr.io/kubedai/spark-history-server`](https://github.com/kubedai/spark-history-server/pkgs/container/spark-history-server)
132 | - Tag with both version and `latest`
133 | 
134 | You can also manually trigger the workflow from GitHub Actions with an optional version override.
135 | 
136 | ## ⚙️ Configuration
137 | 
138 | Key configuration options in `values.yaml`:
139 | 
140 | | Parameter | Description | Default |
141 | |-----------|-------------|---------|
142 | | `image.repository` | Image repository | `ghcr.io/kubedai/spark-history-server` |
143 | | `image.tag` | Image tag | `latest` |
144 | | `serviceAccount.create` | Create service account | `true` |
145 | | `sparkHistoryOpts` | Spark history server options | `""` |
146 | | `resources` | Pod resource requests/limits | See values.yaml |
147 | 
148 | ## 🤝 Community
149 | 
150 | Give us a star ⭐️ if you find this project useful!
151 | 
152 | ## 📝 License
153 | 
154 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
155 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Base image with Maven and Amazon Corretto 8 (Java 8)
 2 | # This is suitable for building Spark-compatible Java applications and resolving Maven dependencies
 3 | FROM maven:3.6-amazoncorretto-8
 4 | 
 5 | # Define a UID for the non-root user to run Spark History Server securely
 6 | ARG spark_uid=1000
 7 | 
 8 | # Set working directory for the build phase
 9 | WORKDIR /tmp/
10 | 
11 | # Install essential utilities:
12 | # - procps: for basic process inspection (e.g., `ps`)
13 | # - curl: for downloading Spark binaries
14 | # - tar: for unpacking Spark archive
15 | RUN yum install -y procps curl tar && yum clean all
16 | 
17 | # Copy the Maven POM file into the image
18 | # This is used to resolve all Hadoop, AWS SDK, and other dependencies
19 | COPY pom.xml /tmp
20 | 
21 | # Download the Apache Spark binary (without Hadoop) to allow for custom Hadoop integrations
22 | # Unpack Spark into /opt/spark, which is the conventional install path
23 | RUN curl -o ./spark-3.5.1-bin-without-hadoop.tgz https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-without-hadoop.tgz && \
24 |     tar -xzf spark-3.5.1-bin-without-hadoop.tgz && \
25 |     mv spark-3.5.1-bin-without-hadoop /opt/spark && \
26 |     rm spark-3.5.1-bin-without-hadoop.tgz
27 | 
28 | # Use Maven to resolve and copy all runtime dependencies from the pom.xml
29 | # into Spark's JAR directory. This ensures S3/Hadoop/AWS SDK integration.
30 | # Then remove conflicting or outdated jars to prevent runtime classloader issues.
31 | RUN mvn dependency:copy-dependencies -DoutputDirectory=/opt/spark/jars/ && \
32 |     rm -f /opt/spark/jars/jsr305-3.0.0.jar && \
33 |     rm -f /opt/spark/jars/jersey-*-1.19.jar && \
34 |     rm -f /opt/spark/jars/joda-time-2.8.1.jar && \
35 |     rm -f /opt/spark/jars/jmespath-java-*.jar && \
36 |     rm -f /opt/spark/jars/aws-java-sdk-core-*.jar && \
37 |     rm -f /opt/spark/jars/aws-java-sdk-kms-*.jar && \
38 |     rm -f /opt/spark/jars/aws-java-sdk-s3-*.jar && \
39 |     rm -f /opt/spark/jars/ion-java-1.0.2.jar
40 | 
41 | # Create Spark logs directory and assign ownership to the non-root user
42 | # Also inject the user into /etc/passwd (required by some JVM tools and shells)
43 | RUN mkdir -p /opt/spark/logs && \
44 |     chown -R ${spark_uid}:${spark_uid} /opt/spark && \
45 |     echo "${spark_uid}:x:${spark_uid}:${spark_uid}:anonymous uid:/opt/spark:/bin/false" >> /etc/passwd
46 | 
47 | # Switch to non-root user for security best practices
48 | USER ${spark_uid}
49 | 
50 | # Set working directory to Spark home
51 | WORKDIR /opt/spark
52 | 
53 | # Use bash as the entrypoint to allow Helm, K8s, or CI to pass runtime commands via CMD
54 | # For example: 
55 | # CMD ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"]
56 | ENTRYPOINT ["/bin/bash", "-c"]


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
  1 | # 🐳 Spark History Server Docker Image
  2 | 
  3 | This guide explains how to build and run the Spark History Server using Docker.
  4 | 
  5 | ## 📋 Prerequisites
  6 | 
  7 | - Git
  8 | - Docker client
  9 | - AWS credentials configured (if using S3)
 10 | 
 11 | ## 🔧 Building the Docker Image
 12 | 
 13 | ### 1. Clone the Repository
 14 | 
 15 | ```bash
 16 | git clone https://github.com/kubedai/spark-history-server.git
 17 | cd spark-history-server/docker
 18 | ```
 19 | 
 20 | ### 2. Build the Image
 21 | 
 22 | Build the Docker image using the provided Dockerfile:
 23 | 
 24 | ```bash
 25 | docker build -t spark-history-server:latest .
 26 | ```
 27 | 
 28 | > Note: You can replace `spark-history-server:latest` with your preferred image name and tag.
 29 | 
 30 | ### 3. Push to Registry (Optional)
 31 | 
 32 | If you want to push the image to a container registry:
 33 | 
 34 | ```bash
 35 | # Tag the image for your registry
 36 | docker tag spark-history-server:latest <your-registry>/spark-history-server:latest
 37 | 
 38 | # Push to registry
 39 | docker push <your-registry>/spark-history-server:latest
 40 | ```
 41 | 
 42 | ## 🚀 Running Locally
 43 | 
 44 | ### Using the Helper Script
 45 | 
 46 | The repository includes a helper script to run the Spark History Server locally:
 47 | 
 48 | ```bash
 49 | # Show help
 50 | ./launch_spark_history_server_locally.sh help
 51 | 
 52 | # Run with default settings
 53 | ./launch_spark_history_server_locally.sh
 54 | ```
 55 | 
 56 | ### Manual Run
 57 | 
 58 | You can also run the container manually:
 59 | 
 60 | ```bash
 61 | docker run -d \
 62 |   --name spark-history-server \
 63 |   -p 18080:18080 \
 64 |   -e SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=s3a://your-bucket/your-prefix/" \
 65 |   spark-history-server:latest
 66 | ```
 67 | 
 68 | ## 🔍 Accessing the UI
 69 | 
 70 | Once running, access the Spark History Server UI at:
 71 | - http://localhost:18080
 72 | 
 73 | ## ⚙️ Configuration
 74 | 
 75 | Key environment variables:
 76 | - `SPARK_HISTORY_OPTS`: Spark History Server configuration options
 77 | - `SPARK_CONF`: Additional Spark configuration properties
 78 | 
 79 | Example with custom configuration:
 80 | 
 81 | ```bash
 82 | docker run -d \
 83 |   --name spark-history-server \
 84 |   -p 18080:18080 \
 85 |   -e SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=s3a://your-bucket/your-prefix/" \
 86 |   -e SPARK_CONF="spark.history.ui.port=18080" \
 87 |   spark-history-server:latest
 88 | ```
 89 | 
 90 | ## 🔄 Updating the Image
 91 | 
 92 | To update to a newer version:
 93 | 
 94 | ```bash
 95 | # Pull the latest changes
 96 | git pull
 97 | 
 98 | # Rebuild the image
 99 | docker build -t spark-history-server:latest .
100 | ```
101 | 
102 | ## 🧹 Cleanup
103 | 
104 | To remove the container and image:
105 | 
106 | ```bash
107 | # Stop and remove container
108 | docker stop spark-history-server
109 | docker rm spark-history-server
110 | 
111 | # Remove image
112 | docker rmi spark-history-server:latest
113 | ```
114 | 


--------------------------------------------------------------------------------
/docker/launch_spark_history_server_locally.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh 
  2 | set -e
  3 | 
  4 | print_help(){
  5 |   # Display Help
  6 |    echo "A helper script to start/stop your local Spark History Server"
  7 |    echo
  8 |    echo "Syntax: sh launch_spark_history_server_locally.sh {start|stop|restart|status|help} {options}"
  9 |    echo
 10 |    echo "actions:"
 11 |    echo "start   start your local Spark History Server at 'localhost:18081'"
 12 |    echo "stop    stop the running Spark History Server"
 13 |    echo "restart restart the Spark History Server"
 14 |    echo "status  print the current running container details"
 15 |    echo "help    print this message"
 16 |    echo
 17 |    echo "start/restart options"
 18 |    echo "-sb or --S3_BUCKET             S3 bucket name where you have your Spark EventLogs. eg: my-bucket in 's3://my-bucket/evemts/'. REQUIRED with 'start/restart"
 19 |    echo "-sp or --S3_BUCKET_PREFIX      S3 bucket prefix where you have your Spark EventLogs. eg: 'spark/history/events' in 's3://my-bucket/spark/history/events/'. REQUIRED with 'start/restart'"
 20 |    echo "-r  or --AWS_REGION            Your AWS Region where the S3 bucket is in. Default: us-east-1"
 21 |    echo "-cn or --CONTAINER_NAME        Your Custom Container Name. Default: spark-history-server"
 22 |    echo "-du or --DOCKER_USER           The local user used to build/publish the docker image. Default: $USER, the current logged in user"
 23 |    echo "-ak or --AWS_ACCESS_KEY_ID     AWS access key id for authentication, optional: you may export ENV variables for the same"
 24 |    echo "-as or --AWS_SECRET_ACCESS_KEY AWS secret access key for authentication, optional: you may export ENV variables for the same"
 25 |    echo "-at or --AWS_SESSION_TOKEN     AWS session token for authentication, optional: you may export ENV variables for the same"
 26 |    echo
 27 |    echo "eg:"
 28 |    echo "sh launch_spark_history_server_locally.sh start -sb my-bucket -sp spark/history/events"
 29 |    echo "sh launch_spark_history_server_locally.sh stop"
 30 |    echo "sh launch_spark_history_server_locally.sh restart -sb my-bucket -sp spark/history/events"
 31 |    echo "sh launch_spark_history_server_locally.sh status"
 32 |    echo "sh launch_spark_history_server_locally.sh help"
 33 |    echo
 34 | }
 35 | 
 36 | do_start(){
 37 |   S3_BUCKET=$1
 38 |   S3_BUCKET_PREFIX=$2
 39 |   LOCAL_USER=$3
 40 | 
 41 |   LOG_DIR="s3a://$S3_BUCKET/$S3_BUCKET_PREFIX"
 42 |   DOCKER_IMAGE="$LOCAL_USER/spark-web-ui:latest"
 43 | 
 44 |   docker run -itd --name $CONTAINER_NAME -e SPARK_DAEMON_MEMORY="2g" -e SPARK_DAEMON_JAVA_OPTS="-XX:+UseG1GC" -e SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS \
 45 |   -Dspark.history.fs.logDirectory=$LOG_DIR \
 46 |   -Dspark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID \
 47 |   -Dspark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY \
 48 |   -Dspark.hadoop.fs.s3a.session.token=$AWS_SESSION_TOKEN \
 49 |   -Dspark.hadoop.fs.s3a.endpoint=$ENDPOINT \
 50 |   -Dspark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider" -p 18080:18080 $DOCKER_IMAGE "/opt/spark/bin/spark-class $CLASS"
 51 |   sleep 10
 52 |   echo "Spark History Server running @ http://localhost:18080 "
 53 |   echo
 54 | }
 55 | 
 56 | do_stop(){
 57 |   set +e
 58 |   docker stop $CONTAINER_NAME
 59 |   docker rm $CONTAINER_NAME
 60 |   set -e
 61 |   sleep 10
 62 | }
 63 | 
 64 | do_status(){
 65 |   docker ps --filter "name=$CONTAINER_NAME"
 66 | }
 67 | 
 68 | # ENTER AWS CREDENTIALS TO RUN THE DOCKER IMAGE LOCALLY
 69 | while [ $# -gt 0 ]; do
 70 |   case "$1" in
 71 |     start|stop|restart|status|help)
 72 |       ACTION="$1"
 73 |       ;;
 74 |     -sb|--S3_BUCKET)
 75 |       S3_BUCKET="$2"
 76 |       shift
 77 |       ;;
 78 |     -sp|--S3_PREFIX)
 79 |       S3_BUCKET_PREFIX="$2"
 80 |       shift
 81 |       ;;
 82 |     -r|--AWS_REGION)
 83 |       AWS_REGION="$2"
 84 |       shift
 85 |       ;;
 86 |     -cn|--CONTAINER_NAME)
 87 |       CONTAINER_NAME="$2"
 88 |       shift
 89 |       ;;
 90 |     -du|--DOCKER_USER)
 91 |       DOCKER_USER="$2"
 92 |       shift
 93 |       ;;
 94 |     -ak|--AWS_ACCESS_KEY_ID)
 95 |       AWS_ACCESS_KEY_ID="$2"
 96 |       shift
 97 |       ;;
 98 |     -as|--AWS_SECRET_ACCESS_KEY)
 99 |       AWS_SECRET_ACCESS_KEY="$2"
100 |       shift
101 |       ;;
102 |     -at|--AWS_SESSION_TOKEN)
103 |       AWS_SESSION_TOKEN="$2"
104 |       shift
105 |       ;;
106 |     *)
107 |       printf "* Error: Invalid argument.*\n"
108 |       print_help
109 |       exit 1
110 |   esac
111 |   shift
112 | done
113 | 
114 | if [ -z "$AWS_REGION" ]; then AWS_REGION="us-east-1" ; fi
115 | if [ -z "$CONTAINER_NAME" ]; then CONTAINER_NAME="spark-history-server" ; fi
116 | if [ -z "$DOCKER_USER" ]; then DOCKER_USER="$USER" ; fi
117 | 
118 | CLASS="org.apache.spark.deploy.history.HistoryServer"
119 | ENDPOINT="s3.$AWS_REGION.amazonaws.com"
120 | 
121 | if [ -n "$AWS_ACCESS_KEY_ID" ]; then export AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" ; fi
122 | if [ -n "$AWS_SECRET_ACCESS_KEY" ]; then export AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" ; fi
123 | if [ -n "$AWS_SESSION_TOKEN" ]; then export AWS_SESSION_TOKEN="$AWS_SESSION_TOKEN" ; fi
124 | 
125 | #echo "Submitted args: $NAME $ACTION $S3_BUCKET $S3_BUCKET_PREFIX $AWS_REGION $CONTAINER_NAME $DOCKER_USER $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY $AWS_SESSION_TOKEN"
126 | 
127 | case $ACTION in
128 |   status)
129 |         echo "Print status: "
130 |         do_status
131 |         ;;
132 |   start)
133 |         echo "Starting Spark History Server: "
134 |         do_start $S3_BUCKET $S3_BUCKET_PREFIX $DOCKER_USER
135 |         do_status
136 |         ;;
137 |   stop)
138 |         echo "Stopping  Spark History Server: "
139 |         do_stop
140 |         ;;
141 |   restart)
142 |         echo "Restarting  Spark History Server: "
143 |         do_stop
144 |         do_start $S3_BUCKET $S3_BUCKET_PREFIX $DOCKER_USER
145 |         do_status
146 |         ;;
147 |   *)
148 |         print_help
149 |         exit 1
150 | esac
151 | 
152 | exit 0


--------------------------------------------------------------------------------
/docker/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  2 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  4 | 
  5 |     <modelVersion>4.0.0</modelVersion>
  6 |     <groupId>com.amazonaws</groupId>
  7 |     <artifactId>SparkHistoryServerForEKS</artifactId>
  8 |     <version>1.0-SNAPSHOT</version>
  9 |     <packaging>jar</packaging>
 10 | 
 11 |     <properties>
 12 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 13 |         <jdk.version>1.8</jdk.version>
 14 |         <hadoop.version>3.4.1</hadoop.version>
 15 |         <awssdk.version>1.12.783</awssdk.version>
 16 |         <httpclient.version>4.5.13</httpclient.version>
 17 |         <jackson.version>2.10.5</jackson.version>
 18 |         <jackson.databind.version>2.12.7.1</jackson.databind.version>
 19 |     </properties>
 20 | 
 21 |     <dependencyManagement>
 22 |         <dependencies>
 23 |             <dependency>
 24 |                 <groupId>com.amazonaws</groupId>
 25 |                 <artifactId>aws-java-sdk-bom</artifactId>
 26 |                 <version>${awssdk.version}</version>
 27 |                 <type>pom</type>
 28 |                 <scope>import</scope>
 29 |             </dependency>
 30 |         </dependencies>
 31 |     </dependencyManagement>
 32 | 
 33 |     <dependencies>
 34 |         <dependency>
 35 |             <groupId>org.apache.hadoop</groupId>
 36 |             <artifactId>hadoop-common</artifactId>
 37 |             <version>${hadoop.version}</version>
 38 |             <scope>provided</scope>
 39 |         </dependency>
 40 | 
 41 |         <dependency>
 42 |             <groupId>org.apache.hadoop</groupId>
 43 |             <artifactId>hadoop-client</artifactId>
 44 |             <version>${hadoop.version}</version>
 45 |             <scope>provided</scope>
 46 |         </dependency>
 47 | 
 48 |         <dependency>
 49 |             <groupId>org.apache.hadoop</groupId>
 50 |             <artifactId>hadoop-aws</artifactId>
 51 |             <version>${hadoop.version}</version>
 52 |             <scope>provided</scope>
 53 |             <exclusions>
 54 |                 <exclusion>
 55 |                     <groupId>com.fasterxml.jackson.core</groupId>
 56 |                     <artifactId>jackson-core</artifactId>
 57 |                 </exclusion>
 58 |                 <exclusion>
 59 |                     <groupId>com.fasterxml.jackson.core</groupId>
 60 |                     <artifactId>jackson-databind</artifactId>
 61 |                 </exclusion>
 62 |                 <exclusion>
 63 |                     <groupId>com.fasterxml.jackson.core</groupId>
 64 |                     <artifactId>jackson-annotations</artifactId>
 65 |                 </exclusion>
 66 |             </exclusions>
 67 |         </dependency>
 68 | 
 69 |         <dependency>
 70 |             <groupId>com.amazonaws</groupId>
 71 |             <artifactId>aws-java-sdk-bundle</artifactId>
 72 |             <version>${awssdk.version}</version>
 73 |         </dependency>
 74 | 
 75 |         <dependency>
 76 |             <groupId>com.amazonaws</groupId>
 77 |             <artifactId>aws-java-sdk-core</artifactId>
 78 |         </dependency>
 79 | 
 80 |         <dependency>
 81 |             <groupId>com.amazonaws</groupId>
 82 |             <artifactId>aws-java-sdk-s3</artifactId>
 83 |         </dependency>
 84 | 
 85 |         <dependency>
 86 |             <groupId>org.apache.httpcomponents</groupId>
 87 |             <artifactId>httpclient</artifactId>
 88 |             <version>${httpclient.version}</version>
 89 |         </dependency>
 90 | 
 91 |         <dependency>
 92 |             <groupId>com.fasterxml.jackson.core</groupId>
 93 |             <artifactId>jackson-core</artifactId>
 94 |             <version>${jackson.version}</version>
 95 |             <scope>provided</scope>
 96 |         </dependency>
 97 | 
 98 |         <dependency>
 99 |             <groupId>com.fasterxml.jackson.core</groupId>
100 |             <artifactId>jackson-databind</artifactId>
101 |             <version>${jackson.databind.version}</version>
102 |             <scope>provided</scope>
103 |         </dependency>
104 | 
105 |         <dependency>
106 |             <groupId>com.fasterxml.jackson.core</groupId>
107 |             <artifactId>jackson-annotations</artifactId>
108 |             <version>${jackson.version}</version>
109 |             <scope>provided</scope>
110 |         </dependency>
111 |     </dependencies>
112 | 
113 | </project>


--------------------------------------------------------------------------------
/images/spark-webui-executors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KubedAI/spark-history-server/991838638947f3ec03049e47be458a80cfed13aa/images/spark-webui-executors.png


--------------------------------------------------------------------------------
/images/spark-webui-home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KubedAI/spark-history-server/991838638947f3ec03049e47be458a80cfed13aa/images/spark-webui-home.png


--------------------------------------------------------------------------------
/stable/spark-history-server/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: spark-history-server
 3 | description: A Helm chart for deploying Spark History Server in Kubernetes
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | # Application charts are a collection of templates that can be packaged into versioned archives
 8 | # to be deployed.
 9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 | 
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 1.3.2
19 | 
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | appVersion: 1.3.2
24 | 
25 | home: https://github.com/kubedai/spark-history-server
26 | sources:
27 |   - https://github.com/kubedai/spark-history-server
28 | keywords:
29 |   - spark
30 |   - kubernetes
31 |   - helm
32 |   - history-server
33 |   - monitoring
34 |   - analytics
35 | 
36 | maintainers:
37 |   - name: vara-bonthu
38 |     email: vara.bonthu@gmail.com


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | 1. Get the application URL by running these commands:
 2 | {{- if .Values.ingress.enabled }}
 3 | {{- range $host := .Values.ingress.hosts }}
 4 |   {{- range .paths }}
 5 |   http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ . }}
 6 |   {{- end }}
 7 | {{- end }}
 8 | {{- else if contains "NodePort" .Values.service.type }}
 9 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "spark-history-server.fullname" . }})
10 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11 |   echo http://$NODE_IP:$NODE_PORT
12 | {{- else if contains "LoadBalancer" .Values.service.type }}
13 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14 |            You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "spark-history-server.fullname" . }}'
15 |   export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "spark-history-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16 |   echo http://$SERVICE_IP:{{ .Values.service.port }}
17 | {{- else if contains "ClusterIP" .Values.service.type }}
18 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "spark-history-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19 |   echo "Visit http://127.0.0.1:8080 to use your application"
20 |   kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:80
21 | {{- end }}
22 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Expand the name of the chart.
 3 | */}}
 4 | {{- define "spark-history-server.name" -}}
 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 6 | {{- end }}
 7 | 
 8 | {{/*
 9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "spark-history-server.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 | 
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "spark-history-server.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 | 
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "spark-history-server.labels" -}}
37 | helm.sh/chart: {{ include "spark-history-server.chart" . }}
38 | {{ include "spark-history-server.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 | 
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "spark-history-server.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "spark-history-server.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 | 
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "spark-history-server.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "spark-history-server.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 | 
64 | {{/*
65 | Return the appropriate apiVersion for deployment.
66 | */}}
67 | {{- define "spark-history-server.deployment.apiVersion" -}}
68 | {{- print "apps/v1" -}}
69 | {{- end -}}


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | kind: ConfigMap
 2 | apiVersion: v1
 3 | metadata:
 4 |   labels:
 5 |     {{- include "spark-history-server.labels" . | nindent 4 }}
 6 |   name: {{ template "spark-history-server.fullname" . }}
 7 | data:
 8 |   spark-defaults.conf:
 9 | {{- toYaml .Values.sparkConf | nindent 4 }}
10 | 
11 |   log4j.properties:
12 | {{- toYaml .Values.log4jConfig | nindent 4 }}
13 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: {{ template "spark-history-server.deployment.apiVersion" . }}
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ template "spark-history-server.fullname" . }}
 5 |   labels:
 6 |     {{- include "spark-history-server.labels" . | nindent 4 }}
 7 | spec:
 8 |   replicas: {{ .Values.replicaCount }}
 9 |   strategy:
10 |     type: RollingUpdate
11 |     rollingUpdate:
12 |       maxUnavailable: 50%
13 |       maxSurge: 1
14 |   selector:
15 |     matchLabels:
16 |       {{- include "spark-history-server.selectorLabels" . | nindent 6 }}
17 |   template:
18 |     metadata:
19 |     {{- with .Values.podAnnotations }}
20 |       annotations:
21 |         {{- toYaml . | nindent 8 }}
22 |     {{- end }}
23 |       labels:
24 |         {{- include "spark-history-server.selectorLabels" . | nindent 8 }}
25 |         {{- with .Values.podLabels }}
26 |           {{- toYaml . | nindent 8 }}
27 |         {{- end }}
28 |     spec:
29 |       {{- with .Values.imagePullSecrets }}
30 |       imagePullSecrets:
31 |         {{- toYaml . | nindent 8 }}
32 |       {{- end }}
33 |       serviceAccountName: {{ include "spark-history-server.serviceAccountName" . }}
34 |       securityContext:
35 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
36 |       volumes:
37 |         - name: config-volume
38 |           configMap:
39 |             name: {{ template "spark-history-server.fullname" . }}
40 |       containers:
41 |         - name: {{ .Chart.Name }}
42 |           securityContext:
43 |             {{- toYaml .Values.securityContext | nindent 12 }}
44 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
45 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
46 |           command:  # Launches Spark history server from Docker container
47 |             - '/opt/spark/sbin/start-history-server.sh'
48 |           env:
49 |             - name: SPARK_NO_DAEMONIZE
50 |               value: "false"
51 |             - name: SPARK_HISTORY_OPTS
52 |               value: {{ .Values.sparkHistoryOpts }}
53 |             - name: SPARK_CONF_DIR
54 |               value: /opt/spark/conf
55 |           volumeMounts:
56 |             - name: config-volume
57 |               mountPath: /opt/spark/conf/spark-defaults.conf
58 |               subPath: spark-defaults.conf
59 |             - name: config-volume
60 |               mountPath: /opt/spark/conf/log4j.properties
61 |               subPath: log4j.properties
62 |           ports:
63 |             - name: http
64 |               containerPort: {{ .Values.service.internalPort }}
65 |               protocol: TCP
66 |           terminationMessagePath: /dev/termination-log
67 |           terminationMessagePolicy: File
68 |           livenessProbe:
69 |             {{- toYaml .Values.livenessProbe | nindent 12 }}
70 |           readinessProbe:
71 |             {{- toYaml .Values.readinessProbe | nindent 12 }}
72 |           resources:
73 |             {{- toYaml .Values.resources | nindent 12 }}
74 |       {{- with .Values.nodeSelector }}
75 |       nodeSelector:
76 |         {{- toYaml . | nindent 8 }}
77 |       {{- end }}
78 |       {{- with .Values.affinity }}
79 |       affinity:
80 |         {{- toYaml . | nindent 8 }}
81 |       {{- end }}
82 |       {{- with .Values.tolerations }}
83 |       tolerations:
84 |         {{- toYaml . | nindent 8 }}
85 |       {{- end }}
86 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/ingress.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.ingress.enabled -}}
 2 | {{- $serviceExternalPort := .Values.service.externalPort -}}
 3 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
 4 | apiVersion: networking.k8s.io/v1
 5 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
 6 | apiVersion: networking.k8s.io/v1beta1
 7 | {{- else -}}
 8 | apiVersion: extensions/v1beta1
 9 | {{- end }}
10 | kind: Ingress
11 | metadata:
12 |   name: {{ template "spark-history-server.fullname" . }}
13 |   labels:
14 |     {{- include "spark-history-server.labels" . | nindent 4 }}
15 |   {{- with .Values.ingress.annotations }}
16 |   annotations:
17 |     {{- toYaml . | nindent 4 }}
18 |   {{- end }}
19 | spec:
20 |   ingressClassName: {{ .Values.ingress.ingressClassName }}
21 |   {{- if .Values.ingress.tls }}
22 |   tls:
23 |     {{- range .Values.ingress.tls }}
24 |     - hosts:
25 |         {{- range .hosts }}
26 |         - {{ . | quote }}
27 |         {{- end }}
28 |       secretName: {{ .secretName }}
29 |     {{- end }}
30 |   {{- end }}
31 |   rules:
32 |     {{- range .Values.ingress.hosts }}
33 |     - host: {{ .host | quote }}
34 |       http:
35 |         paths:
36 |           {{- range .paths }}
37 |           - path: {{ . }}
38 |             pathType: Prefix
39 |             backend:
40 |               service:
41 |                 name: {{ include "spark-history-server.fullname" $ }}
42 |                 port: 
43 |                   number: {{ $serviceExternalPort }}
44 |           {{- end }}
45 |     {{- end }}
46 | {{- end }}
47 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ template "spark-history-server.fullname" . }}
 5 |   labels:
 6 |     {{- include "spark-history-server.labels" . | nindent 4 }}
 7 | spec:
 8 |   type: {{ .Values.service.type }}
 9 |   ports:
10 |     - port: {{ .Values.service.externalPort }}
11 |       targetPort: {{ .Values.service.internalPort }}
12 |       protocol: TCP
13 |       name: {{ .Chart.Name }}
14 |   selector:
15 |     {{- include "spark-history-server.selectorLabels" . | nindent 4 }}
16 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.serviceAccount.create -}}
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: {{ include "spark-history-server.serviceAccountName" . }}
 6 |   labels:
 7 |     {{- include "spark-history-server.labels" . | nindent 4 }}
 8 |   {{- with .Values.serviceAccount.annotations }}
 9 |   annotations:
10 |     {{- toYaml . | nindent 4 }}
11 |   {{- end }}
12 | {{- end }}
13 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/templates/tests/test-connection.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: "{{ include "spark-history-server.fullname" . }}-test-connection"
 5 |   labels:
 6 |     {{- include "spark-history-server.labels" . | nindent 4 }}
 7 |   annotations:
 8 |     "helm.sh/hook": test-success
 9 | spec:
10 |   containers:
11 |     - name: wget
12 |       image: busybox
13 |       command: ['wget']
14 |       args: ['{{ include "spark-history-server.fullname" . }}:{{ .Values.service.port }}']
15 |   restartPolicy: Never
16 | 


--------------------------------------------------------------------------------
/stable/spark-history-server/values.yaml:
--------------------------------------------------------------------------------
  1 | # Default values for spark-history-server.
  2 | # This is a YAML-formatted file.
  3 | # Declare variables to be passed into your templates.
  4 | 
  5 | replicaCount: 1
  6 | 
  7 | image:
  8 |   repository: ghcr.io/kubedai/spark-history-server
  9 |   pullPolicy: Always
 10 |   # Overrides the image tag whose default is the chart appVersion.
 11 |   tag: latest
 12 | 
 13 | imagePullSecrets: []
 14 | nameOverride: ""
 15 | fullnameOverride: ""
 16 | 
 17 | serviceAccount:
 18 |   create: true
 19 |   name: spark-history-server-sa
 20 |   # annotations:
 21 |   #   # IRSA role attached to service account
 22 |   #   eks.amazonaws.com/role-arn: <IRSA ROLE ARN>
 23 | 
 24 | 
 25 | # Enter S3 bucket with Spark Event logs location.
 26 | # Ensure IRSA roles has permissions to read the files for the given S3 bucket
 27 | # sparkHistoryOpts: "-Dspark.history.fs.logDirectory=s3a://<ENTER_S3_BUCKET_NAME>/<PREFIX_FOR_SPARK_EVENT_LOGS>/"
 28 | sparkHistoryOpts: ""
 29 | sparkConf: |-
 30 |   spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.WebIdentityTokenCredentialsProvider
 31 |   spark.history.fs.eventLog.rolling.maxFilesToRetain=5
 32 |   spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
 33 |   spark.eventLog.enabled=true
 34 |   spark.history.ui.port=18080
 35 | 
 36 | log4jConfig: |-
 37 |   log4j.rootCategory=INFO, console
 38 |   log4j.appender.console=org.apache.log4j.ConsoleAppender
 39 |   log4j.appender.console.target=System.out
 40 |   log4j.appender.console.layout=org.apache.log4j.PatternLayout
 41 |   log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 42 |   log4j.logger.org.apache.spark=INFO
 43 |   log4j.logger.org.apache.hadoop=INFO
 44 |   log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
 45 |   log4j.logger.org.apache.spark.deploy.history.FsHistoryProvider=DEBUG
 46 | 
 47 | podAnnotations: {}
 48 | 
 49 | # Extra custom labels for pods
 50 | podLabels: {}
 51 | 
 52 | podSecurityContext:
 53 |   runAsUser: 1000
 54 |   fsGroup: 1000
 55 | 
 56 | securityContext:
 57 |   capabilities:
 58 |     drop:
 59 |      - ALL
 60 |   readOnlyRootFilesystem: true
 61 |   runAsNonRoot: true
 62 |   runAsUser: 1000
 63 | 
 64 | service:
 65 |   externalPort: 80
 66 |   internalPort: 18080
 67 |   type: ClusterIP
 68 | 
 69 | ingress:
 70 |   enabled: false
 71 |   annotations: {}
 72 |   ## Set below values when ingress is enabled
 73 |   # ingressClassName:
 74 |   # hosts:
 75 |   #   - host: <url>
 76 |   #     paths:
 77 |   #       - <path>
 78 | 
 79 | resources:
 80 |   limits:
 81 |     cpu: 200m
 82 |     memory: 2G
 83 |   requests:
 84 |     cpu: 100m
 85 |     memory: 1G
 86 | 
 87 | livenessProbe:
 88 |   httpGet:
 89 |     path: /
 90 |     port: 18080
 91 |     scheme: HTTP
 92 |   timeoutSeconds: 5
 93 |   periodSeconds: 30
 94 |   successThreshold: 1
 95 |   failureThreshold: 3
 96 | 
 97 | readinessProbe:
 98 |   httpGet:
 99 |     path: /
100 |     port: 18080
101 |     scheme: HTTP
102 |   timeoutSeconds: 5
103 |   periodSeconds: 30
104 |   successThreshold: 1
105 |   failureThreshold: 3
106 | 
107 | nodeSelector: {}
108 | 
109 | tolerations: []
110 | 
111 | affinity: {}
112 | 


--------------------------------------------------------------------------------