├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── LICENSE-SAMPLECODE
├── LICENSE-SUMMARY
├── README.md
├── chart
└── kube-scheduler
│ ├── Chart.yaml
│ ├── templates
│ ├── _helpers.tpl
│ ├── clusterrole.yaml
│ ├── clusterrolebinding.yaml
│ ├── configmap.yaml
│ ├── deployment.yaml
│ └── serviceaccount.yaml
│ └── values.yaml
├── content
├── best-practices-and-recommendations
│ └── eks-best-practices.md
├── cost-optimization
│ └── docs
│ │ ├── cost-optimization.md
│ │ ├── cost-tracking.md
│ │ ├── index.md
│ │ ├── node-decommission.md
│ │ └── resources
│ │ ├── images
│ │ ├── ca.png
│ │ ├── karpenter.png
│ │ ├── node_decom.gif
│ │ ├── pvc_reuse.gif
│ │ └── reuse.gif
│ │ ├── scripts
│ │ └── create-bucket-data-export.sh
│ │ └── sql-statements
│ │ ├── compute-cost_view.sql
│ │ ├── data-export-table.sql
│ │ ├── overall-cost_view.sql
│ │ ├── query-over-all_view.sql
│ │ ├── vc-cost_view.sql
│ │ └── vc-lookup.sql
├── index.md
├── metastore-integrations
│ └── docs
│ │ ├── aws-glue.md
│ │ ├── hive-metastore.md
│ │ └── index.md
├── node-placement
│ └── docs
│ │ ├── eks-node-placement.md
│ │ ├── fargate-node-placement.md
│ │ └── index.md
├── outposts
│ ├── emr-containers-on-outposts.md
│ ├── index.md
│ └── resources
│ │ └── outposts_eks_network.png
├── performance
│ └── docs
│ │ ├── binpack.md
│ │ ├── dra.md
│ │ ├── index.md
│ │ ├── karpenter.md
│ │ └── resources
│ │ └── images
│ │ ├── after-binpack.png
│ │ ├── before-binpack.png
│ │ ├── binpack.gif
│ │ └── nonbinpack.gif
├── scalability
│ └── docs
│ │ ├── graphana-dashboard.md
│ │ ├── index.md
│ │ ├── known-factors-spark-operator.md
│ │ ├── known-factors-start-job-run.md
│ │ ├── load-test-for-spark-operator.md
│ │ ├── load-test-for-start-job-run-api.md
│ │ ├── resources
│ │ └── images
│ │ │ ├── EMR_Spark_Operator_Benchmark.png
│ │ │ ├── aws-cni-metrics.png
│ │ │ ├── eks-control-plane.png
│ │ │ ├── emr-on-eks-job-dashboard.png
│ │ │ └── spark-operator-dashboard.png
│ │ └── scalaiblity-glossary.md
├── security
│ └── docs
│ │ ├── index.md
│ │ ├── resources
│ │ ├── Dockerfile
│ │ ├── S3ListObjects_v1.jar
│ │ ├── client-role-2-policy.json
│ │ ├── client-role-2-trust-policy.json
│ │ ├── custom-entrypoint.sh
│ │ ├── driver-pod-template.yaml
│ │ ├── executor-pod-template.yaml
│ │ ├── images
│ │ │ ├── emr-on-eks-fargate.png
│ │ │ ├── emr-on-eks-network-communication.png
│ │ │ ├── emr-on-eks-self-and-managed.png
│ │ │ ├── role-chain.png
│ │ │ └── shared-responsibility-model.png
│ │ ├── job-exec-role-1-policy.json
│ │ ├── job-exec-role-1-trust-policy.json
│ │ ├── mix-spark-boto3.py
│ │ └── only-boto3.py
│ │ └── spark
│ │ ├── chain-role.md
│ │ ├── data-encryption.md
│ │ ├── encryption.md
│ │ ├── network-security.md
│ │ └── secrets.md
├── storage
│ ├── docs
│ │ ├── index.md
│ │ └── spark
│ │ │ ├── ebs.md
│ │ │ ├── fsx-lustre.md
│ │ │ └── instance-store.md
│ └── resources
│ │ └── FSx_Lustre_SG.png
├── submit-applications
│ ├── docs
│ │ └── spark
│ │ │ ├── index.md
│ │ │ ├── java-and-scala.md
│ │ │ ├── multi-arch-image.md
│ │ │ ├── pyspark.md
│ │ │ ├── sparkr.md
│ │ │ └── sparksql.md
│ └── resources
│ │ ├── images
│ │ └── pyspark-packaged-example-zip-folder-structure.png
│ │ └── pyspark-packaged-dependency-src.zip
└── troubleshooting
│ ├── docs
│ ├── change-log-level.md
│ ├── connect-spark-ui.md
│ ├── eks-cluster-auto-scaler.md
│ ├── index.md
│ ├── karpenter.md
│ ├── rbac-permissions-errors.md
│ ├── reverse-proxy-sparkui.md
│ ├── self-hosted-shs.md
│ └── where-to-look-for-spark-logs.md
│ └── resources
│ └── screen-shot-spark-ui-driver.png
├── mkdocs.yml
└── tools
├── emr-vertical-autoscaling
├── grafana-dashboard-model.json
└── prometheus-helm-values.yaml
├── k8s-rbac-policies
├── emr-containers.yaml
└── rbac_patch.py
└── start-job-run-converter
├── README.md
└── startJobRunConverter.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | #IDE
34 | .idea/
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Scrapy stuff:
56 | .scrapy
57 |
58 | # PyBuilder
59 | target/
60 |
61 | # IPython Notebook
62 | .ipynb_checkpoints
63 |
64 | # pyenv
65 | .python-version
66 |
67 | # virtualenv
68 | venv/
69 | ENV/
70 |
71 | # MkDocs documentation
72 | site/
73 | .DS_Store
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
--------------------------------------------------------------------------------
/LICENSE-SAMPLECODE:
--------------------------------------------------------------------------------
1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
4 | software and associated documentation files (the "Software"), to deal in the Software
5 | without restriction, including without limitation the rights to use, copy, modify,
6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
7 | permit persons to whom the Software is furnished to do so.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/LICENSE-SUMMARY:
--------------------------------------------------------------------------------
1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | The documentation is made available under the Creative Commons Attribution-ShareAlike 4.0 International License. See the LICENSE file.
4 |
5 | The sample code within this documentation is made available under the MIT-0 license. See the LICENSE-SAMPLECODE file.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Amazon EMR on Amazon EKS Best Practices
2 |
3 | A best practices guide for submitting spark applications, integration with hive metastore, security, storage options, debugging options and performance considerations.
4 |
5 | Return to [Live Docs](https://aws.github.io/aws-emr-containers-best-practices/).
6 |
7 | ## License Summary
8 |
9 | The documentation is made available under the Creative Commons Attribution-ShareAlike 4.0 International License. See the LICENSE file.
10 |
11 | The sample code within this documentation is made available under the MIT-0 license. See the LICENSE-SAMPLECODE file.
12 |
13 | ## How to make a change
14 | 1. [Fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository)
15 | 2. Make your change and double-check the [mkdocs.yml](./mkdocs.yml) is updated accordingly.
16 | 3. Install the MkDocs command tool and material theme if needed:
17 | ```bash
18 | pip install mkdocs
19 | pip install mkdocs-material # material theme
20 | ```
21 | 4. MkDocs comes with a built-in dev-server that lets you preview your documentation as you work on it. Make sure you're in the same directory as the `mkdocs.yml` configuration file, then run the command:
22 | ```bash
23 | mkdocs serve
24 | ```
25 | 5. Open up http://127.0.0.1:8000/ in your browser, and you'll see the best practice website being displayed locally.
26 | 6. Adjust your document changes in real time.
27 | 7. When everything looks good and you're ready to deploy the change, run the command to build/compile the website content:
28 | ```bash
29 | mkdocs build
30 | ```
31 | 8. This will refresh the directory `./site`. Take a look inside the directory and make sure your changes are included.
32 | ```bash
33 | ls site
34 | ```
35 | 9. Commit change to github and send us a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
36 |
37 | 10. With a repo admin permission, we can merge the pull request into the main branch.
38 |
39 | 11. Most importantly, as a repo admin, we must run the deploy command to copy the './site' content to 'gh-pages' branch and pushing to GitHub. Without this step, the website content won't be refreshed.
40 | ```bash
41 | mkdocs gh-deploy
42 | ```
--------------------------------------------------------------------------------
/chart/kube-scheduler/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: kube-scheduler
3 | description: A Helm chart for deploying a custom Kubernetes scheduler
4 | version: 0.1.0
5 | appVersion: "1.0.0"
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "kube-scheduler.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "kube-scheduler.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "kube-scheduler.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "kube-scheduler.labels" -}}
37 | helm.sh/chart: {{ include "kube-scheduler.chart" . }}
38 | {{ include "kube-scheduler.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "kube-scheduler.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "kube-scheduler.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "kube-scheduler.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "kube-scheduler.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/clusterrole.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRole
3 | metadata:
4 | name: {{ include "kube-scheduler.fullname" . }}
5 | labels:
6 | {{- include "kube-scheduler.labels" . | nindent 4 }}
7 | rules:
8 | - apiGroups:
9 | - ""
10 | - events.k8s.io
11 | resources:
12 | - events
13 | verbs:
14 | - create
15 | - patch
16 | - update
17 | - apiGroups:
18 | - ""
19 | resources:
20 | - configmaps
21 | verbs:
22 | - get
23 | - list
24 | - watch
25 | - apiGroups:
26 | - coordination.k8s.io
27 | resources:
28 | - leases
29 | verbs:
30 | - create
31 | - get
32 | - list
33 | - update
34 | - apiGroups:
35 | - coordination.k8s.io
36 | resourceNames:
37 | - kube-scheduler
38 | resources:
39 | - leases
40 | verbs:
41 | - get
42 | - update
43 | - apiGroups:
44 | - ""
45 | resources:
46 | - endpoints
47 | verbs:
48 | - create
49 | - apiGroups:
50 | - ""
51 | resourceNames:
52 | - kube-scheduler
53 | resources:
54 | - endpoints
55 | verbs:
56 | - get
57 | - update
58 | - apiGroups:
59 | - ""
60 | resources:
61 | - nodes
62 | verbs:
63 | - get
64 | - list
65 | - watch
66 | - apiGroups:
67 | - ""
68 | resources:
69 | - pods
70 | verbs:
71 | - delete
72 | - get
73 | - list
74 | - watch
75 | - apiGroups:
76 | - ""
77 | resources:
78 | - bindings
79 | - pods/binding
80 | verbs:
81 | - create
82 | - apiGroups:
83 | - ""
84 | resources:
85 | - pods/status
86 | verbs:
87 | - patch
88 | - update
89 | - apiGroups:
90 | - ""
91 | resources:
92 | - replicationcontrollers
93 | - services
94 | verbs:
95 | - get
96 | - list
97 | - watch
98 | - apiGroups:
99 | - apps
100 | - extensions
101 | resources:
102 | - replicasets
103 | verbs:
104 | - get
105 | - list
106 | - watch
107 | - apiGroups:
108 | - apps
109 | resources:
110 | - statefulsets
111 | verbs:
112 | - get
113 | - list
114 | - watch
115 | - apiGroups:
116 | - policy
117 | resources:
118 | - poddisruptionbudgets
119 | verbs:
120 | - get
121 | - list
122 | - watch
123 | - apiGroups:
124 | - ""
125 | resources:
126 | - persistentvolumeclaims
127 | - persistentvolumes
128 | verbs:
129 | - get
130 | - list
131 | - watch
132 | - apiGroups:
133 | - authentication.k8s.io
134 | resources:
135 | - tokenreviews
136 | verbs:
137 | - create
138 | - apiGroups:
139 | - authorization.k8s.io
140 | resources:
141 | - subjectaccessreviews
142 | verbs:
143 | - create
144 | - apiGroups:
145 | - storage.k8s.io
146 | resources:
147 | - csinodes
148 | verbs:
149 | - get
150 | - list
151 | - watch
152 | - apiGroups:
153 | - ""
154 | resources:
155 | - namespaces
156 | verbs:
157 | - get
158 | - list
159 | - watch
160 | - apiGroups:
161 | - storage.k8s.io
162 | resources:
163 | - csidrivers
164 | verbs:
165 | - get
166 | - list
167 | - watch
168 | - apiGroups:
169 | - storage.k8s.io
170 | resources:
171 | - csistoragecapacities
172 | verbs:
173 | - get
174 | - list
175 | - watch
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: {{ include "kube-scheduler.fullname" . }}-as-kube-scheduler
5 | labels:
6 | {{- include "kube-scheduler.labels" . | nindent 4 }}
7 | subjects:
8 | - kind: ServiceAccount
9 | name: {{ include "kube-scheduler.serviceAccountName" . }}
10 | namespace: {{ .Release.Namespace }}
11 | roleRef:
12 | kind: ClusterRole
13 | name: {{ include "kube-scheduler.fullname" . }}
14 | apiGroup: rbac.authorization.k8s.io
15 |
16 | ---
17 | apiVersion: rbac.authorization.k8s.io/v1
18 | kind: ClusterRoleBinding
19 | metadata:
20 | name: {{ include "kube-scheduler.fullname" . }}-as-volume-scheduler
21 | labels:
22 | {{- include "kube-scheduler.labels" . | nindent 4 }}
23 | subjects:
24 | - kind: ServiceAccount
25 | name: {{ include "kube-scheduler.serviceAccountName" . }}
26 | namespace: {{ .Release.Namespace }}
27 | roleRef:
28 | kind: ClusterRole
29 | name: system:volume-scheduler
30 | apiGroup: rbac.authorization.k8s.io
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: {{ include "kube-scheduler.fullname" . }}-config
5 | labels:
6 | {{- include "kube-scheduler.labels" . | nindent 4 }}
7 | data:
8 | kube-scheduler-config.yaml: |
9 | apiVersion: kubescheduler.config.k8s.io/v1
10 | kind: KubeSchedulerConfiguration
11 | profiles:
12 | - pluginConfig:
13 | - args:
14 | apiVersion: kubescheduler.config.k8s.io/v1
15 | kind: NodeResourcesFitArgs
16 | scoringStrategy:
17 | resources:
18 | - name: cpu
19 | weight: 1
20 | - name: memory
21 | weight: 1
22 | type: MostAllocated
23 | name: NodeResourcesFit
24 | plugins:
25 | score:
26 | enabled:
27 | - name: NodeResourcesFit
28 | weight: 1
29 | disabled:
30 | - name: "*"
31 | multiPoint:
32 | enabled:
33 | - name: NodeResourcesFit
34 | weight: 1
35 | schedulerName: {{ include "kube-scheduler.fullname" . }}
36 | leaderElection:
37 | leaderElect: true
38 | resourceNamespace: {{ .Release.Namespace }}
39 | resourceName: {{ include "kube-scheduler.fullname" . }}
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: {{ include "kube-scheduler.fullname" . }}
5 | namespace: {{ .Release.Namespace }}
6 | labels:
7 | {{- include "kube-scheduler.labels" . | nindent 4 }}
8 | spec:
9 | replicas: {{ .Values.replicaCount }}
10 | selector:
11 | matchLabels:
12 | {{- include "kube-scheduler.selectorLabels" . | nindent 6 }}
13 | template:
14 | metadata:
15 | labels:
16 | {{- include "kube-scheduler.selectorLabels" . | nindent 8 }}
17 | spec:
18 | serviceAccountName: {{ include "kube-scheduler.serviceAccountName" . }}
19 | {{- with .Values.affinity }}
20 | affinity:
21 | {{- toYaml . | nindent 8 }}
22 | {{- end }}
23 | {{- with .Values.nodeSelector }}
24 | nodeSelector:
25 | {{- toYaml . | nindent 8 }}
26 | {{- end }}
27 | {{- with .Values.tolerations }}
28 | tolerations:
29 | {{- toYaml . | nindent 8 }}
30 | {{- end }}
31 | containers:
32 | - name: {{ .Chart.Name }}
33 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
34 | imagePullPolicy: {{ .Values.image.pullPolicy }}
35 | command:
36 | - /usr/local/bin/kube-scheduler
37 | - --bind-address=0.0.0.0
38 | - --config=/etc/kubernetes/kube-scheduler/kube-scheduler-config.yaml
39 | - --v=5
40 | livenessProbe:
41 | httpGet:
42 | path: /healthz
43 | port: 10259
44 | scheme: HTTPS
45 | initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
46 | periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
47 | readinessProbe:
48 | httpGet:
49 | path: /healthz
50 | port: 10259
51 | scheme: HTTPS
52 | initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
53 | periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
54 | resources:
55 | {{- toYaml .Values.resources | nindent 12 }}
56 | securityContext:
57 | privileged: false
58 | volumeMounts:
59 | - name: config-volume
60 | mountPath: /etc/kubernetes/kube-scheduler
61 | volumes:
62 | - name: config-volume
63 | configMap:
64 | name: {{ include "kube-scheduler.fullname" . }}-config
--------------------------------------------------------------------------------
/chart/kube-scheduler/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.serviceAccount.create -}}
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 | name: {{ include "kube-scheduler.serviceAccountName" . }}
6 | labels:
7 | {{- include "kube-scheduler.labels" . | nindent 4 }}
8 | {{- end }}
--------------------------------------------------------------------------------
/chart/kube-scheduler/values.yaml:
--------------------------------------------------------------------------------
1 | nameOverride: ""
2 | fullnameOverride: ""
3 |
4 | replicaCount: 2
5 |
6 | image:
7 | repository: public.ecr.aws/eks-distro/kubernetes/kube-scheduler
8 | tag: v1.31.0-eks-1-31-latest
9 | pullPolicy: IfNotPresent
10 |
11 | serviceAccount:
12 | create: true
13 | name: ""
14 |
15 | resources:
16 | requests:
17 | cpu: '1'
18 |
19 | livenessProbe:
20 | initialDelaySeconds: 15
21 | periodSeconds: 10
22 | readinessProbe:
23 | initialDelaySeconds: 15
24 | periodSeconds: 10
25 |
26 | nodeSelector: {}
27 | tolerations: []
28 | affinity: {}
--------------------------------------------------------------------------------
/content/cost-optimization/docs/cost-tracking.md:
--------------------------------------------------------------------------------
1 | # Cost Tracking
2 |
3 | In AWS users can gain a detailed insight about the cost of their usage by leveraging [Data Exports](https://docs.aws.amazon.com/cur/latest/userguide/what-is-data-exports.html). It allows organizations to create customized exports of the AWS Cost and Usage Report (CUR) 2.0, offering daily or hourly usage insights along with rates, costs, and usage attributes across all chargeable AWS services. The standard data export option delivers customized cost data to Amazon S3 on a recurring basis. With Data Exports users can also track the cost incurred by their pods running in their EKS cluster.
4 |
5 | In this section we will show you how you can use the Data Exports data to track cost at the Virtual Cluster level, for both the compute and the Amazon EMR on EKS uplift, this would allow you to have a comprehensive view on the cost incured by your jobs.
6 |
7 |
8 | ## Create Data Exports
9 |
10 | To create a Data Export report you can execute the following [shell script](https://github.com/aws/aws-emr-containers-best-practices/blob/main/content/cost-optimization/docs/resources/scripts/create-bucket-data-export.sh), or you can create by following the [AWS documentation](https://docs.aws.amazon.com/cur/latest/userguide/dataexports-create-standard.html).
11 |
12 |
13 | ```sh
14 | sh create-bucket-data-export.sh NAME-OF-S3-BUCKET-TO-CREATE ACCOUNT-ID REPORT-NAME
15 | ```
16 |
17 | > ***NOTE***: if you create it following the AWS documentation, make sure to select the `split cost allocation` and `resource-id` to be included in the Data export.
18 |
19 | ## Create the cost views
20 |
21 | To get the total cost we will use Amazon Athena to query the cost data from the Data Export report. Using Athena we will first create a table for data exported by Data Export report, then we will create a mapping table that will contain the mapping an Amazon EMR on EKS Virtual Cluster to a namespace. Afterward we will create two views one that will represent the compute cost and a second that will contain the EMR on EKS uplift. Last we will create a view that will combine both the cost of the EMR on EKS uplift as well as the compute, this view will be a union of the two views we created earlier.
22 |
23 |
24 | ### Create the data exports report table
25 |
26 | You can use the following [query](https://github.com/aws/aws-emr-containers-best-practices/tree/main/content/cost-optimization/resources/sql-statements/data-export-table.sql) to create the data export table, if you used the script provided you can just replace the S3 bucket name. If you have created the export report not using the provided shell script then you need to update the S3 location to match the one of where the data is exported by data export report you created.
27 |
28 | ### Create the Virtual cluster and namespace lookup table
29 |
30 | To create the look up table you can you the following sql statement.
31 |
32 | ```sql
33 | CREATE EXTERNAL TABLE `virtual_cluster_lookup`(
34 | `virtual_cluster_id` string,
35 | `namespace` string)
36 | ROW FORMAT DELIMITED
37 | FIELDS TERMINATED BY ','
38 | STORED AS INPUTFORMAT
39 | 'org.apache.hadoop.mapred.TextInputFormat'
40 | OUTPUTFORMAT
41 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
42 | LOCATION
43 | 's3://BUCKET-NAME/data/virtual_cluster_definition'
44 | ```
45 | Make sure to insert the look up data, you can use the query below as an example.
46 |
47 | ```sql
48 | INSERT INTO virtual_cluster_lookup
49 | VALUES ('96nxs46332423542abnbd2iuv6049', 'myvc')
50 | ```
51 |
52 | ### Create the EMR on EKS uplift view
53 |
54 | To create EMR on EKS uplift view ou can use the following sql statement.
55 |
56 | > ***NOTE***: You may need to change the source data table if you created the data export yourself. The query below has the source data table called `data`
57 |
58 | ```sql
59 | CREATE OR REPLACE VIEW "emr_uplift_per_vc_view" AS
60 | WITH
61 | emr_uplift_per_vc AS (
62 | SELECT
63 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month",
64 | split_part(line_item_resource_id, '/', 3) vc_id,
65 | sum(line_item_blended_cost) cost
66 | FROM
67 | data
68 | WHERE ((line_item_product_code = 'ElasticMapReduce') AND (line_item_operation = 'StartJobRun'))
69 | GROUP BY line_item_resource_id, 1
70 | )
71 | SELECT
72 | month,
73 | namespace,
74 | SUM(cost) cost
75 | FROM
76 | (emr_uplift_per_vc uplift
77 | INNER JOIN virtual_cluster_lookup lookup ON (uplift.vc_id = lookup.virtual_cluster_id))
78 | GROUP BY month, namespace
79 | ```
80 | ### Create the Compute cost view
81 |
82 | To create Compute cost view ou can use the following sql statement.
83 |
84 | > ***NOTE***: You may need to change the source data table if you created the data export yourself. The query below has the source data table called `data`
85 |
86 | ```sql
87 |
88 | CREATE OR REPLACE VIEW "compute_cost_per_namespace_view" AS
89 | SELECT
90 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month"
91 | , CONCAT(REPLACE(SPLIT_PART("line_item_resource_id", '/', 1), 'pod', 'cluster'), '/', SPLIT_PART("line_item_resource_id", '/', 2)) "cluster_arn"
92 | , SPLIT_PART("line_item_resource_id", '/', 3) "namespace"
93 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-vCPU-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "cpu_cost"
94 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-GB-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "ram_cost"
95 | , SUM(("split_line_item_split_cost" + "split_line_item_unused_cost")) "total_cost"
96 | FROM
97 | (data
98 | INNER JOIN virtual_cluster_lookup lookup ON (SPLIT_PART("line_item_resource_id", '/', 3) = lookup.namespace))
99 | WHERE ("line_item_operation" = 'EKSPod-EC2')
100 | GROUP BY 1, 2, 3
101 | ORDER BY "month" DESC, "cluster_arn" ASC, "namespace" ASC, "total_cost" DESC
102 |
103 | ```
104 |
105 | ### Create the over all cost view
106 |
107 | To create over all cost view view ou can use the following sql statement.
108 |
109 | ```sql
110 | CREATE OR REPLACE VIEW emr_eks_cost AS
111 |
112 | SELECT month, namespace, total_cost as cost FROM "reinventdemo"."compute_cost_per_namespace_view"
113 |
114 | UNION
115 |
116 | SELECT month, namespace, cost FROM "reinventdemo"."emr_uplift_per_vc_view"
117 | ```
118 |
119 | ## Query the data
120 |
121 | After creating the views you can now get insights on the total cost of runing your EMR on EKS job at the virtual cluster level. The query below shows how you van get the over all cost.
122 |
123 | ```
124 | SELECT month, namespace, sum(cost) as total_cost
125 | FROM "emr_eks_cost"
126 | GROUP BY namespace, month
127 | ```
128 | > ***NOTE***: In these views the granularity is at the month level, you can also run it at the day level, you can achieve it by changing the date in the SQL queries to include also the day,
129 |
130 |
131 |
132 |
--------------------------------------------------------------------------------
/content/cost-optimization/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/index.md
--------------------------------------------------------------------------------
/content/cost-optimization/docs/node-decommission.md:
--------------------------------------------------------------------------------
1 | # **Node Decommission**
2 |
3 | This section shows how to use an [Apache Spark feature](https://issues.apache.org/jira/browse/SPARK-20629) that allows you to store the shuffle data and cached RDD blocks present on the terminating executors to peer executors before a Spot node gets decommissioned. Consequently, your job does not need to recalculate the shuffle and RDD blocks of the terminating executor that would otherwise be lost, thus allowing the job to have minimal delay in completion.
4 |
5 | This feature is supported for releases EMR 6.3.0+.
6 |
7 | ### How does it work?
8 |
9 | When spark.decommission.enabled
is true, Spark will try its best to shut down the executor gracefully. spark.storage.decommission.enabled
will enable migrating data stored on the executor. Spark will try to migrate all the cached RDD blocks (controlled by spark.storage.decommission.rddBlocks.enabled
) and shuffle blocks (controlled by spark.storage.decommission.shuffleBlocks.enabled
) from the decommissioning executor to all remote executors when spark decommission is enabled. Relevant Spark configurations for using node decommissioning in the jobs are
10 |
11 | |Configuration|Description|Default Value|
12 | |-----|-----|-----|
13 | |spark.decommission.enabled|Whether to enable decommissioning|false|
14 | |spark.storage.decommission.enabled|Whether to decommission the block manager when decommissioning executor|false|
15 | |spark.storage.decommission.rddBlocks.enabled|Whether to transfer RDD blocks during block manager decommissioning.|false|
16 | |spark.storage.decommission.shuffleBlocks.enabled|Whether to transfer shuffle blocks during block manager decommissioning. Requires a migratable shuffle resolver (like sort based shuffle)|false|
17 | |spark.storage.decommission.maxReplicationFailuresPerBlock|Maximum number of failures which can be handled for migrating shuffle blocks when block manager is decommissioning and trying to move its existing blocks.|3|
18 | |spark.storage.decommission.shuffleBlocks.maxThreads|Maximum number of threads to use in migrating shuffle files.|8|
19 |
20 | This feature can currently be enabled through a temporary workaround on EMR 6.3.0+ releases. To enable it, Spark’s decom.sh file permission must be modified using a [custom image](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images.html). Once the code is fixed, the page will be updated.
21 |
22 | **Dockerfile for custom image:**
23 |
24 | ```
25 | FROM .dkr.ecr..amazonaws.com/spark/
26 | USER root
27 | WORKDIR /home/hadoop
28 | RUN chown hadoop:hadoop /usr/bin/decom.sh
29 | ```
30 |
31 | **Setting decommission timeout:**
32 |
33 | Each executor has to be decommissioned within a certain time limit controlled by the pod’s terminationGracePeriodSeconds configuration. The default value is 30 secs but can be modified using a [custom pod template](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/pod-templates.html). The pod template for this modification would look like
34 | ```
35 | apiVersion: v1
36 | kind: Pod
37 | spec:
38 | terminationGracePeriodSeconds:
39 | ```
40 |
41 | **Note: terminationGracePeriodSeconds timeout should be lesser than spot instance timeout with around 5 seconds buffer kept aside for triggering the node termination**
42 |
43 |
44 | **Request:**
45 |
46 | ```
47 | cat >spark-python-with-node-decommissioning.json << EOF
48 | {
49 | "name": "my-job-run-with-node-decommissioning",
50 | "virtualClusterId": "",
51 | "executionRoleArn": "",
52 | "releaseLabel": "emr-6.3.0-latest",
53 | "jobDriver": {
54 | "sparkSubmitJobDriver": {
55 | "entryPoint": "s3:///trip-count.py",
56 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6"
57 | }
58 | },
59 | "configurationOverrides": {
60 | "applicationConfiguration": [
61 | {
62 | "classification": "spark-defaults",
63 | "properties": {
64 | "spark.kubernetes.container.image": ".dkr.ecr..amazonaws.com/",
65 | "spark.executor.instances": "5",
66 | "spark.decommission.enabled": "true",
67 | "spark.storage.decommission.rddBlocks.enabled": "true",
68 | "spark.storage.decommission.shuffleBlocks.enabled" : "true",
69 | "spark.storage.decommission.enabled": "true"
70 | }
71 | }
72 | ],
73 | "monitoringConfiguration": {
74 | "cloudWatchMonitoringConfiguration": {
75 | "logGroupName": "",
76 | "logStreamNamePrefix": ""
77 | },
78 | "s3MonitoringConfiguration": {
79 | "logUri": ""
80 | }
81 | }
82 | }
83 | }
84 | EOF
85 | ```
86 |
87 | **Observed Behavior:**
88 |
89 | When executors begin decommissioning, its shuffle data gets migrated to peer executors instead of recalculating the shuffle blocks again. If sending shuffle blocks to an executor fails, spark.storage.decommission.maxReplicationFailuresPerBlock
will give the number of retries for migration. The driver log’s stderr will see log lines `Updating map output for to BlockManagerId(, , , )` denoting details about shuffle block ‘s migration. This feature does not emit any other metrics for validation yet.
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/images/ca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/ca.png
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/images/karpenter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/karpenter.png
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/images/node_decom.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/node_decom.gif
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/images/pvc_reuse.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/pvc_reuse.gif
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/images/reuse.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/reuse.gif
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/scripts/create-bucket-data-export.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Check if all required parameters are provided
4 | if [ $# -ne 3 ]; then
5 | echo "Usage: $0 "
6 | echo "Example: $0 my-bucket-name 123456789012 eks-cost-tracking"
7 | exit 1
8 | fi
9 |
10 | # Store parameters in variables
11 | S3_BUCKET=$1
12 | ACCOUNT_ID=$2
13 | REPORT_NAME=$3
14 |
15 | # Create the S3 bucket
16 | echo "Creating S3 bucket: $S3_BUCKET"
17 | if aws s3api create-bucket \
18 | --bucket "$S3_BUCKET" \
19 | --region us-east-1; then
20 | echo "Successfully created bucket: $S3_BUCKET"
21 | else
22 | echo "Failed to create bucket"
23 | exit 1
24 | fi
25 |
26 | # Create temporary bucket policy file
27 | POLICY_FILE=$(mktemp)
28 | cat > "$POLICY_FILE" << EOF
29 | {
30 | "Version": "2012-10-17",
31 | "Statement": [
32 | {
33 | "Sid": "EnableAWSDataExportsToWriteToS3AndCheckPolicy",
34 | "Effect": "Allow",
35 | "Principal": {
36 | "Service": [
37 | "bcm-data-exports.amazonaws.com",
38 | "billingreports.amazonaws.com"
39 | ]
40 | },
41 | "Action": [
42 | "s3:PutObject",
43 | "s3:GetBucketPolicy"
44 | ],
45 | "Resource": [
46 | "arn:aws:s3:::${S3_BUCKET}",
47 | "arn:aws:s3:::${S3_BUCKET}/*"
48 | ],
49 | "Condition": {
50 | "StringLike": {
51 | "aws:SourceAccount": "${ACCOUNT_ID}",
52 | "aws:SourceArn": [
53 | "arn:aws:cur:us-east-1:${ACCOUNT_ID}:definition/*",
54 | "arn:aws:bcm-data-exports:us-east-1:${ACCOUNT_ID}:export/*"
55 | ]
56 | }
57 | }
58 | }
59 | ]
60 | }
61 | EOF
62 |
63 | # Attach the bucket policy
64 | echo "Attaching bucket policy..."
65 | if aws s3api put-bucket-policy \
66 | --bucket "$S3_BUCKET" \
67 | --policy "file://$POLICY_FILE"; then
68 | echo "Successfully attached bucket policy"
69 | else
70 | echo "Failed to attach bucket policy"
71 | rm "$POLICY_FILE"
72 | exit 1
73 | fi
74 |
75 | # Clean up the temporary policy file
76 | rm "$POLICY_FILE"
77 |
78 | # Execute the AWS CLI command for data export
79 | echo "Creating cost export report..."
80 | aws bcm-data-exports create-export \
81 | --export '{
82 | "DataQuery": {
83 | "QueryStatement": "SELECT bill_bill_type, bill_billing_entity, bill_billing_period_end_date, bill_billing_period_start_date, bill_invoice_id, bill_invoicing_entity, bill_payer_account_id, bill_payer_account_name, cost_category, discount, discount_bundled_discount, discount_total_discount, identity_line_item_id, identity_time_interval, line_item_availability_zone, line_item_blended_cost, line_item_blended_rate, line_item_currency_code, line_item_legal_entity, line_item_line_item_description, line_item_line_item_type, line_item_net_unblended_cost, line_item_net_unblended_rate, line_item_normalization_factor, line_item_normalized_usage_amount, line_item_operation, line_item_product_code, line_item_resource_id, line_item_tax_type, line_item_unblended_cost, line_item_unblended_rate, line_item_usage_account_id, line_item_usage_account_name, line_item_usage_amount, line_item_usage_end_date, line_item_usage_start_date, line_item_usage_type, pricing_currency, pricing_lease_contract_length, pricing_offering_class, pricing_public_on_demand_cost, pricing_public_on_demand_rate, pricing_purchase_option, pricing_rate_code, pricing_rate_id, pricing_term, pricing_unit, product, product_comment, product_fee_code, product_fee_description, product_from_location, product_from_location_type, product_from_region_code, product_instance_family, product_instance_type, product_instancesku, product_location, product_location_type, product_operation, product_pricing_unit, product_product_family, product_region_code, product_servicecode, product_sku, product_to_location, product_to_location_type, product_to_region_code, product_usagetype, reservation_amortized_upfront_cost_for_usage, reservation_amortized_upfront_fee_for_billing_period, reservation_availability_zone, reservation_effective_cost, reservation_end_time, reservation_modification_status, reservation_net_amortized_upfront_cost_for_usage, reservation_net_amortized_upfront_fee_for_billing_period, reservation_net_effective_cost, reservation_net_recurring_fee_for_usage, reservation_net_unused_amortized_upfront_fee_for_billing_period, reservation_net_unused_recurring_fee, reservation_net_upfront_value, reservation_normalized_units_per_reservation, reservation_number_of_reservations, reservation_recurring_fee_for_usage, reservation_reservation_a_r_n, reservation_start_time, reservation_subscription_id, reservation_total_reserved_normalized_units, reservation_total_reserved_units, reservation_units_per_reservation, reservation_unused_amortized_upfront_fee_for_billing_period, reservation_unused_normalized_unit_quantity, reservation_unused_quantity, reservation_unused_recurring_fee, reservation_upfront_value, resource_tags, savings_plan_amortized_upfront_commitment_for_billing_period, savings_plan_end_time, savings_plan_instance_type_family, savings_plan_net_amortized_upfront_commitment_for_billing_period, savings_plan_net_recurring_commitment_for_billing_period, savings_plan_net_savings_plan_effective_cost, savings_plan_offering_type, savings_plan_payment_option, savings_plan_purchase_term, savings_plan_recurring_commitment_for_billing_period, savings_plan_region, savings_plan_savings_plan_a_r_n, savings_plan_savings_plan_effective_cost, savings_plan_savings_plan_rate, savings_plan_start_time, savings_plan_total_commitment_to_date, savings_plan_used_commitment, split_line_item_actual_usage, split_line_item_net_split_cost, split_line_item_net_unused_cost, split_line_item_parent_resource_id, split_line_item_public_on_demand_split_cost, split_line_item_public_on_demand_unused_cost, split_line_item_reserved_usage, split_line_item_split_cost, split_line_item_split_usage, split_line_item_split_usage_ratio, split_line_item_unused_cost FROM COST_AND_USAGE_REPORT",
84 | "TableConfigurations": {
85 | "COST_AND_USAGE_REPORT": {
86 | "INCLUDE_MANUAL_DISCOUNT_COMPATIBILITY": "FALSE",
87 | "INCLUDE_RESOURCES": "TRUE",
88 | "INCLUDE_SPLIT_COST_ALLOCATION_DATA": "TRUE",
89 | "TIME_GRANULARITY": "HOURLY"
90 | }
91 | }
92 | },
93 | "DestinationConfigurations": {
94 | "S3Destination": {
95 | "S3Bucket": "'$S3_BUCKET'",
96 | "S3OutputConfigurations": {
97 | "Compression": "PARQUET",
98 | "Format": "PARQUET",
99 | "OutputType": "CUSTOM",
100 | "Overwrite": "OVERWRITE_REPORT"
101 | },
102 | "S3Prefix": "cost-data/data",
103 | "S3Region": "us-east-1"
104 | }
105 | },
106 | "Name": "'$REPORT_NAME'",
107 | "RefreshCadence": {
108 | "Frequency": "SYNCHRONOUS"
109 | }
110 | }'
111 |
112 | # Check if the command was successful
113 | if [ $? -eq 0 ]; then
114 | echo "Successfully created cost export report: $REPORT_NAME"
115 | echo "Data will be exported to s3://$S3_BUCKET/cost-data/"
116 | else
117 | echo "Failed to create cost export report"
118 | exit 1
119 | fi
120 |
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/compute-cost_view.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE VIEW "compute_cost_per_namespace_view" AS
2 | SELECT
3 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month"
4 | , CONCAT(REPLACE(SPLIT_PART("line_item_resource_id", '/', 1), 'pod', 'cluster'), '/', SPLIT_PART("line_item_resource_id", '/', 2)) "cluster_arn"
5 | , SPLIT_PART("line_item_resource_id", '/', 3) "namespace"
6 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-vCPU-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "cpu_cost"
7 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-GB-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "ram_cost"
8 | , SUM(("split_line_item_split_cost" + "split_line_item_unused_cost")) "total_cost"
9 | FROM
10 | (data
11 | INNER JOIN virtual_cluster_lookup lookup ON (SPLIT_PART("line_item_resource_id", '/', 3) = lookup.namespace))
12 | WHERE ("line_item_operation" = 'EKSPod-EC2')
13 | GROUP BY 1, 2, 3
14 | ORDER BY "month" DESC, "cluster_arn" ASC, "namespace" ASC, "total_cost" DESC
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/data-export-table.sql:
--------------------------------------------------------------------------------
1 | CREATE EXTERNAL TABLE `data`(
2 | `bill_bill_type` string,
3 | `bill_billing_entity` string,
4 | `bill_billing_period_end_date` timestamp,
5 | `bill_billing_period_start_date` timestamp,
6 | `bill_invoice_id` string,
7 | `bill_invoicing_entity` string,
8 | `bill_payer_account_id` string,
9 | `bill_payer_account_name` string,
10 | `cost_category` map,
11 | `discount` map,
12 | `discount_bundled_discount` double,
13 | `discount_total_discount` double,
14 | `identity_line_item_id` string,
15 | `identity_time_interval` string,
16 | `line_item_availability_zone` string,
17 | `line_item_blended_cost` double,
18 | `line_item_blended_rate` string,
19 | `line_item_currency_code` string,
20 | `line_item_legal_entity` string,
21 | `line_item_line_item_description` string,
22 | `line_item_line_item_type` string,
23 | `line_item_net_unblended_cost` double,
24 | `line_item_net_unblended_rate` string,
25 | `line_item_normalization_factor` double,
26 | `line_item_normalized_usage_amount` double,
27 | `line_item_operation` string,
28 | `line_item_product_code` string,
29 | `line_item_resource_id` string,
30 | `line_item_tax_type` string,
31 | `line_item_unblended_cost` double,
32 | `line_item_unblended_rate` string,
33 | `line_item_usage_account_id` string,
34 | `line_item_usage_account_name` string,
35 | `line_item_usage_amount` double,
36 | `line_item_usage_end_date` timestamp,
37 | `line_item_usage_start_date` timestamp,
38 | `line_item_usage_type` string,
39 | `pricing_currency` string,
40 | `pricing_lease_contract_length` string,
41 | `pricing_offering_class` string,
42 | `pricing_public_on_demand_cost` double,
43 | `pricing_public_on_demand_rate` string,
44 | `pricing_purchase_option` string,
45 | `pricing_rate_code` string,
46 | `pricing_rate_id` string,
47 | `pricing_term` string,
48 | `pricing_unit` string,
49 | `product` map,
50 | `product_comment` string,
51 | `product_fee_code` string,
52 | `product_fee_description` string,
53 | `product_from_location` string,
54 | `product_from_location_type` string,
55 | `product_from_region_code` string,
56 | `product_instance_family` string,
57 | `product_instance_type` string,
58 | `product_instancesku` string,
59 | `product_location` string,
60 | `product_location_type` string,
61 | `product_operation` string,
62 | `product_pricing_unit` string,
63 | `product_product_family` string,
64 | `product_region_code` string,
65 | `product_servicecode` string,
66 | `product_sku` string,
67 | `product_to_location` string,
68 | `product_to_location_type` string,
69 | `product_to_region_code` string,
70 | `product_usagetype` string,
71 | `reservation_amortized_upfront_cost_for_usage` double,
72 | `reservation_amortized_upfront_fee_for_billing_period` double,
73 | `reservation_availability_zone` string,
74 | `reservation_effective_cost` double,
75 | `reservation_end_time` string,
76 | `reservation_modification_status` string,
77 | `reservation_net_amortized_upfront_cost_for_usage` double,
78 | `reservation_net_amortized_upfront_fee_for_billing_period` double,
79 | `reservation_net_effective_cost` double,
80 | `reservation_net_recurring_fee_for_usage` double,
81 | `reservation_net_unused_amortized_upfront_fee_for_billing_period` double,
82 | `reservation_net_unused_recurring_fee` double,
83 | `reservation_net_upfront_value` double,
84 | `reservation_normalized_units_per_reservation` string,
85 | `reservation_number_of_reservations` string,
86 | `reservation_recurring_fee_for_usage` double,
87 | `reservation_reservation_a_r_n` string,
88 | `reservation_start_time` string,
89 | `reservation_subscription_id` string,
90 | `reservation_total_reserved_normalized_units` string,
91 | `reservation_total_reserved_units` string,
92 | `reservation_units_per_reservation` string,
93 | `reservation_unused_amortized_upfront_fee_for_billing_period` double,
94 | `reservation_unused_normalized_unit_quantity` double,
95 | `reservation_unused_quantity` double,
96 | `reservation_unused_recurring_fee` double,
97 | `reservation_upfront_value` double,
98 | `resource_tags` map,
99 | `savings_plan_amortized_upfront_commitment_for_billing_period` double,
100 | `savings_plan_end_time` string,
101 | `savings_plan_instance_type_family` string,
102 | `savings_plan_net_amortized_upfront_commitment_for_billing_period` double,
103 | `savings_plan_net_recurring_commitment_for_billing_period` double,
104 | `savings_plan_net_savings_plan_effective_cost` double,
105 | `savings_plan_offering_type` string,
106 | `savings_plan_payment_option` string,
107 | `savings_plan_purchase_term` string,
108 | `savings_plan_recurring_commitment_for_billing_period` double,
109 | `savings_plan_region` string,
110 | `savings_plan_savings_plan_a_r_n` string,
111 | `savings_plan_savings_plan_effective_cost` double,
112 | `savings_plan_savings_plan_rate` double,
113 | `savings_plan_start_time` string,
114 | `savings_plan_total_commitment_to_date` double,
115 | `savings_plan_used_commitment` double,
116 | `split_line_item_actual_usage` double,
117 | `split_line_item_net_split_cost` double,
118 | `split_line_item_net_unused_cost` double,
119 | `split_line_item_parent_resource_id` string,
120 | `split_line_item_public_on_demand_split_cost` double,
121 | `split_line_item_public_on_demand_unused_cost` double,
122 | `split_line_item_reserved_usage` double,
123 | `split_line_item_split_cost` double,
124 | `split_line_item_split_usage` double,
125 | `split_line_item_split_usage_ratio` double,
126 | `split_line_item_unused_cost` double)
127 | PARTITIONED BY (
128 | `billing_period` string)
129 | ROW FORMAT SERDE
130 | 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
131 | STORED AS INPUTFORMAT
132 | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
133 | OUTPUTFORMAT
134 | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
135 | LOCATION
136 | 's3://S3-BUCKET-NAME/data/data-export/emr-containers-cost-reinvent/data/'
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/overall-cost_view.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE VIEW emr_eks_cost AS
2 |
3 | SELECT month, namespace, total_cost as cost FROM "reinventdemo"."compute_cost_per_namespace_view"
4 |
5 | UNION
6 |
7 | SELECT month, namespace, cost FROM "reinventdemo"."emr_uplift_per_vc_view"
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/query-over-all_view.sql:
--------------------------------------------------------------------------------
1 | SELECT month, namespace, sum(cost) as total_cost
2 | FROM "emr_eks_cost"
3 | GROUP BY namespace, month
4 |
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/vc-cost_view.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE VIEW "emr_uplift_per_vc_view" AS
2 | WITH
3 | emr_uplift_per_vc AS (
4 | SELECT
5 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month",
6 | split_part(line_item_resource_id, '/', 3) vc_id,
7 | sum(line_item_blended_cost) cost
8 | FROM
9 | data
10 | WHERE ((line_item_product_code = 'ElasticMapReduce') AND (line_item_operation = 'StartJobRun'))
11 | GROUP BY line_item_resource_id, 1
12 | )
13 | SELECT
14 | month,
15 | namespace,
16 | SUM(cost) cost
17 | FROM
18 | (emr_uplift_per_vc uplift
19 | INNER JOIN virtual_cluster_lookup lookup ON (uplift.vc_id = lookup.virtual_cluster_id))
20 | GROUP BY month, namespace
--------------------------------------------------------------------------------
/content/cost-optimization/docs/resources/sql-statements/vc-lookup.sql:
--------------------------------------------------------------------------------
1 | CREATE EXTERNAL TABLE `virtual_cluster_lookup`(
2 | `virtual_cluster_id` string,
3 | `namespace` string)
4 | ROW FORMAT DELIMITED
5 | FIELDS TERMINATED BY ','
6 | STORED AS INPUTFORMAT
7 | 'org.apache.hadoop.mapred.TextInputFormat'
8 | OUTPUTFORMAT
9 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
10 | LOCATION
11 | 's3://BUCKET-NAME/data/virtual_cluster_definition'
--------------------------------------------------------------------------------
/content/index.md:
--------------------------------------------------------------------------------
1 | Welcome to the EMR Containers Best Practices Guide. The primary goal of this project is to offer a set of best practices and templates to get started with [Amazon EMR on EKS](https://aws.amazon.com/emr/features/eks/). We publish this guide on GitHub so we could iterate the content quickly, provide timely and effective recommendations for variety of concerns, and easily incorporate suggestions from the broader community.
2 |
3 | ## Amazon EMR on EKS Workshop
4 | If you are interested in step-by-step tutorials that leverage the best practices contained in this guide, please visit the [Amazon EMR on EKS Workshop.](https://emr-on-eks.workshop.aws/)
5 | ## Contributing
6 |
7 | We encourage you to contribute to these guides. If you have implemented a practice that has proven to be effective, please share it with us by opening an issue or a pull request. Similarly, if you discover an error or flaw in the guide, please submit a pull request to correct it.
8 |
--------------------------------------------------------------------------------
/content/metastore-integrations/docs/aws-glue.md:
--------------------------------------------------------------------------------
1 | # **EMR Containers integration with AWS Glue**
2 |
3 | #### **AWS Glue catalog in same account as EKS**
4 | In the below example a Spark application will be configured to use [AWS Glue data catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html) as the hive metastore.
5 |
6 | **gluequery.py**
7 |
8 | ```
9 | cat > gluequery.py </trip-data.parquet/'")
22 | spark.sql("SELECT count(*) FROM sparkemrnyc").show()
23 | spark.stop()
24 | EOF
25 | ```
26 |
27 | ```
28 | LOCATION 's3:///trip-data.parquet/'
29 | ```
30 |
31 | Configure the above property to point to the S3 location containing the data.
32 |
33 | **Request**
34 |
35 | ```
36 | cat > Spark-Python-in-s3-awsglue-log.json << EOF
37 | {
38 | "name": "spark-python-in-s3-awsglue-log",
39 | "virtualClusterId": "",
40 | "executionRoleArn": "",
41 | "releaseLabel": "emr-6.2.0-latest",
42 | "jobDriver": {
43 | "sparkSubmitJobDriver": {
44 | "entryPoint": "s3:///gluequery.py",
45 | "sparkSubmitParameters": "--conf spark.driver.cores=3 --conf spark.executor.memory=8G --conf spark.driver.memory=6G --conf spark.executor.cores=3"
46 | }
47 | },
48 | "configurationOverrides": {
49 | "applicationConfiguration": [
50 | {
51 | "classification": "spark-defaults",
52 | "properties": {
53 | "spark.hadoop.hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
54 | }
55 | }
56 | ],
57 | "monitoringConfiguration": {
58 | "cloudWatchMonitoringConfiguration": {
59 | "logGroupName": "/emr-containers/jobs",
60 | "logStreamNamePrefix": "demo"
61 | },
62 | "s3MonitoringConfiguration": {
63 | "logUri": "s3://joblogs"
64 | }
65 | }
66 | }
67 | }
68 | EOF
69 |
70 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-awsglue-log.json
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 | ```
80 |
81 | Output from driver logs - Displays the number of rows.
82 |
83 | ```
84 | +----------+
85 | | count(1)|
86 | +----------+
87 | |2716504499|
88 | +----------+
89 | ```
90 |
91 |
92 |
93 | #### **AWS Glue catalog in different account**
94 | The Spark application is submitted to EMR Virtual cluster in Account A and is configured to connect to [AWS Glue catalog in Account B.](https://docs.aws.amazon.com/glue/latest/dg/cross-account-access.html) The IAM policy attached to the job execution role `("executionRoleArn": "") `is in Account A
95 |
96 | ```
97 | {
98 | "Version": "2012-10-17",
99 | "Statement": [
100 | {
101 | "Effect": "Allow",
102 | "Action": [
103 | "glue:*"
104 | ],
105 | "Resource": [
106 | "arn:aws:glue:::catalog",
107 | "arn:aws:glue:::database/default",
108 | "arn:aws:glue:::table/default/sparkemrnyc"
109 | ]
110 | }
111 | ]
112 | }
113 | ```
114 |
115 |
116 | IAM policy attached to the AWS Glue catalog in Account B
117 |
118 | ```
119 | {
120 | "Version" : "2012-10-17",
121 | "Statement" : [ {
122 | "Effect" : "Allow",
123 | "Principal" : {
124 | "AWS" : ""
125 | },
126 | "Action" : "glue:*",
127 | "Resource" : [ "arn:aws:glue:::catalog", "arn:aws:glue:::database/default", "arn:aws:glue:::table/default/sparkemrnyc" ]
128 | } ]
129 | }
130 | ```
131 |
132 |
133 | **Request**
134 |
135 | ```
136 | cat > Spark-Python-in-s3-awsglue-crossaccount.json << EOF
137 | {
138 | "name": "spark-python-in-s3-awsglue-crossaccount",
139 | "virtualClusterId": "",
140 | "executionRoleArn": "",
141 | "releaseLabel": "emr-6.2.0-latest",
142 | "jobDriver": {
143 | "sparkSubmitJobDriver": {
144 | "entryPoint": "s3:///gluequery.py",
145 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6 "
146 | }
147 | },
148 | "configurationOverrides": {
149 | "applicationConfiguration": [
150 | {
151 | "classification": "spark-defaults",
152 | "properties": {
153 | "spark.hadoop.hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
154 | "spark.hadoop.hive.metastore.glue.catalogid":"",
155 | }
156 | }
157 | ],
158 | "monitoringConfiguration": {
159 | "cloudWatchMonitoringConfiguration": {
160 | "logGroupName": "/emr-containers/jobs",
161 | "logStreamNamePrefix": "demo"
162 | },
163 | "s3MonitoringConfiguration": {
164 | "logUri": "s3://joblogs"
165 | }
166 | }
167 | }
168 | }
169 | EOF
170 |
171 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-awsglue-crossaccount.json
172 |
173 |
174 |
175 | ```
176 |
177 | **Configuration of interest**
178 | To specify the accountID where the AWS Glue catalog is defined reference the following:
179 |
180 | [Spark-Glue integration](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-glue.html)
181 |
182 | ```
183 | "spark.hadoop.hive.metastore.glue.catalogid":"",
184 | ```
185 |
186 | Output from driver logs - displays the number of rows.
187 |
188 | ```
189 | +----------+
190 | | count(1)|
191 | +----------+
192 | |2716504499|
193 | +----------+
194 | ```
195 |
196 | #### **Sync Hudi table with AWS Glue catalog**
197 | In this example, a Spark application will be configured to use [AWS Glue data catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html) as the hive metastore.
198 |
199 | Starting from Hudi 0.9.0, we can synchronize Hudi table's latest schema to Glue catalog via the Hive Metastore Service (HMS) in hive sync mode. This example runs a Hudi ETL job with EMR on EKS, and interact with AWS Glue metaStore to create a Hudi table. It provides you the native and serverless capabilities to manage your technical metadata. Also you can query Hudi tables in Athena straigt away after the ETL job, which provides your end user an easy data access and shortens the time to insight.
200 |
201 | **HudiEMRonEKS.py**
202 |
203 | ```
204 | cat > HudiEMRonEKS.py <` (including <>) with your own values. You're required to specify a namespace. The `--labels` option is not required to create your Fargate profile, but will be required if you want to only run Spark executors on Fargate.
12 |
13 | ```
14 | eksctl create fargateprofile \
15 | --cluster \
16 | --name \
17 | --namespace \
18 | --labels spark-node-placement=fargate
19 | ```
20 |
21 | ### 1- Place entire job including driver pod on Fargate
22 |
23 | When both Driver and Executors use the same labels as the Fargate Selector, the entire job including the driver pod will run on Fargate.
24 |
25 | **Request:**
26 | ```
27 | cat >spark-python-in-s3-nodeselector.json << EOF
28 | {
29 | "name": "spark-python-in-s3-fargate-nodeselector",
30 | "virtualClusterId": "",
31 | "executionRoleArn": "",
32 | "releaseLabel": "emr-6.3.0-latest",
33 | "jobDriver": {
34 | "sparkSubmitJobDriver": {
35 | "entryPoint": "s3:///trip-count.py",
36 | "sparkSubmitParameters": "--conf spark.driver.cores=4 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=4"
37 | }
38 | },
39 | "configurationOverrides": {
40 | "applicationConfiguration": [
41 | {
42 | "classification": "spark-defaults",
43 | "properties": {
44 | "spark.kubernetes.driver.label.spark-node-placement": "fargate",
45 | "spark.kubernetes.executor.label.spark-node-placement": "fargate"
46 | }
47 | }
48 | ],
49 | "monitoringConfiguration": {
50 | "cloudWatchMonitoringConfiguration": {
51 | "logGroupName": "/emr-containers/jobs",
52 | "logStreamNamePrefix": "demo"
53 | },
54 | "s3MonitoringConfiguration": {
55 | "logUri": "s3://joblogs"
56 | }
57 | }
58 | }
59 | }
60 | EOF
61 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json
62 | ```
63 |
64 | **Observed Behavior:**
65 | When the job starts, the driver pod and executor pods are scheduled only on Fargate since both are labeled with the `spark-node-placement: fargate`. This is useful when we want to run the entire job on Fargate nodes. The maximum vCPU available for the driver pod is 4vCPU.
66 |
67 | ### 2- Place driver pod on EC2 and executor pod on Fargate
68 | Remove the label from the driver pod to schedule the driver pod on EC2 instances. This is especially helpful when driver pod needs more resources (i.e. > 4 vCPU).
69 |
70 | **Request:**
71 | ```
72 | cat >spark-python-in-s3-nodeselector.json << EOF
73 | {
74 | "name": "spark-python-in-s3-fargate-nodeselector",
75 | "virtualClusterId": "",
76 | "executionRoleArn": "",
77 | "releaseLabel": "emr-6.3.0-latest",
78 | "jobDriver": {
79 | "sparkSubmitJobDriver": {
80 | "entryPoint": "s3:///trip-count.py",
81 | "sparkSubmitParameters": "--conf spark.driver.cores=6 --conf spark.executor.memory=20G --conf spark.driver.memory=30G --conf spark.executor.cores=4"
82 | }
83 | },
84 | "configurationOverrides": {
85 | "applicationConfiguration": [
86 | {
87 | "classification": "spark-defaults",
88 | "properties": {
89 | "spark.kubernetes.executor.label.spark-node-placement": "fargate"
90 | }
91 | }
92 | ],
93 | "monitoringConfiguration": {
94 | "cloudWatchMonitoringConfiguration": {
95 | "logGroupName": "/emr-containers/jobs",
96 | "logStreamNamePrefix": "demo"
97 | },
98 | "s3MonitoringConfiguration": {
99 | "logUri": "s3://joblogs"
100 | }
101 | }
102 | }
103 | }
104 | EOF
105 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json
106 | ```
107 |
108 | **Observed Behavior:**
109 | When the job starts, the driver pod schedules on an EC2 instance. EKS picks an instance from the first Node Group that has the matching resources available to the driver pod.
110 |
111 | ### 3- Define a NodeSelector in Pod Templates
112 | Beginning with Amazon EMR versions 5.33.0 or 6.3.0, Amazon EMR on EKS supports Spark’s pod template feature. Pod templates are specifications that determine how to run each pod. You can use pod template files to define the driver or executor pod’s configurations that Spark configurations do not support. For example Spark configurations do not support defining individual node selectors for the driver pod and the executor pods. Define a node selector **only** for the driver pod when you want to choose on which pool of EC2 instance it should schedule. Let the Fargate Profile schedule the executor pods.
113 |
114 | **Driver Pod Template**
115 |
116 | ```
117 | apiVersion: v1
118 | kind: Pod
119 | spec:
120 | volumes:
121 | - name: source-data-volume
122 | emptyDir: {}
123 | - name: metrics-files-volume
124 | emptyDir: {}
125 | nodeSelector:
126 | :
127 | containers:
128 | - name: spark-kubernetes-driver # This will be interpreted as Spark driver container
129 | ```
130 |
131 | Store the pod template file onto a S3 location:
132 |
133 | ``` aws s3 cp /driver-pod-template.yaml s3:///driver-pod-template.yaml```
134 |
135 |
136 | **Request**
137 |
138 | ```
139 | cat >spark-python-in-s3-nodeselector.json << EOF
140 | {
141 | "name": "spark-python-in-s3-fargate-nodeselector",
142 | "virtualClusterId": "",
143 | "executionRoleArn": "",
144 | "releaseLabel": "emr-6.3.0-latest",
145 | "jobDriver": {
146 | "sparkSubmitJobDriver": {
147 | "entryPoint": "s3:///trip-count.py",
148 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=30G --conf spark.executor.cores=4"
149 | }
150 | },
151 | "configurationOverrides": {
152 | "applicationConfiguration": [
153 | {
154 | "classification": "spark-defaults",
155 | "properties": {
156 | "spark.kubernetes.executor.label.spark-node-placement": "fargate",
157 | "spark.kubernetes.driver.podTemplateFile": "s3:///driver-pod-template.yaml"
158 | }
159 | }
160 | ],
161 | "monitoringConfiguration": {
162 | "cloudWatchMonitoringConfiguration": {
163 | "logGroupName": "/emr-containers/jobs",
164 | "logStreamNamePrefix": "demo"
165 | },
166 | "s3MonitoringConfiguration": {
167 | "logUri": "s3://joblogs"
168 | }
169 | }
170 | }
171 | }
172 | EOF
173 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json
174 | ```
175 |
176 | **Observed Behavior:**
177 | The driver pod schedules on an EC2 instance with enough capacity and matching label key / value with the node selector.
178 |
--------------------------------------------------------------------------------
/content/node-placement/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/node-placement/docs/index.md
--------------------------------------------------------------------------------
/content/outposts/emr-containers-on-outposts.md:
--------------------------------------------------------------------------------
1 | # Running EMR Containers on AWS Outposts
2 | ## Background
3 | You can now run Amazon EMR container jobs on EKS clusters that are running on AWS Outposts. AWS Outposts enables native AWS services, infrastructure, and operating models in on-premises facilities. In AWS Outposts environments, you can use the same AWS APIs, tools, and infrastructure that you use in the AWS Cloud. Amazon EKS nodes on AWS Outposts is ideal for low-latency workloads that need to be run in close proximity to on-premises data and applications. For more information, see the Amazon EKS on Outposts [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/eks-on-outposts.html).
4 |
5 | This document provides the steps to set up EMR containers on AWS Outposts.
6 |
7 | 
8 |
9 | ## Key Considerations and Recommendations
10 | * The EKS cluster on an Outpost must be created with self-managed node groups.
11 | * Use the AWS Management Console and AWS CloudFormation to create a self-managed node group in Outposts.
12 | * For EMR workloads, we recommend creating EKS clusters where all the worker nodes reside in the self-managed node group of Outposts.
13 | * The Kubernetes client in the Spark driver pod creates and monitor executor pods by communicating with the EKS managed Kubernetes API server residing in the parent AWS Region. For reliable monitoring of executor pods during a job run, we also recommend having a reliable low latency link between the Outpost and the parent Region.
14 | * AWS Fargate is not available on Outposts.
15 | * For more information about the supported Regions, prerequisites and considerations for Amazon EKS on AWS Outposts, see the EKS on Outposts [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/eks-on-outposts.html).
16 |
17 |
18 | ## Infrastructure Setup
19 | ### Setup EKS on Outposts
20 | **Network Setup**
21 |
22 |
23 | * Setup a VPC
24 | ```
25 | aws ec2 create-vpc \
26 | --region \
27 | --cidr-block '<10.0.0.0/16>'
28 | ```
29 | In the output, take note of the VPC ID.
30 | ```
31 | {
32 | "Vpc": {
33 | "VpcId": "vpc-123vpc",
34 | ...
35 | }
36 | }
37 | ```
38 |
39 |
40 | * Create two subnets in the parent Region.
41 | ```
42 | aws ec2 create-subnet \
43 | --region '' \
44 | --availability-zone-id '' \
45 | --vpc-id '' \
46 | --cidr-block '<10.0.1.0/24>'
47 |
48 | aws ec2 create-subnet \
49 | --region '' \
50 | --availability-zone-id '' \
51 | --vpc-id '' \
52 | --cidr-block '<10.0.2.0/24>'
53 | ```
54 | In the output, take note of the Subnet ID.
55 | ```
56 | {
57 | "Subnet": {
58 | "SubnetId": "subnet-111",
59 | ...
60 | }
61 | }
62 | {
63 | "Subnet": {
64 | "SubnetId": "subnet-222",
65 | ...
66 | }
67 | }
68 | ```
69 |
70 |
71 | * Create a subnet in the Outpost Availability Zone. (This step is different for Outposts)
72 | ```
73 | aws ec2 create-subnet \
74 | --region '' \
75 | --availability-zone-id '' \
76 | --outpost-arn 'arn:aws:outposts::<123456789>:outpost/' \
77 | --vpc-id '' \
78 | --cidr-block '<10.0.3.0/24>'
79 | ```
80 | In the output, take note of the Subnet ID.
81 | ```
82 | {
83 | "Subnet": {
84 | "SubnetId": "subnet-333outpost",
85 | "OutpostArn": "..."
86 | ...
87 | }
88 | }
89 | ```
90 |
91 |
92 |
93 | **EKS Cluster Creation**
94 |
95 |
96 | * Create an EKS cluster using the three subnet Ids created earlier.
97 | ```
98 | aws eks create-cluster \
99 | --region '' \
100 | --name '' \
101 | --role-arn 'arn:aws:iam::<123456789>:role/' \
102 | --resources-vpc-config subnetIds=',,'
103 | ```
104 |
105 |
106 | * Check until the cluster status becomes active.
107 | ```
108 | aws eks describe-cluster \
109 | --region '' \
110 | --name ''
111 | ```
112 | Note the values of resourcesVpcConfig.clusterSecurityGroupId and identity.oidc.issuer.
113 | ```
114 | {
115 | "cluster": {
116 | "name": "outposts-eks-cluster",
117 | ...
118 | "resourcesVpcConfig": {
119 | "clusterSecurityGroupId": "sg-123clustersg",
120 | },
121 | "identity": {
122 | "oidc": {
123 | "issuer": "https://oidc.eks.us-west-2.amazonaws.com/id/oidcid"
124 | }
125 | },
126 | "status": "ACTIVE",
127 | }
128 | }
129 | ```
130 |
131 | * Add the Outposts nodes to the EKS Cluster.
132 |
133 | At this point, eksctl cannot be used to launch self-managed node groups in Outposts. Please follow the steps listed in the self-managed nodes [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html#aws-management-console). In order to use the cloudformation script lised in the AWS Management Console tab, make note of the following values created in the earlier steps:
134 | * ClusterName: ``````
135 | * ClusterControlPlaneSecurityGroup: ``````
136 | * Subnets: ``````
137 |
138 | Apply the aws-auth-cm config map listed on the documentation page to allow the nodes to join the cluster.
139 |
140 | ### Register cluster with EMR Containers
141 | Once the EKS cluster has been created and the nodes have been registered with the EKS control plane, take the [following steps](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/getting-started.html):
142 |
143 | * Enable cluster access for Amazon EMR on EKS.
144 | * Enable IAM Roles for Service Accounts (IRSA) on the EKS cluster.
145 | * Create a job execution role.
146 | * Update the trust policy of the job execution role.
147 | * Grant users access to Amazon EMR on EKS.
148 | * Register the Amazon EKS cluster with Amazon EMR.
149 |
150 |
151 | ## Conclusion
152 | EMR-EKS on Outposts allows users to run their big data jobs in close proximity to on-premises data and applications.
153 |
--------------------------------------------------------------------------------
/content/outposts/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/outposts/index.md
--------------------------------------------------------------------------------
/content/outposts/resources/outposts_eks_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/outposts/resources/outposts_eks_network.png
--------------------------------------------------------------------------------
/content/performance/docs/dra.md:
--------------------------------------------------------------------------------
1 | # **Dynamic Resource Allocation**
2 |
3 | DRA is available in Spark 3 (EMR 6.x) without the need for an external shuffle service. Spark on Kubernetes doesn't support external shuffle service as of spark 3.1, but DRA can be achieved by enabling [shuffle tracking](https://spark.apache.org/docs/latest/configuration.html#dynamic-allocation).
4 |
5 | **Spark DRA with storage configuration:**
6 |
7 | When using [dynamic provisioning PVC/Volumes](../../storage/docs/spark/ebs.md#dynamic-provisioning) with Spark, you must disable PVC reuse to prevent multi-attach errors. The default configuration attempts to reuse PVCs, which causes EBS volumes to attach to multiple pods and leads to application failure. Set the following configurations:
8 | ```
9 | "spark.kubernetes.driver.ownPersistentVolumeClaim": "false"
10 | "spark.kubernetes.driver.reusePersistentVolumeClaim": "false"
11 | "spark.kubernetes.driver.waitToReusePersistentVolumeClaim": "false"
12 | ```
13 | For workloads requiring PVC reuse with DRA, use storage solutions supporting multi-attach like EFS / FSx for Lustre instead of EBS.
14 |
15 | **Spark DRA without external shuffle service:**
16 | With DRA, the spark driver spawns the initial number of executors and then scales up the number until the specified maximum number of executors is met to process the pending tasks. Idle executors are terminated when there are no pending tasks, the executor idle time exceeds the idle timeout(`spark.dynamicAllocation.executorIdleTimeout)`and it doesn't have any cached or shuffle data.
17 |
18 | If the executor idle threshold is reached and it has cached data, then it has to exceed the cache data idle timeout(`spark.dynamicAllocation.cachedExecutorIdleTimeout) ` and if the executor doesn't have shuffle data, then the idle executor is terminated.
19 |
20 | If the executor idle threshold is reached and it has shuffle data, then without external shuffle service the executor will never be terminated. These executors will be terminated when the job is completed. This behavior is enforced by `"spark.dynamicAllocation.shuffleTracking.enabled":"true" and "spark.dynamicAllocation.enabled":"true"`
21 |
22 | If `"spark.dynamicAllocation.shuffleTracking.enabled":"false"and "spark.dynamicAllocation.enabled":"true"` then the spark application will error out since external shuffle service is not available.
23 |
24 | **Request:**
25 |
26 | ```
27 | cat >spark-python-in-s3-dra.json << EOF
28 | {
29 | "name": "spark-python-in-s3-dra",
30 | "virtualClusterId": "",
31 | "executionRoleArn": "",
32 | "releaseLabel": "emr-6.2.0-latest",
33 | "jobDriver": {
34 | "sparkSubmitJobDriver": {
35 | "entryPoint": "s3:///trip-count.py",
36 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6"
37 | }
38 | },
39 | "configurationOverrides": {
40 | "applicationConfiguration": [
41 | {
42 | "classification": "spark-defaults",
43 | "properties": {
44 | "spark.dynamicAllocation.enabled":"true",
45 | "spark.dynamicAllocation.shuffleTracking.enabled":"true",
46 | "spark.dynamicAllocation.minExecutors":"5",
47 | "spark.dynamicAllocation.maxExecutors":"100",
48 | "spark.dynamicAllocation.initialExecutors":"10"
49 | }
50 | }
51 | ],
52 | "monitoringConfiguration": {
53 | "cloudWatchMonitoringConfiguration": {
54 | "logGroupName": "/emr-containers/jobs",
55 | "logStreamNamePrefix": "demo"
56 | },
57 | "s3MonitoringConfiguration": {
58 | "logUri": "s3://joblogs"
59 | }
60 | }
61 | }
62 | }
63 | EOF
64 | ```
65 |
66 | ```
67 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-dra.json
68 | ```
69 |
70 | **Observed Behavior:**
71 | When the job gets started, the driver pod gets created and 10 executors are initially created. (`"spark.dynamicAllocation.initialExecutors":"10"`) Then the number of executors can scale up to a maximum of 100 (`"spark.dynamicAllocation.maxExecutors":"100"`).
72 | **Configurations to note:**
73 |
74 | `spark.dynamicAllocation.shuffleTracking.enabled` - `**`Experimental`**`. Enables shuffle file tracking for executors, which allows dynamic allocation without the need for an external shuffle service. This option will try to keep alive executors that are storing shuffle data for active jobs.
75 |
76 | `spark.dynamicAllocation.shuffleTracking.timeout` - When shuffle tracking is enabled, controls the timeout for executors that are holding shuffle data. The default value means that Spark will rely on the shuffles being garbage collected to be able to release executors. If for some reason garbage collection is not cleaning up shuffles quickly enough, this option can be used to control when to time out executors even when they are storing shuffle data.
77 |
78 |
--------------------------------------------------------------------------------
/content/performance/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/index.md
--------------------------------------------------------------------------------
/content/performance/docs/resources/images/after-binpack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/after-binpack.png
--------------------------------------------------------------------------------
/content/performance/docs/resources/images/before-binpack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/before-binpack.png
--------------------------------------------------------------------------------
/content/performance/docs/resources/images/binpack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/binpack.gif
--------------------------------------------------------------------------------
/content/performance/docs/resources/images/nonbinpack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/nonbinpack.gif
--------------------------------------------------------------------------------
/content/scalability/docs/graphana-dashboard.md:
--------------------------------------------------------------------------------
1 | # Grafana Dashbaords
2 |
3 | * [Spark Operator Dashboard Template](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/spark-operator-dashbord.json)
4 |
5 | 
6 |
7 | * [EKS Control Plane & Etcd DB monitoring](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/eks-control-plane.json)
8 |
9 | 
10 |
11 | * [CNI usage dashbaord](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/aws-cni-metrics.json)
12 |
13 | 
14 |
15 | * [EMR on EKS Job dashbaord](https://github.com/awslabs/data-on-eks/tree/main/analytics/terraform/emr-eks-karpenter/emr-grafana-dashboard)
16 |
17 | 
18 |
--------------------------------------------------------------------------------
/content/scalability/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/index.md
--------------------------------------------------------------------------------
/content/scalability/docs/known-factors-spark-operator.md:
--------------------------------------------------------------------------------
1 |
2 | # Known factors that impact EMR on EKS Spark Operator submission rate
3 |
4 | |Category |Impact |
5 | |--- |--- |
6 | |Spark Operator Numbers |For the single spark operator, the max performance for submission rate would be around 30 jobs per min, and the performance tune on a single operator is very limited in the current version. Thus, to handle the large volume of workload, to horizontally scale up, with multiple Spark Operator would be the best solution. The operators will be not impacted from each other on eks cluster side, but higher number of operators will increase the overhead on apiserver/etcd side. |
7 | |Spark Operator's controllerThreads |controllerThreads is also named as "workers", which controls the number of concurrent threads used to process Spark application requests. Increasing this value can increase the performance of spark operator to handle the requests. |
8 | |Binpacking |Binpacking could efficiently allocate pods to available nodes within a Kubernetes cluster. Its primary goal is to optimize resource utilization by packing pods as tightly as possible onto nodes, while still meeting resource requirements and constraints. This approach aims to maximize cluster efficiency, reduce costs, and improve overall performance by minimizing the number of active nodes required to run the workload. With Binpacking enabled, the overall workload can minimise the resources used on network traffic between phsical nodes, as most of pods will be allocated in a single node at its launch time. However, we use Karpenter's consolidation feature to maximize pods tensity when node's utilization starts to drop. |
9 | |Spark Operator timeToLiveSeconds |TimeToLiveSeconds defines the Time-To-Live (TTL) duration in seconds for this SparkApplication after its termination. The SparkApplication object will be garbage collected if the current time is more than the TimeToLiveSeconds since its termination. |
10 | |Spark Job Run Time |Experimental observations indicate that Spark Operator performs better with longer-running jobs compared to shorter ones. This is likely due to the Operator's internal workqueue mechanism for managing job submissions and completions. The Spark Operator uses watchers to monitor SparkApplication status changes in the EKS cluster. Each status change triggers a task in the Operator's workqueue. Consequently, shorter jobs cause more frequent status changes, resulting in higher workqueue activity. This design suggests that a higher frequency of short-running jobs may impose greater processing overhead on the Spark Operator compared to fewer, longer-running jobs, even with equivalent total computation time. Understanding this behavior is important for optimizing Spark job scheduling in environments with varying job durations. |
11 | |Number of executors in EKS Spark Operator |The higher number of executors (10 vs 2) per single EMR on EKS job, number of objects created on EKS cluster grow such as pods, config maps, events etc. This becomes a limiting factor for querying etcd database eventually causing EKS cluster performance degradation. |
12 | |Spark Job Config - InitContainers |initContainer has a big impacted on the both Spark Operator and API server / etcd side. As with this setting enbaled, which will creat more events than jobs without this enabled. To utilize more Spark Operators for the job needs this set up, but for etcd size, it still be a bottleneck when the workload is large. |
13 | |EKS Spark Operator submission Rate |The EMR on EKS job submission rate will dictate the load placed on the API Server. A larger EMR EKS job submission rate can cause the more k8s objects in etcd db, increasing etcd db size, increased etcd request and api server request latency, and lower EMR job submission throughput. |
14 | |EMR Image pull policy |Default Image pull policy for job submitter, driver, and executor pods is set as Always. This adds latency in pod creation times. Unless specifically required for customer usecase, we can set Image pull policy to `IfNotPresent` resulting in lesser pod creation times. |
15 | |EKS K8s control plane scaling |EKS will autoscale the K8s control plane including API server and etcd db instances for customer's EKS cluster based on resource consumption. To be able to successfully run larger amounts of concurrent EMR on EKS jobs on EKS the API Server needs to be scaled up to handle the extra load. However if factors like webhook latency impact the metrics needed by the EKS API Server autoscaler are inhibited this can lead to not properly scaling up. This will impact the health of the API Server and etcd db and lead to a lower throughput on successfully completed jobs. |
16 | |EKS etcd db size |As you submit more concurrent running EMR on EKS jobs, the number of k8s objects stored in etcd db grow and in turn increase etcd db size as well. Increased etcd db size causes lantecy in some api server requests requiring cluster-wide/namespace-wide etcd read calls and will reduce EMR job submission throughput. Upper bound on etcd db size is 8GB as specified by EKS and reaching this capacity can make EKS cluster in read-only mode. Customers should monitor and keep their etcd db size within limits. We recommend keeping it below 7GB. In addition, as Spark Operator does not store the metadata of all the running jobs, so if there is any unhealthy or crash happened in etcd/API server, then could cause some job failed or running state lost with Spark Operator. |
17 | |EKS VPC Subnets IP pool |Available IP addresses in VPC subnets that are configured for EKS cluster also impact the EMR on EKS job throughput. Each pod needs to be assigned an IP address, thus it is essential to have large enough IP address pool available in VPC subnets of the EKS cluster to achieve higher pod concurrency. Exhaustion of IP adresses causes new pod creation requests to fail. |
18 | |EKS Cluster version |EKS has made improvements to cluster versions higher than 1.28 resulting in higher job throughput for EMR on EKS jobs. These recommendations are based on using EKS cluster version 1.30. |
19 | |Pod template size|Having high pod template sizes, for example from a high number of sidecar containers, init containers, or numerous environment variables, results in increased usage of the etcd database. This increased usage can potentially limit the number of pods that can be stored in etcd and may impact the cluster's overall performance, including the rate at which EMR jobs can be submitted.|
20 |
--------------------------------------------------------------------------------
/content/scalability/docs/known-factors-start-job-run.md:
--------------------------------------------------------------------------------
1 | # Known factors that impact EMR on EKS submission rate for StartJobRun
2 |
3 | |Category |Impact |
4 | |--- |--- |
5 | |Mutating Webhook Latency |Increased webhook latency leads to an increase in the K8s API Server latency (delay in pod creation for example, if webhook is setup for pod creation). Pod creation latency in turn is propogated to the K8s job controller whose workers are now experiencing delays in creating jobs and leads to growing Job Worker Queue depth. Larger queue depth leads to a lower thoughput in the number of concurrent EMR on EKS jobs |
6 | |EMR on EKS Job driver retries |Driver retries create an extra K8s Job object which essentially doubles the amount of K8s Job objects in etcd database. This leads to increased strain in etcd database and also database size to grow faster and hence leads to increase in etcd request latency. This in turn results in a lower throughput in the number of concurrent EMR on EKS jobs. |
7 | |EMR on EKS Job Start Timeout Setting |When the K8s job controller work queue depth is larger, that means there could be a delay in the actual Spark driver pod to get created. In the meantime EMR on EKS's control plane by default expects the EMR EKS job driver pod to be created within 15 mins. If the driver is not created within that timeout period, the EMR on EKS control plane will mark the job as failed preemptively. Higher timeout values will ensure the job gets longer time for getting the job scheduled and begin running |
8 | |EMR on EKS Job run time |A longer EMR on EKS job run time means that we will essentially have more concurrent active jobs in the EKS cluster. If we keep a consistent job submission rate for long running EMR EKS jobs as compared to a job with a shorter duration we will end up with a larger amount of active concurrent jobs in the EKS cluster. This can lead to more objects in etcd db, increasing etcd db size, increased etcd request latency, and lower EMR job submission throughput. |
9 | |Number of executors in EMR on EKS Job |As we define higher number of executors per single EMR on EKS job, number of objects created on EKS cluster grow such as pods, config maps, events etc. This becomes a limiting factor for querying etcd database eventually causing EKS cluster performance degradation. |
10 | |EMR on EK Job submission Rate |The EMR on EKS job submission rate will dictate the load placed on the API Server. A larger EMR EKS job submission rate can cause the more k8s objects in etcd db, increasing etcd db size, increased etcd request and api server request latency, and lower EMR job submission throughput. |
11 | |EMR Image pull policy |Default Image pull policy for job submitter, driver, and executor pods is set as Always. This adds latency in pod creation times. Unless specifically required for customer usecase, we can set Image pull policy to `IfNotPresent` resulting in lesser pod creation times. |
12 | |EMR Job type |Job concurrency values are different for batch and streaming job types. Streaming jobs usually consume less resources resulting in higher job concurrency values compared to batch jobs. |
13 | |EKS K8s control plane scaling |EKS will autoscale the K8s control plane including API server and etcd db instances for customer's EKS cluster based on resource consumption. To be able to successfully run larger amounts of concurrent EMR on EKS jobs on EKS the API Server needs to be scaled up to handle the extra load. However if factors like webhook latency impact the metrics needed by the EKS API Server autoscaler are inhibited this can lead to not properly scaling up. This will impact the health of the API Server and etcd db and lead to a lower throughput on successfully completed jobs. |
14 | |EKS etcd db size |As you submit more concurrent running EMR on EKS jobs, the number of k8s objects stored in etcd db grow and in turn increase etcd db size as well. Increased etcd db size causes lantecy in some api server requests requiring cluster-wide/namespace-wide etcd read calls and will reduce EMR job submission throughput. Upper bound on etcd db size is 8GB as specified by EKS and reaching this capacity can make EKS cluster in read-only mode. Customers should monitor and keep their etcd db size within limits. We recommend keeping it below 7GB. |
15 | |EKS VPC Subnets IP pool |Available IP addresses in VPC subnets that are configured for EKS cluster also impact the EMR on EKS job throughput. Each pod needs to be assigned an IP address, thus it is essential to have large enough IP address pool available in VPC subnets of the EKS cluster to achieve higher pod concurrency. Exhaustion of IP adresses causes new pod creation requests to fail. |
16 | |EKS Cluster version |EKS has made improvements to cluster versions higher than 1.28 resulting in higher job throughput for EMR on EKS jobs. These recommendations are based on using EKS cluster version 1.30. |
17 | |Pod template size|Having high pod template sizes, for example from a high number of sidecar containers, init containers, or numerous environment variables, results in increased usage of the etcd database. This increased usage can potentially limit the number of pods that can be stored in etcd and may impact the cluster's overall performance, including the rate at which EMR jobs can be submitted.|
18 |
--------------------------------------------------------------------------------
/content/scalability/docs/resources/images/EMR_Spark_Operator_Benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/EMR_Spark_Operator_Benchmark.png
--------------------------------------------------------------------------------
/content/scalability/docs/resources/images/aws-cni-metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/aws-cni-metrics.png
--------------------------------------------------------------------------------
/content/scalability/docs/resources/images/eks-control-plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/eks-control-plane.png
--------------------------------------------------------------------------------
/content/scalability/docs/resources/images/emr-on-eks-job-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/emr-on-eks-job-dashboard.png
--------------------------------------------------------------------------------
/content/scalability/docs/resources/images/spark-operator-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/spark-operator-dashboard.png
--------------------------------------------------------------------------------
/content/scalability/docs/scalaiblity-glossary.md:
--------------------------------------------------------------------------------
1 | # EMR on EKS Glossary & Terms
2 |
3 | * **EMR on EKS Job:** The Spark Job being submitted and executed by the EMR on EKS Control plane
4 | * **EMR on EKS Job types:** Type of Spark job being submitted. It can be either batch job (having fixed job duration) or streaming job (continuously running job).
5 | * **Kubernetes (K8s) control plane:** A K8s cluster consists of a control plane and one or more worker nodes. [Control plane](https://kubernetes.io/docs/concepts/overview/components/) is responsible managing overall state of the cluster and includes components such as API server, etcd database, scheduler, and controller manager.
6 | * **K8s API request:** The [K8s API](https://kubernetes.io/docs/reference/using-api/api-concepts/) is a resource-based (RESTful) programmatic interface provided via HTTP. It supports retrieving, creating, updating, and deleting resources in K8s cluster via the standard HTTP verbs (POST, PUT, PATCH, DELETE, GET).
7 | * **K8s pod:** [Pods](https://kubernetes.io/docs/concepts/workloads/pods/) are the smallest deployable units of computing that you can create and manage in Kubernetes.
8 | * **K8s event:** [Event](https://kubernetes.io/docs/reference/kubernetes-api/cluster-resources/event-v1/) is a report of an event somewhere in the K8s cluster. It generally denotes some state change in the system.
9 | * **K8s config map:** A [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) is an API object used to store non-confidential data in key-value pairs. [Pods](https://kubernetes.io/docs/concepts/workloads/pods/) can consume ConfigMaps as environment variables, command-line arguments, or as configuration files in a [volume](https://kubernetes.io/docs/concepts/storage/volumes/).
10 | * **K8s API Server:** [API server](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/) **** is an internal K8s component responsible to serve and process K8s API requests. EKS hosts this K8s control plane component on EKS owned infrastructure that is different from customer’s EKS cluster.
11 | * **K8s Etcd database:** [Etcd](https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/) is K8s internal database that stores information about K8s objects such as pods, events, config maps etc. EKS hosts this K8s control plane component on EKS owned infrastructure.
12 | * **K8s Job**: A [K8s Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/) object creates and monitors a pod until they complete successfully. It has a retry policy that helps ensuring completion. This is different from EMR on EKS job concept. An EMR on EKS job usually submits one or more K8s jobs in K8s cluster.
13 | * **K8s Job Controller**: The [K8s native controller](https://kubernetes.io/docs/concepts/architecture/controller/) is a component that interacts with the Kubernetes API server to create pods, update job status according to pod status, create events. Job controller monitors and updates K8s job objects.
14 | * **K8s Job Controller Work Queue (Depth)**: The backlog of K8s job object events accumulated, that need to be processed by job controller.
15 | * **EMR Spark Operator:** A job submission model for Amazon EMR on EKS, which users can deploy and manage Spark applications with the Amazon EMR release runtime on the Amazon EKS clusters
16 | * **Job types: **** **Type of Spark job being submitted. It can be either batch job (having fixed job duration) or streaming job (continuously running job).
17 | * **Spark Operator Workqueue:** The central component in the Spark Operator's control loop, managing the flow of SparkApplication resources that need to be processed, ensuring efficient, ordered, and reliable handling of these resources.
--------------------------------------------------------------------------------
/content/security/docs/index.md:
--------------------------------------------------------------------------------
1 | ****
--------------------------------------------------------------------------------
/content/security/docs/resources/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG EMR_VERSION=emr-6.15.0
2 | FROM public.ecr.aws/emr-on-eks/spark/${EMR_VERSION}
3 | USER root
4 |
5 | RUN mkdir -p /usr/lib/poc
6 | COPY custom-entrypoint.sh /usr/lib/poc/entrypoint.sh
7 | RUN chown -R hadoop:hadoop /usr/lib/poc
8 | RUN chmod -R a+x /usr/lib/poc
9 |
10 | USER hadoop:hadoop
11 | ENTRYPOINT ["/usr/lib/poc/entrypoint.sh"]
--------------------------------------------------------------------------------
/content/security/docs/resources/S3ListObjects_v1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/S3ListObjects_v1.jar
--------------------------------------------------------------------------------
/content/security/docs/resources/client-role-2-policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Action": [
7 | "s3:PutObject",
8 | "s3:GetObject"
9 | ],
10 | "Resource": [
11 | "arn:aws:s3:::datalake-${ACCOUNTB}-${REGION}/*"
12 | ]
13 | },
14 | {
15 | "Effect": "Allow",
16 | "Action": [
17 | "s3:ListBucket"
18 | ],
19 | "Resource": [
20 | "arn:aws:s3:::datalake-${ACCOUNTB}-${REGION}"
21 | ]
22 | },
23 | {
24 | "Effect": "Allow",
25 | "Action": "sqs:*",
26 | "Resource": "*"
27 | }
28 | ]
29 | }
--------------------------------------------------------------------------------
/content/security/docs/resources/client-role-2-trust-policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Principal": {
7 | "AWS": "arn:aws:iam::${ACCOUNTA}:role/job-execution-role-1"
8 | },
9 | "Action": "sts:AssumeRole"
10 | }
11 | ]
12 | }
--------------------------------------------------------------------------------
/content/security/docs/resources/custom-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -ex
4 |
5 | #Set up the chained roled for aws boto3 sdk
6 | generate_aws_config() {
7 | # Create a .aws directory in the user's home directory
8 | mkdir -p $HOME/.aws
9 |
10 | # Generate the config file from environment variables
11 | cat > $HOME/.aws/config << EOF
12 | [default]
13 | region=${REGION}
14 | role_arn=${ROLE_2_ARN}
15 | role_session_name=client_role
16 | source_profile=irsa-role
17 |
18 | [profile irsa-role]
19 | region=${REGION}
20 | web_identity_token_file=/var/run/secrets/eks.amazonaws.com/serviceaccount/token
21 | role_arn=${ROLE_1_ARN}
22 | EOF
23 |
24 | # Set proper permissions
25 | chmod 600 $HOME/.aws/config
26 | }
27 |
28 | # Function to generate credentials
29 | generate_aws_credentials() {
30 | echo "Generating AWS credentials at $(date)"
31 |
32 | # Get credentials using web identity token
33 | credentials=$(aws sts assume-role-with-web-identity \
34 | --role-arn ${ROLE_1_ARN} \
35 | --role-session-name webidentity-session \
36 | --web-identity-token "$(cat /var/run/secrets/eks.amazonaws.com/serviceaccount/token)" \
37 | --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken,Expiration]' \
38 | --output text)
39 |
40 | # Create .aws directory
41 | mkdir -p $HOME/.aws
42 |
43 | # Generate the credentials file
44 | cat > $HOME/.aws/credentials << EOF
45 | [default]
46 | source_profile=irsa-role
47 | role_arn=${ROLE_2_ARN}
48 | role_session_name=client_role
49 |
50 | [irsa-role]
51 | aws_access_key_id=$(echo $credentials | awk '{print $1}')
52 | aws_secret_access_key=$(echo $credentials | awk '{print $2}')
53 | aws_session_token=$(echo $credentials | awk '{print $3}')
54 | EOF
55 |
56 | chmod 600 $HOME/.aws/credentials
57 |
58 | # Extract expiration time for next refresh
59 | expiration=$(echo $credentials | awk '{print $4}')
60 | echo "Credentials will expire at: $expiration"
61 | }
62 |
63 | # Function to start credential refresh daemon
64 | start_credential_refresh_daemon() {
65 | while true; do
66 | generate_aws_credentials
67 |
68 | # Sleep for 80% of the default 1-hour credential duration (refresh the token every 48 minutes)
69 | sleep 2880
70 | # # test 10mins for testing
71 | # sleep 600
72 |
73 | # Check if the token file still exists and is readable
74 | if [ ! -r "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" ]; then
75 | echo "Token file not accessible. Stopping refresh daemon."
76 | exit 1
77 | fi
78 | done
79 | }
80 |
81 | generate_aws_config
82 | # NOTE: the IRSA env variable "AWS_ROLE_ARN" must be reset
83 | # To trigger the access deny 403 while evaluating WebIdentity credential
84 | # As a result of the RESET, it forces SDK applications to use the next Profile provider () in the AWS DefaultCredentialChain
85 | export AWS_ROLE_ARN=$ROLE_2_ARN
86 | export AWS_WEB_IDENTITY_TOKEN_FILE=/var/run/secrets/eks.amazonaws.com/serviceaccount/token
87 |
88 | # Start the refresh daemon in the background
89 | start_credential_refresh_daemon &
90 | DAEMON_PID=$!
91 | echo $DAEMON_PID > /tmp/credential-refresh.pid
92 | # Set up trap to clean up the daemon on script exit
93 | trap "kill $DAEMON_PID 2>/dev/null" EXIT
94 |
95 | /usr/bin/entrypoint.sh "$@"
--------------------------------------------------------------------------------
/content/security/docs/resources/driver-pod-template.yaml:
--------------------------------------------------------------------------------
1 | # // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | # // SPDX-License-Identifier: MIT-0
3 | apiVersion: v1
4 | kind: Pod
5 | spec:
6 | containers:
7 | - name: spark-kubernetes-driver
8 | env:
9 | - name: AWS_ROLE_ARN
10 | value: "arn:aws:iam::ACCOUNTB:role/emr-on-eks-client-a-role"
11 | - name: AWS_WEB_IDENTITY_TOKEN_FILE
12 | value: "/var/run/secrets/eks.amazonaws.com/serviceaccount/token"
--------------------------------------------------------------------------------
/content/security/docs/resources/executor-pod-template.yaml:
--------------------------------------------------------------------------------
1 | # // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | # // SPDX-License-Identifier: MIT-0
3 | apiVersion: v1
4 | kind: Pod
5 | spec:
6 | containers:
7 | - name: spark-kubernetes-executor
8 | env:
9 | - name: AWS_ROLE_ARN
10 | value: "arn:aws:iam::ACCOUNTB:role/emr-on-eks-client-a-role"
11 | - name: AWS_WEB_IDENTITY_TOKEN_FILE
12 | value: "/var/run/secrets/eks.amazonaws.com/serviceaccount/token"
--------------------------------------------------------------------------------
/content/security/docs/resources/images/emr-on-eks-fargate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-fargate.png
--------------------------------------------------------------------------------
/content/security/docs/resources/images/emr-on-eks-network-communication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-network-communication.png
--------------------------------------------------------------------------------
/content/security/docs/resources/images/emr-on-eks-self-and-managed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-self-and-managed.png
--------------------------------------------------------------------------------
/content/security/docs/resources/images/role-chain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/role-chain.png
--------------------------------------------------------------------------------
/content/security/docs/resources/images/shared-responsibility-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/shared-responsibility-model.png
--------------------------------------------------------------------------------
/content/security/docs/resources/job-exec-role-1-policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Action": "sts:AssumeRole",
7 | "Resource": "arn:aws:iam::${ACCOUNTB}:role/client-role-2"
8 | },
9 | {
10 | "Effect": "Allow",
11 | "Action": [
12 | "s3:PutObject",
13 | "s3:DeleteObject",
14 | "s3:GetObject"
15 | ],
16 | "Resource": [
17 | "arn:aws:s3:::emr-on-eks-test-${ACCOUNTA}-$REGION}/*"
18 | ]
19 | },
20 | {
21 | "Effect": "Allow",
22 | "Action": "s3:ListBucket",
23 | "Resource": [
24 | "arn:aws:s3:::emr-on-eks-test-${ACCOUNTA}-${REGION}"
25 | ]
26 | },
27 | {
28 | "Effect": "Allow",
29 | "Action": [
30 | "logs:PutLogEvents",
31 | "logs:CreateLogStream",
32 | "logs:DescribeLogGroups",
33 | "logs:DescribeLogStreams",
34 | "logs:CreateLogGroup"
35 | ],
36 | "Resource": [
37 | "arn:aws:logs:*:*:*"
38 | ]
39 | }
40 | ]
41 | }
--------------------------------------------------------------------------------
/content/security/docs/resources/job-exec-role-1-trust-policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Principal": {
7 | "Service": "eks.amazonaws.com"
8 | },
9 | "Action": "sts:AssumeRole"
10 | },
11 | {
12 | "Effect": "Allow",
13 | "Principal": {
14 | "Service": "pods.eks.amazonaws.com"
15 | },
16 | "Action": [
17 | "sts:AssumeRole",
18 | "sts:TagSession"
19 | ]
20 | },
21 | {
22 | "Effect": "Allow",
23 | "Principal": {
24 | "Federated": "arn:aws:iam::${ACCOUNTA}:oidc-provider/oidc.eks.us-east-1.amazonaws.com/id/YOUR_OIDC_ID"
25 | },
26 | "Action": "sts:AssumeRoleWithWebIdentity",
27 | "Condition": {
28 | "StringLike": {
29 | "oidc.eks.us-east-1.amazonaws.com/id/YOUR_OIDC_ID:sub": "system:serviceaccount:emr:emr-containers*"
30 | }
31 | }
32 | }
33 | ]
34 | }
--------------------------------------------------------------------------------
/content/security/docs/resources/mix-spark-boto3.py:
--------------------------------------------------------------------------------
1 | from pyspark.sql import SparkSession
2 | from pyspark.sql.functions import to_json, struct
3 | import boto3,sys
4 | from botocore.exceptions import ClientError
5 |
6 | def main():
7 | print("=== Starting Spark Session ===")
8 | S3_FILE=sys.argv[1]
9 | SQS_URL=sys.argv[2]
10 |
11 | spark = SparkSession.builder.appName("irsa-poc").getOrCreate()
12 |
13 | # 1. Read data from S3
14 | try:
15 | df = spark.read.parquet(S3_FILE)
16 | except Exception as e:
17 | print(f"Error reading S3 data: {e}")
18 | spark.stop()
19 | return
20 |
21 | # 2. Convert each row to JSON string
22 | print("Converting rows to JSON...")
23 | json_df = df.select(to_json(struct("*")).alias("value"))
24 |
25 | print("=== Sample JSON Output ===")
26 | json_df.show(5, truncate=False)
27 |
28 | # 3. Send to SQS
29 | def send_partition(partition):
30 | print(f"\nInitializing SQS client for partition...")
31 | try:
32 | sqs = boto3.client('sqs', region_name='us-east-1')
33 |
34 | results = []
35 | results.append(f"Caller Identity: {boto3.client('sts').get_caller_identity()}")
36 | for i, row in enumerate(partition, 1):
37 | try:
38 | response=sqs.send_message(
39 | QueueUrl=SQS_URL,
40 | MessageBody=row.value
41 | )
42 | results.append(f"Sent message {i} - MessageId: {response['MessageId']}")
43 |
44 | except ClientError as e:
45 | results.append(f"Failed: {e} | Message: {row.value}")
46 | return results
47 | except Exception as e:
48 | return [f"Partition failed: {str(e)}"]
49 |
50 | print("\n=== Starting boto3 connection ===")
51 | results = json_df.rdd.mapPartitions(send_partition).collect()
52 | for msg in results:
53 | print(msg)
54 | print("\n=== Job Completed ===")
55 |
56 |
57 | if __name__ == "__main__":
58 | print("Script started")
59 | main()
60 | print("Script finished")
--------------------------------------------------------------------------------
/content/security/docs/resources/only-boto3.py:
--------------------------------------------------------------------------------
1 | import boto3,json,sys
2 |
3 | def list_s3_bucket_contents(bucket_name, prefix):
4 | s3 = boto3.client('s3', region_name='us-west-2')
5 | objects = []
6 | try:
7 | response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
8 | if 'Contents' in response:
9 | print(f"Files in bucket '{bucket_name}':")
10 | for obj in response['Contents']:
11 | print(f"- {obj['Key']} (Size: {obj['Size']} bytes)")
12 | objects.append(obj['Key'])
13 | else:
14 | print(f"No objects found with prefix '{prefix}'")
15 | except Exception as e:
16 | print(f"Error accessing S3 bucket: {e}")
17 | return objects
18 |
19 | def send_s3_references_to_sqs(bucket_name, object_keys, sqs_queue_url):
20 | sqs = boto3.client('sqs', region_name='us-east-1')
21 | for key in object_keys:
22 | try:
23 | print(f"Sending S3 reference: {key}")
24 | message_body = json.dumps({
25 | 's3_bucket': bucket_name,
26 | 's3_key': key,
27 | 'message_type': 's3_reference'
28 | })
29 | response = sqs.send_message(
30 | QueueUrl=sqs_queue_url,
31 | MessageBody=message_body,
32 | MessageAttributes={
33 | 'Source': {'StringValue': 's3-reference-sender', 'DataType': 'String'},
34 | 'FileType': {'StringValue': 'parquet', 'DataType': 'String'}
35 | }
36 | )
37 | print(f"Message sent with ID: {response['MessageId']}")
38 | except Exception as e:
39 | print(f"ERROR processing {key}: {str(e)[:200]}...")
40 | continue
41 |
42 | if __name__ == "__main__":
43 | if len(sys.argv) != 4:
44 | print("Usage: python script.py ")
45 | sys.exit(1)
46 |
47 | BUCKET_NAME = sys.argv[1]
48 | PREFIX_FILE_PATH = sys.argv[2]
49 | SQS_QUEUE_URL = sys.argv[3]
50 |
51 | s3_objects = list_s3_bucket_contents(BUCKET_NAME, PREFIX_FILE_PATH)
52 | print(f"Found {len(s3_objects)} objects to process")
53 | if s3_objects:
54 | send_s3_references_to_sqs(BUCKET_NAME, s3_objects, SQS_QUEUE_URL)
55 | else:
56 | print("No objects to process")
--------------------------------------------------------------------------------
/content/security/docs/spark/data-encryption.md:
--------------------------------------------------------------------------------
1 | # **EMR Containers Spark - In transit and At Rest data encryption**
2 |
3 | ### **Encryption at Rest**
4 | ####Amazon S3 Client-Side Encryption
5 |
6 | To utilize [S3 Client side encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingClientSideEncryption.html), you will need to create a KMS Key to be used to encrypt and decrypt data. If you do not have an KMS key, please follow this guide - [AWS KMS create keys](https://docs.aws.amazon.com/kms/latest/developerguide/create-keys.html). Also please note the job execution role needs access to this key, please see [Add to Key policy](https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html#key-policy-default-allow-users) for instructions on how to add these permissions.
7 |
8 | **trip-count-encrypt-write.py:**
9 |
10 | ```
11 | cat> trip-count-encrypt-write.py</trip-data.parquet')
25 | print("Total trips: " + str(df.count()))
26 |
27 | df.write.parquet('s3:///write-encrypt-trip-data.parquet')
28 | print("Encrypt - KMS- CSE writew to s3 compeleted")
29 | spark.stop()
30 | EOF
31 |
32 | ```
33 |
34 | **Request:**
35 |
36 | ```
37 | cat > spark-python-in-s3-encrypt-cse-kms-write.json <",
41 | "executionRoleArn": "",
42 | "releaseLabel": "emr-6.2.0-latest",
43 | "jobDriver": {
44 | "sparkSubmitJobDriver": {
45 | "entryPoint": "s3://trip-count-encrypt-write.py",
46 | "sparkSubmitParameters": "--conf spark.executor.instances=10 --conf spark.driver.cores=2 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=2"
47 | }
48 | },
49 | "configurationOverrides": {
50 | "applicationConfiguration": [
51 | {
52 | "classification": "spark-defaults",
53 | "properties": {
54 | "spark.dynamicAllocation.enabled":"false"
55 | }
56 | },
57 | {
58 | "classification": "emrfs-site",
59 | "properties": {
60 | "fs.s3.cse.enabled":"true",
61 | "fs.s3.cse.encryptionMaterialsProvider":"com.amazon.ws.emr.hadoop.fs.cse.KMSEncryptionMaterialsProvider",
62 | "fs.s3.cse.kms.keyId":""
63 | }
64 | }
65 | ],
66 | "monitoringConfiguration": {
67 | "persistentAppUI": "ENABLED",
68 | "cloudWatchMonitoringConfiguration": {
69 | "logGroupName": "/emr-containers/jobs",
70 | "logStreamNamePrefix": "demo"
71 | },
72 | "s3MonitoringConfiguration": {
73 | "logUri": "s3://joblogs"
74 | }
75 | }
76 | }
77 | }
78 | EOF
79 |
80 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-encrypt-cse-kms-write.json
81 |
82 |
83 | ```
84 |
85 | In the above request, EMRFS encrypts the parquet file with the specified KMS key and the encrypted object is persisted to the specified s3 location.
86 |
87 | To verify the encryption - use the same KMS key to decrypt - the KMS key used is a symmetric key ( the same key can be used to both encrypt and decrypt)
88 |
89 | **trip-count-encrypt-read.py**
90 |
91 | ```
92 | cat > trip-count-encrypt-read.py</trip-data.parquet')
106 | print("Total trips: " + str(df.count()))
107 |
108 | df_encrypt = spark.read.parquet('s3:///write-encrypt-trip-data.parquet')
109 | print("Encrypt data - Total trips: " + str(df_encrypt.count()))
110 | spark.stop()
111 | EOF
112 | ```
113 |
114 | **Request**
115 |
116 | ```
117 | cat > spark-python-in-s3-encrypt-cse-kms-read.json<",
121 | "executionRoleArn": "",
122 | "releaseLabel": "emr-6.2.0-latest",
123 | "jobDriver": {
124 | "sparkSubmitJobDriver": {
125 | "entryPoint": "s3://trip-count-encrypt-write.py",
126 | "sparkSubmitParameters": "--conf spark.executor.instances=10 --conf spark.driver.cores=2 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=2"
127 | }
128 | },
129 | "configurationOverrides": {
130 | "applicationConfiguration": [
131 | {
132 | "classification": "spark-defaults",
133 | "properties": {
134 | "spark.dynamicAllocation.enabled":"false"
135 | }
136 | },
137 | {
138 | "classification": "emrfs-site",
139 | "properties": {
140 | "fs.s3.cse.enabled":"true",
141 | "fs.s3.cse.encryptionMaterialsProvider":"com.amazon.ws.emr.hadoop.fs.cse.KMSEncryptionMaterialsProvider",
142 | "fs.s3.cse.kms.keyId":""
143 | }
144 | }
145 | ],
146 | "monitoringConfiguration": {
147 | "persistentAppUI": "ENABLED",
148 | "cloudWatchMonitoringConfiguration": {
149 | "logGroupName": "/emr-containers/jobs",
150 | "logStreamNamePrefix": "demo"
151 | },
152 | "s3MonitoringConfiguration": {
153 | "logUri": "s3://joblogs"
154 | }
155 | }
156 | }
157 | }
158 | EOF
159 |
160 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-encrypt-cse-kms-read.json
161 |
162 |
163 |
164 |
165 |
166 | ```
167 |
168 | **Validate encryption:** Try to read the encrypted data without specifying `"fs.s3.cse.enabled":"true"` - will get an error message in the driver and executor logs because the content is encrypted and cannot be read without decryption.
169 |
--------------------------------------------------------------------------------
/content/security/docs/spark/network-security.md:
--------------------------------------------------------------------------------
1 | # ** Managing VPC for EMR on EKS**
2 |
3 | This section address network security at VPC level. If you want to read more on network security for Spark in EMR on EKS please refer to this [section](https://aws.github.io/aws-emr-containers-best-practices/security/docs/spark/encryption/#amazon-emr-on-eks).
4 |
5 | ## **Security Group**
6 |
7 | The applications running on your EMR on EKS cluster often would need access to services that are running outside the cluster,
8 | for example, these can Amazon Redshift, Amazon Relational Database Service, a service self hosted on an EC2 instance. To access these resource you need to allow network traffic at the security group level. The default mechanism in EKS is using security groups at the node level,
9 | this means all the pods running on the node will inherit the rules on the security group.
10 | For security conscious customers, this is not a desired behavior and you would want to use security groups at the pod level.
11 |
12 | This section address how you can use Security Groups with EMR on EKS.
13 |
14 | ### Configure EKS Cluster to use Security Groups for Pods
15 |
16 | In order to use Security Groups at the pod level, you need to configure the VPC CNI for EKS. The following [link](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) guide through the prerequisites as well as configuring the EKS Cluster.
17 |
18 | #### Define SecurityGroupPolicy
19 |
20 | Once you have configured the VPC CNI, you need to create a SecurityGroupPolicy object.
21 | This object define which **security group** (up to 5) to use, **podselector** to define which pod to apply the security group to and
22 | the **namespace** in which the Security Group should be evaluated. Below you find an example of `SecurityGroupPolicy`.
23 |
24 | ```
25 | apiVersion: vpcresources.k8s.aws/v1beta1
26 | kind: SecurityGroupPolicy
27 | metadata:
28 | name: <>
29 | namespace:
30 | spec:
31 | podSelector:
32 | matchLabels:
33 | role: spark
34 | securityGroups:
35 | groupIds:
36 | - sg-xxxxx
37 | ```
38 |
39 | ### Define pod template to use Security Group for pod
40 |
41 | In order for the security group to be applied to the Spark driver and executors, you need to provide a podtemplate which add label(s) to the pods.
42 | The labels should match the one defined above in the `podSelector` in our example it is `role: spark`.
43 | The snippet below define the pod template that you can upload in S3 and then reference when launching your job.
44 |
45 | ```
46 | apiVersion: v1
47 | kind: Pod
48 | metadata:
49 | labels:
50 | role: spark
51 | ```
52 |
53 | ### Launch a job
54 |
55 | The command below can be used to run a job.
56 |
57 | ```
58 | aws emr-containers start-job-run --virtual-cluster-id --name spark-jdbc --execution-role-arn --release-label emr-6.7.0-latest --job-driver '{
59 | "sparkSubmitJobDriver": {
60 | "entryPoint": "",
61 | "sparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1"
62 | }
63 | }' --configuration-overrides '{
64 | "applicationConfiguration": [
65 | {
66 | "classification": "spark-defaults",
67 | "properties": {
68 | "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
69 | "spark.sql.catalogImplementation": "hive",
70 | "spark.dynamicAllocation.enabled":"true",
71 | "spark.dynamicAllocation.minExecutors": "8",
72 | "spark.dynamicAllocation.maxExecutors": "40",
73 | "spark.kubernetes.allocation.batch.size": "8",
74 | "spark.dynamicAllocation.executorAllocationRatio": "1",
75 | "spark.dynamicAllocation.shuffleTracking.enabled": "true",
76 | "spark.dynamicAllocation.shuffleTracking.timeout": "300s",
77 | "spark.kubernetes.driver.podTemplateFile":,
78 | "spark.kubernetes.executor.podTemplateFile":
79 | }
80 | }
81 | ],
82 | "monitoringConfiguration": {
83 | "persistentAppUI": "ENABLED",
84 | "cloudWatchMonitoringConfiguration": {
85 | "logGroupName": "/aws/emr-containers/",
86 | "logStreamNamePrefix": "default"
87 | }
88 | }
89 | }'
90 | ```
91 |
92 | #### Verify a security group attached to the Pod ENI
93 |
94 | To verify that spark driver and executor driver have the security group attached to, apply the first command to get the podname then the second one to see the annotation in pod with the ENI associated to the pod which has the secuity group defined in the **SecurityGroupPolicy**.
95 |
96 | ```
97 | export POD_NAME=$(kubectl -n get pods -l role=spark -o jsonpath='{.items[].metadata.name}')
98 |
99 | kubectl -n describe pod $POD_NAME | head -11
100 | ```
101 |
102 | ```
103 | Annotations: kubernetes.io/psp: eks.privileged
104 | vpc.amazonaws.com/pod-eni:
105 | [{"eniId":"eni-xxxxxxx","ifAddress":"xx:xx:xx:xx:xx:xx","privateIp":"x.x.x.x","vlanId":1,"subnetCidr":"x.x.x.x/x"}]
106 | ```
--------------------------------------------------------------------------------
/content/security/docs/spark/secrets.md:
--------------------------------------------------------------------------------
1 | # ** Using Secrets in EMR on EKS**
2 |
3 | Secrets can be credentials to APIs, Databases or other resources. There are various ways these secrets can be passed to your containers, some of them are pod environment variable or Kubernetes Secrets. These methods are not secure, as for environment variable, secrets are stored in clear text and any authorized user who has access to Kubernetes cluster with admin privileges can read those secrets. Storing secrets using Kubernetes secrets is also not secure because they are not encrypted and only base36 encoded.
4 |
5 |
6 | There is a secure method to expose these secrets in EKS through the [Secrets Store CSI Driver](https://github.com/aws/secrets-store-csi-driver-provider-aws).
7 |
8 | The Secrets Store CSI Driver integrate with a secret store like [AWS Secrets manager](https://aws.amazon.com/secrets-manager/) and mount the secrets as volume that can be accessed through your application code. This document describes how to set and use AWS Secrets Manager with EMR on EKS through the Secrets Store CSI Driver.
9 |
10 | ### Deploy Secrets Store CSI Drivers and AWS Secrets and Configuration Provider
11 |
12 |
13 | #### Secrets Store CSI Drivers
14 |
15 | Configure EKS Cluster with `Secrets Store CSI Driver`.
16 |
17 | To learn more about AWS Secrets Manager CSI Driver you can refer to this [link](https://docs.aws.amazon.com/secretsmanager/latest/userguide/integrating_csi_driver.html)
18 |
19 | ```
20 | helm repo add secrets-store-csi-driver \
21 | https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts
22 |
23 | helm install -n kube-system csi-secrets-store \
24 | --set syncSecret.enabled=true \
25 | --set enableSecretRotation=true \
26 | secrets-store-csi-driver/secrets-store-csi-driver
27 |
28 | ```
29 |
30 | Deploy the `AWS Secrets and Configuration Provider` to use AWS Secrets Manager
31 |
32 | #### AWS Secrets and Configuration Provider
33 |
34 | ```
35 | kubectl apply -f https://raw.githubusercontent.com/aws/secrets-store-csi-driver-provider-aws/main/deployment/aws-provider-installer.yaml
36 | ```
37 |
38 | ### Define the `SecretProviderClass`
39 |
40 | The `SecretProviderClass` is how you present your secret in Kubernetes, below you find a definition of a `SecretProviderClass`.
41 | There are few parameters that are important:
42 |
43 | - The `provider` must be set to `aws`.
44 | - The `objectName` must be the name of the secret you want to use as defined in AWS.
45 | Here the secret is called `db-creds`.
46 | - The `objectType` must be set to `secretsmanager`.
47 |
48 | ```
49 | cat > db-cred.yaml << EOF
50 |
51 | apiVersion: secrets-store.csi.x-k8s.io/v1
52 | kind: SecretProviderClass
53 | metadata:
54 | name: mysql-spark-secret
55 | spec:
56 | provider: aws
57 | parameters:
58 | objects: |
59 | - objectName: "db-creds"
60 | objectType: "secretsmanager"
61 | EOF
62 | ```
63 |
64 | ```
65 | kubectl apply -f db-cred.yaml -n
66 | ```
67 | In the terminal apply the above command to create `SecretProviderClass`,
68 | The `kubectl` command must include the namespace where your job will be executed.
69 |
70 | ### Pod Template
71 |
72 | In the executor podtemplate you should define it as follows to mount the secret. The example below show how you can define it.
73 | There are few points that are important to mount the secret:
74 |
75 | - `secretProviderClass`: this should have the same name as the one define above. In this case it is `mysql-spark-secret`.
76 | - `mountPath`: Is where the secret is going to be available to the pod. In this example it will be in `/var/secrets`
77 | When defining the `mountPath` make sure you do not specify the ones reserved by EMR on EKS as defined [here](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/pod-templates.html).
78 |
79 | ```
80 | apiVersion: v1
81 | kind: Pod
82 |
83 | spec:
84 | containers:
85 | - name: spark-kubernetes-executors
86 | volumeMounts:
87 | - mountPath: "/var/secrets"
88 | name: mysql-cred
89 | readOnly: true
90 | volumes:
91 | - name: mysql-cred
92 | csi:
93 | driver: secrets-store.csi.k8s.io
94 | readOnly: true
95 | volumeAttributes:
96 | secretProviderClass: mysql-spark-secret
97 | ```
98 |
99 | This podtemplate must be uploaded to S3 and referenced in the job submit command as shown below.
100 |
101 | **Note** You must make sure that the RDS instance or your Database allow traffic from the instances where your driver and executors pods are running.
102 |
103 | ### PySpark code
104 |
105 | The example below shows pyspark code for connecting with a MySQL DB. The example assume the secret is stored in AWS secrets manager as defined above. The `username` is the `key` to retrieve the database `user` as stored in AWS Secrets Manager, and `password` is the `key` to retrieve the database password.
106 |
107 |
108 | It shows how you can retrieve the credentials from the mount point `/var/secrets/`.
109 | The secret is stored in a file with the same name as it is defined in AWS in this case it is `db-creds`.
110 | This has been set in the podTemplate above.
111 |
112 | ```
113 | from pyspark.sql import SparkSession
114 | import json
115 |
116 | secret_path = "/var/secrets/db-creds"
117 |
118 | f = open(secret_path, "r")
119 | mySecretDict = json.loads(f.read())
120 |
121 | spark = SparkSession.builder.getOrCreate()
122 |
123 | str_jdbc_url="jdbc:"
124 | str_Query=
125 | str_username=mySecretDict['username']
126 | str_password=mySecretDict['password']
127 | driver = "com.mysql.jdbc.Driver"
128 |
129 | jdbcDF = spark.read \
130 | .format("jdbc") \
131 | .option("url", str_jdbc_url) \
132 | .option("driver", driver)\
133 | .option("query", str_Query) \
134 | .option("user", str_username) \
135 | .option("password", str_password) \
136 | .load()
137 |
138 | jdbcDF.show()
139 | ```
140 |
141 | ### Execute the job
142 |
143 | The command below can be used to run a job.
144 |
145 | **Note**: The supplied execution role **MUST** have access an IAM policy that allow it to access to the secret defined in `SecretProviderClass` above.
146 | The IAM policy below shows the IAM actions that are needed.
147 |
148 | ```
149 | {
150 | "Version": "2012-10-17",
151 | "Statement": [ {
152 | "Effect": "Allow",
153 | "Action": ["secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret"],
154 | "Resource": []
155 | }]
156 | }
157 | ```
158 |
159 | ```
160 | aws emr-containers start-job-run --virtual-cluster-id --name spark-jdbc --execution-role-arn --release-label emr-6.7.0-latest --job-driver '{
161 | "sparkSubmitJobDriver": {
162 | "entryPoint": "",
163 | "sparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1 --conf spark.jars="
164 | }
165 | }' --configuration-overrides '{
166 | "applicationConfiguration": [
167 | {
168 | "classification": "spark-defaults",
169 | "properties": {
170 | "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
171 | "spark.sql.catalogImplementation": "hive",
172 | "spark.dynamicAllocation.enabled":"true",
173 | "spark.dynamicAllocation.minExecutors": "8",
174 | "spark.dynamicAllocation.maxExecutors": "40",
175 | "spark.kubernetes.allocation.batch.size": "8",
176 | "spark.dynamicAllocation.executorAllocationRatio": "1",
177 | "spark.dynamicAllocation.shuffleTracking.enabled": "true",
178 | "spark.dynamicAllocation.shuffleTracking.timeout": "300s",
179 | "spark.kubernetes.driver.podTemplateFile":,
180 | "spark.kubernetes.executor.podTemplateFile":
181 | }
182 | }
183 | ],
184 | "monitoringConfiguration": {
185 | "persistentAppUI": "ENABLED",
186 | "cloudWatchMonitoringConfiguration": {
187 | "logGroupName": "/aws/emr-containers/",
188 | "logStreamNamePrefix": "default"
189 | }
190 | }
191 | }'
192 | ```
193 |
--------------------------------------------------------------------------------
/content/storage/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/storage/docs/index.md
--------------------------------------------------------------------------------
/content/storage/docs/spark/instance-store.md:
--------------------------------------------------------------------------------
1 | # **Instance Store Volumes**
2 |
3 | When working with Spark workloads, it might be useful to use instances powered by [SSD instance store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html) to improve the performance of your jobs. This storage is located on disks that are physically attached to the host computer and can provide better performance compared to traditional EBS volumes. In the context of Spark, this might be beneficial for wide transformations (e.g. JOIN, GROUP BY) that generate a significant amount of shuffle data that Spark persists on the local filesystem of the instances where the executors are running.
4 |
5 | In this document, we highlight two approaches to leverage NVMe disks in your workloads when using EMR on EKS. For a list of instances supporting NVMe disks, see [Instance store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#instance-store-volumes) in the Amazon EC2 documentation.
6 |
7 | ## **Mount kubelet pod directory on NVMe disks**
8 |
9 | The [kublet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) service manages the lifecycle of pod containers that are created using Kubernetes. When a pod is launched on an instance, an ephemeral volume is automatically created for the pod, and this volume is mapped in a subdirectory within the path `/var/lib/kubelet` of the host node. This volume folder exists for the lifetime of K8s pod, and it will be automatically deleted once the pod ceases to exist.
10 |
11 | In order to leverage NVMe disk attached to an EC2 node in our Spark application, we should perform the following actions during node bootstrap:
12 |
13 | * Prepare the NVMe disks attached to the instance (format disks and create a partition)
14 | * Mount the `/var/lib/kubelet/pods` path on the NVMe
15 |
16 | By doing this, all local files generated by your Spark job (blockmanager data, shuffle data, etc.) will be automatically written to NVMe disks. This way, you don't have to configure Spark volume path when launching the pod (driver or executor). This approach is easier to adopt because it doesn’t require any additional configuration in your job. Besides, once the job is completed, all the data stored in ephemeral volumes will be automatically deleted when the EC2 instance is deleted.
17 |
18 | However, if you have multiple NVMe disks attached to the instance, you need to create RAID0 configuration of all the disks before mounting the `/var/lib/kubelet/pods` directory on the RAID partition. Without a RAID setup, it will not be possible to leverage all the disks capacity available on the node.
19 |
20 | The following example shows how to create a node group in your cluster using this approach. In order to prepare our NVMe disks, we can use the [eksctl](https://eksctl.io/) **preBootstrapCommands** definition while creating the node group. The script will perform the following actions:
21 |
22 | * For instances with a single NVMe disk, format the filesystem, create a Linux partition (e.g. ext4, xfs, etc.)
23 | * For instances with multiple NVMe disks, create a RAID 0 configuration across all available volumes
24 |
25 | Once the disks are formatted and ready to use, we will mount the folder **/var/lib/kubelet/pods** using the filesystem and setup correct permissions. Below, you can find an example of an eksctl configuration to create a managed node group using this approach.
26 |
27 | **Example**
28 |
29 | ```
30 | apiVersion: eksctl.io/v1alpha5
31 | kind: ClusterConfig
32 |
33 | metadata:
34 | name: YOUR_CLUSTER_NAME
35 | region: YOUR_REGION
36 |
37 | managedNodeGroups:
38 | - name: ng-c5d-9xlarge
39 | instanceType: c5d.9xlarge
40 | desiredCapacity: 1
41 | privateNetworking: true
42 | subnets:
43 | - YOUR_NG_SUBNET
44 | preBootstrapCommands: # commands executed as root
45 | - yum install -y mdadm nvme-cli
46 | - nvme_disks=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | awk -F'[[:space:]][[:space:]]+' '{print $1}')) && [[ ${#nvme_disks[@]} -eq 1 ]] && mkfs.ext4 -F ${nvme_disks[*]} && systemctl stop docker && mkdir -p /var/lib/kubelet/pods && mount ${nvme_disks[*]} /var/lib/kubelet/pods && chmod 750 /var/lib/docker && systemctl start docker
47 | - nvme_disks=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | awk -F'[[:space:]][[:space:]]+' '{print $1}')) && [[ ${#nvme_disks[@]} -ge 2 ]] && mdadm --create --verbose /dev/md0 --level=0 --raid-devices=${#nvme_disks[@]} ${nvme_disks[*]} && mkfs.ext4 -F /dev/md0 && systemctl stop docker && mkdir -p /var/lib/kubelet/pods && mount /dev/md0 /var/lib/kubelet/pods && chmod 750 /var/lib/docker && systemctl start docker
48 | ```
49 |
50 |
51 | **Benefits**
52 |
53 | * No need to mount the disk using Spark configurations or pod templates
54 | * Data generated by the application, will immediately be deleted at the pod termination. Data will be also purged in case of pod failures.
55 | * One time configuration for the node group
56 |
57 | **Cons**
58 |
59 | * If multiple jobs are allocated on the same EC2 instance, contention of disk resources will occur because it is not possible to allocate instance store volume resources across jobs
60 |
61 |
62 |
63 | ## **Mount NVMe disks as data volumes**
64 |
65 | In this section, we’re going to explicitly mount instance store volumes as the mount path in Spark configuration for drivers and executors
66 |
67 | As in the previous example, this script will automatically format the instance store volumes and create an **xfs** partition. The disks are then mounted in local folders called **/spark_data_IDX** where IDX is an integer that corresponds to the disk mounted.
68 |
69 | **Example**
70 |
71 | ```
72 | apiVersion: eksctl.io/v1alpha5
73 | kind: ClusterConfig
74 |
75 | metadata:
76 | name: YOUR_CLUSTER_NAME
77 | region: YOUR_REGION
78 |
79 | managedNodeGroups:
80 | - name: ng-m5d-4xlarge
81 | instanceType: m5d.4xlarge
82 | desiredCapacity: 1
83 | privateNetworking: true
84 | subnets:
85 | - YOUR_NG_SUBNET
86 | preBootstrapCommands: # commands executed as root
87 | - "IDX=1;for DEV in /dev/nvme[1-9]n1;do mkfs.xfs ${DEV}; mkdir -p /spark_data_${IDX}; echo ${DEV} /spark_data_${IDX} xfs defaults,noatime 1 2 >> /etc/fstab; IDX=$((${IDX} + 1)); done"
88 | - "mount -a"
89 | - "chown 999:1000 /spark_data_*"
90 | ```
91 |
92 | In order to successfully use ephemeral volumes within Spark, you need to specify additional configurations. In addition to spark configuration, the mounted volume name should start with `spark-local-dir-`.
93 |
94 | Below an example configuration provided during the EMR on EKS job submission, that shows how to configure Spark to use 2 volumes as local storage for the job.
95 |
96 | **Spark Configurations**
97 |
98 | ```
99 | {
100 | "name": ....,
101 | "virtualClusterId": ....,
102 | "executionRoleArn": ....,
103 | "releaseLabel": ....,
104 | "jobDriver": ....,
105 | "configurationOverrides": {
106 | "applicationConfiguration": [
107 | {
108 | "classification": "spark-defaults",
109 | "properties": {
110 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.mount.path": "/spark_data_1",
111 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.mount.readOnly": "false",
112 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.options.path": "/spark_data_1",
113 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.mount.path": "/spark_data_2",
114 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.mount.readOnly": "false",
115 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.options.path": "/spark_data_2"
116 | }
117 | }
118 | ]
119 | }
120 | }
121 | ```
122 |
123 |
124 | Please note that for this approach it is required to specify the following configurations for each volume that you want to use. (IDX is a label to identify the volume mounted)
125 |
126 | ```
127 | # Mount path on the host node
128 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.options.path
129 |
130 | # Mount path on the k8s pod
131 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.mount.path
132 |
133 | # (boolean) Should be defined as false to allow Spark to write in the path
134 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.mount.readOnly
135 | ```
136 |
137 |
138 | **Benefits**
139 |
140 | * You can allocate dedicated resources of instance store volumes across your Spark jobs (For example, lets take a scenario where an EC2 instance has two instance store volumes. If you run two spark jobs on this node, you can dedicate one volume per Spark job)
141 |
142 | **Cons**
143 |
144 | * Additional configurations are required for Spark jobs to use instance store volumes. This approach can be error-prone if you don’t control the instance types being used (for example, multiple node groups with different instance types). You can mitigate this issue by using k8s node selectors and specify instance type in your spark configuraiton: **[spark.kubernetes.node.selector.node.kubernetes.io/instance-type](http://spark.kubernetes.node.selector.node.kubernetes.io/instance-type)**
145 | * Data created on the volumes is automatically deleted once the job is completed and instance is terminated. However, you need to extra measures to delete the data on instance store volumes if EC2 instance is re-used or is not terminated.
146 |
--------------------------------------------------------------------------------
/content/storage/resources/FSx_Lustre_SG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/storage/resources/FSx_Lustre_SG.png
--------------------------------------------------------------------------------
/content/submit-applications/docs/spark/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/index.md
--------------------------------------------------------------------------------
/content/submit-applications/docs/spark/java-and-scala.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/java-and-scala.md
--------------------------------------------------------------------------------
/content/submit-applications/docs/spark/multi-arch-image.md:
--------------------------------------------------------------------------------
1 | # Build a Multi-architecture Docker Image Supporting arm64 & amd64
2 |
3 | ## Pre-requisites
4 | We can complete all the steps either from a local desktop or using [AWS Cloud9](https://aws.amazon.com/cloud9/). If you’re using AWS Cloud9, follow the instructions in the "Setup AWS Cloud9" to create and configure the environment first, otherwise skip to the next section.
5 |
6 | ## Setup AWS Cloud9
7 | AWS Cloud9 is a cloud-based IDE that lets you write, run, and debug your code via just a browser. AWS Cloud9 comes preconfigured with some of AWS dependencies we require to build our application, such ash the AWS CLI tool.
8 |
9 | ### 1. Create a Cloud9 instance
10 |
11 | **Instance type** - Create an AWS Cloud9 environment from the [AWS Management Console](https://console.aws.amazon.com/cloud9) with an instance type of `t3.small or larger`. In our example, we used `m5.xlarge` for adequate memory and CPU to compile and build a large docker image.
12 |
13 | **VPC** - Follow the launch wizard and provide the required name. To interact with an existing EKS cluster in the same region later on, recommend to use the same VPC to your EKS cluster in the Cloud9 environment. Leave the remaining default values as they are.
14 |
15 | **Storage size** - You must increase the Cloud9's EBS volume size (pre-attached to your AWS Cloud9 instance) to 30+ GB, because the default disk space ( 10 GB with ~72% used) is not enough for building a container image. Refer to [Resize an Amazon EBS volume used by an environment](https://docs.aws.amazon.com/cloud9/latest/user-guide/move-environment.html#move-environment-resize) document, download the script `resize.sh` to your cloud9 environment.
16 |
17 | ```bash
18 | touch resize.sh
19 | # Double click the file name in cloud9
20 | # Copy and paste the content from the official document to your file, save and close it
21 | ```
22 | Validate the disk size is 10GB currently:
23 | ```
24 | admin:~/environment $ df -h
25 | Filesystem Size Used Avail Use% Mounted on
26 | devtmpfs 4.0M 0 4.0M 0% /dev
27 | tmpfs 951M 0 951M 0% /dev/shm
28 | tmpfs 381M 5.3M 376M 2% /run
29 | /dev/nvme0n1p1 10G 7.2G 2.9G 72% /
30 | tmpfs 951M 12K 951M 1% /tmp
31 | /dev/nvme0n1p128 10M 1.3M 8.7M 13% /boot/efi
32 | tmpfs 191M 0 191M 0% /run/user/1000
33 | ```
34 | Increase the disk size:
35 | ```bash
36 | bash resize.sh 30
37 | ```
38 | ```
39 | admin:~/environment $ df -h
40 | Filesystem Size Used Avail Use% Mounted on
41 | devtmpfs 4.0M 0 4.0M 0% /dev
42 | tmpfs 951M 0 951M 0% /dev/shm
43 | tmpfs 381M 5.3M 376M 2% /run
44 | /dev/nvme0n1p1 30G 7.3G 23G 25% /
45 | tmpfs 951M 12K 951M 1% /tmp
46 | /dev/nvme0n1p128 10M 1.3M 8.7M 13% /boot/efi
47 | tmpfs 191M 0 191M 0% /run/user/1000
48 | ```
49 |
50 | ### 2. Install Docker and Buildx if required
51 |
52 | - **Installing Docker** - a Cloud9 EC2 instance comes with a Docker daemon pre-installed. Outside of the Cloud9, your environment may or may not need to install Docker. If needed, follow the instructions in the [Docker Desktop page](https://docs.docker.com/desktop/#download-and-install) to install.
53 |
54 |
55 | - **Installing Buildx** (pre-installed in Cloud9) - To build a single multi-arch Docker image (x86_64 and arm64), we may or may not need to [install an extra Buildx plugin](https://docs.docker.com/build/architecture/#install-buildx) that extends the Docker CLI to support the multi-architecture feature. Docker Buildx is installed by default with a Docker Engine since **version 23.0+**. For an earlier version, it requires you grab a binary from GitHub repository and install it manually, or get it from a separate package. See [docker/buildx README](https://github.com/docker/buildx#manual-download) for more information.
56 |
57 | Once the buildx CLI is available, we can create a builder instance which gives access to the new multi-architecture features.You only have to perform this task once.
58 | ```bash
59 | # create a builder
60 | docker buildx create --name mybuilder --use
61 | # boot up the builder and inspect
62 | docker buildx inspect --bootstrap
63 |
64 |
65 | # list builder instances
66 | # the asterisk (*) next to a builder name indicates the selected builder.
67 | docker buildx ls
68 | ```
69 | If your builder doesn't support [QEMU](https://docs.docker.com/build/building/multi-platform/#qemu), only limited platform types are supported as below. For example, the current builder instance created in Cloud9 doesn't support QEMU, so we can't build the docker image for the arm64 CPU type yet.
70 | ```bash
71 | NAME/NODE DRIVER/ENDPOINT STATUS BUILDKIT PLATFORMS
72 | default docker
73 | default default running v0.11.6 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/386
74 | mybuilder * docker-container
75 | my_builder0 default running v0.11.6 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/386
76 | ```
77 |
78 | - **Installing QEMU for Cloud9** - Building multi-platform images under emulation with QEMU is the easiest way to get started if your builder already supports it. However, AWS Cloud9 isn't preconfigured with the [binfmt_misc](https://en.wikipedia.org/wiki/Binfmt_misc) support. We must install compiled QEMU binaries. The installations can be easily done via the docker run CLI:
79 | ```bash
80 | docker run --privileged --rm tonistiigi/binfmt --install all
81 | ```
82 | List the builder instance again. Now we see the full list of platforms are supported,including arm-based CPU:
83 | ```bash
84 | docker buildx ls
85 |
86 | NAME/NODE DRIVER/ENDPOINT STATUS BUILDKIT PLATFORMS
87 | mybuilder * docker-container
88 | mybuilder20 unix:///var/run/docker.sock running v0.13.2 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/amd64/v4, linux/arm64, linux/riscv64, linux/ppc64le, linux/s390x, linux/386, linux/mips64le, linux/mips64, linux/arm/v7, linux/arm/v6
89 | default docker
90 | default default running v0.12.5 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/amd64/v4, linux/386, linux/arm64, linux/riscv64, linux/ppc64le, linux/s390x, linux/mips64le, linux/mips64, linux/arm/v7, linux/arm/v6
91 | ```
92 |
93 | ## Build a docker image supporting multi-arch
94 |
95 | In this example, we will create a [spark-benchmark-utility](https://github.com/aws-samples/emr-on-eks-benchmark) container image. We are going to reuse the source code from the [EMR on EKS benchmark Github repo](https://github.com/aws-samples/emr-on-eks-benchmark).
96 |
97 | ### 1. Download the source code from the Github:
98 | ```bash
99 | git clone https://github.com/aws-samples/emr-on-eks-benchmark.git
100 | cd emr-on-eks-benchmark
101 | ```
102 |
103 | ### 2. Setup required environment variables
104 |
105 | We will build an image to test EMR 6.15's performance. The equivalent versions are Spark 3.4.1 and Hadoop 3.3.4. Change them accordingly if needed.
106 | ```bash
107 | export SPARK_VERSION=3.4.1
108 | export HADOOP_VERSION=3.3.6
109 | ```
110 |
111 | Log in to your own Amazon ECR registry:
112 | ```bash
113 | export AWS_REGION=us-east-1
114 | export ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
115 | export ECR_URL=$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
116 |
117 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ECR_URL
118 | ```
119 |
120 | ### 3. Build OSS Spark base image if required
121 | If you want to test open-source Apache Spark's performance, build a base Spark image first. Otherwise skip this step.
122 | ```bash
123 | docker buildx build --platform linux/amd64,linux/arm64 \
124 | -t $ECR_URL/spark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \
125 | -f docker/hadoop-aws-3.3.1/Dockerfile \
126 | --build-arg HADOOP_VERSION=${HADOOP_VERSION} --build-arg SPARK_VERSION=${SPARK_VERSION} --push .
127 | ```
128 |
129 | ### 4. Get EMR Spark base image from AWS
130 | ```bash
131 | export SRC_ECR_URL=755674844232.dkr.ecr.us-east-1.amazonaws.com
132 | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $SRC_ECR_URL
133 |
134 | docker pull $SRC_ECR_URL/spark/emr-6.15.0:latest
135 | ```
136 |
137 |
138 | ### 5. Build the Benchmark Utility image
139 |
140 | Build and push the docker image based the OSS Spark engine built before (Step #3):
141 |
142 | ```bash
143 |
144 | docker buildx build --platform linux/amd64,linux/arm64 \
145 | -t $ECR_URL/eks-spark-benchmark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \
146 | -f docker/benchmark-util/Dockerfile \
147 | --build-arg SPARK_BASE_IMAGE=$ECR_URL/spark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \
148 | --push .
149 | ```
150 |
151 | Build and push the benchmark docker image based EMR's Spark runtime (Step #4):
152 |
153 | ```bash
154 | docker buildx build --platform linux/amd64,linux/arm64 \
155 | -t $ECR_URL/eks-spark-benchmark:emr6.15 \
156 | -f docker/benchmark-util/Dockerfile \
157 | --build-arg SPARK_BASE_IMAGE=$SRC_ECR_URL/spark/emr-6.15.0:latest \
158 | --push .
159 |
160 | ```
161 |
162 | ## Benchmark application based on the docker images built
163 |
164 | Based on the mutli-arch docker images built previously, now you can start to [run benchmark applications](https://github.com/aws-samples/emr-on-eks-benchmark/tree/delta?tab=readme-ov-file#run-benchmark) on both intel and arm-based CPU nodes.
165 |
166 | In Cloud9, the following extra steps are required to configure the environment, before you can submit the applications.
167 |
168 | 1. Install kkubectl/helm/eksctl CLI tools. refer to this [sample scirpt](https://github.com/aws-samples/stream-emr-on-eks/blob/workshop/deployment/app_code/post-deployment.sh)
169 |
170 | 2. Modify the IAM role attached to the Cloud9 EC2 instance, allowing it has enough privilege to assume an EKS cluster's admin role or has the permission to submit jobs against the EKS cluster.
171 |
172 | 3. Upgrade AWS CLI and turn off the AWS managed temporary credentials in Cloud9:
173 | ```bash
174 | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
175 | unzip awscliv2.zip
176 | sudo ./aws/install --update
177 | /usr/local/bin/aws cloud9 update-environment --environment-id $C9_PID --managed-credentials-action DISABLE
178 | rm -vf ${HOME}/.aws/credentials
179 | ```
180 |
181 | 4. Connect to the EKS cluster
182 | ```bash
183 | # a sample connection string
184 | aws eks update-kubeconfig --name YOUR_EKS_CLUSTER_NAME --region us-east-1 --role-arn arn:aws:iam::ACCOUNTID:role/SparkOnEKS-iamrolesclusterAdmin-xxxxxx
185 |
186 | # validate the connection
187 | kubectl get svc
188 | ```
189 |
--------------------------------------------------------------------------------
/content/submit-applications/docs/spark/sparkr.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/sparkr.md
--------------------------------------------------------------------------------
/content/submit-applications/docs/spark/sparksql.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/sparksql.md
--------------------------------------------------------------------------------
/content/submit-applications/resources/images/pyspark-packaged-example-zip-folder-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/resources/images/pyspark-packaged-example-zip-folder-structure.png
--------------------------------------------------------------------------------
/content/submit-applications/resources/pyspark-packaged-dependency-src.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/resources/pyspark-packaged-dependency-src.zip
--------------------------------------------------------------------------------
/content/troubleshooting/docs/change-log-level.md:
--------------------------------------------------------------------------------
1 | # **Change Log level for Spark application on EMR on EKS**
2 |
3 | To obtain more detail about their application or job submission, Spark application developers can change the log level of their job to different levels depending on their requirements. Spark uses apache log4j for logging.
4 |
5 | ### Change log level to DEBUG
6 |
7 | ####**Using EMR classification**
8 | Log level of spark applications can be changed using the [EMR spark-log4j configuration classification.](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html)
9 |
10 | **Request**
11 | The `pi.py` application script is from the [spark examples](https://github.com/apache/spark/blob/master/examples/src/main/python/pi.py). EMR on EKS has included the example located at`/usr/lib/spark/examples/src/main` for you to try.
12 |
13 | `spark-log4j` classification can be used to configure values in [log4j.properties](https://github.com/apache/spark/blob/branch-3.2/conf/log4j.properties.template) for EMR releases 6.7.0 or lower , and [log4j2.properties](https://github.com/apache/spark/blob/master/conf/log4j2.properties.template) for EMR releases 6.8.0+ .
14 | ```
15 | cat > Spark-Python-in-s3-debug-log.json << EOF
16 | {
17 | "name": "spark-python-in-s3-debug-log-classification",
18 | "virtualClusterId": "",
19 | "executionRoleArn": "",
20 | "releaseLabel": "emr-6.2.0-latest",
21 | "jobDriver": {
22 | "sparkSubmitJobDriver": {
23 | "entryPoint": "local:///usr/lib/spark/examples/src/main/python/pi.py",
24 | "entryPointArguments": [ "200" ],
25 | "sparkSubmitParameters": "--conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.memory=2G --conf spark.executor.instances=2"
26 | }
27 | },
28 | "configurationOverrides": {
29 | "applicationConfiguration": [
30 | {
31 | "classification": "spark-defaults",
32 | "properties": {
33 | "spark.dynamicAllocation.enabled":"false"
34 | }
35 | },
36 | {
37 | "classification": "spark-log4j",
38 | "properties": {
39 | "log4j.rootCategory":"DEBUG, console"
40 | }
41 | }
42 | ],
43 | "monitoringConfiguration": {
44 | "cloudWatchMonitoringConfiguration": {
45 | "logGroupName": "/emr-containers/jobs",
46 | "logStreamNamePrefix": "demo"
47 | },
48 | "s3MonitoringConfiguration": {
49 | "logUri": "s3://joblogs"
50 | }
51 | }
52 | }
53 | }
54 | EOF
55 |
56 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-debug-log.json
57 |
58 |
59 | ```
60 |
61 | The above request will print DEBUG logs in the spark driver and executor containers. The generated logs will be pushed to S3 and AWS Cloudwatch logs as configured in the request.
62 |
63 | Starting from the version 3.3.0, Spark has been [migrated from log4j1 to log4j2](https://issues.apache.org/jira/browse/SPARK-37814). EMR on EKS allows you still write the log4j properties to the same `"classification": "spark-log4j"`, however it now needs to be log4j2.properties, such as
64 | ```
65 | {
66 | "classification": "spark-log4j",
67 | "properties": {
68 | "rootLogger.level" : "DEBUG"
69 | }
70 | }
71 |
72 | ```
73 |
74 | ####**Custom log4j properties**
75 | Download log4j properties from [here](https://github.com/apache/spark/blob/master/conf/log4j.properties.template). Edit log4j.properties with log level as required. Save the edited log4j.properties in a mounted volume. In this example log4j.properties is placed in a s3 bucket that is mapped to a [FSx for Lustre filesystem](https://docs.aws.amazon.com/fsx/latest/LustreGuide/what-is.html).
76 |
77 | **Request**
78 | pi.py used in the below request payload is from [spark examples](https://github.com/apache/spark/blob/master/examples/src/main/python/pi.py)
79 | ```
80 | cat > Spark-Python-in-s3-debug-log.json << EOF
81 | {
82 | "name": "spark-python-in-s3-debug-log",
83 | "virtualClusterId": "",
84 | "executionRoleArn": "",
85 | "releaseLabel": "emr-6.2.0-latest",
86 | "jobDriver": {
87 | "sparkSubmitJobDriver": {
88 | "entryPoint": "s3:///pi.py",
89 | "sparkSubmitParameters": "--conf spark.driver.cores=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=2"
90 | }
91 | },
92 | "configurationOverrides": {
93 | "applicationConfiguration": [
94 | {
95 | "classification": "spark-defaults",
96 | "properties": {
97 | "spark.driver.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties",
98 | "spark.executor.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties",
99 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.options.claimName":"fsx-claim",
100 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.mount.path":"/var/data/",
101 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.mount.readOnly":"false",
102 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.options.claimName":"fsx-claim",
103 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.mount.path":"/var/data/",
104 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.mount.readOnly":"false"
105 | }
106 | }
107 | ],
108 | "monitoringConfiguration": {
109 | "cloudWatchMonitoringConfiguration": {
110 | "logGroupName": "/emr-containers/jobs",
111 | "logStreamNamePrefix": "demo"
112 | },
113 | "s3MonitoringConfiguration": {
114 | "logUri": "s3://joblogs"
115 | }
116 | }
117 | }
118 | }
119 | EOF
120 |
121 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-debug-log.json
122 |
123 | ```
124 |
125 | **Configurations of interest:**
126 | Below configuration enables spark driver and executor to pick up the log4j configuration file from ``/var/data/`` folder mounted to the driver and executor containers. For guide to mount FSx for Lustre to driver and executor containers - refer to [EMR Containers integration with FSx for Lustre](../../storage/docs/spark/fsx-lustre.md)
127 |
128 | ```
129 | "spark.driver.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties",
130 | "spark.executor.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties",
131 |
132 |
133 | ```
134 |
135 |
--------------------------------------------------------------------------------
/content/troubleshooting/docs/connect-spark-ui.md:
--------------------------------------------------------------------------------
1 | # **Connect to Spark UI running on the Driver Pod**
2 |
3 | To obtain more detail about their application or monitor their job execution, Spark application developers can connect to Spark-UI running on the Driver Pod.
4 |
5 | Spark UI (Spark history server) is packaged with EMR on EKS out of the box. Alternatively, if you want to see Spark UI immediately after the driver is spun up, you can use the instructions in this page to connect.
6 |
7 | This page shows how to use `kubectl port-forward` to connect to the Job's Driver Pod running in a Kubernetes cluster. This type of connection is useful for debugging purposes.
8 |
9 | **Pre-Requisites**
10 |
11 | * AWS cli should be installed
12 | * "kubectl" should be installed
13 | * If this is the first time you are connecting to your EKS cluster from your machine, you should run `aws eks update-kubeconfig --name --region` to download kubeconfig file and use correct context to talk to API server.
14 |
15 | ### Submitting the job to a virtual cluster
16 |
17 | **Request**
18 | ```
19 | cat >spark-python.json << EOF
20 | {
21 | "name": "spark-python-in-s3",
22 | "virtualClusterId": "",
23 | "executionRoleArn": "",
24 | "releaseLabel": "emr-6.3.0-latest",
25 | "jobDriver": {
26 | "sparkSubmitJobDriver": {
27 | "entryPoint": "s3:///trip-count.py",
28 | "sparkSubmitParameters": "--conf spark.driver.cores=4 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=4"
29 | }
30 | },
31 | "configurationOverrides": {
32 | "applicationConfiguration": [
33 | {
34 | "classification": "spark-defaults",
35 | "properties": {
36 |
37 | }
38 | }
39 | ],
40 | "monitoringConfiguration": {
41 | "cloudWatchMonitoringConfiguration": {
42 | "logGroupName": "/emr-containers/jobs",
43 | "logStreamNamePrefix": "demo"
44 | },
45 | "s3MonitoringConfiguration": {
46 | "logUri": "s3://joblogs"
47 | }
48 | }
49 | }
50 | }
51 | EOF
52 | aws emr-containers start-job-run --cli-input-json file:///spark-python.json
53 | ```
54 |
55 |
56 | Once the job is submitted successfully, run `kubectl get pods -n -w ` command to watch all the pods, until you observe the driver pod is in the "Running" state. The Driver pod's name usually is in `spark--driver` format.
57 |
58 |
59 | ### Connecting to the Driver Pod
60 |
61 | Spark Driver Pod hosts Spark-UI on port `4040`. However the pod runs within the internal Kubernetes network. To get access to the internal Kubernetes resources, `kubectl` provides a tool ("Port Forwarding") that allows access from your localhost. To get access to the driver pod in your cluster:
62 |
63 |
64 | 1- Run ```kubectl port-forward 4040:4040```
65 |
66 | The result should be the following:
67 |
68 |
69 | ```
70 | Forwarding from 127.0.0.1:28015 -> 27017
71 | Forwarding from [::1]:28015 -> 27017
72 | ```
73 |
74 | 2- Open a browser and type `http://localhost:4040` in the Address bar.
75 |
76 | You should be able to connect to the Spark UI:
77 |
78 | 
79 | ### Consideration
80 |
81 | In some cases like long-running Spark jobs, such as Spark streaming or large Spark SQL queries can generate large event logs. With large events logs, it might happen quickly use up storage space on running pods and sometimes encounter to experience blank UI or even OutOfMemory errors when you load Persistent UIs. To avoid these issues, we recommend that you follow either by turn on the Spark event log [rolling and compaction feature](https://docs.aws.amazon.com/emr/latest/ManagementGuide/app-history-spark-UI.html#app-history-spark-UI-large-event-logs) (default emr-container-event-log-dir - /var/log/spark/apps) or use S3 location to parse the log using [self hosted of Spark history server](https://aws.github.io/aws-emr-containers-best-practices/troubleshooting/docs/self-hosted-shs/).
82 |
--------------------------------------------------------------------------------
/content/troubleshooting/docs/eks-cluster-auto-scaler.md:
--------------------------------------------------------------------------------
1 | # **EKS Cluster Auto-Scaler**
2 | Kubernetes provisions nodes using CAS (Cluster Autoscaler). AWS EKS has its own implementation of K8 CAS, and EKS uses Managed-Nodegroups to spuns of Nodes.
3 |
4 | ### Logs of EKS Cluster Auto-scaler.
5 |
6 | On AWS, Cluster Autoscaler utilizes Amazon EC2 Auto Scaling Groups to provision nodes. This section will help you identify the error message when a AutoScaler fails to provision nodes.
7 |
8 | An example scenario, where the NodeGroup would fail due to non-supported nodes in certain AZs.
9 | ```
10 | Could not launch On-Demand Instances. Unsupported - Your requested instance type (g4dn.xlarge) is not supported in your requested Availability Zone (ca-central-1d). Please retry your request by not specifying an Availability Zone or choosing ca-central-1a, ca-central-1b. Launching EC2 instance failed.
11 | ```
12 |
13 | The steps to find the logs for AutoScalingGroups are,
14 |
15 | Step 1: Login to AWS Console, and select `Elastic Kubernetes Service`
16 |
17 | Step 2: Select `Compute` tab, and select the `NodeGroup` that fails.
18 |
19 | Step 3: Select the `Autoscaling group name` from the NodeGroup's section, which will direct you to `EC2 --> AutoScaling Group` page.
20 |
21 | Step 4: Click the Tab `Activity` of the `AutoScaling Group`, and the `Activity History` would give provide the details of the error.
22 | ```
23 | - Status
24 | - Description
25 | - Cause
26 | - Start Time
27 | - End Time
28 | ```
29 | Alternatively, the activities/logs can be found via CLI as well
30 | ```bash
31 | aws autoscaling describe-scaling-activities \
32 | --region \
33 | --auto-scaling-group-name
34 | ```
35 |
36 | In the above error scenario, the `ca-central-1d` availability zone doesn't support `g4dn.xlarge`. The solution is
37 |
38 | Step 1: Identify the Subnets of the Availability zones that supports the GPU node type. The NodeGroup Section would list all the subnets, and you can click each subnet to see which AZ it is deployed to.
39 |
40 | Step 2: Create a NodeGroup only in the Subnets identified in the above step
41 | ```bash
42 | aws eks create-nodegroup \
43 | --region \
44 | --cluster-name \
45 | --nodegroup-name \
46 | --scaling-config minSize=10,maxSize=10,desiredSize=10 \
47 | --ami-type AL2_x86_64_GPU \
48 | --node-role \
49 | --subnets \
50 | --instance-types g4dn.xlarge \
51 | --disk-size
52 | ```
--------------------------------------------------------------------------------
/content/troubleshooting/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/troubleshooting/docs/index.md
--------------------------------------------------------------------------------
/content/troubleshooting/docs/karpenter.md:
--------------------------------------------------------------------------------
1 | # **Karpenter**
2 |
3 | Karpenter is an open-source cluster autoscaler for kubernetes (EKS) that automatically provisions new nodes in response to unschedulable pods. Until Karpenter was introduced, EKS would use its implementation of "CAS" Cluster Autoscaler, which creates Managed-NodeGroups to provision nodes.
4 |
5 | The challenge with Managed-NodeGroups is that, it can only create nodes with a single instance-type. In-order to provision nodes with different instance-types for different workloads, multiple nodegroups have to be created. Karpenter on the other hand can provision nodes of different types by working with EC2-Fleet-API.
6 | The best practices to configure the Provisioners are documented at https://aws.github.io/aws-eks-best-practices/karpenter/
7 |
8 | This guide helps the user troubleshoot common problems with Karpenter.
9 |
10 | ### Logs of Karpenter Controller
11 |
12 | Karpenter is a Custom Kubernetes Controller, and the following steps would help find Karpenter Logs.
13 |
14 | Step 1: Identify the namespace where Karpenter is running. In most cases, `helm` would be used to deploy Karpenter packages. The `helm ls` command would list the namespace where karpenter would be installed.
15 | ```
16 | # Example
17 |
18 | % helm ls --all-namespaces
19 | NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
20 | karpenter karpenter 1 2023-05-15 14:16:03.726908 -0500 CDT deployed karpenter-v0.27.3 0.27.3
21 | ```
22 |
23 | Step 2: Setup kubectl
24 | ```
25 | brew install kubectl
26 |
27 | aws --region eks update-kubeconfig --name
28 | ```
29 |
30 | Step 3: Check the status of the pods of Karpenter
31 | ```
32 | # kubectl get pods -n
33 |
34 | % kubectl get pods -n karpenter
35 | NAME READY STATUS RESTARTS AGE
36 | karpenter-7b455dccb8-prrzx 1/1 Running 0 7m18s
37 | karpenter-7b455dccb8-x8zv8 1/1 Running 0 7m18s
38 | ```
39 |
40 | Step 4: The `kubectl logs` command would help read the Karpenter logs. The below example, karpenter pod logs depict that an `t3a.large` instance was launched.
41 | ```
42 | # kubectl logs -n
43 |
44 | % kubectl logs karpenter-7b455dccb8-prrzx -n karpenter
45 | ..
46 | ..
47 |
48 | 2023-05-15T19:16:20.546Z DEBUG controller discovered region {"commit": "***-dirty", "region": "us-west-2"}
49 | 2023-05-15T19:16:20.666Z DEBUG controller discovered cluster endpoint {"commit": "**-dirty", "cluster-endpoint": "https://******.**.us-west-2.eks.amazonaws.com"}
50 | ..
51 | ..
52 | 2023-05-15T19:16:20.786Z INFO controller.provisioner starting controller {"commit": "**-dirty"}
53 | 2023-05-15T19:16:20.787Z INFO controller.deprovisioning starting controller {"commit": "**-dirty"}
54 | ..
55 | 2023-05-15T19:16:20.788Z INFO controller Starting EventSource {"commit": "**-dirty", "controller": "node", "controllerGroup": "", "controllerKind": "Node", "source": "kind source: *v1.Pod"}
56 | ..
57 | 2023-05-15T20:34:56.718Z INFO controller.provisioner.cloudprovider launched instance {"commit": "d7e22b1-dirty", "provisioner": "default", "id": "i-03146cd4d4152a935", "hostname": "ip-*-*-*-*.us-west-2.compute.internal", "instance-type": "t3a.large", "zone": "us-west-2d", "capacity-type": "on-demand", "capacity": {"cpu":"2","ephemeral-storage":"20Gi","memory":"7577Mi","pods":"35"}}
58 | ```
59 |
60 | ### Error while decoding JSON: json: unknown field "iamIdentityMappings"
61 |
62 | **Problem**
63 | The Create-Cluster command https://karpenter.sh/v0.27.3/getting-started/getting-started-with-karpenter/#3-create-a-cluster throws an error
64 | ```
65 | Error: loading config file "karpenter.yaml": error unmarshaling JSON: while decoding JSON: json: unknown field "iamIdentityMappings"
66 | ```
67 |
68 | **Solution**
69 | The `eksctl` cli was not able to understand the kind `iamIdentityMappings`. This is because, the `eksctl` version is old, and its schema doesn't support this kind.
70 |
71 | The solution is to upgrade the `eksctl` cli, and re-run the cluster creation commands
72 | ```bash
73 | brew upgrade eksctl
74 | ```
--------------------------------------------------------------------------------
/content/troubleshooting/docs/rbac-permissions-errors.md:
--------------------------------------------------------------------------------
1 | # **RBAC Permission Errors**
2 |
3 | The following sections provide solutions to common RBAC authorization errors.
4 |
5 | ### PersistentVolumeClaims is forbidden
6 |
7 | **Error:**
8 | Spark jobs that require creation, listing or deletion of Persistent Volume Claims (PVC) was not supported before EMR6.8. Jobs that require these permissions will fail with the exception “persistentvolumeclaims is forbidden". Looking into driver logs, you may see an error like this:
9 | ```
10 | persistentvolumeclaims is forbidden. User "system:serviceaccount:emr:emr-containers-sa-spark-client-93ztm12rnjz163mt3rgdb3bjqxqfz1cgvqh1e9be6yr81" cannot create resource "persistentvolumeclaims" in API group "" in namesapce "emr".
11 | ```
12 | You may encounter this error because the default Kubernetes role `emr-containers` is missing the required RBAC permissions. As a result, the `emr-containers` primary role can’t dynamically create necessary permissions for additional roles such as Spark driver, Spark executor or Spark client when you submit a job.
13 |
14 | **Solution:**
15 | Add the required permissions to `emr-containers`.
16 |
17 | Here are the complete RBAC permissions for EMR on EKS:
18 |
19 | * [emr-containers.yaml](https://github.com/aws/aws-emr-containers-best-practices/blob/main/tools/k8s-rbac-policies/emr-containers.yaml)
20 |
21 | You can compare whether you have complete RBAC permissions using the steps below,
22 | ```bash
23 | export NAMESPACE=YOUR_VALUE
24 | kubectl describe role emr-containers -n ${NAMESPACE}
25 | ```
26 |
27 | If the permissions don't match, proceed to apply latest permissions
28 |
29 | ```bash
30 | export NAMESPACE=YOUR_VALUE
31 | kubectl apply -f https://github.com/aws/aws-emr-containers-best-practices/blob/main/tools/k8s-rbac-policies/emr-containers.yaml -n ${NAMESPACE}
32 | ```
33 | You can delete the spark driver and client roles because they will be dynamically created when the job is run next time.
34 |
35 |
--------------------------------------------------------------------------------
/content/troubleshooting/docs/self-hosted-shs.md:
--------------------------------------------------------------------------------
1 | # **Self Hosted Spark History Server**
2 |
3 | In this section, you will learn how to self host Spark History Server instead of using the Persistent App UI on the AWS Console.
4 |
5 | 1. In your StartJobRun call for EMR on EKS, set the following conf. to point to an S3 bucket where you would like your event logs to go : `spark.eventLog.dir` and `spark.eventLog.enabled` as such:
6 |
7 |
8 | "configurationOverrides": {
9 | "applicationConfiguration": [{
10 | "classification": "spark-defaults",
11 | "properties": {
12 | "spark.eventLog.enabled": "true",
13 | "spark.eventLog.dir": "s3://your-bucket-here/some-directory"
14 | ...
15 |
16 |
17 | 2. Take note of the S3 bucket specified in #1, and use it in the instructions on step #3 wherever you are asked for `path_to_eventlog` and make sure it is prepended with `s3a://`, not `s3://`. An example is `-Dspark.history.fs.logDirectory=s3a://path_to_eventlog`.
18 |
19 | 3. Follow instructions [here](https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-history.html#monitor-spark-ui-history-local) to launch Spark History Server using a Docker image.
20 |
21 | 4. After following the above steps, event logs should flow to the specified S3 bucket and the docker container should spin up Spark History Server (which will be available at `127.0.0.1:18080`). This instance of Spark History Server will pick up and parse event logs from the S3 bucket specified.
--------------------------------------------------------------------------------
/content/troubleshooting/docs/where-to-look-for-spark-logs.md:
--------------------------------------------------------------------------------
1 | # **Spark Driver and Executor Logs**
2 |
3 | The status of the spark jobs can be monitored via [EMR on EKS describe-job-run API](https://docs.aws.amazon.com/cli/latest/reference/emr-containers/describe-job-run.html).
4 |
5 | To be able to monitor the job progress and to troubleshoot failures, you must configure your jobs to send log information to Amazon S3, Amazon CloudWatch Logs, or both
6 |
7 | ### Send Spark Logs to S3
8 |
9 | ####**Update the IAM role with S3 write access**
10 | Configure the IAM Role passed in StartJobRun input `executionRoleArn` with access to S3 buckets.
11 | ```json
12 | {
13 | "Version": "2012-10-17",
14 | "Statement": [
15 | {
16 | "Effect": "Allow",
17 | "Action": [
18 | "s3:PutObject",
19 | "s3:GetObject",
20 | "s3:ListBucket"
21 | ],
22 | "Resource": [
23 | "arn:aws:s3:::my_s3_log_location",
24 | "arn:aws:s3:::my_s3_log_location/*",
25 | ]
26 | }
27 | ]
28 | }
29 | ```
30 |
31 | ####**Configure the StartJobRun API with S3 buckets**
32 | Configure the `monitoringConfiguration` with `s3MonitoringConfiguration`, and configure the S3 location where the logs would be synced.
33 |
34 | ```json
35 | {
36 | "name": "",
37 | "virtualClusterId": "",
38 | "executionRoleArn": "",
39 | "releaseLabel": "",
40 | "jobDriver": {
41 |
42 | },
43 | "configurationOverrides": {
44 | "monitoringConfiguration": {
45 | "persistentAppUI": "ENABLED",
46 | "s3MonitoringConfiguration": {
47 | "logUri": "s3://my_s3_log_location"
48 | }
49 | }
50 | }
51 | }
52 | ```
53 |
54 | ####**Log location of JobRunner, Driver, Executor in S3**
55 | The JobRunner (pod that does spark-submit), Spark Driver, and Spark Executor logs would be found in the following S3 locations.
56 | ```text
57 | JobRunner/Spark-Submit/Controller Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${job-runner-pod-id}/(stderr.gz/stdout.gz)
58 |
59 | Driver Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-pod-name}/(stderr.gz/stdout.gz)
60 |
61 | Executor Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-executor-id}/(stderr.gz/stdout.gz)
62 | ```
63 |
64 |
65 | ### Send Spark Logs to CloudWatch
66 |
67 | ####**Update the IAM role with CloudWatch access**
68 | Configure the IAM Role passed in StartJobRun input `executionRoleArn` with access to CloudWatch Streams.
69 | ```json
70 | {
71 | "Version": "2012-10-17",
72 | "Statement": [
73 | {
74 | "Effect": "Allow",
75 | "Action": [
76 | "logs:CreateLogStream",
77 | "logs:DescribeLogGroups",
78 | "logs:DescribeLogStreams"
79 | ],
80 | "Resource": [
81 | "arn:aws:logs:*:*:*"
82 | ]
83 | },
84 | {
85 | "Effect": "Allow",
86 | "Action": [
87 | "logs:PutLogEvents"
88 | ],
89 | "Resource": [
90 | "arn:aws:logs:*:*:log-group:my_log_group_name:log-stream:my_log_stream_prefix/*"
91 | ]
92 | }
93 | ]
94 | }
95 | ```
96 |
97 | ####**Configure StartJobRun API with CloudWatch**
98 | Configure the `monitoringConfiguration` with `cloudWatchMonitoringConfiguration`, and configure the CloudWatch `logGroupName` and `logStreamNamePrefix` where the logs should be pushed.
99 |
100 | ```json
101 | {
102 | "name": "",
103 | "virtualClusterId": "",
104 | "executionRoleArn": "",
105 | "releaseLabel": "",
106 | "jobDriver": {
107 |
108 | },
109 | "configurationOverrides": {
110 | "monitoringConfiguration": {
111 | "persistentAppUI": "ENABLED",
112 | "cloudWatchMonitoringConfiguration": {
113 | "logGroupName": "my_log_group_name",
114 | "logStreamNamePrefix": "my_log_stream_prefix"
115 | }
116 | }
117 | }
118 | }
119 | ```
120 |
121 | ####**Log location of JobRunner, Driver, Executor**
122 | The JobRunner (pod that does spark-submit), Spark Driver, and Spark Executor logs would be found in the following AWS CloudWatch locations.
123 |
124 | ```text
125 | JobRunner/Spark-Submit/Controller Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${job-runner-pod-id}/(stderr.gz/stdout.gz)
126 |
127 | Driver Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-pod-name}/(stderr.gz/stdout.gz)
128 |
129 | Executor Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-executor-id}/(stderr.gz/stdout.gz)
130 | ```
131 |
132 |
--------------------------------------------------------------------------------
/content/troubleshooting/resources/screen-shot-spark-ui-driver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/troubleshooting/resources/screen-shot-spark-ui-driver.png
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: "EMR Containers Best Practices Guides"
2 | repo_name: "aws/aws-emr-containers-best-practices"
3 | repo_url: "https://github.com/aws/aws-emr-containers-best-practices"
4 | docs_dir: "content"
5 | theme:
6 | name: material
7 | features:
8 | - navigation.tabs
9 | nav:
10 | - Guides:
11 | - Introduction: 'index.md'
12 | - EMR on EKS(AWS Outposts): 'outposts/emr-containers-on-outposts.md'
13 | - Security:
14 | - Encryption: 'security/docs/spark/encryption.md'
15 | - Data Encryption: 'security/docs/spark/data-encryption.md'
16 | - Network Security: 'security/docs/spark/network-security.md'
17 | - Secrets: 'security/docs/spark/secrets.md'
18 | - Chain IAM Roles: 'security/docs/spark/chain-role.md'
19 | - Submit applications:
20 | - Pyspark: 'submit-applications/docs/spark/pyspark.md'
21 | - Build Multi-arch Docker Image: 'submit-applications/docs/spark/multi-arch-image.md'
22 | - Storage:
23 | - EBS: 'storage/docs/spark/ebs.md'
24 | - FSx for Lustre: 'storage/docs/spark/fsx-lustre.md'
25 | - Instance Store: 'storage/docs/spark/instance-store.md'
26 | - Metastore Integration:
27 | - Hive Metastore: 'metastore-integrations/docs/hive-metastore.md'
28 | - AWS Glue: 'metastore-integrations/docs/aws-glue.md'
29 | - Troubleshooting:
30 | - Spark Log Location in S3 and CloudWatch: 'troubleshooting/docs/where-to-look-for-spark-logs.md'
31 | - Change Log Level: 'troubleshooting/docs/change-log-level.md'
32 | - Connect to Spark UI: 'troubleshooting/docs/connect-spark-ui.md'
33 | - Connect to Spark UI via Reverse Proxy: 'troubleshooting/docs/reverse-proxy-sparkui.md'
34 | - Self Hosted SHS: 'troubleshooting/docs/self-hosted-shs.md'
35 | - RBAC Permissions error: 'troubleshooting/docs/rbac-permissions-errors.md'
36 | - EKS Cluster AutoScaler: 'troubleshooting/docs/eks-cluster-auto-scaler.md'
37 | - EKS Karpenter: 'troubleshooting/docs/karpenter.md'
38 | - Node Placement:
39 | - EKS Node placement: 'node-placement/docs/eks-node-placement.md'
40 | - EKS Fargate Node placement: 'node-placement/docs/fargate-node-placement.md'
41 | - Performance:
42 | - Dynamic Resource Allocation: 'performance/docs/dra.md'
43 | - BinPacking: 'performance/docs/binpack.md'
44 | - Karpenter: 'performance/docs/karpenter.md'
45 | - EKS Best Practices: 'best-practices-and-recommendations/eks-best-practices.md'
46 | - Cost Tracking and Optimization:
47 | - Cost Optimization using EC2 Spot Instances: 'cost-optimization/docs/cost-optimization.md'
48 | - Node Decommission: 'cost-optimization/docs/node-decommission.md'
49 | - Cost Tracking: 'cost-optimization/docs/cost-tracking.md'
50 | - Scalability:
51 | - Glossary and Terms: 'scalability/docs/scalaiblity-glossary.md'
52 | - Known Factors for Spark Operator: 'scalability/docs/known-factors-spark-operator.md'
53 | - Known Factors for StartJobRun API: 'scalability/docs/known-factors-start-job-run.md'
54 | - Recommendation for StartJobRun: 'scalability/docs/load-test-for-start-job-run-api.md'
55 | - Recommendation for Spark Operator: 'scalability/docs/load-test-for-spark-operator.md'
56 | - Grafana Dashboard: 'scalability/docs/graphana-dashboard.md'
57 | markdown_extensions:
58 | - toc:
59 | permalink: true
60 | - admonition
61 | - codehilite
62 | - tables
63 |
--------------------------------------------------------------------------------
/tools/emr-vertical-autoscaling/grafana-dashboard-model.json:
--------------------------------------------------------------------------------
1 | {
2 | "annotations": {
3 | "list": [
4 | {
5 | "builtIn": 1,
6 | "datasource": {
7 | "type": "datasource",
8 | "uid": "grafana"
9 | },
10 | "enable": true,
11 | "hide": true,
12 | "iconColor": "rgba(0, 211, 255, 1)",
13 | "name": "Annotations & Alerts",
14 | "target": {
15 | "limit": 100,
16 | "matchAny": false,
17 | "tags": [],
18 | "type": "dashboard"
19 | },
20 | "type": "dashboard"
21 | }
22 | ]
23 | },
24 | "description": "EMR Vertical Autoscaling",
25 | "editable": true,
26 | "fiscalYearStartMonth": 0,
27 | "gnetId": 14588,
28 | "graphTooltip": 0,
29 | "id": 1,
30 | "links": [],
31 | "liveNow": false,
32 | "panels": [
33 | {
34 | "aliasColors": {},
35 | "bars": false,
36 | "dashLength": 10,
37 | "dashes": false,
38 | "datasource": {
39 | "type": "prometheus",
40 | "uid": "PBFA97CFB590B2093"
41 | },
42 | "description": "Shows the amount of memory being provisioned to EMR spark applications as compared to the actual utilization and the recommendations computed by EMR vertical autoscaling",
43 | "fill": 0,
44 | "fillGradient": 0,
45 | "gridPos": {
46 | "h": 12,
47 | "w": 24,
48 | "x": 0,
49 | "y": 0
50 | },
51 | "hiddenSeries": false,
52 | "id": 3,
53 | "legend": {
54 | "avg": false,
55 | "current": false,
56 | "max": false,
57 | "min": false,
58 | "show": true,
59 | "total": false,
60 | "values": false
61 | },
62 | "lines": true,
63 | "linewidth": 1,
64 | "links": [],
65 | "nullPointMode": "null",
66 | "options": {
67 | "alertThreshold": true
68 | },
69 | "percentage": false,
70 | "pluginVersion": "9.4.3",
71 | "pointradius": 2,
72 | "points": false,
73 | "renderer": "flot",
74 | "seriesOverrides": [],
75 | "spaceLength": 10,
76 | "stack": false,
77 | "steppedLine": false,
78 | "targets": [
79 | {
80 | "datasource": {
81 | "type": "prometheus",
82 | "uid": "PBFA97CFB590B2093"
83 | },
84 | "editorMode": "code",
85 | "exemplar": true,
86 | "expr": "max(kube_customresource_vpa_spark_rec_memory_target{signature=\"$signature\", namespace=\"$namespace\"})",
87 | "format": "time_series",
88 | "interval": "",
89 | "intervalFactor": 1,
90 | "legendFormat": "Recommended Memory",
91 | "range": true,
92 | "refId": "D"
93 | },
94 | {
95 | "datasource": {
96 | "type": "prometheus",
97 | "uid": "PBFA97CFB590B2093"
98 | },
99 | "editorMode": "code",
100 | "exemplar": true,
101 | "expr": "max(container_memory_working_set_bytes{container=\"spark-kubernetes-executor\", namespace=\"$namespace\"} * on (namespace, pod) group_left() kube_pod_labels{label_emr_containers_amazonaws_com_dynamic_sizing_signature=\"$signature\", namespace=\"$namespace\"})",
102 | "format": "time_series",
103 | "interval": "",
104 | "intervalFactor": 1,
105 | "legendFormat": "Actual Utilization",
106 | "range": true,
107 | "refId": "C"
108 | },
109 | {
110 | "datasource": {
111 | "type": "prometheus",
112 | "uid": "PBFA97CFB590B2093"
113 | },
114 | "editorMode": "code",
115 | "expr": "max(kube_pod_container_resource_requests{container=\"spark-kubernetes-executor\", resource=\"memory\", namespace=\"$namespace\"} * on (namespace, pod) group_left() kube_pod_labels{label_emr_containers_amazonaws_com_dynamic_sizing_signature=\"$signature\", namespace=\"$namespace\"})",
116 | "hide": false,
117 | "legendFormat": "Provisioned Memory",
118 | "range": true,
119 | "refId": "A"
120 | }
121 | ],
122 | "thresholds": [],
123 | "timeRegions": [],
124 | "title": "EMR Vertical Autoscaling - Provisioned Memory, Utilization and Recommendation",
125 | "tooltip": {
126 | "shared": true,
127 | "sort": 0,
128 | "value_type": "individual"
129 | },
130 | "type": "graph",
131 | "xaxis": {
132 | "mode": "time",
133 | "show": true,
134 | "values": []
135 | },
136 | "yaxes": [
137 | {
138 | "$$hashKey": "object:420",
139 | "format": "bytes",
140 | "label": "Bytes",
141 | "logBase": 1,
142 | "show": true
143 | },
144 | {
145 | "$$hashKey": "object:421",
146 | "format": "short",
147 | "logBase": 1,
148 | "show": true
149 | }
150 | ],
151 | "yaxis": {
152 | "align": false
153 | }
154 | }
155 | ],
156 | "refresh": false,
157 | "revision": 1,
158 | "schemaVersion": 38,
159 | "style": "dark",
160 | "tags": [
161 | "Autoscaling",
162 | "VPA",
163 | "EMR"
164 | ],
165 | "templating": {
166 | "list": [
167 | {
168 | "current": {
169 | "selected": false,
170 | "text": "q1-v2.4",
171 | "value": "q1-v2.4"
172 | },
173 | "datasource": {
174 | "type": "prometheus",
175 | "uid": "PBFA97CFB590B2093"
176 | },
177 | "definition": "label_values(kube_customresource_vpa_spark_rec_memory_target, signature)",
178 | "hide": 0,
179 | "includeAll": false,
180 | "label": "Signature",
181 | "multi": false,
182 | "name": "signature",
183 | "options": [],
184 | "query": {
185 | "query": "label_values(kube_customresource_vpa_spark_rec_memory_target, signature)",
186 | "refId": "StandardVariableQuery"
187 | },
188 | "refresh": 2,
189 | "regex": "",
190 | "skipUrlSync": false,
191 | "sort": 1,
192 | "tagValuesQuery": "",
193 | "tagsQuery": "",
194 | "type": "query",
195 | "useTags": false
196 | },
197 | {
198 | "current": {
199 | "selected": false,
200 | "text": "emr",
201 | "value": "emr"
202 | },
203 | "datasource": {
204 | "type": "prometheus",
205 | "uid": "PBFA97CFB590B2093"
206 | },
207 | "definition": "label_values(kube_customresource_vpa_spark_rec_memory_target, namespace)",
208 | "description": "The K8s Namespace where the EMR job was submitted",
209 | "hide": 0,
210 | "includeAll": false,
211 | "label": "Namespace",
212 | "multi": false,
213 | "name": "namespace",
214 | "options": [],
215 | "query": {
216 | "query": "label_values(kube_customresource_vpa_spark_rec_memory_target, namespace)",
217 | "refId": "StandardVariableQuery"
218 | },
219 | "refresh": 2,
220 | "regex": "",
221 | "skipUrlSync": false,
222 | "sort": 1,
223 | "type": "query"
224 | }
225 | ]
226 | },
227 | "time": {
228 | "from": "2023-05-01T10:26:21.317Z",
229 | "to": "2023-05-01T10:38:18.180Z"
230 | },
231 | "timepicker": {
232 | "refresh_intervals": [
233 | "5s",
234 | "10s",
235 | "30s",
236 | "1m",
237 | "5m",
238 | "15m",
239 | "30m",
240 | "1h"
241 | ],
242 | "time_options": [
243 | "5m",
244 | "15m",
245 | "1h",
246 | "3h",
247 | "6h",
248 | "12h",
249 | "24h",
250 | "2d",
251 | "7d",
252 | "14d"
253 | ]
254 | },
255 | "timezone": "browser",
256 | "title": "EMR Vertical Autoscaling",
257 | "uid": "qQkYVZBVz",
258 | "version": 7,
259 | "weekStart": ""
260 | }
--------------------------------------------------------------------------------
/tools/emr-vertical-autoscaling/prometheus-helm-values.yaml:
--------------------------------------------------------------------------------
1 | kube-state-metrics:
2 | image:
3 | registry: public.ecr.aws
4 | repository: bitnami/kube-state-metrics
5 | tag: "2.8.1"
6 | sha: ""
7 | pullPolicy: Always
8 | extraArgs:
9 | - --custom-resource-state-config
10 | - |
11 | spec:
12 | resources:
13 | - groupVersionKind:
14 | group: autoscaling.k8s.io
15 | kind: "VerticalPodAutoscaler"
16 | version: "v1"
17 | labelsFromPath:
18 | verticalpodautoscaler: [metadata, name]
19 | namespace: [metadata, namespace]
20 | signature: [metadata, labels, "emr-containers.amazonaws.com/dynamic.sizing.signature"]
21 | metrics:
22 | - name: vpa_spark_rec_memory_lower
23 | help: "VPA recommended memory - lower bound"
24 | each:
25 | type: Gauge
26 | gauge:
27 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", lowerBound]
28 | valueFrom: [memory]
29 | - name: vpa_spark_rec_memory_upper
30 | help: "VPA recommended memory - upper bound"
31 | each:
32 | type: Gauge
33 | gauge:
34 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", upperBound]
35 | valueFrom: [memory]
36 | - name: vpa_spark_rec_memory_target
37 | help: "VPA recommended memory - target"
38 | each:
39 | type: Gauge
40 | gauge:
41 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", target]
42 | valueFrom: [memory]
43 | rbac:
44 | create: true
45 | useClusterRole: true
46 | extraRules:
47 | - apiGroups: ["autoscaling.k8s.io"]
48 | resources: ["verticalpodautoscalers"]
49 | verbs: ["list", "watch"]
50 | - apiGroups: ["apiextensions.k8s.io"]
51 | resources: ["customresourcedefinitions"]
52 | verbs: ["list", "watch"]
53 | metricLabelsAllowlist:
54 | - "pods=[emr-containers.amazonaws.com/dynamic.sizing.signature]"
55 |
--------------------------------------------------------------------------------
/tools/k8s-rbac-policies/emr-containers.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: Role
3 | metadata:
4 | name: emr-containers
5 | rules:
6 | - apiGroups:
7 | - ""
8 | resources:
9 | - namespaces
10 | verbs:
11 | - get
12 | - apiGroups:
13 | - ""
14 | resources:
15 | - serviceaccounts
16 | - services
17 | - configmaps
18 | - events
19 | - pods
20 | - pods/log
21 | verbs:
22 | - get
23 | - list
24 | - watch
25 | - describe
26 | - create
27 | - edit
28 | - delete
29 | - deletecollection
30 | - annotate
31 | - patch
32 | - label
33 | - apiGroups:
34 | - ""
35 | resources:
36 | - secrets
37 | verbs:
38 | - create
39 | - patch
40 | - delete
41 | - watch
42 | - apiGroups:
43 | - apps
44 | resources:
45 | - statefulsets
46 | - deployments
47 | verbs:
48 | - get
49 | - list
50 | - watch
51 | - describe
52 | - create
53 | - edit
54 | - delete
55 | - annotate
56 | - patch
57 | - label
58 | - apiGroups:
59 | - batch
60 | resources:
61 | - jobs
62 | verbs:
63 | - get
64 | - list
65 | - watch
66 | - describe
67 | - create
68 | - edit
69 | - delete
70 | - annotate
71 | - patch
72 | - label
73 | - apiGroups:
74 | - extensions
75 | - networking.k8s.io
76 | resources:
77 | - ingresses
78 | verbs:
79 | - get
80 | - list
81 | - watch
82 | - describe
83 | - create
84 | - edit
85 | - delete
86 | - annotate
87 | - patch
88 | - label
89 | - apiGroups:
90 | - rbac.authorization.k8s.io
91 | resources:
92 | - roles
93 | - rolebindings
94 | verbs:
95 | - get
96 | - list
97 | - watch
98 | - describe
99 | - create
100 | - edit
101 | - delete
102 | - deletecollection
103 | - annotate
104 | - patch
105 | - label
106 | - apiGroups:
107 | - ""
108 | resources:
109 | - persistentvolumeclaims
110 | verbs:
111 | - create
112 | - list
113 | - delete
114 | - patch
115 | - deletecollection
116 |
--------------------------------------------------------------------------------
/tools/k8s-rbac-policies/rbac_patch.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess as sp
3 | import tempfile as temp
4 | import json
5 | import argparse
6 | import uuid
7 |
8 |
9 | def delete_if_exists(dictionary: dict, key: str):
10 | if dictionary.get(key, None) is not None:
11 | del dictionary[key]
12 |
13 |
14 | def doTerminalCmd(cmd):
15 | with temp.TemporaryFile() as f:
16 | process = sp.Popen(cmd, stdout=f, stderr=f)
17 | process.wait()
18 | f.seek(0)
19 | msg = f.read().decode()
20 | return msg
21 |
22 |
23 | def patchRole(roleName, namespace, extraRules, skipConfirmation=False):
24 | cmd = f"kubectl get role {roleName} -n {namespace} --output json".split(" ")
25 | msg = doTerminalCmd(cmd)
26 | if "(NotFound)" in msg and "Error" in msg:
27 | print(msg)
28 | return False
29 | role = json.loads(msg)
30 | rules = role["rules"]
31 | rulesToAssign = extraRules[::]
32 | passedRules = []
33 | for rule in rules:
34 | apiGroups = set(rule["apiGroups"])
35 | resources = set(rule["resources"])
36 | verbs = set(rule["verbs"])
37 | for extraRule in extraRules:
38 | passes = 0
39 | apiGroupsExtra = set(extraRule["apiGroups"])
40 | resourcesExtra = set(extraRule["resources"])
41 | verbsExtra = set(extraRule["verbs"])
42 | passes += len(apiGroupsExtra.intersection(apiGroups)) >= len(apiGroupsExtra)
43 | passes += len(resourcesExtra.intersection(resources)) >= len(resourcesExtra)
44 | passes += len(verbsExtra.intersection(verbs)) >= len(verbsExtra)
45 | if passes >= 3:
46 | if extraRule not in passedRules:
47 | passedRules.append(extraRule)
48 | if extraRule in rulesToAssign:
49 | rulesToAssign.remove(extraRule)
50 | break
51 | prompt_text = "Apply Changes?"
52 | if len(rulesToAssign) == 0:
53 | print(f"The role {roleName} seems to already have the necessary permissions!")
54 | prompt_text = "Proceed anyways?"
55 | for ruleToAssign in rulesToAssign:
56 | role["rules"].append(ruleToAssign)
57 | delete_if_exists(role, "creationTimestamp")
58 | delete_if_exists(role, "resourceVersion")
59 | delete_if_exists(role, "uid")
60 | new_role = json.dumps(role, indent=3)
61 | uid = uuid.uuid4()
62 | filename = f"Role-{roleName}-New_Permissions-{uid}-TemporaryFile.json"
63 | try:
64 | with open(filename, "w+") as f:
65 | f.write(new_role)
66 | f.flush()
67 | prompt = "y"
68 | if not skipConfirmation:
69 | prompt = input(
70 | doTerminalCmd(f"kubectl diff -f {filename}".split(" ")) + f"\n{prompt_text} y/n: "
71 | ).lower().strip()
72 | while prompt != "y" and prompt != "n":
73 | prompt = input("Please make a valid selection. y/n: ").lower().strip()
74 | if prompt == "y":
75 | print(doTerminalCmd(f"kubectl apply -f {filename}".split(" ")))
76 | except Exception as e:
77 | print(e)
78 | os.remove(f"./{filename}")
79 |
80 |
81 | if __name__ == '__main__':
82 | parser = argparse.ArgumentParser()
83 | parser.add_argument("-n", "--namespace",
84 | help="Namespace of the Role. By default its the VirtualCluster's namespace",
85 | required=True,
86 | dest="namespace"
87 | )
88 |
89 | parser.add_argument("-p", "--no-prompt",
90 | help="Applies the patches without asking first",
91 | dest="no_prompt",
92 | default=False,
93 | action="store_true"
94 | )
95 | args = parser.parse_args()
96 |
97 | emrRoleRules = [
98 | {
99 | "apiGroups": [""],
100 | "resources": ["persistentvolumeclaims"],
101 | "verbs": ["list", "create", "delete"]
102 | }
103 |
104 | ]
105 |
106 | driverRoleRules = [
107 | {
108 | "apiGroups": [""],
109 | "resources": ["persistentvolumeclaims"],
110 | "verbs": ["list", "create", "delete"]
111 | },
112 | {
113 | "apiGroups": [""],
114 | "resources": ["services"],
115 | "verbs": ["get", "list", "describe", "create", "delete", "watch"]
116 | }
117 | ]
118 |
119 | clientRoleRules = [
120 | {
121 | "apiGroups": [""],
122 | "resources": ["persistentvolumeclaims"],
123 | "verbs": ["list", "create", "delete"]
124 | }
125 | ]
126 |
127 | patchRole("emr-containers", args.namespace, emrRoleRules, args.no_prompt)
128 | patchRole("emr-containers-role-spark-driver", args.namespace, driverRoleRules, args.no_prompt)
129 | patchRole("emr-containers-role-spark-client", args.namespace, clientRoleRules, args.no_prompt)
--------------------------------------------------------------------------------
/tools/start-job-run-converter/README.md:
--------------------------------------------------------------------------------
1 | # start-job-run converter
2 | This tool can be used to migrate spark-submit commands in a script to **aws emr-containers start-job-run**
3 | and save the result to a new file with _converted suffix.
4 |
5 | Supported arguments:
6 | ```
7 | -h, --help show this help message and exit
8 | --file FILE the input spark-submit script file
9 | --name NAME The name of the job run
10 | --virtual-cluster-id VIRTUAL_CLUSTER_ID
11 | The virtual cluster ID for which the job run request is submitted
12 | --client-token CLIENT_TOKEN
13 | The client idempotency token of the job run request
14 | --execution-role-arn EXECUTION_ROLE_ARN
15 | The execution role ARN for the job run
16 | --release-label RELEASE_LABEL
17 | The Amazon EMR release version to use for the job run
18 | --configuration-overrides CONFIGURATION_OVERRIDES
19 | The configuration overrides for the job run
20 | --tags TAGS The tags assigned to job runs
21 | ```
22 |
23 | ##Run the tool
24 |
25 | ```
26 | startJobRunConverter.py \
27 | --file ./submit_script.sh \
28 | --virtual-cluster-id \
29 | --name emreks-test-job \
30 | --execution-role-arn \
31 | --release-label emr-6.4.0-latest \
32 | --tags KeyName1=string \
33 | --configuration-overrides '{
34 | "monitoringConfiguration": {
35 | "cloudWatchMonitoringConfiguration": {
36 | "logGroupName": "emrekstest",
37 | "logStreamNamePrefix": "emreks_log_stream"
38 | },
39 | "s3MonitoringConfiguration": {
40 | "logUri": "s3://"
41 | }
42 | }
43 | }'
44 | ```
45 |
46 | ###Example 1
47 | Below spark-submit command in submit_script.sh
48 | ```
49 | spark-submit --deploy-mode cluster \
50 | --conf spark.executor.instances=2 \
51 | --conf spark.executor.memory=2G \
52 | --conf spark.executor.cores=2 \
53 | --conf spark.driver.cores=1 \
54 | --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \
55 | --verbose \
56 | s3:///health_violations.py \
57 | --data_source s3:///food_establishment_data.csv \
58 | --output_uri s3:///myOutputFolder
59 | ```
60 |
61 | is converted to below in submit_script.sh_converted
62 |
63 | ```
64 | #spark-submit --deploy-mode cluster \
65 | #--conf spark.executor.instances=2 \
66 | #--conf spark.executor.memory=2G \
67 | #--conf spark.executor.cores=2 \
68 | #--conf spark.driver.cores=1 \
69 | #--conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \
70 | #--verbose \
71 | #s3:///health_violations.py \
72 | #--data_source s3:///food_establishment_data.csv \
73 | #--output_uri s3:///myOutputFolder
74 |
75 | # ----- Auto converted by startJobRunConverter.py -----
76 | aws emr-containers start-job-run \
77 | --name emreks-test-job \
78 | --virtual-cluster-id \
79 | --execution-role-arn \
80 | --release-label emr-6.4.0-latest \
81 | --configuration-overrides '{
82 | "monitoringConfiguration": {
83 | "cloudWatchMonitoringConfiguration": {
84 | "logGroupName": "emrekstest",
85 | "logStreamNamePrefix": "emreks_log_stream"
86 | },
87 | "s3MonitoringConfiguration": {
88 | "logUri": "s3://"
89 | }
90 | }
91 | }' \
92 | --tags KeyName1=string \
93 | --job-driver '{
94 | "sparkSubmitJobDriver": {
95 | "entryPoint": "s3:///health_violations.py",
96 | "entryPointArguments": [
97 | "--data_source",
98 | "s3:///food_establishment_data.csv",
99 | "--output_uri",
100 | "s3:///myOutputFolder"
101 | ],
102 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1 --conf \"spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps\" --verbose"
103 | }
104 | }'
105 | ```
106 | >As you can see in the example, the original spark-submit command is kept as comments
107 |
108 | ###Example 2
109 | ```
110 | EXECMEM=2G
111 | EXEC_INST=2
112 | REGION=us-east-2
113 | spark-submit --deploy-mode cluster \
114 | --conf spark.executor.instances=$EXEC_INST --conf spark.executor.memory=$EXECMEM --conf spark.executor.cores=2 --conf spark.driver.cores=1 \
115 | s3:///wordcount.py s3:///wordcount_output $REGION
116 | ```
117 |
118 | is converted to below in submit_script.sh_converted
119 |
120 | ```
121 |
122 | EXECMEM=2G
123 | EXEC_INST=2
124 | REGION=us-east-2
125 | #spark-submit --deploy-mode cluster \
126 | #--conf spark.executor.instances=$EXEC_INST --conf spark.executor.memory=$EXECMEM --conf spark.executor.cores=2 --conf spark.driver.cores=1 \
127 | #s3:///wordcount.py s3:///wordcount_output $REGION
128 |
129 | # ----- Auto converted by startJobRunConverter.py -----
130 | aws emr-containers start-job-run \
131 | --name emreks-test-job \
132 | --virtual-cluster-id \
133 | --execution-role-arn \
134 | --release-label emr-6.4.0-latest \
135 | --configuration-overrides '{
136 | "monitoringConfiguration": {
137 | "cloudWatchMonitoringConfiguration": {
138 | "logGroupName": "emrekstest",
139 | "logStreamNamePrefix": "emreks_log_stream"
140 | },
141 | "s3MonitoringConfiguration": {
142 | "logUri": "s3://"
143 | }
144 | }
145 | }' \
146 | --tags KeyName1=string \
147 | --job-driver '{
148 | "sparkSubmitJobDriver": {
149 | "entryPoint": "s3:///wordcount.py",
150 | "entryPointArguments": [
151 | "s3:///wordcount_output",
152 | "'"$REGION"'"
153 | ],
154 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances='"$EXEC_INST"' --conf spark.executor.memory='"$EXECMEM"' --conf spark.executor.cores=2 --conf spark.driver.cores=1"
155 | }
156 | }'
157 | ```
158 | >In bash shell, single quote won't expand variables. The tool can correctly handle the variables using double quote.
159 |
160 | ##Wait for completion
161 | One difference between spark-submit and start-job-run is that spark-submit is waiting for the spark job to complete
162 | but start-job-run is async. A wait_for_completion() bash shell function can be manually appended to the converted
163 | command if needed.
164 |
165 | ```
166 | function wait_for_completion() {
167 | cat < /dev/stdin|jq -r '[.id, .virtualClusterId]|join(" ")'| { read id virtualClusterId; echo id=$id; echo virtualClusterId=$virtualClusterId; while [ true ]
168 | do
169 | sleep 10
170 | state=$(aws emr-containers describe-job-run --id $id --virtual-cluster-id $virtualClusterId|jq -r '.jobRun.state')
171 | echo "$(date) job run state: $state"
172 | if [ "$state" = "COMPLETED" ]; then
173 | echo "job run id: $id completed"
174 | break
175 | elif [ "$state" = "FAILED" ]; then
176 | echo "job run id: $id failed. Exiting..."
177 | exit 1
178 | fi
179 | done; }
180 | }
181 | ```
182 | >jq tool is required for json parsing.
183 |
184 | To use it, append wait_for_completion to the end of the command.
185 | ```
186 | # ----- Auto converted by startJobRunConverter.py -----
187 | aws emr-containers start-job-run \
188 | --name emreks-test-job \
189 | --virtual-cluster-id \
190 | --execution-role-arn \
191 | --release-label emr-6.4.0-latest \
192 | --configuration-overrides '{
193 | "monitoringConfiguration": {
194 | "cloudWatchMonitoringConfiguration": {
195 | "logGroupName": "emrekstest",
196 | "logStreamNamePrefix": "emreks_log_stream"
197 | },
198 | "s3MonitoringConfiguration": {
199 | "logUri": "s3://"
200 | }
201 | }
202 | }' \
203 | --tags KeyName1=string,k2=v2 \
204 | --job-driver '{
205 | "sparkSubmitJobDriver": {
206 | "entryPoint": "s3:///wordcount.py",
207 | "entryPointArguments": [
208 | "s3:///wordcount_output",
209 | "'"$REGION"'"
210 | ],
211 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances='"$EXEC_INST"' --conf spark.executor.memory='"$EXECMEM"' --conf spark.executor.cores=2 --conf spark.driver.cores=1"
212 | }
213 | }'|wait_for_completion
214 | ```
215 |
--------------------------------------------------------------------------------
/tools/start-job-run-converter/startJobRunConverter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import json
4 | import os
5 | import re
6 | import shlex
7 | import sys
8 |
9 | OUTPUT_FILE_SUFFIX = "_converted"
10 | AUTO_CONVERT_MSG = "\n# ----- Auto converted by {} -----\n"
11 |
12 | SPARK_SUBMIT = "spark-submit"
13 | SPARK_UNARY_ARGUMENTS = {"-v", "--verbose"}
14 | CONVERTER_ARGUMENTS = {"file"}
15 |
16 |
17 | def add_quote(data, quote="\"", guard=" "):
18 | if isinstance(data, list):
19 | data = [quote + d + quote if guard in d and not d.startswith(quote) else d for d in data]
20 | elif isinstance(data, str):
21 | data = quote + data + quote if guard in data and not data.startswith(quote) else data
22 | return data
23 |
24 |
25 | # In argparse, any internal - characters will be converted to _ characters to make sure the string
26 | # is a valid attribute name e.g. execution_role_arn.
27 | # This function change it back, e.g. execution-role-arn
28 | def normalize_arg_key(arg):
29 | return arg.replace("_", "-")
30 |
31 |
32 | # In bash shell, single quote won't expand a variable. Need to close the single quotes,
33 | # insert variable, and then re-enter again.
34 | def convert_matched_var(match_obj):
35 | if match_obj.group() is not None:
36 | return "'\"" + match_obj.group() + "\"'"
37 | return ""
38 |
39 | # This function assumes a valid spark-submit command, otherwise it throws exception
40 | def generate_start_job_cmd(spark_cmd_line, start_job_args):
41 | start_job_cmd = "aws emr-containers start-job-run \\\n"
42 | start_idx, curr_idx = 0, 0
43 | while curr_idx < len(spark_cmd_line):
44 | curr_arg = spark_cmd_line[curr_idx].strip()
45 | if curr_arg:
46 | if SPARK_SUBMIT in curr_arg:
47 | start_idx = curr_idx + 1
48 | elif curr_arg.startswith("-"):
49 | if curr_arg not in SPARK_UNARY_ARGUMENTS:
50 | curr_idx += 1 # the argument is a pair e.g. --num-executors 50
51 | else:
52 | break
53 | curr_idx += 1
54 | spark_submit_parameters = add_quote(spark_cmd_line[start_idx: curr_idx])
55 | entrypoint_location = spark_cmd_line[curr_idx]
56 | entrypoint_arguments = add_quote(spark_cmd_line[curr_idx + 1:])
57 | job_driver = {"sparkSubmitJobDriver": {
58 | "entryPoint": entrypoint_location,
59 | "entryPointArguments": entrypoint_arguments,
60 | "sparkSubmitParameters": " ".join(spark_submit_parameters)
61 | }}
62 |
63 | res_str = add_quote(json.dumps(job_driver, indent=4), quote="'", guard="\n")
64 | res_str = re.sub(r"\${?[0-9a-zA-Z_]+}?", convert_matched_var, res_str)
65 | start_job_args["job_driver"] = res_str + "\n"
66 |
67 | for k, v in start_job_args.items():
68 | if k not in CONVERTER_ARGUMENTS and v:
69 | start_job_cmd += "--" + normalize_arg_key(k) + " " + add_quote(v, quote="'", guard="\n") + " \\\n"
70 | return start_job_cmd[:len(start_job_cmd) - 2] + "\n"
71 |
72 |
73 | def convert_file(input, output, extra_args, banner="\n"):
74 | with open(input, "r") as input_fp:
75 | with open(output, "w") as output_fp:
76 | in_cmd = False
77 | cmd_line = ""
78 | for line in input_fp:
79 | new_line = line.strip()
80 | if new_line and ((new_line[0] != "#" and SPARK_SUBMIT in new_line) or in_cmd):
81 | output_fp.write("#" + line) # Keep the original lines in comment
82 | in_cmd = True
83 | cmd_line += new_line
84 | if new_line[-1] != "\\":
85 | converted_cmd = generate_start_job_cmd(shlex.split(cmd_line), extra_args)
86 | output_fp.write(banner)
87 | output_fp.writelines(str(converted_cmd) + "\n")
88 | in_cmd = False
89 | cmd_line = ""
90 | else:
91 | output_fp.write(line)
92 |
93 |
94 | if __name__ == '__main__':
95 | # Create the parser
96 | cmd_parser = argparse.ArgumentParser(description='A tool for converting spark-sumbit command line to EMR on EKS '
97 | 'start-job-run.')
98 |
99 | # Add the arguments
100 | cmd_parser.add_argument('--file', help='the input spark-submit script file', required=True)
101 | cmd_parser.add_argument('--name', help='The name of the job run')
102 | cmd_parser.add_argument('--virtual-cluster-id', help='The virtual cluster ID for which the job run request is submitted', required=True)
103 | cmd_parser.add_argument('--client-token', help='The client idempotency token of the job run request')
104 | cmd_parser.add_argument('--execution-role-arn', help='The execution role ARN for the job run', required=True)
105 | cmd_parser.add_argument('--release-label', help='The Amazon EMR release version to use for the job run', required=True)
106 | cmd_parser.add_argument('--configuration-overrides', help='The configuration overrides for the job run')
107 | cmd_parser.add_argument('--tags', help='The tags assigned to job runs')
108 |
109 | args = cmd_parser.parse_args()
110 |
111 | input_file = args.file
112 | output_file = os.path.basename(input_file) + OUTPUT_FILE_SUFFIX
113 |
114 | if os.path.exists(output_file):
115 | print("Error: {} already exists.".format(output_file))
116 | sys.exit(1)
117 |
118 | convert_file(input_file, output_file, vars(args),
119 | AUTO_CONVERT_MSG.format(os.path.basename(sys.argv[0])))
120 |
--------------------------------------------------------------------------------