├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── LICENSE-SAMPLECODE ├── LICENSE-SUMMARY ├── README.md ├── chart └── kube-scheduler │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ ├── clusterrole.yaml │ ├── clusterrolebinding.yaml │ ├── configmap.yaml │ ├── deployment.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── content ├── best-practices-and-recommendations │ └── eks-best-practices.md ├── cost-optimization │ └── docs │ │ ├── cost-optimization.md │ │ ├── cost-tracking.md │ │ ├── index.md │ │ ├── node-decommission.md │ │ └── resources │ │ ├── images │ │ ├── ca.png │ │ ├── karpenter.png │ │ ├── node_decom.gif │ │ ├── pvc_reuse.gif │ │ └── reuse.gif │ │ ├── scripts │ │ └── create-bucket-data-export.sh │ │ └── sql-statements │ │ ├── compute-cost_view.sql │ │ ├── data-export-table.sql │ │ ├── overall-cost_view.sql │ │ ├── query-over-all_view.sql │ │ ├── vc-cost_view.sql │ │ └── vc-lookup.sql ├── index.md ├── metastore-integrations │ └── docs │ │ ├── aws-glue.md │ │ ├── hive-metastore.md │ │ └── index.md ├── node-placement │ └── docs │ │ ├── eks-node-placement.md │ │ ├── fargate-node-placement.md │ │ └── index.md ├── outposts │ ├── emr-containers-on-outposts.md │ ├── index.md │ └── resources │ │ └── outposts_eks_network.png ├── performance │ └── docs │ │ ├── binpack.md │ │ ├── dra.md │ │ ├── index.md │ │ ├── karpenter.md │ │ └── resources │ │ └── images │ │ ├── after-binpack.png │ │ ├── before-binpack.png │ │ ├── binpack.gif │ │ └── nonbinpack.gif ├── scalability │ └── docs │ │ ├── graphana-dashboard.md │ │ ├── index.md │ │ ├── known-factors-spark-operator.md │ │ ├── known-factors-start-job-run.md │ │ ├── load-test-for-spark-operator.md │ │ ├── load-test-for-start-job-run-api.md │ │ ├── resources │ │ └── images │ │ │ ├── EMR_Spark_Operator_Benchmark.png │ │ │ ├── aws-cni-metrics.png │ │ │ ├── eks-control-plane.png │ │ │ ├── emr-on-eks-job-dashboard.png │ │ │ └── spark-operator-dashboard.png │ │ └── scalaiblity-glossary.md ├── security │ └── docs │ │ ├── index.md │ │ ├── resources │ │ ├── Dockerfile │ │ ├── S3ListObjects_v1.jar │ │ ├── client-role-2-policy.json │ │ ├── client-role-2-trust-policy.json │ │ ├── custom-entrypoint.sh │ │ ├── driver-pod-template.yaml │ │ ├── executor-pod-template.yaml │ │ ├── images │ │ │ ├── emr-on-eks-fargate.png │ │ │ ├── emr-on-eks-network-communication.png │ │ │ ├── emr-on-eks-self-and-managed.png │ │ │ ├── role-chain.png │ │ │ └── shared-responsibility-model.png │ │ ├── job-exec-role-1-policy.json │ │ ├── job-exec-role-1-trust-policy.json │ │ ├── mix-spark-boto3.py │ │ └── only-boto3.py │ │ └── spark │ │ ├── chain-role.md │ │ ├── data-encryption.md │ │ ├── encryption.md │ │ ├── network-security.md │ │ └── secrets.md ├── storage │ ├── docs │ │ ├── index.md │ │ └── spark │ │ │ ├── ebs.md │ │ │ ├── fsx-lustre.md │ │ │ └── instance-store.md │ └── resources │ │ └── FSx_Lustre_SG.png ├── submit-applications │ ├── docs │ │ └── spark │ │ │ ├── index.md │ │ │ ├── java-and-scala.md │ │ │ ├── multi-arch-image.md │ │ │ ├── pyspark.md │ │ │ ├── sparkr.md │ │ │ └── sparksql.md │ └── resources │ │ ├── images │ │ └── pyspark-packaged-example-zip-folder-structure.png │ │ └── pyspark-packaged-dependency-src.zip └── troubleshooting │ ├── docs │ ├── change-log-level.md │ ├── connect-spark-ui.md │ ├── eks-cluster-auto-scaler.md │ ├── index.md │ ├── karpenter.md │ ├── rbac-permissions-errors.md │ ├── reverse-proxy-sparkui.md │ ├── self-hosted-shs.md │ └── where-to-look-for-spark-logs.md │ └── resources │ └── screen-shot-spark-ui-driver.png ├── mkdocs.yml └── tools ├── emr-vertical-autoscaling ├── grafana-dashboard-model.json └── prometheus-helm-values.yaml ├── k8s-rbac-policies ├── emr-containers.yaml └── rbac_patch.py └── start-job-run-converter ├── README.md └── startJobRunConverter.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | #IDE 34 | .idea/ 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Scrapy stuff: 56 | .scrapy 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # IPython Notebook 62 | .ipynb_checkpoints 63 | 64 | # pyenv 65 | .python-version 66 | 67 | # virtualenv 68 | venv/ 69 | ENV/ 70 | 71 | # MkDocs documentation 72 | site/ 73 | .DS_Store -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE-SAMPLECODE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /LICENSE-SUMMARY: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | The documentation is made available under the Creative Commons Attribution-ShareAlike 4.0 International License. See the LICENSE file. 4 | 5 | The sample code within this documentation is made available under the MIT-0 license. See the LICENSE-SAMPLECODE file. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Amazon EMR on Amazon EKS Best Practices 2 | 3 | A best practices guide for submitting spark applications, integration with hive metastore, security, storage options, debugging options and performance considerations. 4 | 5 | Return to [Live Docs](https://aws.github.io/aws-emr-containers-best-practices/). 6 | 7 | ## License Summary 8 | 9 | The documentation is made available under the Creative Commons Attribution-ShareAlike 4.0 International License. See the LICENSE file. 10 | 11 | The sample code within this documentation is made available under the MIT-0 license. See the LICENSE-SAMPLECODE file. 12 | 13 | ## How to make a change 14 | 1. [Fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository) 15 | 2. Make your change and double-check the [mkdocs.yml](./mkdocs.yml) is updated accordingly. 16 | 3. Install the MkDocs command tool and material theme if needed: 17 | ```bash 18 | pip install mkdocs 19 | pip install mkdocs-material # material theme 20 | ``` 21 | 4. MkDocs comes with a built-in dev-server that lets you preview your documentation as you work on it. Make sure you're in the same directory as the `mkdocs.yml` configuration file, then run the command: 22 | ```bash 23 | mkdocs serve 24 | ``` 25 | 5. Open up http://127.0.0.1:8000/ in your browser, and you'll see the best practice website being displayed locally. 26 | 6. Adjust your document changes in real time. 27 | 7. When everything looks good and you're ready to deploy the change, run the command to build/compile the website content: 28 | ```bash 29 | mkdocs build 30 | ``` 31 | 8. This will refresh the directory `./site`. Take a look inside the directory and make sure your changes are included. 32 | ```bash 33 | ls site 34 | ``` 35 | 9. Commit change to github and send us a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). 36 | 37 | 10. With a repo admin permission, we can merge the pull request into the main branch. 38 | 39 | 11. Most importantly, as a repo admin, we must run the deploy command to copy the './site' content to 'gh-pages' branch and pushing to GitHub. Without this step, the website content won't be refreshed. 40 | ```bash 41 | mkdocs gh-deploy 42 | ``` -------------------------------------------------------------------------------- /chart/kube-scheduler/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: kube-scheduler 3 | description: A Helm chart for deploying a custom Kubernetes scheduler 4 | version: 0.1.0 5 | appVersion: "1.0.0" -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "kube-scheduler.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "kube-scheduler.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "kube-scheduler.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "kube-scheduler.labels" -}} 37 | helm.sh/chart: {{ include "kube-scheduler.chart" . }} 38 | {{ include "kube-scheduler.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "kube-scheduler.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "kube-scheduler.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "kube-scheduler.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "kube-scheduler.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: {{ include "kube-scheduler.fullname" . }} 5 | labels: 6 | {{- include "kube-scheduler.labels" . | nindent 4 }} 7 | rules: 8 | - apiGroups: 9 | - "" 10 | - events.k8s.io 11 | resources: 12 | - events 13 | verbs: 14 | - create 15 | - patch 16 | - update 17 | - apiGroups: 18 | - "" 19 | resources: 20 | - configmaps 21 | verbs: 22 | - get 23 | - list 24 | - watch 25 | - apiGroups: 26 | - coordination.k8s.io 27 | resources: 28 | - leases 29 | verbs: 30 | - create 31 | - get 32 | - list 33 | - update 34 | - apiGroups: 35 | - coordination.k8s.io 36 | resourceNames: 37 | - kube-scheduler 38 | resources: 39 | - leases 40 | verbs: 41 | - get 42 | - update 43 | - apiGroups: 44 | - "" 45 | resources: 46 | - endpoints 47 | verbs: 48 | - create 49 | - apiGroups: 50 | - "" 51 | resourceNames: 52 | - kube-scheduler 53 | resources: 54 | - endpoints 55 | verbs: 56 | - get 57 | - update 58 | - apiGroups: 59 | - "" 60 | resources: 61 | - nodes 62 | verbs: 63 | - get 64 | - list 65 | - watch 66 | - apiGroups: 67 | - "" 68 | resources: 69 | - pods 70 | verbs: 71 | - delete 72 | - get 73 | - list 74 | - watch 75 | - apiGroups: 76 | - "" 77 | resources: 78 | - bindings 79 | - pods/binding 80 | verbs: 81 | - create 82 | - apiGroups: 83 | - "" 84 | resources: 85 | - pods/status 86 | verbs: 87 | - patch 88 | - update 89 | - apiGroups: 90 | - "" 91 | resources: 92 | - replicationcontrollers 93 | - services 94 | verbs: 95 | - get 96 | - list 97 | - watch 98 | - apiGroups: 99 | - apps 100 | - extensions 101 | resources: 102 | - replicasets 103 | verbs: 104 | - get 105 | - list 106 | - watch 107 | - apiGroups: 108 | - apps 109 | resources: 110 | - statefulsets 111 | verbs: 112 | - get 113 | - list 114 | - watch 115 | - apiGroups: 116 | - policy 117 | resources: 118 | - poddisruptionbudgets 119 | verbs: 120 | - get 121 | - list 122 | - watch 123 | - apiGroups: 124 | - "" 125 | resources: 126 | - persistentvolumeclaims 127 | - persistentvolumes 128 | verbs: 129 | - get 130 | - list 131 | - watch 132 | - apiGroups: 133 | - authentication.k8s.io 134 | resources: 135 | - tokenreviews 136 | verbs: 137 | - create 138 | - apiGroups: 139 | - authorization.k8s.io 140 | resources: 141 | - subjectaccessreviews 142 | verbs: 143 | - create 144 | - apiGroups: 145 | - storage.k8s.io 146 | resources: 147 | - csinodes 148 | verbs: 149 | - get 150 | - list 151 | - watch 152 | - apiGroups: 153 | - "" 154 | resources: 155 | - namespaces 156 | verbs: 157 | - get 158 | - list 159 | - watch 160 | - apiGroups: 161 | - storage.k8s.io 162 | resources: 163 | - csidrivers 164 | verbs: 165 | - get 166 | - list 167 | - watch 168 | - apiGroups: 169 | - storage.k8s.io 170 | resources: 171 | - csistoragecapacities 172 | verbs: 173 | - get 174 | - list 175 | - watch -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: {{ include "kube-scheduler.fullname" . }}-as-kube-scheduler 5 | labels: 6 | {{- include "kube-scheduler.labels" . | nindent 4 }} 7 | subjects: 8 | - kind: ServiceAccount 9 | name: {{ include "kube-scheduler.serviceAccountName" . }} 10 | namespace: {{ .Release.Namespace }} 11 | roleRef: 12 | kind: ClusterRole 13 | name: {{ include "kube-scheduler.fullname" . }} 14 | apiGroup: rbac.authorization.k8s.io 15 | 16 | --- 17 | apiVersion: rbac.authorization.k8s.io/v1 18 | kind: ClusterRoleBinding 19 | metadata: 20 | name: {{ include "kube-scheduler.fullname" . }}-as-volume-scheduler 21 | labels: 22 | {{- include "kube-scheduler.labels" . | nindent 4 }} 23 | subjects: 24 | - kind: ServiceAccount 25 | name: {{ include "kube-scheduler.serviceAccountName" . }} 26 | namespace: {{ .Release.Namespace }} 27 | roleRef: 28 | kind: ClusterRole 29 | name: system:volume-scheduler 30 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ include "kube-scheduler.fullname" . }}-config 5 | labels: 6 | {{- include "kube-scheduler.labels" . | nindent 4 }} 7 | data: 8 | kube-scheduler-config.yaml: | 9 | apiVersion: kubescheduler.config.k8s.io/v1 10 | kind: KubeSchedulerConfiguration 11 | profiles: 12 | - pluginConfig: 13 | - args: 14 | apiVersion: kubescheduler.config.k8s.io/v1 15 | kind: NodeResourcesFitArgs 16 | scoringStrategy: 17 | resources: 18 | - name: cpu 19 | weight: 1 20 | - name: memory 21 | weight: 1 22 | type: MostAllocated 23 | name: NodeResourcesFit 24 | plugins: 25 | score: 26 | enabled: 27 | - name: NodeResourcesFit 28 | weight: 1 29 | disabled: 30 | - name: "*" 31 | multiPoint: 32 | enabled: 33 | - name: NodeResourcesFit 34 | weight: 1 35 | schedulerName: {{ include "kube-scheduler.fullname" . }} 36 | leaderElection: 37 | leaderElect: true 38 | resourceNamespace: {{ .Release.Namespace }} 39 | resourceName: {{ include "kube-scheduler.fullname" . }} -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "kube-scheduler.fullname" . }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "kube-scheduler.labels" . | nindent 4 }} 8 | spec: 9 | replicas: {{ .Values.replicaCount }} 10 | selector: 11 | matchLabels: 12 | {{- include "kube-scheduler.selectorLabels" . | nindent 6 }} 13 | template: 14 | metadata: 15 | labels: 16 | {{- include "kube-scheduler.selectorLabels" . | nindent 8 }} 17 | spec: 18 | serviceAccountName: {{ include "kube-scheduler.serviceAccountName" . }} 19 | {{- with .Values.affinity }} 20 | affinity: 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | {{- with .Values.nodeSelector }} 24 | nodeSelector: 25 | {{- toYaml . | nindent 8 }} 26 | {{- end }} 27 | {{- with .Values.tolerations }} 28 | tolerations: 29 | {{- toYaml . | nindent 8 }} 30 | {{- end }} 31 | containers: 32 | - name: {{ .Chart.Name }} 33 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 34 | imagePullPolicy: {{ .Values.image.pullPolicy }} 35 | command: 36 | - /usr/local/bin/kube-scheduler 37 | - --bind-address=0.0.0.0 38 | - --config=/etc/kubernetes/kube-scheduler/kube-scheduler-config.yaml 39 | - --v=5 40 | livenessProbe: 41 | httpGet: 42 | path: /healthz 43 | port: 10259 44 | scheme: HTTPS 45 | initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} 46 | periodSeconds: {{ .Values.livenessProbe.periodSeconds }} 47 | readinessProbe: 48 | httpGet: 49 | path: /healthz 50 | port: 10259 51 | scheme: HTTPS 52 | initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} 53 | periodSeconds: {{ .Values.readinessProbe.periodSeconds }} 54 | resources: 55 | {{- toYaml .Values.resources | nindent 12 }} 56 | securityContext: 57 | privileged: false 58 | volumeMounts: 59 | - name: config-volume 60 | mountPath: /etc/kubernetes/kube-scheduler 61 | volumes: 62 | - name: config-volume 63 | configMap: 64 | name: {{ include "kube-scheduler.fullname" . }}-config -------------------------------------------------------------------------------- /chart/kube-scheduler/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "kube-scheduler.serviceAccountName" . }} 6 | labels: 7 | {{- include "kube-scheduler.labels" . | nindent 4 }} 8 | {{- end }} -------------------------------------------------------------------------------- /chart/kube-scheduler/values.yaml: -------------------------------------------------------------------------------- 1 | nameOverride: "" 2 | fullnameOverride: "" 3 | 4 | replicaCount: 2 5 | 6 | image: 7 | repository: public.ecr.aws/eks-distro/kubernetes/kube-scheduler 8 | tag: v1.31.0-eks-1-31-latest 9 | pullPolicy: IfNotPresent 10 | 11 | serviceAccount: 12 | create: true 13 | name: "" 14 | 15 | resources: 16 | requests: 17 | cpu: '1' 18 | 19 | livenessProbe: 20 | initialDelaySeconds: 15 21 | periodSeconds: 10 22 | readinessProbe: 23 | initialDelaySeconds: 15 24 | periodSeconds: 10 25 | 26 | nodeSelector: {} 27 | tolerations: [] 28 | affinity: {} -------------------------------------------------------------------------------- /content/cost-optimization/docs/cost-tracking.md: -------------------------------------------------------------------------------- 1 | # Cost Tracking 2 | 3 | In AWS users can gain a detailed insight about the cost of their usage by leveraging [Data Exports](https://docs.aws.amazon.com/cur/latest/userguide/what-is-data-exports.html). It allows organizations to create customized exports of the AWS Cost and Usage Report (CUR) 2.0, offering daily or hourly usage insights along with rates, costs, and usage attributes across all chargeable AWS services. The standard data export option delivers customized cost data to Amazon S3 on a recurring basis. With Data Exports users can also track the cost incurred by their pods running in their EKS cluster. 4 | 5 | In this section we will show you how you can use the Data Exports data to track cost at the Virtual Cluster level, for both the compute and the Amazon EMR on EKS uplift, this would allow you to have a comprehensive view on the cost incured by your jobs. 6 | 7 | 8 | ## Create Data Exports 9 | 10 | To create a Data Export report you can execute the following [shell script](https://github.com/aws/aws-emr-containers-best-practices/blob/main/content/cost-optimization/docs/resources/scripts/create-bucket-data-export.sh), or you can create by following the [AWS documentation](https://docs.aws.amazon.com/cur/latest/userguide/dataexports-create-standard.html). 11 | 12 | 13 | ```sh 14 | sh create-bucket-data-export.sh NAME-OF-S3-BUCKET-TO-CREATE ACCOUNT-ID REPORT-NAME 15 | ``` 16 | 17 | > ***NOTE***: if you create it following the AWS documentation, make sure to select the `split cost allocation` and `resource-id` to be included in the Data export. 18 | 19 | ## Create the cost views 20 | 21 | To get the total cost we will use Amazon Athena to query the cost data from the Data Export report. Using Athena we will first create a table for data exported by Data Export report, then we will create a mapping table that will contain the mapping an Amazon EMR on EKS Virtual Cluster to a namespace. Afterward we will create two views one that will represent the compute cost and a second that will contain the EMR on EKS uplift. Last we will create a view that will combine both the cost of the EMR on EKS uplift as well as the compute, this view will be a union of the two views we created earlier. 22 | 23 | 24 | ### Create the data exports report table 25 | 26 | You can use the following [query](https://github.com/aws/aws-emr-containers-best-practices/tree/main/content/cost-optimization/resources/sql-statements/data-export-table.sql) to create the data export table, if you used the script provided you can just replace the S3 bucket name. If you have created the export report not using the provided shell script then you need to update the S3 location to match the one of where the data is exported by data export report you created. 27 | 28 | ### Create the Virtual cluster and namespace lookup table 29 | 30 | To create the look up table you can you the following sql statement. 31 | 32 | ```sql 33 | CREATE EXTERNAL TABLE `virtual_cluster_lookup`( 34 | `virtual_cluster_id` string, 35 | `namespace` string) 36 | ROW FORMAT DELIMITED 37 | FIELDS TERMINATED BY ',' 38 | STORED AS INPUTFORMAT 39 | 'org.apache.hadoop.mapred.TextInputFormat' 40 | OUTPUTFORMAT 41 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 42 | LOCATION 43 | 's3://BUCKET-NAME/data/virtual_cluster_definition' 44 | ``` 45 | Make sure to insert the look up data, you can use the query below as an example. 46 | 47 | ```sql 48 | INSERT INTO virtual_cluster_lookup 49 | VALUES ('96nxs46332423542abnbd2iuv6049', 'myvc') 50 | ``` 51 | 52 | ### Create the EMR on EKS uplift view 53 | 54 | To create EMR on EKS uplift view ou can use the following sql statement. 55 | 56 | > ***NOTE***: You may need to change the source data table if you created the data export yourself. The query below has the source data table called `data` 57 | 58 | ```sql 59 | CREATE OR REPLACE VIEW "emr_uplift_per_vc_view" AS 60 | WITH 61 | emr_uplift_per_vc AS ( 62 | SELECT 63 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month", 64 | split_part(line_item_resource_id, '/', 3) vc_id, 65 | sum(line_item_blended_cost) cost 66 | FROM 67 | data 68 | WHERE ((line_item_product_code = 'ElasticMapReduce') AND (line_item_operation = 'StartJobRun')) 69 | GROUP BY line_item_resource_id, 1 70 | ) 71 | SELECT 72 | month, 73 | namespace, 74 | SUM(cost) cost 75 | FROM 76 | (emr_uplift_per_vc uplift 77 | INNER JOIN virtual_cluster_lookup lookup ON (uplift.vc_id = lookup.virtual_cluster_id)) 78 | GROUP BY month, namespace 79 | ``` 80 | ### Create the Compute cost view 81 | 82 | To create Compute cost view ou can use the following sql statement. 83 | 84 | > ***NOTE***: You may need to change the source data table if you created the data export yourself. The query below has the source data table called `data` 85 | 86 | ```sql 87 | 88 | CREATE OR REPLACE VIEW "compute_cost_per_namespace_view" AS 89 | SELECT 90 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month" 91 | , CONCAT(REPLACE(SPLIT_PART("line_item_resource_id", '/', 1), 'pod', 'cluster'), '/', SPLIT_PART("line_item_resource_id", '/', 2)) "cluster_arn" 92 | , SPLIT_PART("line_item_resource_id", '/', 3) "namespace" 93 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-vCPU-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "cpu_cost" 94 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-GB-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "ram_cost" 95 | , SUM(("split_line_item_split_cost" + "split_line_item_unused_cost")) "total_cost" 96 | FROM 97 | (data 98 | INNER JOIN virtual_cluster_lookup lookup ON (SPLIT_PART("line_item_resource_id", '/', 3) = lookup.namespace)) 99 | WHERE ("line_item_operation" = 'EKSPod-EC2') 100 | GROUP BY 1, 2, 3 101 | ORDER BY "month" DESC, "cluster_arn" ASC, "namespace" ASC, "total_cost" DESC 102 | 103 | ``` 104 | 105 | ### Create the over all cost view 106 | 107 | To create over all cost view view ou can use the following sql statement. 108 | 109 | ```sql 110 | CREATE OR REPLACE VIEW emr_eks_cost AS 111 | 112 | SELECT month, namespace, total_cost as cost FROM "reinventdemo"."compute_cost_per_namespace_view" 113 | 114 | UNION 115 | 116 | SELECT month, namespace, cost FROM "reinventdemo"."emr_uplift_per_vc_view" 117 | ``` 118 | 119 | ## Query the data 120 | 121 | After creating the views you can now get insights on the total cost of runing your EMR on EKS job at the virtual cluster level. The query below shows how you van get the over all cost. 122 | 123 | ``` 124 | SELECT month, namespace, sum(cost) as total_cost 125 | FROM "emr_eks_cost" 126 | GROUP BY namespace, month 127 | ``` 128 | > ***NOTE***: In these views the granularity is at the month level, you can also run it at the day level, you can achieve it by changing the date in the SQL queries to include also the day, 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /content/cost-optimization/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/index.md -------------------------------------------------------------------------------- /content/cost-optimization/docs/node-decommission.md: -------------------------------------------------------------------------------- 1 | # **Node Decommission** 2 | 3 | This section shows how to use an [Apache Spark feature](https://issues.apache.org/jira/browse/SPARK-20629) that allows you to store the shuffle data and cached RDD blocks present on the terminating executors to peer executors before a Spot node gets decommissioned. Consequently, your job does not need to recalculate the shuffle and RDD blocks of the terminating executor that would otherwise be lost, thus allowing the job to have minimal delay in completion. 4 | 5 | This feature is supported for releases EMR 6.3.0+. 6 | 7 | ### How does it work? 8 | 9 | When spark.decommission.enabled is true, Spark will try its best to shut down the executor gracefully. spark.storage.decommission.enabled will enable migrating data stored on the executor. Spark will try to migrate all the cached RDD blocks (controlled by spark.storage.decommission.rddBlocks.enabled) and shuffle blocks (controlled by spark.storage.decommission.shuffleBlocks.enabled) from the decommissioning executor to all remote executors when spark decommission is enabled. Relevant Spark configurations for using node decommissioning in the jobs are 10 | 11 | |Configuration|Description|Default Value| 12 | |-----|-----|-----| 13 | |spark.decommission.enabled|Whether to enable decommissioning|false| 14 | |spark.storage.decommission.enabled|Whether to decommission the block manager when decommissioning executor|false| 15 | |spark.storage.decommission.rddBlocks.enabled|Whether to transfer RDD blocks during block manager decommissioning.|false| 16 | |spark.storage.decommission.shuffleBlocks.enabled|Whether to transfer shuffle blocks during block manager decommissioning. Requires a migratable shuffle resolver (like sort based shuffle)|false| 17 | |spark.storage.decommission.maxReplicationFailuresPerBlock|Maximum number of failures which can be handled for migrating shuffle blocks when block manager is decommissioning and trying to move its existing blocks.|3| 18 | |spark.storage.decommission.shuffleBlocks.maxThreads|Maximum number of threads to use in migrating shuffle files.|8| 19 | 20 | This feature can currently be enabled through a temporary workaround on EMR 6.3.0+ releases. To enable it, Spark’s decom.sh file permission must be modified using a [custom image](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images.html). Once the code is fixed, the page will be updated. 21 | 22 | **Dockerfile for custom image:** 23 | 24 | ``` 25 | FROM .dkr.ecr..amazonaws.com/spark/ 26 | USER root 27 | WORKDIR /home/hadoop 28 | RUN chown hadoop:hadoop /usr/bin/decom.sh 29 | ``` 30 | 31 | **Setting decommission timeout:** 32 | 33 | Each executor has to be decommissioned within a certain time limit controlled by the pod’s terminationGracePeriodSeconds configuration. The default value is 30 secs but can be modified using a [custom pod template](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/pod-templates.html). The pod template for this modification would look like 34 | ``` 35 | apiVersion: v1 36 | kind: Pod 37 | spec: 38 | terminationGracePeriodSeconds: 39 | ``` 40 | 41 | **Note: terminationGracePeriodSeconds timeout should be lesser than spot instance timeout with around 5 seconds buffer kept aside for triggering the node termination** 42 | 43 | 44 | **Request:** 45 | 46 | ``` 47 | cat >spark-python-with-node-decommissioning.json << EOF 48 | { 49 | "name": "my-job-run-with-node-decommissioning", 50 | "virtualClusterId": "", 51 | "executionRoleArn": "", 52 | "releaseLabel": "emr-6.3.0-latest", 53 | "jobDriver": { 54 | "sparkSubmitJobDriver": { 55 | "entryPoint": "s3:///trip-count.py", 56 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6" 57 | } 58 | }, 59 | "configurationOverrides": { 60 | "applicationConfiguration": [ 61 | { 62 | "classification": "spark-defaults", 63 | "properties": { 64 | "spark.kubernetes.container.image": ".dkr.ecr..amazonaws.com/", 65 | "spark.executor.instances": "5", 66 | "spark.decommission.enabled": "true", 67 | "spark.storage.decommission.rddBlocks.enabled": "true", 68 | "spark.storage.decommission.shuffleBlocks.enabled" : "true", 69 | "spark.storage.decommission.enabled": "true" 70 | } 71 | } 72 | ], 73 | "monitoringConfiguration": { 74 | "cloudWatchMonitoringConfiguration": { 75 | "logGroupName": "", 76 | "logStreamNamePrefix": "" 77 | }, 78 | "s3MonitoringConfiguration": { 79 | "logUri": "" 80 | } 81 | } 82 | } 83 | } 84 | EOF 85 | ``` 86 | 87 | **Observed Behavior:** 88 | 89 | When executors begin decommissioning, its shuffle data gets migrated to peer executors instead of recalculating the shuffle blocks again. If sending shuffle blocks to an executor fails, spark.storage.decommission.maxReplicationFailuresPerBlock will give the number of retries for migration. The driver log’s stderr will see log lines `Updating map output for to BlockManagerId(, , , )` denoting details about shuffle block ‘s migration. This feature does not emit any other metrics for validation yet. -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/images/ca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/ca.png -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/images/karpenter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/karpenter.png -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/images/node_decom.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/node_decom.gif -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/images/pvc_reuse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/pvc_reuse.gif -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/images/reuse.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/cost-optimization/docs/resources/images/reuse.gif -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/scripts/create-bucket-data-export.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if all required parameters are provided 4 | if [ $# -ne 3 ]; then 5 | echo "Usage: $0 " 6 | echo "Example: $0 my-bucket-name 123456789012 eks-cost-tracking" 7 | exit 1 8 | fi 9 | 10 | # Store parameters in variables 11 | S3_BUCKET=$1 12 | ACCOUNT_ID=$2 13 | REPORT_NAME=$3 14 | 15 | # Create the S3 bucket 16 | echo "Creating S3 bucket: $S3_BUCKET" 17 | if aws s3api create-bucket \ 18 | --bucket "$S3_BUCKET" \ 19 | --region us-east-1; then 20 | echo "Successfully created bucket: $S3_BUCKET" 21 | else 22 | echo "Failed to create bucket" 23 | exit 1 24 | fi 25 | 26 | # Create temporary bucket policy file 27 | POLICY_FILE=$(mktemp) 28 | cat > "$POLICY_FILE" << EOF 29 | { 30 | "Version": "2012-10-17", 31 | "Statement": [ 32 | { 33 | "Sid": "EnableAWSDataExportsToWriteToS3AndCheckPolicy", 34 | "Effect": "Allow", 35 | "Principal": { 36 | "Service": [ 37 | "bcm-data-exports.amazonaws.com", 38 | "billingreports.amazonaws.com" 39 | ] 40 | }, 41 | "Action": [ 42 | "s3:PutObject", 43 | "s3:GetBucketPolicy" 44 | ], 45 | "Resource": [ 46 | "arn:aws:s3:::${S3_BUCKET}", 47 | "arn:aws:s3:::${S3_BUCKET}/*" 48 | ], 49 | "Condition": { 50 | "StringLike": { 51 | "aws:SourceAccount": "${ACCOUNT_ID}", 52 | "aws:SourceArn": [ 53 | "arn:aws:cur:us-east-1:${ACCOUNT_ID}:definition/*", 54 | "arn:aws:bcm-data-exports:us-east-1:${ACCOUNT_ID}:export/*" 55 | ] 56 | } 57 | } 58 | } 59 | ] 60 | } 61 | EOF 62 | 63 | # Attach the bucket policy 64 | echo "Attaching bucket policy..." 65 | if aws s3api put-bucket-policy \ 66 | --bucket "$S3_BUCKET" \ 67 | --policy "file://$POLICY_FILE"; then 68 | echo "Successfully attached bucket policy" 69 | else 70 | echo "Failed to attach bucket policy" 71 | rm "$POLICY_FILE" 72 | exit 1 73 | fi 74 | 75 | # Clean up the temporary policy file 76 | rm "$POLICY_FILE" 77 | 78 | # Execute the AWS CLI command for data export 79 | echo "Creating cost export report..." 80 | aws bcm-data-exports create-export \ 81 | --export '{ 82 | "DataQuery": { 83 | "QueryStatement": "SELECT bill_bill_type, bill_billing_entity, bill_billing_period_end_date, bill_billing_period_start_date, bill_invoice_id, bill_invoicing_entity, bill_payer_account_id, bill_payer_account_name, cost_category, discount, discount_bundled_discount, discount_total_discount, identity_line_item_id, identity_time_interval, line_item_availability_zone, line_item_blended_cost, line_item_blended_rate, line_item_currency_code, line_item_legal_entity, line_item_line_item_description, line_item_line_item_type, line_item_net_unblended_cost, line_item_net_unblended_rate, line_item_normalization_factor, line_item_normalized_usage_amount, line_item_operation, line_item_product_code, line_item_resource_id, line_item_tax_type, line_item_unblended_cost, line_item_unblended_rate, line_item_usage_account_id, line_item_usage_account_name, line_item_usage_amount, line_item_usage_end_date, line_item_usage_start_date, line_item_usage_type, pricing_currency, pricing_lease_contract_length, pricing_offering_class, pricing_public_on_demand_cost, pricing_public_on_demand_rate, pricing_purchase_option, pricing_rate_code, pricing_rate_id, pricing_term, pricing_unit, product, product_comment, product_fee_code, product_fee_description, product_from_location, product_from_location_type, product_from_region_code, product_instance_family, product_instance_type, product_instancesku, product_location, product_location_type, product_operation, product_pricing_unit, product_product_family, product_region_code, product_servicecode, product_sku, product_to_location, product_to_location_type, product_to_region_code, product_usagetype, reservation_amortized_upfront_cost_for_usage, reservation_amortized_upfront_fee_for_billing_period, reservation_availability_zone, reservation_effective_cost, reservation_end_time, reservation_modification_status, reservation_net_amortized_upfront_cost_for_usage, reservation_net_amortized_upfront_fee_for_billing_period, reservation_net_effective_cost, reservation_net_recurring_fee_for_usage, reservation_net_unused_amortized_upfront_fee_for_billing_period, reservation_net_unused_recurring_fee, reservation_net_upfront_value, reservation_normalized_units_per_reservation, reservation_number_of_reservations, reservation_recurring_fee_for_usage, reservation_reservation_a_r_n, reservation_start_time, reservation_subscription_id, reservation_total_reserved_normalized_units, reservation_total_reserved_units, reservation_units_per_reservation, reservation_unused_amortized_upfront_fee_for_billing_period, reservation_unused_normalized_unit_quantity, reservation_unused_quantity, reservation_unused_recurring_fee, reservation_upfront_value, resource_tags, savings_plan_amortized_upfront_commitment_for_billing_period, savings_plan_end_time, savings_plan_instance_type_family, savings_plan_net_amortized_upfront_commitment_for_billing_period, savings_plan_net_recurring_commitment_for_billing_period, savings_plan_net_savings_plan_effective_cost, savings_plan_offering_type, savings_plan_payment_option, savings_plan_purchase_term, savings_plan_recurring_commitment_for_billing_period, savings_plan_region, savings_plan_savings_plan_a_r_n, savings_plan_savings_plan_effective_cost, savings_plan_savings_plan_rate, savings_plan_start_time, savings_plan_total_commitment_to_date, savings_plan_used_commitment, split_line_item_actual_usage, split_line_item_net_split_cost, split_line_item_net_unused_cost, split_line_item_parent_resource_id, split_line_item_public_on_demand_split_cost, split_line_item_public_on_demand_unused_cost, split_line_item_reserved_usage, split_line_item_split_cost, split_line_item_split_usage, split_line_item_split_usage_ratio, split_line_item_unused_cost FROM COST_AND_USAGE_REPORT", 84 | "TableConfigurations": { 85 | "COST_AND_USAGE_REPORT": { 86 | "INCLUDE_MANUAL_DISCOUNT_COMPATIBILITY": "FALSE", 87 | "INCLUDE_RESOURCES": "TRUE", 88 | "INCLUDE_SPLIT_COST_ALLOCATION_DATA": "TRUE", 89 | "TIME_GRANULARITY": "HOURLY" 90 | } 91 | } 92 | }, 93 | "DestinationConfigurations": { 94 | "S3Destination": { 95 | "S3Bucket": "'$S3_BUCKET'", 96 | "S3OutputConfigurations": { 97 | "Compression": "PARQUET", 98 | "Format": "PARQUET", 99 | "OutputType": "CUSTOM", 100 | "Overwrite": "OVERWRITE_REPORT" 101 | }, 102 | "S3Prefix": "cost-data/data", 103 | "S3Region": "us-east-1" 104 | } 105 | }, 106 | "Name": "'$REPORT_NAME'", 107 | "RefreshCadence": { 108 | "Frequency": "SYNCHRONOUS" 109 | } 110 | }' 111 | 112 | # Check if the command was successful 113 | if [ $? -eq 0 ]; then 114 | echo "Successfully created cost export report: $REPORT_NAME" 115 | echo "Data will be exported to s3://$S3_BUCKET/cost-data/" 116 | else 117 | echo "Failed to create cost export report" 118 | exit 1 119 | fi 120 | -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/compute-cost_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW "compute_cost_per_namespace_view" AS 2 | SELECT 3 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month" 4 | , CONCAT(REPLACE(SPLIT_PART("line_item_resource_id", '/', 1), 'pod', 'cluster'), '/', SPLIT_PART("line_item_resource_id", '/', 2)) "cluster_arn" 5 | , SPLIT_PART("line_item_resource_id", '/', 3) "namespace" 6 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-vCPU-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "cpu_cost" 7 | , SUM((CASE WHEN ("line_item_usage_type" LIKE '%EKS-EC2-GB-Hours') THEN ("split_line_item_split_cost" + "split_line_item_unused_cost") ELSE 0E0 END)) "ram_cost" 8 | , SUM(("split_line_item_split_cost" + "split_line_item_unused_cost")) "total_cost" 9 | FROM 10 | (data 11 | INNER JOIN virtual_cluster_lookup lookup ON (SPLIT_PART("line_item_resource_id", '/', 3) = lookup.namespace)) 12 | WHERE ("line_item_operation" = 'EKSPod-EC2') 13 | GROUP BY 1, 2, 3 14 | ORDER BY "month" DESC, "cluster_arn" ASC, "namespace" ASC, "total_cost" DESC -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/data-export-table.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTERNAL TABLE `data`( 2 | `bill_bill_type` string, 3 | `bill_billing_entity` string, 4 | `bill_billing_period_end_date` timestamp, 5 | `bill_billing_period_start_date` timestamp, 6 | `bill_invoice_id` string, 7 | `bill_invoicing_entity` string, 8 | `bill_payer_account_id` string, 9 | `bill_payer_account_name` string, 10 | `cost_category` map, 11 | `discount` map, 12 | `discount_bundled_discount` double, 13 | `discount_total_discount` double, 14 | `identity_line_item_id` string, 15 | `identity_time_interval` string, 16 | `line_item_availability_zone` string, 17 | `line_item_blended_cost` double, 18 | `line_item_blended_rate` string, 19 | `line_item_currency_code` string, 20 | `line_item_legal_entity` string, 21 | `line_item_line_item_description` string, 22 | `line_item_line_item_type` string, 23 | `line_item_net_unblended_cost` double, 24 | `line_item_net_unblended_rate` string, 25 | `line_item_normalization_factor` double, 26 | `line_item_normalized_usage_amount` double, 27 | `line_item_operation` string, 28 | `line_item_product_code` string, 29 | `line_item_resource_id` string, 30 | `line_item_tax_type` string, 31 | `line_item_unblended_cost` double, 32 | `line_item_unblended_rate` string, 33 | `line_item_usage_account_id` string, 34 | `line_item_usage_account_name` string, 35 | `line_item_usage_amount` double, 36 | `line_item_usage_end_date` timestamp, 37 | `line_item_usage_start_date` timestamp, 38 | `line_item_usage_type` string, 39 | `pricing_currency` string, 40 | `pricing_lease_contract_length` string, 41 | `pricing_offering_class` string, 42 | `pricing_public_on_demand_cost` double, 43 | `pricing_public_on_demand_rate` string, 44 | `pricing_purchase_option` string, 45 | `pricing_rate_code` string, 46 | `pricing_rate_id` string, 47 | `pricing_term` string, 48 | `pricing_unit` string, 49 | `product` map, 50 | `product_comment` string, 51 | `product_fee_code` string, 52 | `product_fee_description` string, 53 | `product_from_location` string, 54 | `product_from_location_type` string, 55 | `product_from_region_code` string, 56 | `product_instance_family` string, 57 | `product_instance_type` string, 58 | `product_instancesku` string, 59 | `product_location` string, 60 | `product_location_type` string, 61 | `product_operation` string, 62 | `product_pricing_unit` string, 63 | `product_product_family` string, 64 | `product_region_code` string, 65 | `product_servicecode` string, 66 | `product_sku` string, 67 | `product_to_location` string, 68 | `product_to_location_type` string, 69 | `product_to_region_code` string, 70 | `product_usagetype` string, 71 | `reservation_amortized_upfront_cost_for_usage` double, 72 | `reservation_amortized_upfront_fee_for_billing_period` double, 73 | `reservation_availability_zone` string, 74 | `reservation_effective_cost` double, 75 | `reservation_end_time` string, 76 | `reservation_modification_status` string, 77 | `reservation_net_amortized_upfront_cost_for_usage` double, 78 | `reservation_net_amortized_upfront_fee_for_billing_period` double, 79 | `reservation_net_effective_cost` double, 80 | `reservation_net_recurring_fee_for_usage` double, 81 | `reservation_net_unused_amortized_upfront_fee_for_billing_period` double, 82 | `reservation_net_unused_recurring_fee` double, 83 | `reservation_net_upfront_value` double, 84 | `reservation_normalized_units_per_reservation` string, 85 | `reservation_number_of_reservations` string, 86 | `reservation_recurring_fee_for_usage` double, 87 | `reservation_reservation_a_r_n` string, 88 | `reservation_start_time` string, 89 | `reservation_subscription_id` string, 90 | `reservation_total_reserved_normalized_units` string, 91 | `reservation_total_reserved_units` string, 92 | `reservation_units_per_reservation` string, 93 | `reservation_unused_amortized_upfront_fee_for_billing_period` double, 94 | `reservation_unused_normalized_unit_quantity` double, 95 | `reservation_unused_quantity` double, 96 | `reservation_unused_recurring_fee` double, 97 | `reservation_upfront_value` double, 98 | `resource_tags` map, 99 | `savings_plan_amortized_upfront_commitment_for_billing_period` double, 100 | `savings_plan_end_time` string, 101 | `savings_plan_instance_type_family` string, 102 | `savings_plan_net_amortized_upfront_commitment_for_billing_period` double, 103 | `savings_plan_net_recurring_commitment_for_billing_period` double, 104 | `savings_plan_net_savings_plan_effective_cost` double, 105 | `savings_plan_offering_type` string, 106 | `savings_plan_payment_option` string, 107 | `savings_plan_purchase_term` string, 108 | `savings_plan_recurring_commitment_for_billing_period` double, 109 | `savings_plan_region` string, 110 | `savings_plan_savings_plan_a_r_n` string, 111 | `savings_plan_savings_plan_effective_cost` double, 112 | `savings_plan_savings_plan_rate` double, 113 | `savings_plan_start_time` string, 114 | `savings_plan_total_commitment_to_date` double, 115 | `savings_plan_used_commitment` double, 116 | `split_line_item_actual_usage` double, 117 | `split_line_item_net_split_cost` double, 118 | `split_line_item_net_unused_cost` double, 119 | `split_line_item_parent_resource_id` string, 120 | `split_line_item_public_on_demand_split_cost` double, 121 | `split_line_item_public_on_demand_unused_cost` double, 122 | `split_line_item_reserved_usage` double, 123 | `split_line_item_split_cost` double, 124 | `split_line_item_split_usage` double, 125 | `split_line_item_split_usage_ratio` double, 126 | `split_line_item_unused_cost` double) 127 | PARTITIONED BY ( 128 | `billing_period` string) 129 | ROW FORMAT SERDE 130 | 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 131 | STORED AS INPUTFORMAT 132 | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 133 | OUTPUTFORMAT 134 | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' 135 | LOCATION 136 | 's3://S3-BUCKET-NAME/data/data-export/emr-containers-cost-reinvent/data/' -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/overall-cost_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW emr_eks_cost AS 2 | 3 | SELECT month, namespace, total_cost as cost FROM "reinventdemo"."compute_cost_per_namespace_view" 4 | 5 | UNION 6 | 7 | SELECT month, namespace, cost FROM "reinventdemo"."emr_uplift_per_vc_view" -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/query-over-all_view.sql: -------------------------------------------------------------------------------- 1 | SELECT month, namespace, sum(cost) as total_cost 2 | FROM "emr_eks_cost" 3 | GROUP BY namespace, month 4 | -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/vc-cost_view.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE VIEW "emr_uplift_per_vc_view" AS 2 | WITH 3 | emr_uplift_per_vc AS ( 4 | SELECT 5 | DATE_FORMAT(DATE_TRUNC('month', "line_item_usage_start_date"), '%Y-%m') "month", 6 | split_part(line_item_resource_id, '/', 3) vc_id, 7 | sum(line_item_blended_cost) cost 8 | FROM 9 | data 10 | WHERE ((line_item_product_code = 'ElasticMapReduce') AND (line_item_operation = 'StartJobRun')) 11 | GROUP BY line_item_resource_id, 1 12 | ) 13 | SELECT 14 | month, 15 | namespace, 16 | SUM(cost) cost 17 | FROM 18 | (emr_uplift_per_vc uplift 19 | INNER JOIN virtual_cluster_lookup lookup ON (uplift.vc_id = lookup.virtual_cluster_id)) 20 | GROUP BY month, namespace -------------------------------------------------------------------------------- /content/cost-optimization/docs/resources/sql-statements/vc-lookup.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTERNAL TABLE `virtual_cluster_lookup`( 2 | `virtual_cluster_id` string, 3 | `namespace` string) 4 | ROW FORMAT DELIMITED 5 | FIELDS TERMINATED BY ',' 6 | STORED AS INPUTFORMAT 7 | 'org.apache.hadoop.mapred.TextInputFormat' 8 | OUTPUTFORMAT 9 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 10 | LOCATION 11 | 's3://BUCKET-NAME/data/virtual_cluster_definition' -------------------------------------------------------------------------------- /content/index.md: -------------------------------------------------------------------------------- 1 | Welcome to the EMR Containers Best Practices Guide. The primary goal of this project is to offer a set of best practices and templates to get started with [Amazon EMR on EKS](https://aws.amazon.com/emr/features/eks/). We publish this guide on GitHub so we could iterate the content quickly, provide timely and effective recommendations for variety of concerns, and easily incorporate suggestions from the broader community. 2 | 3 | ## Amazon EMR on EKS Workshop 4 | If you are interested in step-by-step tutorials that leverage the best practices contained in this guide, please visit the [Amazon EMR on EKS Workshop.](https://emr-on-eks.workshop.aws/) 5 | ## Contributing 6 | 7 | We encourage you to contribute to these guides. If you have implemented a practice that has proven to be effective, please share it with us by opening an issue or a pull request. Similarly, if you discover an error or flaw in the guide, please submit a pull request to correct it. 8 | -------------------------------------------------------------------------------- /content/metastore-integrations/docs/aws-glue.md: -------------------------------------------------------------------------------- 1 | # **EMR Containers integration with AWS Glue** 2 | 3 | #### **AWS Glue catalog in same account as EKS** 4 | In the below example a Spark application will be configured to use [AWS Glue data catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html) as the hive metastore. 5 | 6 | **gluequery.py** 7 | 8 | ``` 9 | cat > gluequery.py </trip-data.parquet/'") 22 | spark.sql("SELECT count(*) FROM sparkemrnyc").show() 23 | spark.stop() 24 | EOF 25 | ``` 26 | 27 | ``` 28 | LOCATION 's3:///trip-data.parquet/' 29 | ``` 30 | 31 | Configure the above property to point to the S3 location containing the data. 32 | 33 | **Request** 34 | 35 | ``` 36 | cat > Spark-Python-in-s3-awsglue-log.json << EOF 37 | { 38 | "name": "spark-python-in-s3-awsglue-log", 39 | "virtualClusterId": "", 40 | "executionRoleArn": "", 41 | "releaseLabel": "emr-6.2.0-latest", 42 | "jobDriver": { 43 | "sparkSubmitJobDriver": { 44 | "entryPoint": "s3:///gluequery.py", 45 | "sparkSubmitParameters": "--conf spark.driver.cores=3 --conf spark.executor.memory=8G --conf spark.driver.memory=6G --conf spark.executor.cores=3" 46 | } 47 | }, 48 | "configurationOverrides": { 49 | "applicationConfiguration": [ 50 | { 51 | "classification": "spark-defaults", 52 | "properties": { 53 | "spark.hadoop.hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", 54 | } 55 | } 56 | ], 57 | "monitoringConfiguration": { 58 | "cloudWatchMonitoringConfiguration": { 59 | "logGroupName": "/emr-containers/jobs", 60 | "logStreamNamePrefix": "demo" 61 | }, 62 | "s3MonitoringConfiguration": { 63 | "logUri": "s3://joblogs" 64 | } 65 | } 66 | } 67 | } 68 | EOF 69 | 70 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-awsglue-log.json 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | ``` 80 | 81 | Output from driver logs - Displays the number of rows. 82 | 83 | ``` 84 | +----------+ 85 | | count(1)| 86 | +----------+ 87 | |2716504499| 88 | +----------+ 89 | ``` 90 | 91 | 92 | 93 | #### **AWS Glue catalog in different account** 94 | The Spark application is submitted to EMR Virtual cluster in Account A and is configured to connect to [AWS Glue catalog in Account B.](https://docs.aws.amazon.com/glue/latest/dg/cross-account-access.html) The IAM policy attached to the job execution role `("executionRoleArn": "") `is in Account A 95 | 96 | ``` 97 | { 98 | "Version": "2012-10-17", 99 | "Statement": [ 100 | { 101 | "Effect": "Allow", 102 | "Action": [ 103 | "glue:*" 104 | ], 105 | "Resource": [ 106 | "arn:aws:glue:::catalog", 107 | "arn:aws:glue:::database/default", 108 | "arn:aws:glue:::table/default/sparkemrnyc" 109 | ] 110 | } 111 | ] 112 | } 113 | ``` 114 | 115 | 116 | IAM policy attached to the AWS Glue catalog in Account B 117 | 118 | ``` 119 | { 120 | "Version" : "2012-10-17", 121 | "Statement" : [ { 122 | "Effect" : "Allow", 123 | "Principal" : { 124 | "AWS" : "" 125 | }, 126 | "Action" : "glue:*", 127 | "Resource" : [ "arn:aws:glue:::catalog", "arn:aws:glue:::database/default", "arn:aws:glue:::table/default/sparkemrnyc" ] 128 | } ] 129 | } 130 | ``` 131 | 132 | 133 | **Request** 134 | 135 | ``` 136 | cat > Spark-Python-in-s3-awsglue-crossaccount.json << EOF 137 | { 138 | "name": "spark-python-in-s3-awsglue-crossaccount", 139 | "virtualClusterId": "", 140 | "executionRoleArn": "", 141 | "releaseLabel": "emr-6.2.0-latest", 142 | "jobDriver": { 143 | "sparkSubmitJobDriver": { 144 | "entryPoint": "s3:///gluequery.py", 145 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6 " 146 | } 147 | }, 148 | "configurationOverrides": { 149 | "applicationConfiguration": [ 150 | { 151 | "classification": "spark-defaults", 152 | "properties": { 153 | "spark.hadoop.hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", 154 | "spark.hadoop.hive.metastore.glue.catalogid":"", 155 | } 156 | } 157 | ], 158 | "monitoringConfiguration": { 159 | "cloudWatchMonitoringConfiguration": { 160 | "logGroupName": "/emr-containers/jobs", 161 | "logStreamNamePrefix": "demo" 162 | }, 163 | "s3MonitoringConfiguration": { 164 | "logUri": "s3://joblogs" 165 | } 166 | } 167 | } 168 | } 169 | EOF 170 | 171 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-awsglue-crossaccount.json 172 | 173 | 174 | 175 | ``` 176 | 177 | **Configuration of interest** 178 | To specify the accountID where the AWS Glue catalog is defined reference the following: 179 | 180 | [Spark-Glue integration](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-glue.html) 181 | 182 | ``` 183 | "spark.hadoop.hive.metastore.glue.catalogid":"", 184 | ``` 185 | 186 | Output from driver logs - displays the number of rows. 187 | 188 | ``` 189 | +----------+ 190 | | count(1)| 191 | +----------+ 192 | |2716504499| 193 | +----------+ 194 | ``` 195 | 196 | #### **Sync Hudi table with AWS Glue catalog** 197 | In this example, a Spark application will be configured to use [AWS Glue data catalog](https://docs.aws.amazon.com/glue/latest/dg/components-overview.html) as the hive metastore. 198 | 199 | Starting from Hudi 0.9.0, we can synchronize Hudi table's latest schema to Glue catalog via the Hive Metastore Service (HMS) in hive sync mode. This example runs a Hudi ETL job with EMR on EKS, and interact with AWS Glue metaStore to create a Hudi table. It provides you the native and serverless capabilities to manage your technical metadata. Also you can query Hudi tables in Athena straigt away after the ETL job, which provides your end user an easy data access and shortens the time to insight. 200 | 201 | **HudiEMRonEKS.py** 202 | 203 | ``` 204 | cat > HudiEMRonEKS.py <` (including <>) with your own values. You're required to specify a namespace. The `--labels` option is not required to create your Fargate profile, but will be required if you want to only run Spark executors on Fargate. 12 | 13 | ``` 14 | eksctl create fargateprofile \ 15 | --cluster \ 16 | --name \ 17 | --namespace \ 18 | --labels spark-node-placement=fargate 19 | ``` 20 | 21 | ### 1- Place entire job including driver pod on Fargate 22 | 23 | When both Driver and Executors use the same labels as the Fargate Selector, the entire job including the driver pod will run on Fargate. 24 | 25 | **Request:** 26 | ``` 27 | cat >spark-python-in-s3-nodeselector.json << EOF 28 | { 29 | "name": "spark-python-in-s3-fargate-nodeselector", 30 | "virtualClusterId": "", 31 | "executionRoleArn": "", 32 | "releaseLabel": "emr-6.3.0-latest", 33 | "jobDriver": { 34 | "sparkSubmitJobDriver": { 35 | "entryPoint": "s3:///trip-count.py", 36 | "sparkSubmitParameters": "--conf spark.driver.cores=4 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=4" 37 | } 38 | }, 39 | "configurationOverrides": { 40 | "applicationConfiguration": [ 41 | { 42 | "classification": "spark-defaults", 43 | "properties": { 44 | "spark.kubernetes.driver.label.spark-node-placement": "fargate", 45 | "spark.kubernetes.executor.label.spark-node-placement": "fargate" 46 | } 47 | } 48 | ], 49 | "monitoringConfiguration": { 50 | "cloudWatchMonitoringConfiguration": { 51 | "logGroupName": "/emr-containers/jobs", 52 | "logStreamNamePrefix": "demo" 53 | }, 54 | "s3MonitoringConfiguration": { 55 | "logUri": "s3://joblogs" 56 | } 57 | } 58 | } 59 | } 60 | EOF 61 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json 62 | ``` 63 | 64 | **Observed Behavior:** 65 | When the job starts, the driver pod and executor pods are scheduled only on Fargate since both are labeled with the `spark-node-placement: fargate`. This is useful when we want to run the entire job on Fargate nodes. The maximum vCPU available for the driver pod is 4vCPU. 66 | 67 | ### 2- Place driver pod on EC2 and executor pod on Fargate 68 | Remove the label from the driver pod to schedule the driver pod on EC2 instances. This is especially helpful when driver pod needs more resources (i.e. > 4 vCPU). 69 | 70 | **Request:** 71 | ``` 72 | cat >spark-python-in-s3-nodeselector.json << EOF 73 | { 74 | "name": "spark-python-in-s3-fargate-nodeselector", 75 | "virtualClusterId": "", 76 | "executionRoleArn": "", 77 | "releaseLabel": "emr-6.3.0-latest", 78 | "jobDriver": { 79 | "sparkSubmitJobDriver": { 80 | "entryPoint": "s3:///trip-count.py", 81 | "sparkSubmitParameters": "--conf spark.driver.cores=6 --conf spark.executor.memory=20G --conf spark.driver.memory=30G --conf spark.executor.cores=4" 82 | } 83 | }, 84 | "configurationOverrides": { 85 | "applicationConfiguration": [ 86 | { 87 | "classification": "spark-defaults", 88 | "properties": { 89 | "spark.kubernetes.executor.label.spark-node-placement": "fargate" 90 | } 91 | } 92 | ], 93 | "monitoringConfiguration": { 94 | "cloudWatchMonitoringConfiguration": { 95 | "logGroupName": "/emr-containers/jobs", 96 | "logStreamNamePrefix": "demo" 97 | }, 98 | "s3MonitoringConfiguration": { 99 | "logUri": "s3://joblogs" 100 | } 101 | } 102 | } 103 | } 104 | EOF 105 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json 106 | ``` 107 | 108 | **Observed Behavior:** 109 | When the job starts, the driver pod schedules on an EC2 instance. EKS picks an instance from the first Node Group that has the matching resources available to the driver pod. 110 | 111 | ### 3- Define a NodeSelector in Pod Templates 112 | Beginning with Amazon EMR versions 5.33.0 or 6.3.0, Amazon EMR on EKS supports Spark’s pod template feature. Pod templates are specifications that determine how to run each pod. You can use pod template files to define the driver or executor pod’s configurations that Spark configurations do not support. For example Spark configurations do not support defining individual node selectors for the driver pod and the executor pods. Define a node selector **only** for the driver pod when you want to choose on which pool of EC2 instance it should schedule. Let the Fargate Profile schedule the executor pods. 113 | 114 | **Driver Pod Template** 115 | 116 | ``` 117 | apiVersion: v1 118 | kind: Pod 119 | spec: 120 | volumes: 121 | - name: source-data-volume 122 | emptyDir: {} 123 | - name: metrics-files-volume 124 | emptyDir: {} 125 | nodeSelector: 126 | : 127 | containers: 128 | - name: spark-kubernetes-driver # This will be interpreted as Spark driver container 129 | ``` 130 | 131 | Store the pod template file onto a S3 location: 132 | 133 | ``` aws s3 cp /driver-pod-template.yaml s3:///driver-pod-template.yaml``` 134 | 135 | 136 | **Request** 137 | 138 | ``` 139 | cat >spark-python-in-s3-nodeselector.json << EOF 140 | { 141 | "name": "spark-python-in-s3-fargate-nodeselector", 142 | "virtualClusterId": "", 143 | "executionRoleArn": "", 144 | "releaseLabel": "emr-6.3.0-latest", 145 | "jobDriver": { 146 | "sparkSubmitJobDriver": { 147 | "entryPoint": "s3:///trip-count.py", 148 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=30G --conf spark.executor.cores=4" 149 | } 150 | }, 151 | "configurationOverrides": { 152 | "applicationConfiguration": [ 153 | { 154 | "classification": "spark-defaults", 155 | "properties": { 156 | "spark.kubernetes.executor.label.spark-node-placement": "fargate", 157 | "spark.kubernetes.driver.podTemplateFile": "s3:///driver-pod-template.yaml" 158 | } 159 | } 160 | ], 161 | "monitoringConfiguration": { 162 | "cloudWatchMonitoringConfiguration": { 163 | "logGroupName": "/emr-containers/jobs", 164 | "logStreamNamePrefix": "demo" 165 | }, 166 | "s3MonitoringConfiguration": { 167 | "logUri": "s3://joblogs" 168 | } 169 | } 170 | } 171 | } 172 | EOF 173 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-nodeselector.json 174 | ``` 175 | 176 | **Observed Behavior:** 177 | The driver pod schedules on an EC2 instance with enough capacity and matching label key / value with the node selector. 178 | -------------------------------------------------------------------------------- /content/node-placement/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/node-placement/docs/index.md -------------------------------------------------------------------------------- /content/outposts/emr-containers-on-outposts.md: -------------------------------------------------------------------------------- 1 | # Running EMR Containers on AWS Outposts 2 | ## Background 3 | You can now run Amazon EMR container jobs on EKS clusters that are running on AWS Outposts. AWS Outposts enables native AWS services, infrastructure, and operating models in on-premises facilities. In AWS Outposts environments, you can use the same AWS APIs, tools, and infrastructure that you use in the AWS Cloud. Amazon EKS nodes on AWS Outposts is ideal for low-latency workloads that need to be run in close proximity to on-premises data and applications. For more information, see the Amazon EKS on Outposts [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/eks-on-outposts.html). 4 | 5 | This document provides the steps to set up EMR containers on AWS Outposts. 6 | 7 | ![](resources/outposts_eks_network.png) 8 | 9 | ## Key Considerations and Recommendations 10 | * The EKS cluster on an Outpost must be created with self-managed node groups. 11 | * Use the AWS Management Console and AWS CloudFormation to create a self-managed node group in Outposts. 12 | * For EMR workloads, we recommend creating EKS clusters where all the worker nodes reside in the self-managed node group of Outposts. 13 | * The Kubernetes client in the Spark driver pod creates and monitor executor pods by communicating with the EKS managed Kubernetes API server residing in the parent AWS Region. For reliable monitoring of executor pods during a job run, we also recommend having a reliable low latency link between the Outpost and the parent Region. 14 | * AWS Fargate is not available on Outposts. 15 | * For more information about the supported Regions, prerequisites and considerations for Amazon EKS on AWS Outposts, see the EKS on Outposts [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/eks-on-outposts.html). 16 | 17 | 18 | ## Infrastructure Setup 19 | ### Setup EKS on Outposts 20 | **Network Setup** 21 | 22 | 23 | * Setup a VPC 24 | ``` 25 | aws ec2 create-vpc \ 26 | --region \ 27 | --cidr-block '<10.0.0.0/16>' 28 | ``` 29 | In the output, take note of the VPC ID. 30 | ``` 31 | { 32 | "Vpc": { 33 | "VpcId": "vpc-123vpc", 34 | ... 35 | } 36 | } 37 | ``` 38 | 39 | 40 | * Create two subnets in the parent Region. 41 | ``` 42 | aws ec2 create-subnet \ 43 | --region '' \ 44 | --availability-zone-id '' \ 45 | --vpc-id '' \ 46 | --cidr-block '<10.0.1.0/24>' 47 | 48 | aws ec2 create-subnet \ 49 | --region '' \ 50 | --availability-zone-id '' \ 51 | --vpc-id '' \ 52 | --cidr-block '<10.0.2.0/24>' 53 | ``` 54 | In the output, take note of the Subnet ID. 55 | ``` 56 | { 57 | "Subnet": { 58 | "SubnetId": "subnet-111", 59 | ... 60 | } 61 | } 62 | { 63 | "Subnet": { 64 | "SubnetId": "subnet-222", 65 | ... 66 | } 67 | } 68 | ``` 69 | 70 | 71 | * Create a subnet in the Outpost Availability Zone. (This step is different for Outposts) 72 | ``` 73 | aws ec2 create-subnet \ 74 | --region '' \ 75 | --availability-zone-id '' \ 76 | --outpost-arn 'arn:aws:outposts::<123456789>:outpost/' \ 77 | --vpc-id '' \ 78 | --cidr-block '<10.0.3.0/24>' 79 | ``` 80 | In the output, take note of the Subnet ID. 81 | ``` 82 | { 83 | "Subnet": { 84 | "SubnetId": "subnet-333outpost", 85 | "OutpostArn": "..." 86 | ... 87 | } 88 | } 89 | ``` 90 | 91 | 92 | 93 | **EKS Cluster Creation** 94 | 95 | 96 | * Create an EKS cluster using the three subnet Ids created earlier. 97 | ``` 98 | aws eks create-cluster \ 99 | --region '' \ 100 | --name '' \ 101 | --role-arn 'arn:aws:iam::<123456789>:role/' \ 102 | --resources-vpc-config subnetIds=',,' 103 | ``` 104 | 105 | 106 | * Check until the cluster status becomes active. 107 | ``` 108 | aws eks describe-cluster \ 109 | --region '' \ 110 | --name '' 111 | ``` 112 | Note the values of resourcesVpcConfig.clusterSecurityGroupId and identity.oidc.issuer. 113 | ``` 114 | { 115 | "cluster": { 116 | "name": "outposts-eks-cluster", 117 | ... 118 | "resourcesVpcConfig": { 119 | "clusterSecurityGroupId": "sg-123clustersg", 120 | }, 121 | "identity": { 122 | "oidc": { 123 | "issuer": "https://oidc.eks.us-west-2.amazonaws.com/id/oidcid" 124 | } 125 | }, 126 | "status": "ACTIVE", 127 | } 128 | } 129 | ``` 130 | 131 | * Add the Outposts nodes to the EKS Cluster. 132 | 133 | At this point, eksctl cannot be used to launch self-managed node groups in Outposts. Please follow the steps listed in the self-managed nodes [documentation page](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html#aws-management-console). In order to use the cloudformation script lised in the AWS Management Console tab, make note of the following values created in the earlier steps: 134 | * ClusterName: `````` 135 | * ClusterControlPlaneSecurityGroup: `````` 136 | * Subnets: `````` 137 | 138 | Apply the aws-auth-cm config map listed on the documentation page to allow the nodes to join the cluster. 139 | 140 | ### Register cluster with EMR Containers 141 | Once the EKS cluster has been created and the nodes have been registered with the EKS control plane, take the [following steps](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/getting-started.html): 142 | 143 | * Enable cluster access for Amazon EMR on EKS. 144 | * Enable IAM Roles for Service Accounts (IRSA) on the EKS cluster. 145 | * Create a job execution role. 146 | * Update the trust policy of the job execution role. 147 | * Grant users access to Amazon EMR on EKS. 148 | * Register the Amazon EKS cluster with Amazon EMR. 149 | 150 | 151 | ## Conclusion 152 | EMR-EKS on Outposts allows users to run their big data jobs in close proximity to on-premises data and applications. 153 | -------------------------------------------------------------------------------- /content/outposts/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/outposts/index.md -------------------------------------------------------------------------------- /content/outposts/resources/outposts_eks_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/outposts/resources/outposts_eks_network.png -------------------------------------------------------------------------------- /content/performance/docs/dra.md: -------------------------------------------------------------------------------- 1 | # **Dynamic Resource Allocation** 2 | 3 | DRA is available in Spark 3 (EMR 6.x) without the need for an external shuffle service. Spark on Kubernetes doesn't support external shuffle service as of spark 3.1, but DRA can be achieved by enabling [shuffle tracking](https://spark.apache.org/docs/latest/configuration.html#dynamic-allocation). 4 | 5 | **Spark DRA with storage configuration:** 6 | 7 | When using [dynamic provisioning PVC/Volumes](../../storage/docs/spark/ebs.md#dynamic-provisioning) with Spark, you must disable PVC reuse to prevent multi-attach errors. The default configuration attempts to reuse PVCs, which causes EBS volumes to attach to multiple pods and leads to application failure. Set the following configurations: 8 | ``` 9 | "spark.kubernetes.driver.ownPersistentVolumeClaim": "false" 10 | "spark.kubernetes.driver.reusePersistentVolumeClaim": "false" 11 | "spark.kubernetes.driver.waitToReusePersistentVolumeClaim": "false" 12 | ``` 13 | For workloads requiring PVC reuse with DRA, use storage solutions supporting multi-attach like EFS / FSx for Lustre instead of EBS. 14 | 15 | **Spark DRA without external shuffle service:** 16 | With DRA, the spark driver spawns the initial number of executors and then scales up the number until the specified maximum number of executors is met to process the pending tasks. Idle executors are terminated when there are no pending tasks, the executor idle time exceeds the idle timeout(`spark.dynamicAllocation.executorIdleTimeout)`and it doesn't have any cached or shuffle data. 17 | 18 | If the executor idle threshold is reached and it has cached data, then it has to exceed the cache data idle timeout(`spark.dynamicAllocation.cachedExecutorIdleTimeout) ` and if the executor doesn't have shuffle data, then the idle executor is terminated. 19 | 20 | If the executor idle threshold is reached and it has shuffle data, then without external shuffle service the executor will never be terminated. These executors will be terminated when the job is completed. This behavior is enforced by `"spark.dynamicAllocation.shuffleTracking.enabled":"true" and "spark.dynamicAllocation.enabled":"true"` 21 | 22 | If `"spark.dynamicAllocation.shuffleTracking.enabled":"false"and "spark.dynamicAllocation.enabled":"true"` then the spark application will error out since external shuffle service is not available. 23 | 24 | **Request:** 25 | 26 | ``` 27 | cat >spark-python-in-s3-dra.json << EOF 28 | { 29 | "name": "spark-python-in-s3-dra", 30 | "virtualClusterId": "", 31 | "executionRoleArn": "", 32 | "releaseLabel": "emr-6.2.0-latest", 33 | "jobDriver": { 34 | "sparkSubmitJobDriver": { 35 | "entryPoint": "s3:///trip-count.py", 36 | "sparkSubmitParameters": "--conf spark.driver.cores=5 --conf spark.executor.memory=20G --conf spark.driver.memory=15G --conf spark.executor.cores=6" 37 | } 38 | }, 39 | "configurationOverrides": { 40 | "applicationConfiguration": [ 41 | { 42 | "classification": "spark-defaults", 43 | "properties": { 44 | "spark.dynamicAllocation.enabled":"true", 45 | "spark.dynamicAllocation.shuffleTracking.enabled":"true", 46 | "spark.dynamicAllocation.minExecutors":"5", 47 | "spark.dynamicAllocation.maxExecutors":"100", 48 | "spark.dynamicAllocation.initialExecutors":"10" 49 | } 50 | } 51 | ], 52 | "monitoringConfiguration": { 53 | "cloudWatchMonitoringConfiguration": { 54 | "logGroupName": "/emr-containers/jobs", 55 | "logStreamNamePrefix": "demo" 56 | }, 57 | "s3MonitoringConfiguration": { 58 | "logUri": "s3://joblogs" 59 | } 60 | } 61 | } 62 | } 63 | EOF 64 | ``` 65 | 66 | ``` 67 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-dra.json 68 | ``` 69 | 70 | **Observed Behavior:** 71 | When the job gets started, the driver pod gets created and 10 executors are initially created. (`"spark.dynamicAllocation.initialExecutors":"10"`) Then the number of executors can scale up to a maximum of 100 (`"spark.dynamicAllocation.maxExecutors":"100"`). 72 | **Configurations to note:** 73 | 74 | `spark.dynamicAllocation.shuffleTracking.enabled` - `**`Experimental`**`. Enables shuffle file tracking for executors, which allows dynamic allocation without the need for an external shuffle service. This option will try to keep alive executors that are storing shuffle data for active jobs. 75 | 76 | `spark.dynamicAllocation.shuffleTracking.timeout` - When shuffle tracking is enabled, controls the timeout for executors that are holding shuffle data. The default value means that Spark will rely on the shuffles being garbage collected to be able to release executors. If for some reason garbage collection is not cleaning up shuffles quickly enough, this option can be used to control when to time out executors even when they are storing shuffle data. 77 | 78 | -------------------------------------------------------------------------------- /content/performance/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/index.md -------------------------------------------------------------------------------- /content/performance/docs/resources/images/after-binpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/after-binpack.png -------------------------------------------------------------------------------- /content/performance/docs/resources/images/before-binpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/before-binpack.png -------------------------------------------------------------------------------- /content/performance/docs/resources/images/binpack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/binpack.gif -------------------------------------------------------------------------------- /content/performance/docs/resources/images/nonbinpack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/performance/docs/resources/images/nonbinpack.gif -------------------------------------------------------------------------------- /content/scalability/docs/graphana-dashboard.md: -------------------------------------------------------------------------------- 1 | # Grafana Dashbaords 2 | 3 | * [Spark Operator Dashboard Template](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/spark-operator-dashbord.json) 4 | 5 | ![](resources/images/spark-operator-dashboard.png) 6 | 7 | * [EKS Control Plane & Etcd DB monitoring](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/eks-control-plane.json) 8 | 9 | ![](resources/images/eks-control-plane.png) 10 | 11 | * [CNI usage dashbaord](https://github.com/aws-samples/load-test-for-emr-on-eks/blob/main/grafana/dashboard-template/aws-cni-metrics.json) 12 | 13 | ![](resources/images/aws-cni-metrics.png) 14 | 15 | * [EMR on EKS Job dashbaord](https://github.com/awslabs/data-on-eks/tree/main/analytics/terraform/emr-eks-karpenter/emr-grafana-dashboard) 16 | 17 | ![](resources/images/emr-on-eks-job-dashboard.png) 18 | -------------------------------------------------------------------------------- /content/scalability/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/index.md -------------------------------------------------------------------------------- /content/scalability/docs/known-factors-spark-operator.md: -------------------------------------------------------------------------------- 1 | 2 | # Known factors that impact EMR on EKS Spark Operator submission rate 3 | 4 | |Category |Impact | 5 | |--- |--- | 6 | |Spark Operator Numbers |For the single spark operator, the max performance for submission rate would be around 30 jobs per min, and the performance tune on a single operator is very limited in the current version. Thus, to handle the large volume of workload, to horizontally scale up, with multiple Spark Operator would be the best solution. The operators will be not impacted from each other on eks cluster side, but higher number of operators will increase the overhead on apiserver/etcd side. | 7 | |Spark Operator's controllerThreads |controllerThreads is also named as "workers", which controls the number of concurrent threads used to process Spark application requests. Increasing this value can increase the performance of spark operator to handle the requests. | 8 | |Binpacking |Binpacking could efficiently allocate pods to available nodes within a Kubernetes cluster. Its primary goal is to optimize resource utilization by packing pods as tightly as possible onto nodes, while still meeting resource requirements and constraints. This approach aims to maximize cluster efficiency, reduce costs, and improve overall performance by minimizing the number of active nodes required to run the workload. With Binpacking enabled, the overall workload can minimise the resources used on network traffic between phsical nodes, as most of pods will be allocated in a single node at its launch time. However, we use Karpenter's consolidation feature to maximize pods tensity when node's utilization starts to drop. | 9 | |Spark Operator timeToLiveSeconds |TimeToLiveSeconds defines the Time-To-Live (TTL) duration in seconds for this SparkApplication after its termination. The SparkApplication object will be garbage collected if the current time is more than the TimeToLiveSeconds since its termination. | 10 | |Spark Job Run Time |Experimental observations indicate that Spark Operator performs better with longer-running jobs compared to shorter ones. This is likely due to the Operator's internal workqueue mechanism for managing job submissions and completions. The Spark Operator uses watchers to monitor SparkApplication status changes in the EKS cluster. Each status change triggers a task in the Operator's workqueue. Consequently, shorter jobs cause more frequent status changes, resulting in higher workqueue activity. This design suggests that a higher frequency of short-running jobs may impose greater processing overhead on the Spark Operator compared to fewer, longer-running jobs, even with equivalent total computation time. Understanding this behavior is important for optimizing Spark job scheduling in environments with varying job durations. | 11 | |Number of executors in EKS Spark Operator |The higher number of executors (10 vs 2) per single EMR on EKS job, number of objects created on EKS cluster grow such as pods, config maps, events etc. This becomes a limiting factor for querying etcd database eventually causing EKS cluster performance degradation. | 12 | |Spark Job Config - InitContainers |initContainer has a big impacted on the both Spark Operator and API server / etcd side. As with this setting enbaled, which will creat more events than jobs without this enabled. To utilize more Spark Operators for the job needs this set up, but for etcd size, it still be a bottleneck when the workload is large. | 13 | |EKS Spark Operator submission Rate |The EMR on EKS job submission rate will dictate the load placed on the API Server. A larger EMR EKS job submission rate can cause the more k8s objects in etcd db, increasing etcd db size, increased etcd request and api server request latency, and lower EMR job submission throughput. | 14 | |EMR Image pull policy |Default Image pull policy for job submitter, driver, and executor pods is set as Always. This adds latency in pod creation times. Unless specifically required for customer usecase, we can set Image pull policy to `IfNotPresent` resulting in lesser pod creation times. | 15 | |EKS K8s control plane scaling |EKS will autoscale the K8s control plane including API server and etcd db instances for customer's EKS cluster based on resource consumption. To be able to successfully run larger amounts of concurrent EMR on EKS jobs on EKS the API Server needs to be scaled up to handle the extra load. However if factors like webhook latency impact the metrics needed by the EKS API Server autoscaler are inhibited this can lead to not properly scaling up. This will impact the health of the API Server and etcd db and lead to a lower throughput on successfully completed jobs. | 16 | |EKS etcd db size |As you submit more concurrent running EMR on EKS jobs, the number of k8s objects stored in etcd db grow and in turn increase etcd db size as well. Increased etcd db size causes lantecy in some api server requests requiring cluster-wide/namespace-wide etcd read calls and will reduce EMR job submission throughput. Upper bound on etcd db size is 8GB as specified by EKS and reaching this capacity can make EKS cluster in read-only mode. Customers should monitor and keep their etcd db size within limits. We recommend keeping it below 7GB. In addition, as Spark Operator does not store the metadata of all the running jobs, so if there is any unhealthy or crash happened in etcd/API server, then could cause some job failed or running state lost with Spark Operator. | 17 | |EKS VPC Subnets IP pool |Available IP addresses in VPC subnets that are configured for EKS cluster also impact the EMR on EKS job throughput. Each pod needs to be assigned an IP address, thus it is essential to have large enough IP address pool available in VPC subnets of the EKS cluster to achieve higher pod concurrency. Exhaustion of IP adresses causes new pod creation requests to fail. | 18 | |EKS Cluster version |EKS has made improvements to cluster versions higher than 1.28 resulting in higher job throughput for EMR on EKS jobs. These recommendations are based on using EKS cluster version 1.30. | 19 | |Pod template size|Having high pod template sizes, for example from a high number of sidecar containers, init containers, or numerous environment variables, results in increased usage of the etcd database. This increased usage can potentially limit the number of pods that can be stored in etcd and may impact the cluster's overall performance, including the rate at which EMR jobs can be submitted.| 20 | -------------------------------------------------------------------------------- /content/scalability/docs/known-factors-start-job-run.md: -------------------------------------------------------------------------------- 1 | # Known factors that impact EMR on EKS submission rate for StartJobRun 2 | 3 | |Category |Impact | 4 | |--- |--- | 5 | |Mutating Webhook Latency |Increased webhook latency leads to an increase in the K8s API Server latency (delay in pod creation for example, if webhook is setup for pod creation). Pod creation latency in turn is propogated to the K8s job controller whose workers are now experiencing delays in creating jobs and leads to growing Job Worker Queue depth. Larger queue depth leads to a lower thoughput in the number of concurrent EMR on EKS jobs | 6 | |EMR on EKS Job driver retries |Driver retries create an extra K8s Job object which essentially doubles the amount of K8s Job objects in etcd database. This leads to increased strain in etcd database and also database size to grow faster and hence leads to increase in etcd request latency. This in turn results in a lower throughput in the number of concurrent EMR on EKS jobs. | 7 | |EMR on EKS Job Start Timeout Setting |When the K8s job controller work queue depth is larger, that means there could be a delay in the actual Spark driver pod to get created. In the meantime EMR on EKS's control plane by default expects the EMR EKS job driver pod to be created within 15 mins. If the driver is not created within that timeout period, the EMR on EKS control plane will mark the job as failed preemptively. Higher timeout values will ensure the job gets longer time for getting the job scheduled and begin running | 8 | |EMR on EKS Job run time |A longer EMR on EKS job run time means that we will essentially have more concurrent active jobs in the EKS cluster. If we keep a consistent job submission rate for long running EMR EKS jobs as compared to a job with a shorter duration we will end up with a larger amount of active concurrent jobs in the EKS cluster. This can lead to more objects in etcd db, increasing etcd db size, increased etcd request latency, and lower EMR job submission throughput. | 9 | |Number of executors in EMR on EKS Job |As we define higher number of executors per single EMR on EKS job, number of objects created on EKS cluster grow such as pods, config maps, events etc. This becomes a limiting factor for querying etcd database eventually causing EKS cluster performance degradation. | 10 | |EMR on EK Job submission Rate |The EMR on EKS job submission rate will dictate the load placed on the API Server. A larger EMR EKS job submission rate can cause the more k8s objects in etcd db, increasing etcd db size, increased etcd request and api server request latency, and lower EMR job submission throughput. | 11 | |EMR Image pull policy |Default Image pull policy for job submitter, driver, and executor pods is set as Always. This adds latency in pod creation times. Unless specifically required for customer usecase, we can set Image pull policy to `IfNotPresent` resulting in lesser pod creation times. | 12 | |EMR Job type |Job concurrency values are different for batch and streaming job types. Streaming jobs usually consume less resources resulting in higher job concurrency values compared to batch jobs. | 13 | |EKS K8s control plane scaling |EKS will autoscale the K8s control plane including API server and etcd db instances for customer's EKS cluster based on resource consumption. To be able to successfully run larger amounts of concurrent EMR on EKS jobs on EKS the API Server needs to be scaled up to handle the extra load. However if factors like webhook latency impact the metrics needed by the EKS API Server autoscaler are inhibited this can lead to not properly scaling up. This will impact the health of the API Server and etcd db and lead to a lower throughput on successfully completed jobs. | 14 | |EKS etcd db size |As you submit more concurrent running EMR on EKS jobs, the number of k8s objects stored in etcd db grow and in turn increase etcd db size as well. Increased etcd db size causes lantecy in some api server requests requiring cluster-wide/namespace-wide etcd read calls and will reduce EMR job submission throughput. Upper bound on etcd db size is 8GB as specified by EKS and reaching this capacity can make EKS cluster in read-only mode. Customers should monitor and keep their etcd db size within limits. We recommend keeping it below 7GB. | 15 | |EKS VPC Subnets IP pool |Available IP addresses in VPC subnets that are configured for EKS cluster also impact the EMR on EKS job throughput. Each pod needs to be assigned an IP address, thus it is essential to have large enough IP address pool available in VPC subnets of the EKS cluster to achieve higher pod concurrency. Exhaustion of IP adresses causes new pod creation requests to fail. | 16 | |EKS Cluster version |EKS has made improvements to cluster versions higher than 1.28 resulting in higher job throughput for EMR on EKS jobs. These recommendations are based on using EKS cluster version 1.30. | 17 | |Pod template size|Having high pod template sizes, for example from a high number of sidecar containers, init containers, or numerous environment variables, results in increased usage of the etcd database. This increased usage can potentially limit the number of pods that can be stored in etcd and may impact the cluster's overall performance, including the rate at which EMR jobs can be submitted.| 18 | -------------------------------------------------------------------------------- /content/scalability/docs/resources/images/EMR_Spark_Operator_Benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/EMR_Spark_Operator_Benchmark.png -------------------------------------------------------------------------------- /content/scalability/docs/resources/images/aws-cni-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/aws-cni-metrics.png -------------------------------------------------------------------------------- /content/scalability/docs/resources/images/eks-control-plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/eks-control-plane.png -------------------------------------------------------------------------------- /content/scalability/docs/resources/images/emr-on-eks-job-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/emr-on-eks-job-dashboard.png -------------------------------------------------------------------------------- /content/scalability/docs/resources/images/spark-operator-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/scalability/docs/resources/images/spark-operator-dashboard.png -------------------------------------------------------------------------------- /content/scalability/docs/scalaiblity-glossary.md: -------------------------------------------------------------------------------- 1 | # EMR on EKS Glossary & Terms 2 | 3 | * **EMR on EKS Job:** The Spark Job being submitted and executed by the EMR on EKS Control plane 4 | * **EMR on EKS Job types:** Type of Spark job being submitted. It can be either batch job (having fixed job duration) or streaming job (continuously running job). 5 | * **Kubernetes (K8s) control plane:** A K8s cluster consists of a control plane and one or more worker nodes. [Control plane](https://kubernetes.io/docs/concepts/overview/components/) is responsible managing overall state of the cluster and includes components such as API server, etcd database, scheduler, and controller manager. 6 | * **K8s API request:** The [K8s API](https://kubernetes.io/docs/reference/using-api/api-concepts/) is a resource-based (RESTful) programmatic interface provided via HTTP. It supports retrieving, creating, updating, and deleting resources in K8s cluster via the standard HTTP verbs (POST, PUT, PATCH, DELETE, GET). 7 | * **K8s pod:** [Pods](https://kubernetes.io/docs/concepts/workloads/pods/) are the smallest deployable units of computing that you can create and manage in Kubernetes. 8 | * **K8s event:** [Event](https://kubernetes.io/docs/reference/kubernetes-api/cluster-resources/event-v1/) is a report of an event somewhere in the K8s cluster. It generally denotes some state change in the system. 9 | * **K8s config map:** A [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) is an API object used to store non-confidential data in key-value pairs. [Pods](https://kubernetes.io/docs/concepts/workloads/pods/) can consume ConfigMaps as environment variables, command-line arguments, or as configuration files in a [volume](https://kubernetes.io/docs/concepts/storage/volumes/). 10 | * **K8s API Server:** [API server](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/) **** is an internal K8s component responsible to serve and process K8s API requests. EKS hosts this K8s control plane component on EKS owned infrastructure that is different from customer’s EKS cluster. 11 | * **K8s Etcd database:** [Etcd](https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/) is K8s internal database that stores information about K8s objects such as pods, events, config maps etc. EKS hosts this K8s control plane component on EKS owned infrastructure. 12 | * **K8s Job**: A [K8s Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/) object creates and monitors a pod until they complete successfully. It has a retry policy that helps ensuring completion. This is different from EMR on EKS job concept. An EMR on EKS job usually submits one or more K8s jobs in K8s cluster. 13 | * **K8s Job Controller**: The [K8s native controller](https://kubernetes.io/docs/concepts/architecture/controller/) is a component that interacts with the Kubernetes API server to create pods, update job status according to pod status, create events. Job controller monitors and updates K8s job objects. 14 | * **K8s Job Controller Work Queue (Depth)**: The backlog of K8s job object events accumulated, that need to be processed by job controller. 15 | * **EMR Spark Operator:** A job submission model for Amazon EMR on EKS, which users can deploy and manage Spark applications with the Amazon EMR release runtime on the Amazon EKS clusters 16 | * **Job types: **** **Type of Spark job being submitted. It can be either batch job (having fixed job duration) or streaming job (continuously running job). 17 | * **Spark Operator Workqueue:** The central component in the Spark Operator's control loop, managing the flow of SparkApplication resources that need to be processed, ensuring efficient, ordered, and reliable handling of these resources. -------------------------------------------------------------------------------- /content/security/docs/index.md: -------------------------------------------------------------------------------- 1 | **** -------------------------------------------------------------------------------- /content/security/docs/resources/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG EMR_VERSION=emr-6.15.0 2 | FROM public.ecr.aws/emr-on-eks/spark/${EMR_VERSION} 3 | USER root 4 | 5 | RUN mkdir -p /usr/lib/poc 6 | COPY custom-entrypoint.sh /usr/lib/poc/entrypoint.sh 7 | RUN chown -R hadoop:hadoop /usr/lib/poc 8 | RUN chmod -R a+x /usr/lib/poc 9 | 10 | USER hadoop:hadoop 11 | ENTRYPOINT ["/usr/lib/poc/entrypoint.sh"] -------------------------------------------------------------------------------- /content/security/docs/resources/S3ListObjects_v1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/S3ListObjects_v1.jar -------------------------------------------------------------------------------- /content/security/docs/resources/client-role-2-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "s3:PutObject", 8 | "s3:GetObject" 9 | ], 10 | "Resource": [ 11 | "arn:aws:s3:::datalake-${ACCOUNTB}-${REGION}/*" 12 | ] 13 | }, 14 | { 15 | "Effect": "Allow", 16 | "Action": [ 17 | "s3:ListBucket" 18 | ], 19 | "Resource": [ 20 | "arn:aws:s3:::datalake-${ACCOUNTB}-${REGION}" 21 | ] 22 | }, 23 | { 24 | "Effect": "Allow", 25 | "Action": "sqs:*", 26 | "Resource": "*" 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /content/security/docs/resources/client-role-2-trust-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "AWS": "arn:aws:iam::${ACCOUNTA}:role/job-execution-role-1" 8 | }, 9 | "Action": "sts:AssumeRole" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /content/security/docs/resources/custom-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | #Set up the chained roled for aws boto3 sdk 6 | generate_aws_config() { 7 | # Create a .aws directory in the user's home directory 8 | mkdir -p $HOME/.aws 9 | 10 | # Generate the config file from environment variables 11 | cat > $HOME/.aws/config << EOF 12 | [default] 13 | region=${REGION} 14 | role_arn=${ROLE_2_ARN} 15 | role_session_name=client_role 16 | source_profile=irsa-role 17 | 18 | [profile irsa-role] 19 | region=${REGION} 20 | web_identity_token_file=/var/run/secrets/eks.amazonaws.com/serviceaccount/token 21 | role_arn=${ROLE_1_ARN} 22 | EOF 23 | 24 | # Set proper permissions 25 | chmod 600 $HOME/.aws/config 26 | } 27 | 28 | # Function to generate credentials 29 | generate_aws_credentials() { 30 | echo "Generating AWS credentials at $(date)" 31 | 32 | # Get credentials using web identity token 33 | credentials=$(aws sts assume-role-with-web-identity \ 34 | --role-arn ${ROLE_1_ARN} \ 35 | --role-session-name webidentity-session \ 36 | --web-identity-token "$(cat /var/run/secrets/eks.amazonaws.com/serviceaccount/token)" \ 37 | --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken,Expiration]' \ 38 | --output text) 39 | 40 | # Create .aws directory 41 | mkdir -p $HOME/.aws 42 | 43 | # Generate the credentials file 44 | cat > $HOME/.aws/credentials << EOF 45 | [default] 46 | source_profile=irsa-role 47 | role_arn=${ROLE_2_ARN} 48 | role_session_name=client_role 49 | 50 | [irsa-role] 51 | aws_access_key_id=$(echo $credentials | awk '{print $1}') 52 | aws_secret_access_key=$(echo $credentials | awk '{print $2}') 53 | aws_session_token=$(echo $credentials | awk '{print $3}') 54 | EOF 55 | 56 | chmod 600 $HOME/.aws/credentials 57 | 58 | # Extract expiration time for next refresh 59 | expiration=$(echo $credentials | awk '{print $4}') 60 | echo "Credentials will expire at: $expiration" 61 | } 62 | 63 | # Function to start credential refresh daemon 64 | start_credential_refresh_daemon() { 65 | while true; do 66 | generate_aws_credentials 67 | 68 | # Sleep for 80% of the default 1-hour credential duration (refresh the token every 48 minutes) 69 | sleep 2880 70 | # # test 10mins for testing 71 | # sleep 600 72 | 73 | # Check if the token file still exists and is readable 74 | if [ ! -r "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" ]; then 75 | echo "Token file not accessible. Stopping refresh daemon." 76 | exit 1 77 | fi 78 | done 79 | } 80 | 81 | generate_aws_config 82 | # NOTE: the IRSA env variable "AWS_ROLE_ARN" must be reset 83 | # To trigger the access deny 403 while evaluating WebIdentity credential 84 | # As a result of the RESET, it forces SDK applications to use the next Profile provider () in the AWS DefaultCredentialChain 85 | export AWS_ROLE_ARN=$ROLE_2_ARN 86 | export AWS_WEB_IDENTITY_TOKEN_FILE=/var/run/secrets/eks.amazonaws.com/serviceaccount/token 87 | 88 | # Start the refresh daemon in the background 89 | start_credential_refresh_daemon & 90 | DAEMON_PID=$! 91 | echo $DAEMON_PID > /tmp/credential-refresh.pid 92 | # Set up trap to clean up the daemon on script exit 93 | trap "kill $DAEMON_PID 2>/dev/null" EXIT 94 | 95 | /usr/bin/entrypoint.sh "$@" -------------------------------------------------------------------------------- /content/security/docs/resources/driver-pod-template.yaml: -------------------------------------------------------------------------------- 1 | # // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # // SPDX-License-Identifier: MIT-0 3 | apiVersion: v1 4 | kind: Pod 5 | spec: 6 | containers: 7 | - name: spark-kubernetes-driver 8 | env: 9 | - name: AWS_ROLE_ARN 10 | value: "arn:aws:iam::ACCOUNTB:role/emr-on-eks-client-a-role" 11 | - name: AWS_WEB_IDENTITY_TOKEN_FILE 12 | value: "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" -------------------------------------------------------------------------------- /content/security/docs/resources/executor-pod-template.yaml: -------------------------------------------------------------------------------- 1 | # // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # // SPDX-License-Identifier: MIT-0 3 | apiVersion: v1 4 | kind: Pod 5 | spec: 6 | containers: 7 | - name: spark-kubernetes-executor 8 | env: 9 | - name: AWS_ROLE_ARN 10 | value: "arn:aws:iam::ACCOUNTB:role/emr-on-eks-client-a-role" 11 | - name: AWS_WEB_IDENTITY_TOKEN_FILE 12 | value: "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" -------------------------------------------------------------------------------- /content/security/docs/resources/images/emr-on-eks-fargate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-fargate.png -------------------------------------------------------------------------------- /content/security/docs/resources/images/emr-on-eks-network-communication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-network-communication.png -------------------------------------------------------------------------------- /content/security/docs/resources/images/emr-on-eks-self-and-managed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/emr-on-eks-self-and-managed.png -------------------------------------------------------------------------------- /content/security/docs/resources/images/role-chain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/role-chain.png -------------------------------------------------------------------------------- /content/security/docs/resources/images/shared-responsibility-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/security/docs/resources/images/shared-responsibility-model.png -------------------------------------------------------------------------------- /content/security/docs/resources/job-exec-role-1-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": "sts:AssumeRole", 7 | "Resource": "arn:aws:iam::${ACCOUNTB}:role/client-role-2" 8 | }, 9 | { 10 | "Effect": "Allow", 11 | "Action": [ 12 | "s3:PutObject", 13 | "s3:DeleteObject", 14 | "s3:GetObject" 15 | ], 16 | "Resource": [ 17 | "arn:aws:s3:::emr-on-eks-test-${ACCOUNTA}-$REGION}/*" 18 | ] 19 | }, 20 | { 21 | "Effect": "Allow", 22 | "Action": "s3:ListBucket", 23 | "Resource": [ 24 | "arn:aws:s3:::emr-on-eks-test-${ACCOUNTA}-${REGION}" 25 | ] 26 | }, 27 | { 28 | "Effect": "Allow", 29 | "Action": [ 30 | "logs:PutLogEvents", 31 | "logs:CreateLogStream", 32 | "logs:DescribeLogGroups", 33 | "logs:DescribeLogStreams", 34 | "logs:CreateLogGroup" 35 | ], 36 | "Resource": [ 37 | "arn:aws:logs:*:*:*" 38 | ] 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /content/security/docs/resources/job-exec-role-1-trust-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "Service": "eks.amazonaws.com" 8 | }, 9 | "Action": "sts:AssumeRole" 10 | }, 11 | { 12 | "Effect": "Allow", 13 | "Principal": { 14 | "Service": "pods.eks.amazonaws.com" 15 | }, 16 | "Action": [ 17 | "sts:AssumeRole", 18 | "sts:TagSession" 19 | ] 20 | }, 21 | { 22 | "Effect": "Allow", 23 | "Principal": { 24 | "Federated": "arn:aws:iam::${ACCOUNTA}:oidc-provider/oidc.eks.us-east-1.amazonaws.com/id/YOUR_OIDC_ID" 25 | }, 26 | "Action": "sts:AssumeRoleWithWebIdentity", 27 | "Condition": { 28 | "StringLike": { 29 | "oidc.eks.us-east-1.amazonaws.com/id/YOUR_OIDC_ID:sub": "system:serviceaccount:emr:emr-containers*" 30 | } 31 | } 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /content/security/docs/resources/mix-spark-boto3.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import to_json, struct 3 | import boto3,sys 4 | from botocore.exceptions import ClientError 5 | 6 | def main(): 7 | print("=== Starting Spark Session ===") 8 | S3_FILE=sys.argv[1] 9 | SQS_URL=sys.argv[2] 10 | 11 | spark = SparkSession.builder.appName("irsa-poc").getOrCreate() 12 | 13 | # 1. Read data from S3 14 | try: 15 | df = spark.read.parquet(S3_FILE) 16 | except Exception as e: 17 | print(f"Error reading S3 data: {e}") 18 | spark.stop() 19 | return 20 | 21 | # 2. Convert each row to JSON string 22 | print("Converting rows to JSON...") 23 | json_df = df.select(to_json(struct("*")).alias("value")) 24 | 25 | print("=== Sample JSON Output ===") 26 | json_df.show(5, truncate=False) 27 | 28 | # 3. Send to SQS 29 | def send_partition(partition): 30 | print(f"\nInitializing SQS client for partition...") 31 | try: 32 | sqs = boto3.client('sqs', region_name='us-east-1') 33 | 34 | results = [] 35 | results.append(f"Caller Identity: {boto3.client('sts').get_caller_identity()}") 36 | for i, row in enumerate(partition, 1): 37 | try: 38 | response=sqs.send_message( 39 | QueueUrl=SQS_URL, 40 | MessageBody=row.value 41 | ) 42 | results.append(f"Sent message {i} - MessageId: {response['MessageId']}") 43 | 44 | except ClientError as e: 45 | results.append(f"Failed: {e} | Message: {row.value}") 46 | return results 47 | except Exception as e: 48 | return [f"Partition failed: {str(e)}"] 49 | 50 | print("\n=== Starting boto3 connection ===") 51 | results = json_df.rdd.mapPartitions(send_partition).collect() 52 | for msg in results: 53 | print(msg) 54 | print("\n=== Job Completed ===") 55 | 56 | 57 | if __name__ == "__main__": 58 | print("Script started") 59 | main() 60 | print("Script finished") -------------------------------------------------------------------------------- /content/security/docs/resources/only-boto3.py: -------------------------------------------------------------------------------- 1 | import boto3,json,sys 2 | 3 | def list_s3_bucket_contents(bucket_name, prefix): 4 | s3 = boto3.client('s3', region_name='us-west-2') 5 | objects = [] 6 | try: 7 | response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) 8 | if 'Contents' in response: 9 | print(f"Files in bucket '{bucket_name}':") 10 | for obj in response['Contents']: 11 | print(f"- {obj['Key']} (Size: {obj['Size']} bytes)") 12 | objects.append(obj['Key']) 13 | else: 14 | print(f"No objects found with prefix '{prefix}'") 15 | except Exception as e: 16 | print(f"Error accessing S3 bucket: {e}") 17 | return objects 18 | 19 | def send_s3_references_to_sqs(bucket_name, object_keys, sqs_queue_url): 20 | sqs = boto3.client('sqs', region_name='us-east-1') 21 | for key in object_keys: 22 | try: 23 | print(f"Sending S3 reference: {key}") 24 | message_body = json.dumps({ 25 | 's3_bucket': bucket_name, 26 | 's3_key': key, 27 | 'message_type': 's3_reference' 28 | }) 29 | response = sqs.send_message( 30 | QueueUrl=sqs_queue_url, 31 | MessageBody=message_body, 32 | MessageAttributes={ 33 | 'Source': {'StringValue': 's3-reference-sender', 'DataType': 'String'}, 34 | 'FileType': {'StringValue': 'parquet', 'DataType': 'String'} 35 | } 36 | ) 37 | print(f"Message sent with ID: {response['MessageId']}") 38 | except Exception as e: 39 | print(f"ERROR processing {key}: {str(e)[:200]}...") 40 | continue 41 | 42 | if __name__ == "__main__": 43 | if len(sys.argv) != 4: 44 | print("Usage: python script.py ") 45 | sys.exit(1) 46 | 47 | BUCKET_NAME = sys.argv[1] 48 | PREFIX_FILE_PATH = sys.argv[2] 49 | SQS_QUEUE_URL = sys.argv[3] 50 | 51 | s3_objects = list_s3_bucket_contents(BUCKET_NAME, PREFIX_FILE_PATH) 52 | print(f"Found {len(s3_objects)} objects to process") 53 | if s3_objects: 54 | send_s3_references_to_sqs(BUCKET_NAME, s3_objects, SQS_QUEUE_URL) 55 | else: 56 | print("No objects to process") -------------------------------------------------------------------------------- /content/security/docs/spark/data-encryption.md: -------------------------------------------------------------------------------- 1 | # **EMR Containers Spark - In transit and At Rest data encryption** 2 | 3 | ### **Encryption at Rest** 4 | ####Amazon S3 Client-Side Encryption 5 | 6 | To utilize [S3 Client side encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingClientSideEncryption.html), you will need to create a KMS Key to be used to encrypt and decrypt data. If you do not have an KMS key, please follow this guide - [AWS KMS create keys](https://docs.aws.amazon.com/kms/latest/developerguide/create-keys.html). Also please note the job execution role needs access to this key, please see [Add to Key policy](https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html#key-policy-default-allow-users) for instructions on how to add these permissions. 7 | 8 | **trip-count-encrypt-write.py:** 9 | 10 | ``` 11 | cat> trip-count-encrypt-write.py</trip-data.parquet') 25 | print("Total trips: " + str(df.count())) 26 | 27 | df.write.parquet('s3:///write-encrypt-trip-data.parquet') 28 | print("Encrypt - KMS- CSE writew to s3 compeleted") 29 | spark.stop() 30 | EOF 31 | 32 | ``` 33 | 34 | **Request:** 35 | 36 | ``` 37 | cat > spark-python-in-s3-encrypt-cse-kms-write.json <", 41 | "executionRoleArn": "", 42 | "releaseLabel": "emr-6.2.0-latest", 43 | "jobDriver": { 44 | "sparkSubmitJobDriver": { 45 | "entryPoint": "s3://trip-count-encrypt-write.py", 46 | "sparkSubmitParameters": "--conf spark.executor.instances=10 --conf spark.driver.cores=2 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=2" 47 | } 48 | }, 49 | "configurationOverrides": { 50 | "applicationConfiguration": [ 51 | { 52 | "classification": "spark-defaults", 53 | "properties": { 54 | "spark.dynamicAllocation.enabled":"false" 55 | } 56 | }, 57 | { 58 | "classification": "emrfs-site", 59 | "properties": { 60 | "fs.s3.cse.enabled":"true", 61 | "fs.s3.cse.encryptionMaterialsProvider":"com.amazon.ws.emr.hadoop.fs.cse.KMSEncryptionMaterialsProvider", 62 | "fs.s3.cse.kms.keyId":"" 63 | } 64 | } 65 | ], 66 | "monitoringConfiguration": { 67 | "persistentAppUI": "ENABLED", 68 | "cloudWatchMonitoringConfiguration": { 69 | "logGroupName": "/emr-containers/jobs", 70 | "logStreamNamePrefix": "demo" 71 | }, 72 | "s3MonitoringConfiguration": { 73 | "logUri": "s3://joblogs" 74 | } 75 | } 76 | } 77 | } 78 | EOF 79 | 80 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-encrypt-cse-kms-write.json 81 | 82 | 83 | ``` 84 | 85 | In the above request, EMRFS encrypts the parquet file with the specified KMS key and the encrypted object is persisted to the specified s3 location. 86 | 87 | To verify the encryption - use the same KMS key to decrypt - the KMS key used is a symmetric key ( the same key can be used to both encrypt and decrypt) 88 | 89 | **trip-count-encrypt-read.py** 90 | 91 | ``` 92 | cat > trip-count-encrypt-read.py</trip-data.parquet') 106 | print("Total trips: " + str(df.count())) 107 | 108 | df_encrypt = spark.read.parquet('s3:///write-encrypt-trip-data.parquet') 109 | print("Encrypt data - Total trips: " + str(df_encrypt.count())) 110 | spark.stop() 111 | EOF 112 | ``` 113 | 114 | **Request** 115 | 116 | ``` 117 | cat > spark-python-in-s3-encrypt-cse-kms-read.json<", 121 | "executionRoleArn": "", 122 | "releaseLabel": "emr-6.2.0-latest", 123 | "jobDriver": { 124 | "sparkSubmitJobDriver": { 125 | "entryPoint": "s3://trip-count-encrypt-write.py", 126 | "sparkSubmitParameters": "--conf spark.executor.instances=10 --conf spark.driver.cores=2 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=2" 127 | } 128 | }, 129 | "configurationOverrides": { 130 | "applicationConfiguration": [ 131 | { 132 | "classification": "spark-defaults", 133 | "properties": { 134 | "spark.dynamicAllocation.enabled":"false" 135 | } 136 | }, 137 | { 138 | "classification": "emrfs-site", 139 | "properties": { 140 | "fs.s3.cse.enabled":"true", 141 | "fs.s3.cse.encryptionMaterialsProvider":"com.amazon.ws.emr.hadoop.fs.cse.KMSEncryptionMaterialsProvider", 142 | "fs.s3.cse.kms.keyId":"" 143 | } 144 | } 145 | ], 146 | "monitoringConfiguration": { 147 | "persistentAppUI": "ENABLED", 148 | "cloudWatchMonitoringConfiguration": { 149 | "logGroupName": "/emr-containers/jobs", 150 | "logStreamNamePrefix": "demo" 151 | }, 152 | "s3MonitoringConfiguration": { 153 | "logUri": "s3://joblogs" 154 | } 155 | } 156 | } 157 | } 158 | EOF 159 | 160 | aws emr-containers start-job-run --cli-input-json file:///spark-python-in-s3-encrypt-cse-kms-read.json 161 | 162 | 163 | 164 | 165 | 166 | ``` 167 | 168 | **Validate encryption:** Try to read the encrypted data without specifying `"fs.s3.cse.enabled":"true"` - will get an error message in the driver and executor logs because the content is encrypted and cannot be read without decryption. 169 | -------------------------------------------------------------------------------- /content/security/docs/spark/network-security.md: -------------------------------------------------------------------------------- 1 | # ** Managing VPC for EMR on EKS** 2 | 3 | This section address network security at VPC level. If you want to read more on network security for Spark in EMR on EKS please refer to this [section](https://aws.github.io/aws-emr-containers-best-practices/security/docs/spark/encryption/#amazon-emr-on-eks). 4 | 5 | ## **Security Group** 6 | 7 | The applications running on your EMR on EKS cluster often would need access to services that are running outside the cluster, 8 | for example, these can Amazon Redshift, Amazon Relational Database Service, a service self hosted on an EC2 instance. To access these resource you need to allow network traffic at the security group level. The default mechanism in EKS is using security groups at the node level, 9 | this means all the pods running on the node will inherit the rules on the security group. 10 | For security conscious customers, this is not a desired behavior and you would want to use security groups at the pod level. 11 | 12 | This section address how you can use Security Groups with EMR on EKS. 13 | 14 | ### Configure EKS Cluster to use Security Groups for Pods 15 | 16 | In order to use Security Groups at the pod level, you need to configure the VPC CNI for EKS. The following [link](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) guide through the prerequisites as well as configuring the EKS Cluster. 17 | 18 | #### Define SecurityGroupPolicy 19 | 20 | Once you have configured the VPC CNI, you need to create a SecurityGroupPolicy object. 21 | This object define which **security group** (up to 5) to use, **podselector** to define which pod to apply the security group to and 22 | the **namespace** in which the Security Group should be evaluated. Below you find an example of `SecurityGroupPolicy`. 23 | 24 | ``` 25 | apiVersion: vpcresources.k8s.aws/v1beta1 26 | kind: SecurityGroupPolicy 27 | metadata: 28 | name: <> 29 | namespace: 30 | spec: 31 | podSelector: 32 | matchLabels: 33 | role: spark 34 | securityGroups: 35 | groupIds: 36 | - sg-xxxxx 37 | ``` 38 | 39 | ### Define pod template to use Security Group for pod 40 | 41 | In order for the security group to be applied to the Spark driver and executors, you need to provide a podtemplate which add label(s) to the pods. 42 | The labels should match the one defined above in the `podSelector` in our example it is `role: spark`. 43 | The snippet below define the pod template that you can upload in S3 and then reference when launching your job. 44 | 45 | ``` 46 | apiVersion: v1 47 | kind: Pod 48 | metadata: 49 | labels: 50 | role: spark 51 | ``` 52 | 53 | ### Launch a job 54 | 55 | The command below can be used to run a job. 56 | 57 | ``` 58 | aws emr-containers start-job-run --virtual-cluster-id --name spark-jdbc --execution-role-arn --release-label emr-6.7.0-latest --job-driver '{ 59 | "sparkSubmitJobDriver": { 60 | "entryPoint": "", 61 | "sparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1" 62 | } 63 | }' --configuration-overrides '{ 64 | "applicationConfiguration": [ 65 | { 66 | "classification": "spark-defaults", 67 | "properties": { 68 | "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", 69 | "spark.sql.catalogImplementation": "hive", 70 | "spark.dynamicAllocation.enabled":"true", 71 | "spark.dynamicAllocation.minExecutors": "8", 72 | "spark.dynamicAllocation.maxExecutors": "40", 73 | "spark.kubernetes.allocation.batch.size": "8", 74 | "spark.dynamicAllocation.executorAllocationRatio": "1", 75 | "spark.dynamicAllocation.shuffleTracking.enabled": "true", 76 | "spark.dynamicAllocation.shuffleTracking.timeout": "300s", 77 | "spark.kubernetes.driver.podTemplateFile":, 78 | "spark.kubernetes.executor.podTemplateFile": 79 | } 80 | } 81 | ], 82 | "monitoringConfiguration": { 83 | "persistentAppUI": "ENABLED", 84 | "cloudWatchMonitoringConfiguration": { 85 | "logGroupName": "/aws/emr-containers/", 86 | "logStreamNamePrefix": "default" 87 | } 88 | } 89 | }' 90 | ``` 91 | 92 | #### Verify a security group attached to the Pod ENI 93 | 94 | To verify that spark driver and executor driver have the security group attached to, apply the first command to get the podname then the second one to see the annotation in pod with the ENI associated to the pod which has the secuity group defined in the **SecurityGroupPolicy**. 95 | 96 | ``` 97 | export POD_NAME=$(kubectl -n get pods -l role=spark -o jsonpath='{.items[].metadata.name}') 98 | 99 | kubectl -n describe pod $POD_NAME | head -11 100 | ``` 101 | 102 | ``` 103 | Annotations: kubernetes.io/psp: eks.privileged 104 | vpc.amazonaws.com/pod-eni: 105 | [{"eniId":"eni-xxxxxxx","ifAddress":"xx:xx:xx:xx:xx:xx","privateIp":"x.x.x.x","vlanId":1,"subnetCidr":"x.x.x.x/x"}] 106 | ``` -------------------------------------------------------------------------------- /content/security/docs/spark/secrets.md: -------------------------------------------------------------------------------- 1 | # ** Using Secrets in EMR on EKS** 2 | 3 | Secrets can be credentials to APIs, Databases or other resources. There are various ways these secrets can be passed to your containers, some of them are pod environment variable or Kubernetes Secrets. These methods are not secure, as for environment variable, secrets are stored in clear text and any authorized user who has access to Kubernetes cluster with admin privileges can read those secrets. Storing secrets using Kubernetes secrets is also not secure because they are not encrypted and only base36 encoded. 4 | 5 | 6 | There is a secure method to expose these secrets in EKS through the [Secrets Store CSI Driver](https://github.com/aws/secrets-store-csi-driver-provider-aws). 7 | 8 | The Secrets Store CSI Driver integrate with a secret store like [AWS Secrets manager](https://aws.amazon.com/secrets-manager/) and mount the secrets as volume that can be accessed through your application code. This document describes how to set and use AWS Secrets Manager with EMR on EKS through the Secrets Store CSI Driver. 9 | 10 | ### Deploy Secrets Store CSI Drivers and AWS Secrets and Configuration Provider 11 | 12 | 13 | #### Secrets Store CSI Drivers 14 | 15 | Configure EKS Cluster with `Secrets Store CSI Driver`. 16 | 17 | To learn more about AWS Secrets Manager CSI Driver you can refer to this [link](https://docs.aws.amazon.com/secretsmanager/latest/userguide/integrating_csi_driver.html) 18 | 19 | ``` 20 | helm repo add secrets-store-csi-driver \ 21 | https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts 22 | 23 | helm install -n kube-system csi-secrets-store \ 24 | --set syncSecret.enabled=true \ 25 | --set enableSecretRotation=true \ 26 | secrets-store-csi-driver/secrets-store-csi-driver 27 | 28 | ``` 29 | 30 | Deploy the `AWS Secrets and Configuration Provider` to use AWS Secrets Manager 31 | 32 | #### AWS Secrets and Configuration Provider 33 | 34 | ``` 35 | kubectl apply -f https://raw.githubusercontent.com/aws/secrets-store-csi-driver-provider-aws/main/deployment/aws-provider-installer.yaml 36 | ``` 37 | 38 | ### Define the `SecretProviderClass` 39 | 40 | The `SecretProviderClass` is how you present your secret in Kubernetes, below you find a definition of a `SecretProviderClass`. 41 | There are few parameters that are important: 42 | 43 | - The `provider` must be set to `aws`. 44 | - The `objectName` must be the name of the secret you want to use as defined in AWS. 45 | Here the secret is called `db-creds`. 46 | - The `objectType` must be set to `secretsmanager`. 47 | 48 | ``` 49 | cat > db-cred.yaml << EOF 50 | 51 | apiVersion: secrets-store.csi.x-k8s.io/v1 52 | kind: SecretProviderClass 53 | metadata: 54 | name: mysql-spark-secret 55 | spec: 56 | provider: aws 57 | parameters: 58 | objects: | 59 | - objectName: "db-creds" 60 | objectType: "secretsmanager" 61 | EOF 62 | ``` 63 | 64 | ``` 65 | kubectl apply -f db-cred.yaml -n 66 | ``` 67 | In the terminal apply the above command to create `SecretProviderClass`, 68 | The `kubectl` command must include the namespace where your job will be executed. 69 | 70 | ### Pod Template 71 | 72 | In the executor podtemplate you should define it as follows to mount the secret. The example below show how you can define it. 73 | There are few points that are important to mount the secret: 74 | 75 | - `secretProviderClass`: this should have the same name as the one define above. In this case it is `mysql-spark-secret`. 76 | - `mountPath`: Is where the secret is going to be available to the pod. In this example it will be in `/var/secrets` 77 | When defining the `mountPath` make sure you do not specify the ones reserved by EMR on EKS as defined [here](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/pod-templates.html). 78 | 79 | ``` 80 | apiVersion: v1 81 | kind: Pod 82 | 83 | spec: 84 | containers: 85 | - name: spark-kubernetes-executors 86 | volumeMounts: 87 | - mountPath: "/var/secrets" 88 | name: mysql-cred 89 | readOnly: true 90 | volumes: 91 | - name: mysql-cred 92 | csi: 93 | driver: secrets-store.csi.k8s.io 94 | readOnly: true 95 | volumeAttributes: 96 | secretProviderClass: mysql-spark-secret 97 | ``` 98 | 99 | This podtemplate must be uploaded to S3 and referenced in the job submit command as shown below. 100 | 101 | **Note** You must make sure that the RDS instance or your Database allow traffic from the instances where your driver and executors pods are running. 102 | 103 | ### PySpark code 104 | 105 | The example below shows pyspark code for connecting with a MySQL DB. The example assume the secret is stored in AWS secrets manager as defined above. The `username` is the `key` to retrieve the database `user` as stored in AWS Secrets Manager, and `password` is the `key` to retrieve the database password. 106 | 107 | 108 | It shows how you can retrieve the credentials from the mount point `/var/secrets/`. 109 | The secret is stored in a file with the same name as it is defined in AWS in this case it is `db-creds`. 110 | This has been set in the podTemplate above. 111 | 112 | ``` 113 | from pyspark.sql import SparkSession 114 | import json 115 | 116 | secret_path = "/var/secrets/db-creds" 117 | 118 | f = open(secret_path, "r") 119 | mySecretDict = json.loads(f.read()) 120 | 121 | spark = SparkSession.builder.getOrCreate() 122 | 123 | str_jdbc_url="jdbc:" 124 | str_Query= 125 | str_username=mySecretDict['username'] 126 | str_password=mySecretDict['password'] 127 | driver = "com.mysql.jdbc.Driver" 128 | 129 | jdbcDF = spark.read \ 130 | .format("jdbc") \ 131 | .option("url", str_jdbc_url) \ 132 | .option("driver", driver)\ 133 | .option("query", str_Query) \ 134 | .option("user", str_username) \ 135 | .option("password", str_password) \ 136 | .load() 137 | 138 | jdbcDF.show() 139 | ``` 140 | 141 | ### Execute the job 142 | 143 | The command below can be used to run a job. 144 | 145 | **Note**: The supplied execution role **MUST** have access an IAM policy that allow it to access to the secret defined in `SecretProviderClass` above. 146 | The IAM policy below shows the IAM actions that are needed. 147 | 148 | ``` 149 | { 150 | "Version": "2012-10-17", 151 | "Statement": [ { 152 | "Effect": "Allow", 153 | "Action": ["secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret"], 154 | "Resource": [] 155 | }] 156 | } 157 | ``` 158 | 159 | ``` 160 | aws emr-containers start-job-run --virtual-cluster-id --name spark-jdbc --execution-role-arn --release-label emr-6.7.0-latest --job-driver '{ 161 | "sparkSubmitJobDriver": { 162 | "entryPoint": "", 163 | "sparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1 --conf spark.jars=" 164 | } 165 | }' --configuration-overrides '{ 166 | "applicationConfiguration": [ 167 | { 168 | "classification": "spark-defaults", 169 | "properties": { 170 | "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", 171 | "spark.sql.catalogImplementation": "hive", 172 | "spark.dynamicAllocation.enabled":"true", 173 | "spark.dynamicAllocation.minExecutors": "8", 174 | "spark.dynamicAllocation.maxExecutors": "40", 175 | "spark.kubernetes.allocation.batch.size": "8", 176 | "spark.dynamicAllocation.executorAllocationRatio": "1", 177 | "spark.dynamicAllocation.shuffleTracking.enabled": "true", 178 | "spark.dynamicAllocation.shuffleTracking.timeout": "300s", 179 | "spark.kubernetes.driver.podTemplateFile":, 180 | "spark.kubernetes.executor.podTemplateFile": 181 | } 182 | } 183 | ], 184 | "monitoringConfiguration": { 185 | "persistentAppUI": "ENABLED", 186 | "cloudWatchMonitoringConfiguration": { 187 | "logGroupName": "/aws/emr-containers/", 188 | "logStreamNamePrefix": "default" 189 | } 190 | } 191 | }' 192 | ``` 193 | -------------------------------------------------------------------------------- /content/storage/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/storage/docs/index.md -------------------------------------------------------------------------------- /content/storage/docs/spark/instance-store.md: -------------------------------------------------------------------------------- 1 | # **Instance Store Volumes** 2 | 3 | When working with Spark workloads, it might be useful to use instances powered by [SSD instance store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html) to improve the performance of your jobs. This storage is located on disks that are physically attached to the host computer and can provide better performance compared to traditional EBS volumes. In the context of Spark, this might be beneficial for wide transformations (e.g. JOIN, GROUP BY) that generate a significant amount of shuffle data that Spark persists on the local filesystem of the instances where the executors are running. 4 | 5 | In this document, we highlight two approaches to leverage NVMe disks in your workloads when using EMR on EKS. For a list of instances supporting NVMe disks, see [Instance store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#instance-store-volumes) in the Amazon EC2 documentation. 6 | 7 | ## **Mount kubelet pod directory on NVMe disks** 8 | 9 | The [kublet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) service manages the lifecycle of pod containers that are created using Kubernetes. When a pod is launched on an instance, an ephemeral volume is automatically created for the pod, and this volume is mapped in a subdirectory within the path `/var/lib/kubelet` of the host node. This volume folder exists for the lifetime of K8s pod, and it will be automatically deleted once the pod ceases to exist. 10 | 11 | In order to leverage NVMe disk attached to an EC2 node in our Spark application, we should perform the following actions during node bootstrap: 12 | 13 | * Prepare the NVMe disks attached to the instance (format disks and create a partition) 14 | * Mount the `/var/lib/kubelet/pods` path on the NVMe 15 | 16 | By doing this, all local files generated by your Spark job (blockmanager data, shuffle data, etc.) will be automatically written to NVMe disks. This way, you don't have to configure Spark volume path when launching the pod (driver or executor). This approach is easier to adopt because it doesn’t require any additional configuration in your job. Besides, once the job is completed, all the data stored in ephemeral volumes will be automatically deleted when the EC2 instance is deleted. 17 | 18 | However, if you have multiple NVMe disks attached to the instance, you need to create RAID0 configuration of all the disks before mounting the `/var/lib/kubelet/pods` directory on the RAID partition. Without a RAID setup, it will not be possible to leverage all the disks capacity available on the node. 19 | 20 | The following example shows how to create a node group in your cluster using this approach. In order to prepare our NVMe disks, we can use the [eksctl](https://eksctl.io/) **preBootstrapCommands** definition while creating the node group. The script will perform the following actions: 21 | 22 | * For instances with a single NVMe disk, format the filesystem, create a Linux partition (e.g. ext4, xfs, etc.) 23 | * For instances with multiple NVMe disks, create a RAID 0 configuration across all available volumes 24 | 25 | Once the disks are formatted and ready to use, we will mount the folder **/var/lib/kubelet/pods** using the filesystem and setup correct permissions. Below, you can find an example of an eksctl configuration to create a managed node group using this approach. 26 | 27 | **Example** 28 | 29 | ``` 30 | apiVersion: eksctl.io/v1alpha5 31 | kind: ClusterConfig 32 | 33 | metadata: 34 | name: YOUR_CLUSTER_NAME 35 | region: YOUR_REGION 36 | 37 | managedNodeGroups: 38 | - name: ng-c5d-9xlarge 39 | instanceType: c5d.9xlarge 40 | desiredCapacity: 1 41 | privateNetworking: true 42 | subnets: 43 | - YOUR_NG_SUBNET 44 | preBootstrapCommands: # commands executed as root 45 | - yum install -y mdadm nvme-cli 46 | - nvme_disks=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | awk -F'[[:space:]][[:space:]]+' '{print $1}')) && [[ ${#nvme_disks[@]} -eq 1 ]] && mkfs.ext4 -F ${nvme_disks[*]} && systemctl stop docker && mkdir -p /var/lib/kubelet/pods && mount ${nvme_disks[*]} /var/lib/kubelet/pods && chmod 750 /var/lib/docker && systemctl start docker 47 | - nvme_disks=($(nvme list | grep "Amazon EC2 NVMe Instance Storage" | awk -F'[[:space:]][[:space:]]+' '{print $1}')) && [[ ${#nvme_disks[@]} -ge 2 ]] && mdadm --create --verbose /dev/md0 --level=0 --raid-devices=${#nvme_disks[@]} ${nvme_disks[*]} && mkfs.ext4 -F /dev/md0 && systemctl stop docker && mkdir -p /var/lib/kubelet/pods && mount /dev/md0 /var/lib/kubelet/pods && chmod 750 /var/lib/docker && systemctl start docker 48 | ``` 49 | 50 | 51 | **Benefits** 52 | 53 | * No need to mount the disk using Spark configurations or pod templates 54 | * Data generated by the application, will immediately be deleted at the pod termination. Data will be also purged in case of pod failures. 55 | * One time configuration for the node group 56 | 57 | **Cons** 58 | 59 | * If multiple jobs are allocated on the same EC2 instance, contention of disk resources will occur because it is not possible to allocate instance store volume resources across jobs 60 | 61 | 62 | 63 | ## **Mount NVMe disks as data volumes** 64 | 65 | In this section, we’re going to explicitly mount instance store volumes as the mount path in Spark configuration for drivers and executors 66 | 67 | As in the previous example, this script will automatically format the instance store volumes and create an **xfs** partition. The disks are then mounted in local folders called **/spark_data_IDX** where IDX is an integer that corresponds to the disk mounted. 68 | 69 | **Example** 70 | 71 | ``` 72 | apiVersion: eksctl.io/v1alpha5 73 | kind: ClusterConfig 74 | 75 | metadata: 76 | name: YOUR_CLUSTER_NAME 77 | region: YOUR_REGION 78 | 79 | managedNodeGroups: 80 | - name: ng-m5d-4xlarge 81 | instanceType: m5d.4xlarge 82 | desiredCapacity: 1 83 | privateNetworking: true 84 | subnets: 85 | - YOUR_NG_SUBNET 86 | preBootstrapCommands: # commands executed as root 87 | - "IDX=1;for DEV in /dev/nvme[1-9]n1;do mkfs.xfs ${DEV}; mkdir -p /spark_data_${IDX}; echo ${DEV} /spark_data_${IDX} xfs defaults,noatime 1 2 >> /etc/fstab; IDX=$((${IDX} + 1)); done" 88 | - "mount -a" 89 | - "chown 999:1000 /spark_data_*" 90 | ``` 91 | 92 | In order to successfully use ephemeral volumes within Spark, you need to specify additional configurations. In addition to spark configuration, the mounted volume name should start with `spark-local-dir-`. 93 | 94 | Below an example configuration provided during the EMR on EKS job submission, that shows how to configure Spark to use 2 volumes as local storage for the job. 95 | 96 | **Spark Configurations** 97 | 98 | ``` 99 | { 100 | "name": ...., 101 | "virtualClusterId": ...., 102 | "executionRoleArn": ...., 103 | "releaseLabel": ...., 104 | "jobDriver": ...., 105 | "configurationOverrides": { 106 | "applicationConfiguration": [ 107 | { 108 | "classification": "spark-defaults", 109 | "properties": { 110 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.mount.path": "/spark_data_1", 111 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.mount.readOnly": "false", 112 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-1.options.path": "/spark_data_1", 113 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.mount.path": "/spark_data_2", 114 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.mount.readOnly": "false", 115 | "spark.kubernetes.executor.volumes.hostPath.spark-local-dir-2.options.path": "/spark_data_2" 116 | } 117 | } 118 | ] 119 | } 120 | } 121 | ``` 122 | 123 | 124 | Please note that for this approach it is required to specify the following configurations for each volume that you want to use. (IDX is a label to identify the volume mounted) 125 | 126 | ``` 127 | # Mount path on the host node 128 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.options.path 129 | 130 | # Mount path on the k8s pod 131 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.mount.path 132 | 133 | # (boolean) Should be defined as false to allow Spark to write in the path 134 | spark.kubernetes.executor.volumes.hostPath.spark-local-dir-IDX.mount.readOnly 135 | ``` 136 | 137 | 138 | **Benefits** 139 | 140 | * You can allocate dedicated resources of instance store volumes across your Spark jobs (For example, lets take a scenario where an EC2 instance has two instance store volumes. If you run two spark jobs on this node, you can dedicate one volume per Spark job) 141 | 142 | **Cons** 143 | 144 | * Additional configurations are required for Spark jobs to use instance store volumes. This approach can be error-prone if you don’t control the instance types being used (for example, multiple node groups with different instance types). You can mitigate this issue by using k8s node selectors and specify instance type in your spark configuraiton: **[spark.kubernetes.node.selector.node.kubernetes.io/instance-type](http://spark.kubernetes.node.selector.node.kubernetes.io/instance-type)** 145 | * Data created on the volumes is automatically deleted once the job is completed and instance is terminated. However, you need to extra measures to delete the data on instance store volumes if EC2 instance is re-used or is not terminated. 146 | -------------------------------------------------------------------------------- /content/storage/resources/FSx_Lustre_SG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/storage/resources/FSx_Lustre_SG.png -------------------------------------------------------------------------------- /content/submit-applications/docs/spark/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/index.md -------------------------------------------------------------------------------- /content/submit-applications/docs/spark/java-and-scala.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/java-and-scala.md -------------------------------------------------------------------------------- /content/submit-applications/docs/spark/multi-arch-image.md: -------------------------------------------------------------------------------- 1 | # Build a Multi-architecture Docker Image Supporting arm64 & amd64 2 | 3 | ## Pre-requisites 4 | We can complete all the steps either from a local desktop or using [AWS Cloud9](https://aws.amazon.com/cloud9/). If you’re using AWS Cloud9, follow the instructions in the "Setup AWS Cloud9" to create and configure the environment first, otherwise skip to the next section. 5 | 6 | ## Setup AWS Cloud9 7 | AWS Cloud9 is a cloud-based IDE that lets you write, run, and debug your code via just a browser. AWS Cloud9 comes preconfigured with some of AWS dependencies we require to build our application, such ash the AWS CLI tool. 8 | 9 | ### 1. Create a Cloud9 instance 10 | 11 | **Instance type** - Create an AWS Cloud9 environment from the [AWS Management Console](https://console.aws.amazon.com/cloud9) with an instance type of `t3.small or larger`. In our example, we used `m5.xlarge` for adequate memory and CPU to compile and build a large docker image. 12 | 13 | **VPC** - Follow the launch wizard and provide the required name. To interact with an existing EKS cluster in the same region later on, recommend to use the same VPC to your EKS cluster in the Cloud9 environment. Leave the remaining default values as they are. 14 | 15 | **Storage size** - You must increase the Cloud9's EBS volume size (pre-attached to your AWS Cloud9 instance) to 30+ GB, because the default disk space ( 10 GB with ~72% used) is not enough for building a container image. Refer to [Resize an Amazon EBS volume used by an environment](https://docs.aws.amazon.com/cloud9/latest/user-guide/move-environment.html#move-environment-resize) document, download the script `resize.sh` to your cloud9 environment. 16 | 17 | ```bash 18 | touch resize.sh 19 | # Double click the file name in cloud9 20 | # Copy and paste the content from the official document to your file, save and close it 21 | ``` 22 | Validate the disk size is 10GB currently: 23 | ``` 24 | admin:~/environment $ df -h 25 | Filesystem Size Used Avail Use% Mounted on 26 | devtmpfs 4.0M 0 4.0M 0% /dev 27 | tmpfs 951M 0 951M 0% /dev/shm 28 | tmpfs 381M 5.3M 376M 2% /run 29 | /dev/nvme0n1p1 10G 7.2G 2.9G 72% / 30 | tmpfs 951M 12K 951M 1% /tmp 31 | /dev/nvme0n1p128 10M 1.3M 8.7M 13% /boot/efi 32 | tmpfs 191M 0 191M 0% /run/user/1000 33 | ``` 34 | Increase the disk size: 35 | ```bash 36 | bash resize.sh 30 37 | ``` 38 | ``` 39 | admin:~/environment $ df -h 40 | Filesystem Size Used Avail Use% Mounted on 41 | devtmpfs 4.0M 0 4.0M 0% /dev 42 | tmpfs 951M 0 951M 0% /dev/shm 43 | tmpfs 381M 5.3M 376M 2% /run 44 | /dev/nvme0n1p1 30G 7.3G 23G 25% / 45 | tmpfs 951M 12K 951M 1% /tmp 46 | /dev/nvme0n1p128 10M 1.3M 8.7M 13% /boot/efi 47 | tmpfs 191M 0 191M 0% /run/user/1000 48 | ``` 49 | 50 | ### 2. Install Docker and Buildx if required 51 | 52 | - **Installing Docker** - a Cloud9 EC2 instance comes with a Docker daemon pre-installed. Outside of the Cloud9, your environment may or may not need to install Docker. If needed, follow the instructions in the [Docker Desktop page](https://docs.docker.com/desktop/#download-and-install) to install. 53 | 54 | 55 | - **Installing Buildx** (pre-installed in Cloud9) - To build a single multi-arch Docker image (x86_64 and arm64), we may or may not need to [install an extra Buildx plugin](https://docs.docker.com/build/architecture/#install-buildx) that extends the Docker CLI to support the multi-architecture feature. Docker Buildx is installed by default with a Docker Engine since **version 23.0+**. For an earlier version, it requires you grab a binary from GitHub repository and install it manually, or get it from a separate package. See [docker/buildx README](https://github.com/docker/buildx#manual-download) for more information. 56 | 57 | Once the buildx CLI is available, we can create a builder instance which gives access to the new multi-architecture features.You only have to perform this task once. 58 | ```bash 59 | # create a builder 60 | docker buildx create --name mybuilder --use 61 | # boot up the builder and inspect 62 | docker buildx inspect --bootstrap 63 | 64 | 65 | # list builder instances 66 | # the asterisk (*) next to a builder name indicates the selected builder. 67 | docker buildx ls 68 | ``` 69 | If your builder doesn't support [QEMU](https://docs.docker.com/build/building/multi-platform/#qemu), only limited platform types are supported as below. For example, the current builder instance created in Cloud9 doesn't support QEMU, so we can't build the docker image for the arm64 CPU type yet. 70 | ```bash 71 | NAME/NODE DRIVER/ENDPOINT STATUS BUILDKIT PLATFORMS 72 | default docker 73 | default default running v0.11.6 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/386 74 | mybuilder * docker-container 75 | my_builder0 default running v0.11.6 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/386 76 | ``` 77 | 78 | - **Installing QEMU for Cloud9** - Building multi-platform images under emulation with QEMU is the easiest way to get started if your builder already supports it. However, AWS Cloud9 isn't preconfigured with the [binfmt_misc](https://en.wikipedia.org/wiki/Binfmt_misc) support. We must install compiled QEMU binaries. The installations can be easily done via the docker run CLI: 79 | ```bash 80 | docker run --privileged --rm tonistiigi/binfmt --install all 81 | ``` 82 | List the builder instance again. Now we see the full list of platforms are supported,including arm-based CPU: 83 | ```bash 84 | docker buildx ls 85 | 86 | NAME/NODE DRIVER/ENDPOINT STATUS BUILDKIT PLATFORMS 87 | mybuilder * docker-container 88 | mybuilder20 unix:///var/run/docker.sock running v0.13.2 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/amd64/v4, linux/arm64, linux/riscv64, linux/ppc64le, linux/s390x, linux/386, linux/mips64le, linux/mips64, linux/arm/v7, linux/arm/v6 89 | default docker 90 | default default running v0.12.5 linux/amd64, linux/amd64/v2, linux/amd64/v3, linux/amd64/v4, linux/386, linux/arm64, linux/riscv64, linux/ppc64le, linux/s390x, linux/mips64le, linux/mips64, linux/arm/v7, linux/arm/v6 91 | ``` 92 | 93 | ## Build a docker image supporting multi-arch 94 | 95 | In this example, we will create a [spark-benchmark-utility](https://github.com/aws-samples/emr-on-eks-benchmark) container image. We are going to reuse the source code from the [EMR on EKS benchmark Github repo](https://github.com/aws-samples/emr-on-eks-benchmark). 96 | 97 | ### 1. Download the source code from the Github: 98 | ```bash 99 | git clone https://github.com/aws-samples/emr-on-eks-benchmark.git 100 | cd emr-on-eks-benchmark 101 | ``` 102 | 103 | ### 2. Setup required environment variables 104 | 105 | We will build an image to test EMR 6.15's performance. The equivalent versions are Spark 3.4.1 and Hadoop 3.3.4. Change them accordingly if needed. 106 | ```bash 107 | export SPARK_VERSION=3.4.1 108 | export HADOOP_VERSION=3.3.6 109 | ``` 110 | 111 | Log in to your own Amazon ECR registry: 112 | ```bash 113 | export AWS_REGION=us-east-1 114 | export ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) 115 | export ECR_URL=$ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com 116 | 117 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ECR_URL 118 | ``` 119 | 120 | ### 3. Build OSS Spark base image if required 121 | If you want to test open-source Apache Spark's performance, build a base Spark image first. Otherwise skip this step. 122 | ```bash 123 | docker buildx build --platform linux/amd64,linux/arm64 \ 124 | -t $ECR_URL/spark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \ 125 | -f docker/hadoop-aws-3.3.1/Dockerfile \ 126 | --build-arg HADOOP_VERSION=${HADOOP_VERSION} --build-arg SPARK_VERSION=${SPARK_VERSION} --push . 127 | ``` 128 | 129 | ### 4. Get EMR Spark base image from AWS 130 | ```bash 131 | export SRC_ECR_URL=755674844232.dkr.ecr.us-east-1.amazonaws.com 132 | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $SRC_ECR_URL 133 | 134 | docker pull $SRC_ECR_URL/spark/emr-6.15.0:latest 135 | ``` 136 | 137 | 138 | ### 5. Build the Benchmark Utility image 139 | 140 | Build and push the docker image based the OSS Spark engine built before (Step #3): 141 | 142 | ```bash 143 | 144 | docker buildx build --platform linux/amd64,linux/arm64 \ 145 | -t $ECR_URL/eks-spark-benchmark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \ 146 | -f docker/benchmark-util/Dockerfile \ 147 | --build-arg SPARK_BASE_IMAGE=$ECR_URL/spark:${SPARK_VERSION}_hadoop_${HADOOP_VERSION} \ 148 | --push . 149 | ``` 150 | 151 | Build and push the benchmark docker image based EMR's Spark runtime (Step #4): 152 | 153 | ```bash 154 | docker buildx build --platform linux/amd64,linux/arm64 \ 155 | -t $ECR_URL/eks-spark-benchmark:emr6.15 \ 156 | -f docker/benchmark-util/Dockerfile \ 157 | --build-arg SPARK_BASE_IMAGE=$SRC_ECR_URL/spark/emr-6.15.0:latest \ 158 | --push . 159 | 160 | ``` 161 | 162 | ## Benchmark application based on the docker images built 163 | 164 | Based on the mutli-arch docker images built previously, now you can start to [run benchmark applications](https://github.com/aws-samples/emr-on-eks-benchmark/tree/delta?tab=readme-ov-file#run-benchmark) on both intel and arm-based CPU nodes. 165 | 166 | In Cloud9, the following extra steps are required to configure the environment, before you can submit the applications. 167 | 168 | 1. Install kkubectl/helm/eksctl CLI tools. refer to this [sample scirpt](https://github.com/aws-samples/stream-emr-on-eks/blob/workshop/deployment/app_code/post-deployment.sh) 169 | 170 | 2. Modify the IAM role attached to the Cloud9 EC2 instance, allowing it has enough privilege to assume an EKS cluster's admin role or has the permission to submit jobs against the EKS cluster. 171 | 172 | 3. Upgrade AWS CLI and turn off the AWS managed temporary credentials in Cloud9: 173 | ```bash 174 | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" 175 | unzip awscliv2.zip 176 | sudo ./aws/install --update 177 | /usr/local/bin/aws cloud9 update-environment --environment-id $C9_PID --managed-credentials-action DISABLE 178 | rm -vf ${HOME}/.aws/credentials 179 | ``` 180 | 181 | 4. Connect to the EKS cluster 182 | ```bash 183 | # a sample connection string 184 | aws eks update-kubeconfig --name YOUR_EKS_CLUSTER_NAME --region us-east-1 --role-arn arn:aws:iam::ACCOUNTID:role/SparkOnEKS-iamrolesclusterAdmin-xxxxxx 185 | 186 | # validate the connection 187 | kubectl get svc 188 | ``` 189 | -------------------------------------------------------------------------------- /content/submit-applications/docs/spark/sparkr.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/sparkr.md -------------------------------------------------------------------------------- /content/submit-applications/docs/spark/sparksql.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/docs/spark/sparksql.md -------------------------------------------------------------------------------- /content/submit-applications/resources/images/pyspark-packaged-example-zip-folder-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/resources/images/pyspark-packaged-example-zip-folder-structure.png -------------------------------------------------------------------------------- /content/submit-applications/resources/pyspark-packaged-dependency-src.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/submit-applications/resources/pyspark-packaged-dependency-src.zip -------------------------------------------------------------------------------- /content/troubleshooting/docs/change-log-level.md: -------------------------------------------------------------------------------- 1 | # **Change Log level for Spark application on EMR on EKS** 2 | 3 | To obtain more detail about their application or job submission, Spark application developers can change the log level of their job to different levels depending on their requirements. Spark uses apache log4j for logging. 4 | 5 | ### Change log level to DEBUG 6 | 7 | ####**Using EMR classification** 8 | Log level of spark applications can be changed using the [EMR spark-log4j configuration classification.](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) 9 | 10 | **Request** 11 | The `pi.py` application script is from the [spark examples](https://github.com/apache/spark/blob/master/examples/src/main/python/pi.py). EMR on EKS has included the example located at`/usr/lib/spark/examples/src/main` for you to try. 12 | 13 | `spark-log4j` classification can be used to configure values in [log4j.properties](https://github.com/apache/spark/blob/branch-3.2/conf/log4j.properties.template) for EMR releases 6.7.0 or lower , and [log4j2.properties](https://github.com/apache/spark/blob/master/conf/log4j2.properties.template) for EMR releases 6.8.0+ . 14 | ``` 15 | cat > Spark-Python-in-s3-debug-log.json << EOF 16 | { 17 | "name": "spark-python-in-s3-debug-log-classification", 18 | "virtualClusterId": "", 19 | "executionRoleArn": "", 20 | "releaseLabel": "emr-6.2.0-latest", 21 | "jobDriver": { 22 | "sparkSubmitJobDriver": { 23 | "entryPoint": "local:///usr/lib/spark/examples/src/main/python/pi.py", 24 | "entryPointArguments": [ "200" ], 25 | "sparkSubmitParameters": "--conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.memory=2G --conf spark.executor.instances=2" 26 | } 27 | }, 28 | "configurationOverrides": { 29 | "applicationConfiguration": [ 30 | { 31 | "classification": "spark-defaults", 32 | "properties": { 33 | "spark.dynamicAllocation.enabled":"false" 34 | } 35 | }, 36 | { 37 | "classification": "spark-log4j", 38 | "properties": { 39 | "log4j.rootCategory":"DEBUG, console" 40 | } 41 | } 42 | ], 43 | "monitoringConfiguration": { 44 | "cloudWatchMonitoringConfiguration": { 45 | "logGroupName": "/emr-containers/jobs", 46 | "logStreamNamePrefix": "demo" 47 | }, 48 | "s3MonitoringConfiguration": { 49 | "logUri": "s3://joblogs" 50 | } 51 | } 52 | } 53 | } 54 | EOF 55 | 56 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-debug-log.json 57 | 58 | 59 | ``` 60 | 61 | The above request will print DEBUG logs in the spark driver and executor containers. The generated logs will be pushed to S3 and AWS Cloudwatch logs as configured in the request. 62 | 63 | Starting from the version 3.3.0, Spark has been [migrated from log4j1 to log4j2](https://issues.apache.org/jira/browse/SPARK-37814). EMR on EKS allows you still write the log4j properties to the same `"classification": "spark-log4j"`, however it now needs to be log4j2.properties, such as 64 | ``` 65 | { 66 | "classification": "spark-log4j", 67 | "properties": { 68 | "rootLogger.level" : "DEBUG" 69 | } 70 | } 71 | 72 | ``` 73 | 74 | ####**Custom log4j properties** 75 | Download log4j properties from [here](https://github.com/apache/spark/blob/master/conf/log4j.properties.template). Edit log4j.properties with log level as required. Save the edited log4j.properties in a mounted volume. In this example log4j.properties is placed in a s3 bucket that is mapped to a [FSx for Lustre filesystem](https://docs.aws.amazon.com/fsx/latest/LustreGuide/what-is.html). 76 | 77 | **Request** 78 | pi.py used in the below request payload is from [spark examples](https://github.com/apache/spark/blob/master/examples/src/main/python/pi.py) 79 | ``` 80 | cat > Spark-Python-in-s3-debug-log.json << EOF 81 | { 82 | "name": "spark-python-in-s3-debug-log", 83 | "virtualClusterId": "", 84 | "executionRoleArn": "", 85 | "releaseLabel": "emr-6.2.0-latest", 86 | "jobDriver": { 87 | "sparkSubmitJobDriver": { 88 | "entryPoint": "s3:///pi.py", 89 | "sparkSubmitParameters": "--conf spark.driver.cores=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=2" 90 | } 91 | }, 92 | "configurationOverrides": { 93 | "applicationConfiguration": [ 94 | { 95 | "classification": "spark-defaults", 96 | "properties": { 97 | "spark.driver.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties", 98 | "spark.executor.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties", 99 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.options.claimName":"fsx-claim", 100 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.mount.path":"/var/data/", 101 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkdata.mount.readOnly":"false", 102 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.options.claimName":"fsx-claim", 103 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.mount.path":"/var/data/", 104 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkdata.mount.readOnly":"false" 105 | } 106 | } 107 | ], 108 | "monitoringConfiguration": { 109 | "cloudWatchMonitoringConfiguration": { 110 | "logGroupName": "/emr-containers/jobs", 111 | "logStreamNamePrefix": "demo" 112 | }, 113 | "s3MonitoringConfiguration": { 114 | "logUri": "s3://joblogs" 115 | } 116 | } 117 | } 118 | } 119 | EOF 120 | 121 | aws emr-containers start-job-run --cli-input-json file:///Spark-Python-in-s3-debug-log.json 122 | 123 | ``` 124 | 125 | **Configurations of interest:** 126 | Below configuration enables spark driver and executor to pick up the log4j configuration file from ``/var/data/`` folder mounted to the driver and executor containers. For guide to mount FSx for Lustre to driver and executor containers - refer to [EMR Containers integration with FSx for Lustre](../../storage/docs/spark/fsx-lustre.md) 127 | 128 | ``` 129 | "spark.driver.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties", 130 | "spark.executor.extraJavaOptions":"-Dlog4j.configuration=file:///var/data/log4j-debug.properties", 131 | 132 | 133 | ``` 134 | 135 | -------------------------------------------------------------------------------- /content/troubleshooting/docs/connect-spark-ui.md: -------------------------------------------------------------------------------- 1 | # **Connect to Spark UI running on the Driver Pod** 2 | 3 | To obtain more detail about their application or monitor their job execution, Spark application developers can connect to Spark-UI running on the Driver Pod. 4 | 5 | Spark UI (Spark history server) is packaged with EMR on EKS out of the box. Alternatively, if you want to see Spark UI immediately after the driver is spun up, you can use the instructions in this page to connect. 6 | 7 | This page shows how to use `kubectl port-forward` to connect to the Job's Driver Pod running in a Kubernetes cluster. This type of connection is useful for debugging purposes. 8 | 9 | **Pre-Requisites** 10 | 11 | * AWS cli should be installed 12 | * "kubectl" should be installed 13 | * If this is the first time you are connecting to your EKS cluster from your machine, you should run `aws eks update-kubeconfig --name --region` to download kubeconfig file and use correct context to talk to API server. 14 | 15 | ### Submitting the job to a virtual cluster 16 | 17 | **Request** 18 | ``` 19 | cat >spark-python.json << EOF 20 | { 21 | "name": "spark-python-in-s3", 22 | "virtualClusterId": "", 23 | "executionRoleArn": "", 24 | "releaseLabel": "emr-6.3.0-latest", 25 | "jobDriver": { 26 | "sparkSubmitJobDriver": { 27 | "entryPoint": "s3:///trip-count.py", 28 | "sparkSubmitParameters": "--conf spark.driver.cores=4 --conf spark.executor.memory=20G --conf spark.driver.memory=20G --conf spark.executor.cores=4" 29 | } 30 | }, 31 | "configurationOverrides": { 32 | "applicationConfiguration": [ 33 | { 34 | "classification": "spark-defaults", 35 | "properties": { 36 | 37 | } 38 | } 39 | ], 40 | "monitoringConfiguration": { 41 | "cloudWatchMonitoringConfiguration": { 42 | "logGroupName": "/emr-containers/jobs", 43 | "logStreamNamePrefix": "demo" 44 | }, 45 | "s3MonitoringConfiguration": { 46 | "logUri": "s3://joblogs" 47 | } 48 | } 49 | } 50 | } 51 | EOF 52 | aws emr-containers start-job-run --cli-input-json file:///spark-python.json 53 | ``` 54 | 55 | 56 | Once the job is submitted successfully, run `kubectl get pods -n -w ` command to watch all the pods, until you observe the driver pod is in the "Running" state. The Driver pod's name usually is in `spark--driver` format. 57 | 58 | 59 | ### Connecting to the Driver Pod 60 | 61 | Spark Driver Pod hosts Spark-UI on port `4040`. However the pod runs within the internal Kubernetes network. To get access to the internal Kubernetes resources, `kubectl` provides a tool ("Port Forwarding") that allows access from your localhost. To get access to the driver pod in your cluster: 62 | 63 | 64 | 1- Run ```kubectl port-forward 4040:4040``` 65 | 66 | The result should be the following: 67 | 68 | 69 | ``` 70 | Forwarding from 127.0.0.1:28015 -> 27017 71 | Forwarding from [::1]:28015 -> 27017 72 | ``` 73 | 74 | 2- Open a browser and type `http://localhost:4040` in the Address bar. 75 | 76 | You should be able to connect to the Spark UI: 77 | 78 | ![](../resources/screen-shot-spark-ui-driver.png) 79 | ### Consideration 80 | 81 | In some cases like long-running Spark jobs, such as Spark streaming or large Spark SQL queries can generate large event logs. With large events logs, it might happen quickly use up storage space on running pods and sometimes encounter to experience blank UI or even OutOfMemory errors when you load Persistent UIs. To avoid these issues, we recommend that you follow either by turn on the Spark event log [rolling and compaction feature](https://docs.aws.amazon.com/emr/latest/ManagementGuide/app-history-spark-UI.html#app-history-spark-UI-large-event-logs) (default emr-container-event-log-dir - /var/log/spark/apps) or use S3 location to parse the log using [self hosted of Spark history server](https://aws.github.io/aws-emr-containers-best-practices/troubleshooting/docs/self-hosted-shs/). 82 | -------------------------------------------------------------------------------- /content/troubleshooting/docs/eks-cluster-auto-scaler.md: -------------------------------------------------------------------------------- 1 | # **EKS Cluster Auto-Scaler** 2 | Kubernetes provisions nodes using CAS (Cluster Autoscaler). AWS EKS has its own implementation of K8 CAS, and EKS uses Managed-Nodegroups to spuns of Nodes. 3 | 4 | ### Logs of EKS Cluster Auto-scaler. 5 | 6 | On AWS, Cluster Autoscaler utilizes Amazon EC2 Auto Scaling Groups to provision nodes. This section will help you identify the error message when a AutoScaler fails to provision nodes. 7 | 8 | An example scenario, where the NodeGroup would fail due to non-supported nodes in certain AZs. 9 | ``` 10 | Could not launch On-Demand Instances. Unsupported - Your requested instance type (g4dn.xlarge) is not supported in your requested Availability Zone (ca-central-1d). Please retry your request by not specifying an Availability Zone or choosing ca-central-1a, ca-central-1b. Launching EC2 instance failed. 11 | ``` 12 | 13 | The steps to find the logs for AutoScalingGroups are, 14 | 15 | Step 1: Login to AWS Console, and select `Elastic Kubernetes Service` 16 | 17 | Step 2: Select `Compute` tab, and select the `NodeGroup` that fails. 18 | 19 | Step 3: Select the `Autoscaling group name` from the NodeGroup's section, which will direct you to `EC2 --> AutoScaling Group` page. 20 | 21 | Step 4: Click the Tab `Activity` of the `AutoScaling Group`, and the `Activity History` would give provide the details of the error. 22 | ``` 23 | - Status 24 | - Description 25 | - Cause 26 | - Start Time 27 | - End Time 28 | ``` 29 | Alternatively, the activities/logs can be found via CLI as well 30 | ```bash 31 | aws autoscaling describe-scaling-activities \ 32 | --region \ 33 | --auto-scaling-group-name 34 | ``` 35 | 36 | In the above error scenario, the `ca-central-1d` availability zone doesn't support `g4dn.xlarge`. The solution is 37 | 38 | Step 1: Identify the Subnets of the Availability zones that supports the GPU node type. The NodeGroup Section would list all the subnets, and you can click each subnet to see which AZ it is deployed to. 39 | 40 | Step 2: Create a NodeGroup only in the Subnets identified in the above step 41 | ```bash 42 | aws eks create-nodegroup \ 43 | --region \ 44 | --cluster-name \ 45 | --nodegroup-name \ 46 | --scaling-config minSize=10,maxSize=10,desiredSize=10 \ 47 | --ami-type AL2_x86_64_GPU \ 48 | --node-role \ 49 | --subnets \ 50 | --instance-types g4dn.xlarge \ 51 | --disk-size 52 | ``` -------------------------------------------------------------------------------- /content/troubleshooting/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/troubleshooting/docs/index.md -------------------------------------------------------------------------------- /content/troubleshooting/docs/karpenter.md: -------------------------------------------------------------------------------- 1 | # **Karpenter** 2 | 3 | Karpenter is an open-source cluster autoscaler for kubernetes (EKS) that automatically provisions new nodes in response to unschedulable pods. Until Karpenter was introduced, EKS would use its implementation of "CAS" Cluster Autoscaler, which creates Managed-NodeGroups to provision nodes. 4 | 5 | The challenge with Managed-NodeGroups is that, it can only create nodes with a single instance-type. In-order to provision nodes with different instance-types for different workloads, multiple nodegroups have to be created. Karpenter on the other hand can provision nodes of different types by working with EC2-Fleet-API. 6 | The best practices to configure the Provisioners are documented at https://aws.github.io/aws-eks-best-practices/karpenter/ 7 | 8 | This guide helps the user troubleshoot common problems with Karpenter. 9 | 10 | ### Logs of Karpenter Controller 11 | 12 | Karpenter is a Custom Kubernetes Controller, and the following steps would help find Karpenter Logs. 13 | 14 | Step 1: Identify the namespace where Karpenter is running. In most cases, `helm` would be used to deploy Karpenter packages. The `helm ls` command would list the namespace where karpenter would be installed. 15 | ``` 16 | # Example 17 | 18 | % helm ls --all-namespaces 19 | NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION 20 | karpenter karpenter 1 2023-05-15 14:16:03.726908 -0500 CDT deployed karpenter-v0.27.3 0.27.3 21 | ``` 22 | 23 | Step 2: Setup kubectl 24 | ``` 25 | brew install kubectl 26 | 27 | aws --region eks update-kubeconfig --name 28 | ``` 29 | 30 | Step 3: Check the status of the pods of Karpenter 31 | ``` 32 | # kubectl get pods -n 33 | 34 | % kubectl get pods -n karpenter 35 | NAME READY STATUS RESTARTS AGE 36 | karpenter-7b455dccb8-prrzx 1/1 Running 0 7m18s 37 | karpenter-7b455dccb8-x8zv8 1/1 Running 0 7m18s 38 | ``` 39 | 40 | Step 4: The `kubectl logs` command would help read the Karpenter logs. The below example, karpenter pod logs depict that an `t3a.large` instance was launched. 41 | ``` 42 | # kubectl logs -n 43 | 44 | % kubectl logs karpenter-7b455dccb8-prrzx -n karpenter 45 | .. 46 | .. 47 | 48 | 2023-05-15T19:16:20.546Z DEBUG controller discovered region {"commit": "***-dirty", "region": "us-west-2"} 49 | 2023-05-15T19:16:20.666Z DEBUG controller discovered cluster endpoint {"commit": "**-dirty", "cluster-endpoint": "https://******.**.us-west-2.eks.amazonaws.com"} 50 | .. 51 | .. 52 | 2023-05-15T19:16:20.786Z INFO controller.provisioner starting controller {"commit": "**-dirty"} 53 | 2023-05-15T19:16:20.787Z INFO controller.deprovisioning starting controller {"commit": "**-dirty"} 54 | .. 55 | 2023-05-15T19:16:20.788Z INFO controller Starting EventSource {"commit": "**-dirty", "controller": "node", "controllerGroup": "", "controllerKind": "Node", "source": "kind source: *v1.Pod"} 56 | .. 57 | 2023-05-15T20:34:56.718Z INFO controller.provisioner.cloudprovider launched instance {"commit": "d7e22b1-dirty", "provisioner": "default", "id": "i-03146cd4d4152a935", "hostname": "ip-*-*-*-*.us-west-2.compute.internal", "instance-type": "t3a.large", "zone": "us-west-2d", "capacity-type": "on-demand", "capacity": {"cpu":"2","ephemeral-storage":"20Gi","memory":"7577Mi","pods":"35"}} 58 | ``` 59 | 60 | ### Error while decoding JSON: json: unknown field "iamIdentityMappings" 61 | 62 | **Problem** 63 | The Create-Cluster command https://karpenter.sh/v0.27.3/getting-started/getting-started-with-karpenter/#3-create-a-cluster throws an error 64 | ``` 65 | Error: loading config file "karpenter.yaml": error unmarshaling JSON: while decoding JSON: json: unknown field "iamIdentityMappings" 66 | ``` 67 | 68 | **Solution** 69 | The `eksctl` cli was not able to understand the kind `iamIdentityMappings`. This is because, the `eksctl` version is old, and its schema doesn't support this kind. 70 | 71 | The solution is to upgrade the `eksctl` cli, and re-run the cluster creation commands 72 | ```bash 73 | brew upgrade eksctl 74 | ``` -------------------------------------------------------------------------------- /content/troubleshooting/docs/rbac-permissions-errors.md: -------------------------------------------------------------------------------- 1 | # **RBAC Permission Errors** 2 | 3 | The following sections provide solutions to common RBAC authorization errors. 4 | 5 | ### PersistentVolumeClaims is forbidden 6 | 7 | **Error:** 8 | Spark jobs that require creation, listing or deletion of Persistent Volume Claims (PVC) was not supported before EMR6.8. Jobs that require these permissions will fail with the exception “persistentvolumeclaims is forbidden". Looking into driver logs, you may see an error like this: 9 | ``` 10 | persistentvolumeclaims is forbidden. User "system:serviceaccount:emr:emr-containers-sa-spark-client-93ztm12rnjz163mt3rgdb3bjqxqfz1cgvqh1e9be6yr81" cannot create resource "persistentvolumeclaims" in API group "" in namesapce "emr". 11 | ``` 12 | You may encounter this error because the default Kubernetes role `emr-containers` is missing the required RBAC permissions. As a result, the `emr-containers` primary role can’t dynamically create necessary permissions for additional roles such as Spark driver, Spark executor or Spark client when you submit a job. 13 | 14 | **Solution:** 15 | Add the required permissions to `emr-containers`. 16 | 17 | Here are the complete RBAC permissions for EMR on EKS: 18 | 19 | * [emr-containers.yaml](https://github.com/aws/aws-emr-containers-best-practices/blob/main/tools/k8s-rbac-policies/emr-containers.yaml) 20 | 21 | You can compare whether you have complete RBAC permissions using the steps below, 22 | ```bash 23 | export NAMESPACE=YOUR_VALUE 24 | kubectl describe role emr-containers -n ${NAMESPACE} 25 | ``` 26 | 27 | If the permissions don't match, proceed to apply latest permissions 28 | 29 | ```bash 30 | export NAMESPACE=YOUR_VALUE 31 | kubectl apply -f https://github.com/aws/aws-emr-containers-best-practices/blob/main/tools/k8s-rbac-policies/emr-containers.yaml -n ${NAMESPACE} 32 | ``` 33 | You can delete the spark driver and client roles because they will be dynamically created when the job is run next time. 34 | 35 | -------------------------------------------------------------------------------- /content/troubleshooting/docs/self-hosted-shs.md: -------------------------------------------------------------------------------- 1 | # **Self Hosted Spark History Server** 2 | 3 | In this section, you will learn how to self host Spark History Server instead of using the Persistent App UI on the AWS Console. 4 | 5 | 1. In your StartJobRun call for EMR on EKS, set the following conf. to point to an S3 bucket where you would like your event logs to go : `spark.eventLog.dir` and `spark.eventLog.enabled` as such: 6 | 7 | 8 | "configurationOverrides": { 9 | "applicationConfiguration": [{ 10 | "classification": "spark-defaults", 11 | "properties": { 12 | "spark.eventLog.enabled": "true", 13 | "spark.eventLog.dir": "s3://your-bucket-here/some-directory" 14 | ... 15 | 16 | 17 | 2. Take note of the S3 bucket specified in #1, and use it in the instructions on step #3 wherever you are asked for `path_to_eventlog` and make sure it is prepended with `s3a://`, not `s3://`. An example is `-Dspark.history.fs.logDirectory=s3a://path_to_eventlog`. 18 | 19 | 3. Follow instructions [here](https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-history.html#monitor-spark-ui-history-local) to launch Spark History Server using a Docker image. 20 | 21 | 4. After following the above steps, event logs should flow to the specified S3 bucket and the docker container should spin up Spark History Server (which will be available at `127.0.0.1:18080`). This instance of Spark History Server will pick up and parse event logs from the S3 bucket specified. -------------------------------------------------------------------------------- /content/troubleshooting/docs/where-to-look-for-spark-logs.md: -------------------------------------------------------------------------------- 1 | # **Spark Driver and Executor Logs** 2 | 3 | The status of the spark jobs can be monitored via [EMR on EKS describe-job-run API](https://docs.aws.amazon.com/cli/latest/reference/emr-containers/describe-job-run.html). 4 | 5 | To be able to monitor the job progress and to troubleshoot failures, you must configure your jobs to send log information to Amazon S3, Amazon CloudWatch Logs, or both 6 | 7 | ### Send Spark Logs to S3 8 | 9 | ####**Update the IAM role with S3 write access** 10 | Configure the IAM Role passed in StartJobRun input `executionRoleArn` with access to S3 buckets. 11 | ```json 12 | { 13 | "Version": "2012-10-17", 14 | "Statement": [ 15 | { 16 | "Effect": "Allow", 17 | "Action": [ 18 | "s3:PutObject", 19 | "s3:GetObject", 20 | "s3:ListBucket" 21 | ], 22 | "Resource": [ 23 | "arn:aws:s3:::my_s3_log_location", 24 | "arn:aws:s3:::my_s3_log_location/*", 25 | ] 26 | } 27 | ] 28 | } 29 | ``` 30 | 31 | ####**Configure the StartJobRun API with S3 buckets** 32 | Configure the `monitoringConfiguration` with `s3MonitoringConfiguration`, and configure the S3 location where the logs would be synced. 33 | 34 | ```json 35 | { 36 | "name": "", 37 | "virtualClusterId": "", 38 | "executionRoleArn": "", 39 | "releaseLabel": "", 40 | "jobDriver": { 41 | 42 | }, 43 | "configurationOverrides": { 44 | "monitoringConfiguration": { 45 | "persistentAppUI": "ENABLED", 46 | "s3MonitoringConfiguration": { 47 | "logUri": "s3://my_s3_log_location" 48 | } 49 | } 50 | } 51 | } 52 | ``` 53 | 54 | ####**Log location of JobRunner, Driver, Executor in S3** 55 | The JobRunner (pod that does spark-submit), Spark Driver, and Spark Executor logs would be found in the following S3 locations. 56 | ```text 57 | JobRunner/Spark-Submit/Controller Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${job-runner-pod-id}/(stderr.gz/stdout.gz) 58 | 59 | Driver Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-pod-name}/(stderr.gz/stdout.gz) 60 | 61 | Executor Logs - s3://my_s3_log_location/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-executor-id}/(stderr.gz/stdout.gz) 62 | ``` 63 | 64 | 65 | ### Send Spark Logs to CloudWatch 66 | 67 | ####**Update the IAM role with CloudWatch access** 68 | Configure the IAM Role passed in StartJobRun input `executionRoleArn` with access to CloudWatch Streams. 69 | ```json 70 | { 71 | "Version": "2012-10-17", 72 | "Statement": [ 73 | { 74 | "Effect": "Allow", 75 | "Action": [ 76 | "logs:CreateLogStream", 77 | "logs:DescribeLogGroups", 78 | "logs:DescribeLogStreams" 79 | ], 80 | "Resource": [ 81 | "arn:aws:logs:*:*:*" 82 | ] 83 | }, 84 | { 85 | "Effect": "Allow", 86 | "Action": [ 87 | "logs:PutLogEvents" 88 | ], 89 | "Resource": [ 90 | "arn:aws:logs:*:*:log-group:my_log_group_name:log-stream:my_log_stream_prefix/*" 91 | ] 92 | } 93 | ] 94 | } 95 | ``` 96 | 97 | ####**Configure StartJobRun API with CloudWatch** 98 | Configure the `monitoringConfiguration` with `cloudWatchMonitoringConfiguration`, and configure the CloudWatch `logGroupName` and `logStreamNamePrefix` where the logs should be pushed. 99 | 100 | ```json 101 | { 102 | "name": "", 103 | "virtualClusterId": "", 104 | "executionRoleArn": "", 105 | "releaseLabel": "", 106 | "jobDriver": { 107 | 108 | }, 109 | "configurationOverrides": { 110 | "monitoringConfiguration": { 111 | "persistentAppUI": "ENABLED", 112 | "cloudWatchMonitoringConfiguration": { 113 | "logGroupName": "my_log_group_name", 114 | "logStreamNamePrefix": "my_log_stream_prefix" 115 | } 116 | } 117 | } 118 | } 119 | ``` 120 | 121 | ####**Log location of JobRunner, Driver, Executor** 122 | The JobRunner (pod that does spark-submit), Spark Driver, and Spark Executor logs would be found in the following AWS CloudWatch locations. 123 | 124 | ```text 125 | JobRunner/Spark-Submit/Controller Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${job-runner-pod-id}/(stderr.gz/stdout.gz) 126 | 127 | Driver Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-pod-name}/(stderr.gz/stdout.gz) 128 | 129 | Executor Logs - ${my_log_group_name}/${my_log_stream_prefix}/${virtual-cluster-id}/jobs/${job-id}/containers/${spark-application-id}/${spark-job-id-driver-executor-id}/(stderr.gz/stdout.gz) 130 | ``` 131 | 132 | -------------------------------------------------------------------------------- /content/troubleshooting/resources/screen-shot-spark-ui-driver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/aws-emr-containers-best-practices/c92bd642f323736d90dc4c85eaee2840f87d37e9/content/troubleshooting/resources/screen-shot-spark-ui-driver.png -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "EMR Containers Best Practices Guides" 2 | repo_name: "aws/aws-emr-containers-best-practices" 3 | repo_url: "https://github.com/aws/aws-emr-containers-best-practices" 4 | docs_dir: "content" 5 | theme: 6 | name: material 7 | features: 8 | - navigation.tabs 9 | nav: 10 | - Guides: 11 | - Introduction: 'index.md' 12 | - EMR on EKS(AWS Outposts): 'outposts/emr-containers-on-outposts.md' 13 | - Security: 14 | - Encryption: 'security/docs/spark/encryption.md' 15 | - Data Encryption: 'security/docs/spark/data-encryption.md' 16 | - Network Security: 'security/docs/spark/network-security.md' 17 | - Secrets: 'security/docs/spark/secrets.md' 18 | - Chain IAM Roles: 'security/docs/spark/chain-role.md' 19 | - Submit applications: 20 | - Pyspark: 'submit-applications/docs/spark/pyspark.md' 21 | - Build Multi-arch Docker Image: 'submit-applications/docs/spark/multi-arch-image.md' 22 | - Storage: 23 | - EBS: 'storage/docs/spark/ebs.md' 24 | - FSx for Lustre: 'storage/docs/spark/fsx-lustre.md' 25 | - Instance Store: 'storage/docs/spark/instance-store.md' 26 | - Metastore Integration: 27 | - Hive Metastore: 'metastore-integrations/docs/hive-metastore.md' 28 | - AWS Glue: 'metastore-integrations/docs/aws-glue.md' 29 | - Troubleshooting: 30 | - Spark Log Location in S3 and CloudWatch: 'troubleshooting/docs/where-to-look-for-spark-logs.md' 31 | - Change Log Level: 'troubleshooting/docs/change-log-level.md' 32 | - Connect to Spark UI: 'troubleshooting/docs/connect-spark-ui.md' 33 | - Connect to Spark UI via Reverse Proxy: 'troubleshooting/docs/reverse-proxy-sparkui.md' 34 | - Self Hosted SHS: 'troubleshooting/docs/self-hosted-shs.md' 35 | - RBAC Permissions error: 'troubleshooting/docs/rbac-permissions-errors.md' 36 | - EKS Cluster AutoScaler: 'troubleshooting/docs/eks-cluster-auto-scaler.md' 37 | - EKS Karpenter: 'troubleshooting/docs/karpenter.md' 38 | - Node Placement: 39 | - EKS Node placement: 'node-placement/docs/eks-node-placement.md' 40 | - EKS Fargate Node placement: 'node-placement/docs/fargate-node-placement.md' 41 | - Performance: 42 | - Dynamic Resource Allocation: 'performance/docs/dra.md' 43 | - BinPacking: 'performance/docs/binpack.md' 44 | - Karpenter: 'performance/docs/karpenter.md' 45 | - EKS Best Practices: 'best-practices-and-recommendations/eks-best-practices.md' 46 | - Cost Tracking and Optimization: 47 | - Cost Optimization using EC2 Spot Instances: 'cost-optimization/docs/cost-optimization.md' 48 | - Node Decommission: 'cost-optimization/docs/node-decommission.md' 49 | - Cost Tracking: 'cost-optimization/docs/cost-tracking.md' 50 | - Scalability: 51 | - Glossary and Terms: 'scalability/docs/scalaiblity-glossary.md' 52 | - Known Factors for Spark Operator: 'scalability/docs/known-factors-spark-operator.md' 53 | - Known Factors for StartJobRun API: 'scalability/docs/known-factors-start-job-run.md' 54 | - Recommendation for StartJobRun: 'scalability/docs/load-test-for-start-job-run-api.md' 55 | - Recommendation for Spark Operator: 'scalability/docs/load-test-for-spark-operator.md' 56 | - Grafana Dashboard: 'scalability/docs/graphana-dashboard.md' 57 | markdown_extensions: 58 | - toc: 59 | permalink: true 60 | - admonition 61 | - codehilite 62 | - tables 63 | -------------------------------------------------------------------------------- /tools/emr-vertical-autoscaling/grafana-dashboard-model.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "datasource", 8 | "uid": "grafana" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "description": "EMR Vertical Autoscaling", 25 | "editable": true, 26 | "fiscalYearStartMonth": 0, 27 | "gnetId": 14588, 28 | "graphTooltip": 0, 29 | "id": 1, 30 | "links": [], 31 | "liveNow": false, 32 | "panels": [ 33 | { 34 | "aliasColors": {}, 35 | "bars": false, 36 | "dashLength": 10, 37 | "dashes": false, 38 | "datasource": { 39 | "type": "prometheus", 40 | "uid": "PBFA97CFB590B2093" 41 | }, 42 | "description": "Shows the amount of memory being provisioned to EMR spark applications as compared to the actual utilization and the recommendations computed by EMR vertical autoscaling", 43 | "fill": 0, 44 | "fillGradient": 0, 45 | "gridPos": { 46 | "h": 12, 47 | "w": 24, 48 | "x": 0, 49 | "y": 0 50 | }, 51 | "hiddenSeries": false, 52 | "id": 3, 53 | "legend": { 54 | "avg": false, 55 | "current": false, 56 | "max": false, 57 | "min": false, 58 | "show": true, 59 | "total": false, 60 | "values": false 61 | }, 62 | "lines": true, 63 | "linewidth": 1, 64 | "links": [], 65 | "nullPointMode": "null", 66 | "options": { 67 | "alertThreshold": true 68 | }, 69 | "percentage": false, 70 | "pluginVersion": "9.4.3", 71 | "pointradius": 2, 72 | "points": false, 73 | "renderer": "flot", 74 | "seriesOverrides": [], 75 | "spaceLength": 10, 76 | "stack": false, 77 | "steppedLine": false, 78 | "targets": [ 79 | { 80 | "datasource": { 81 | "type": "prometheus", 82 | "uid": "PBFA97CFB590B2093" 83 | }, 84 | "editorMode": "code", 85 | "exemplar": true, 86 | "expr": "max(kube_customresource_vpa_spark_rec_memory_target{signature=\"$signature\", namespace=\"$namespace\"})", 87 | "format": "time_series", 88 | "interval": "", 89 | "intervalFactor": 1, 90 | "legendFormat": "Recommended Memory", 91 | "range": true, 92 | "refId": "D" 93 | }, 94 | { 95 | "datasource": { 96 | "type": "prometheus", 97 | "uid": "PBFA97CFB590B2093" 98 | }, 99 | "editorMode": "code", 100 | "exemplar": true, 101 | "expr": "max(container_memory_working_set_bytes{container=\"spark-kubernetes-executor\", namespace=\"$namespace\"} * on (namespace, pod) group_left() kube_pod_labels{label_emr_containers_amazonaws_com_dynamic_sizing_signature=\"$signature\", namespace=\"$namespace\"})", 102 | "format": "time_series", 103 | "interval": "", 104 | "intervalFactor": 1, 105 | "legendFormat": "Actual Utilization", 106 | "range": true, 107 | "refId": "C" 108 | }, 109 | { 110 | "datasource": { 111 | "type": "prometheus", 112 | "uid": "PBFA97CFB590B2093" 113 | }, 114 | "editorMode": "code", 115 | "expr": "max(kube_pod_container_resource_requests{container=\"spark-kubernetes-executor\", resource=\"memory\", namespace=\"$namespace\"} * on (namespace, pod) group_left() kube_pod_labels{label_emr_containers_amazonaws_com_dynamic_sizing_signature=\"$signature\", namespace=\"$namespace\"})", 116 | "hide": false, 117 | "legendFormat": "Provisioned Memory", 118 | "range": true, 119 | "refId": "A" 120 | } 121 | ], 122 | "thresholds": [], 123 | "timeRegions": [], 124 | "title": "EMR Vertical Autoscaling - Provisioned Memory, Utilization and Recommendation", 125 | "tooltip": { 126 | "shared": true, 127 | "sort": 0, 128 | "value_type": "individual" 129 | }, 130 | "type": "graph", 131 | "xaxis": { 132 | "mode": "time", 133 | "show": true, 134 | "values": [] 135 | }, 136 | "yaxes": [ 137 | { 138 | "$$hashKey": "object:420", 139 | "format": "bytes", 140 | "label": "Bytes", 141 | "logBase": 1, 142 | "show": true 143 | }, 144 | { 145 | "$$hashKey": "object:421", 146 | "format": "short", 147 | "logBase": 1, 148 | "show": true 149 | } 150 | ], 151 | "yaxis": { 152 | "align": false 153 | } 154 | } 155 | ], 156 | "refresh": false, 157 | "revision": 1, 158 | "schemaVersion": 38, 159 | "style": "dark", 160 | "tags": [ 161 | "Autoscaling", 162 | "VPA", 163 | "EMR" 164 | ], 165 | "templating": { 166 | "list": [ 167 | { 168 | "current": { 169 | "selected": false, 170 | "text": "q1-v2.4", 171 | "value": "q1-v2.4" 172 | }, 173 | "datasource": { 174 | "type": "prometheus", 175 | "uid": "PBFA97CFB590B2093" 176 | }, 177 | "definition": "label_values(kube_customresource_vpa_spark_rec_memory_target, signature)", 178 | "hide": 0, 179 | "includeAll": false, 180 | "label": "Signature", 181 | "multi": false, 182 | "name": "signature", 183 | "options": [], 184 | "query": { 185 | "query": "label_values(kube_customresource_vpa_spark_rec_memory_target, signature)", 186 | "refId": "StandardVariableQuery" 187 | }, 188 | "refresh": 2, 189 | "regex": "", 190 | "skipUrlSync": false, 191 | "sort": 1, 192 | "tagValuesQuery": "", 193 | "tagsQuery": "", 194 | "type": "query", 195 | "useTags": false 196 | }, 197 | { 198 | "current": { 199 | "selected": false, 200 | "text": "emr", 201 | "value": "emr" 202 | }, 203 | "datasource": { 204 | "type": "prometheus", 205 | "uid": "PBFA97CFB590B2093" 206 | }, 207 | "definition": "label_values(kube_customresource_vpa_spark_rec_memory_target, namespace)", 208 | "description": "The K8s Namespace where the EMR job was submitted", 209 | "hide": 0, 210 | "includeAll": false, 211 | "label": "Namespace", 212 | "multi": false, 213 | "name": "namespace", 214 | "options": [], 215 | "query": { 216 | "query": "label_values(kube_customresource_vpa_spark_rec_memory_target, namespace)", 217 | "refId": "StandardVariableQuery" 218 | }, 219 | "refresh": 2, 220 | "regex": "", 221 | "skipUrlSync": false, 222 | "sort": 1, 223 | "type": "query" 224 | } 225 | ] 226 | }, 227 | "time": { 228 | "from": "2023-05-01T10:26:21.317Z", 229 | "to": "2023-05-01T10:38:18.180Z" 230 | }, 231 | "timepicker": { 232 | "refresh_intervals": [ 233 | "5s", 234 | "10s", 235 | "30s", 236 | "1m", 237 | "5m", 238 | "15m", 239 | "30m", 240 | "1h" 241 | ], 242 | "time_options": [ 243 | "5m", 244 | "15m", 245 | "1h", 246 | "3h", 247 | "6h", 248 | "12h", 249 | "24h", 250 | "2d", 251 | "7d", 252 | "14d" 253 | ] 254 | }, 255 | "timezone": "browser", 256 | "title": "EMR Vertical Autoscaling", 257 | "uid": "qQkYVZBVz", 258 | "version": 7, 259 | "weekStart": "" 260 | } -------------------------------------------------------------------------------- /tools/emr-vertical-autoscaling/prometheus-helm-values.yaml: -------------------------------------------------------------------------------- 1 | kube-state-metrics: 2 | image: 3 | registry: public.ecr.aws 4 | repository: bitnami/kube-state-metrics 5 | tag: "2.8.1" 6 | sha: "" 7 | pullPolicy: Always 8 | extraArgs: 9 | - --custom-resource-state-config 10 | - | 11 | spec: 12 | resources: 13 | - groupVersionKind: 14 | group: autoscaling.k8s.io 15 | kind: "VerticalPodAutoscaler" 16 | version: "v1" 17 | labelsFromPath: 18 | verticalpodautoscaler: [metadata, name] 19 | namespace: [metadata, namespace] 20 | signature: [metadata, labels, "emr-containers.amazonaws.com/dynamic.sizing.signature"] 21 | metrics: 22 | - name: vpa_spark_rec_memory_lower 23 | help: "VPA recommended memory - lower bound" 24 | each: 25 | type: Gauge 26 | gauge: 27 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", lowerBound] 28 | valueFrom: [memory] 29 | - name: vpa_spark_rec_memory_upper 30 | help: "VPA recommended memory - upper bound" 31 | each: 32 | type: Gauge 33 | gauge: 34 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", upperBound] 35 | valueFrom: [memory] 36 | - name: vpa_spark_rec_memory_target 37 | help: "VPA recommended memory - target" 38 | each: 39 | type: Gauge 40 | gauge: 41 | path: [status, recommendation, containerRecommendations, "[containerName=spark-kubernetes-executor]", target] 42 | valueFrom: [memory] 43 | rbac: 44 | create: true 45 | useClusterRole: true 46 | extraRules: 47 | - apiGroups: ["autoscaling.k8s.io"] 48 | resources: ["verticalpodautoscalers"] 49 | verbs: ["list", "watch"] 50 | - apiGroups: ["apiextensions.k8s.io"] 51 | resources: ["customresourcedefinitions"] 52 | verbs: ["list", "watch"] 53 | metricLabelsAllowlist: 54 | - "pods=[emr-containers.amazonaws.com/dynamic.sizing.signature]" 55 | -------------------------------------------------------------------------------- /tools/k8s-rbac-policies/emr-containers.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: emr-containers 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - namespaces 10 | verbs: 11 | - get 12 | - apiGroups: 13 | - "" 14 | resources: 15 | - serviceaccounts 16 | - services 17 | - configmaps 18 | - events 19 | - pods 20 | - pods/log 21 | verbs: 22 | - get 23 | - list 24 | - watch 25 | - describe 26 | - create 27 | - edit 28 | - delete 29 | - deletecollection 30 | - annotate 31 | - patch 32 | - label 33 | - apiGroups: 34 | - "" 35 | resources: 36 | - secrets 37 | verbs: 38 | - create 39 | - patch 40 | - delete 41 | - watch 42 | - apiGroups: 43 | - apps 44 | resources: 45 | - statefulsets 46 | - deployments 47 | verbs: 48 | - get 49 | - list 50 | - watch 51 | - describe 52 | - create 53 | - edit 54 | - delete 55 | - annotate 56 | - patch 57 | - label 58 | - apiGroups: 59 | - batch 60 | resources: 61 | - jobs 62 | verbs: 63 | - get 64 | - list 65 | - watch 66 | - describe 67 | - create 68 | - edit 69 | - delete 70 | - annotate 71 | - patch 72 | - label 73 | - apiGroups: 74 | - extensions 75 | - networking.k8s.io 76 | resources: 77 | - ingresses 78 | verbs: 79 | - get 80 | - list 81 | - watch 82 | - describe 83 | - create 84 | - edit 85 | - delete 86 | - annotate 87 | - patch 88 | - label 89 | - apiGroups: 90 | - rbac.authorization.k8s.io 91 | resources: 92 | - roles 93 | - rolebindings 94 | verbs: 95 | - get 96 | - list 97 | - watch 98 | - describe 99 | - create 100 | - edit 101 | - delete 102 | - deletecollection 103 | - annotate 104 | - patch 105 | - label 106 | - apiGroups: 107 | - "" 108 | resources: 109 | - persistentvolumeclaims 110 | verbs: 111 | - create 112 | - list 113 | - delete 114 | - patch 115 | - deletecollection 116 | -------------------------------------------------------------------------------- /tools/k8s-rbac-policies/rbac_patch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess as sp 3 | import tempfile as temp 4 | import json 5 | import argparse 6 | import uuid 7 | 8 | 9 | def delete_if_exists(dictionary: dict, key: str): 10 | if dictionary.get(key, None) is not None: 11 | del dictionary[key] 12 | 13 | 14 | def doTerminalCmd(cmd): 15 | with temp.TemporaryFile() as f: 16 | process = sp.Popen(cmd, stdout=f, stderr=f) 17 | process.wait() 18 | f.seek(0) 19 | msg = f.read().decode() 20 | return msg 21 | 22 | 23 | def patchRole(roleName, namespace, extraRules, skipConfirmation=False): 24 | cmd = f"kubectl get role {roleName} -n {namespace} --output json".split(" ") 25 | msg = doTerminalCmd(cmd) 26 | if "(NotFound)" in msg and "Error" in msg: 27 | print(msg) 28 | return False 29 | role = json.loads(msg) 30 | rules = role["rules"] 31 | rulesToAssign = extraRules[::] 32 | passedRules = [] 33 | for rule in rules: 34 | apiGroups = set(rule["apiGroups"]) 35 | resources = set(rule["resources"]) 36 | verbs = set(rule["verbs"]) 37 | for extraRule in extraRules: 38 | passes = 0 39 | apiGroupsExtra = set(extraRule["apiGroups"]) 40 | resourcesExtra = set(extraRule["resources"]) 41 | verbsExtra = set(extraRule["verbs"]) 42 | passes += len(apiGroupsExtra.intersection(apiGroups)) >= len(apiGroupsExtra) 43 | passes += len(resourcesExtra.intersection(resources)) >= len(resourcesExtra) 44 | passes += len(verbsExtra.intersection(verbs)) >= len(verbsExtra) 45 | if passes >= 3: 46 | if extraRule not in passedRules: 47 | passedRules.append(extraRule) 48 | if extraRule in rulesToAssign: 49 | rulesToAssign.remove(extraRule) 50 | break 51 | prompt_text = "Apply Changes?" 52 | if len(rulesToAssign) == 0: 53 | print(f"The role {roleName} seems to already have the necessary permissions!") 54 | prompt_text = "Proceed anyways?" 55 | for ruleToAssign in rulesToAssign: 56 | role["rules"].append(ruleToAssign) 57 | delete_if_exists(role, "creationTimestamp") 58 | delete_if_exists(role, "resourceVersion") 59 | delete_if_exists(role, "uid") 60 | new_role = json.dumps(role, indent=3) 61 | uid = uuid.uuid4() 62 | filename = f"Role-{roleName}-New_Permissions-{uid}-TemporaryFile.json" 63 | try: 64 | with open(filename, "w+") as f: 65 | f.write(new_role) 66 | f.flush() 67 | prompt = "y" 68 | if not skipConfirmation: 69 | prompt = input( 70 | doTerminalCmd(f"kubectl diff -f {filename}".split(" ")) + f"\n{prompt_text} y/n: " 71 | ).lower().strip() 72 | while prompt != "y" and prompt != "n": 73 | prompt = input("Please make a valid selection. y/n: ").lower().strip() 74 | if prompt == "y": 75 | print(doTerminalCmd(f"kubectl apply -f {filename}".split(" "))) 76 | except Exception as e: 77 | print(e) 78 | os.remove(f"./{filename}") 79 | 80 | 81 | if __name__ == '__main__': 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument("-n", "--namespace", 84 | help="Namespace of the Role. By default its the VirtualCluster's namespace", 85 | required=True, 86 | dest="namespace" 87 | ) 88 | 89 | parser.add_argument("-p", "--no-prompt", 90 | help="Applies the patches without asking first", 91 | dest="no_prompt", 92 | default=False, 93 | action="store_true" 94 | ) 95 | args = parser.parse_args() 96 | 97 | emrRoleRules = [ 98 | { 99 | "apiGroups": [""], 100 | "resources": ["persistentvolumeclaims"], 101 | "verbs": ["list", "create", "delete"] 102 | } 103 | 104 | ] 105 | 106 | driverRoleRules = [ 107 | { 108 | "apiGroups": [""], 109 | "resources": ["persistentvolumeclaims"], 110 | "verbs": ["list", "create", "delete"] 111 | }, 112 | { 113 | "apiGroups": [""], 114 | "resources": ["services"], 115 | "verbs": ["get", "list", "describe", "create", "delete", "watch"] 116 | } 117 | ] 118 | 119 | clientRoleRules = [ 120 | { 121 | "apiGroups": [""], 122 | "resources": ["persistentvolumeclaims"], 123 | "verbs": ["list", "create", "delete"] 124 | } 125 | ] 126 | 127 | patchRole("emr-containers", args.namespace, emrRoleRules, args.no_prompt) 128 | patchRole("emr-containers-role-spark-driver", args.namespace, driverRoleRules, args.no_prompt) 129 | patchRole("emr-containers-role-spark-client", args.namespace, clientRoleRules, args.no_prompt) -------------------------------------------------------------------------------- /tools/start-job-run-converter/README.md: -------------------------------------------------------------------------------- 1 | # start-job-run converter 2 | This tool can be used to migrate spark-submit commands in a script to **aws emr-containers start-job-run** 3 | and save the result to a new file with _converted suffix. 4 | 5 | Supported arguments: 6 | ``` 7 | -h, --help show this help message and exit 8 | --file FILE the input spark-submit script file 9 | --name NAME The name of the job run 10 | --virtual-cluster-id VIRTUAL_CLUSTER_ID 11 | The virtual cluster ID for which the job run request is submitted 12 | --client-token CLIENT_TOKEN 13 | The client idempotency token of the job run request 14 | --execution-role-arn EXECUTION_ROLE_ARN 15 | The execution role ARN for the job run 16 | --release-label RELEASE_LABEL 17 | The Amazon EMR release version to use for the job run 18 | --configuration-overrides CONFIGURATION_OVERRIDES 19 | The configuration overrides for the job run 20 | --tags TAGS The tags assigned to job runs 21 | ``` 22 | 23 | ##Run the tool 24 | 25 | ``` 26 | startJobRunConverter.py \ 27 | --file ./submit_script.sh \ 28 | --virtual-cluster-id \ 29 | --name emreks-test-job \ 30 | --execution-role-arn \ 31 | --release-label emr-6.4.0-latest \ 32 | --tags KeyName1=string \ 33 | --configuration-overrides '{ 34 | "monitoringConfiguration": { 35 | "cloudWatchMonitoringConfiguration": { 36 | "logGroupName": "emrekstest", 37 | "logStreamNamePrefix": "emreks_log_stream" 38 | }, 39 | "s3MonitoringConfiguration": { 40 | "logUri": "s3://" 41 | } 42 | } 43 | }' 44 | ``` 45 | 46 | ###Example 1 47 | Below spark-submit command in submit_script.sh 48 | ``` 49 | spark-submit --deploy-mode cluster \ 50 | --conf spark.executor.instances=2 \ 51 | --conf spark.executor.memory=2G \ 52 | --conf spark.executor.cores=2 \ 53 | --conf spark.driver.cores=1 \ 54 | --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \ 55 | --verbose \ 56 | s3:///health_violations.py \ 57 | --data_source s3:///food_establishment_data.csv \ 58 | --output_uri s3:///myOutputFolder 59 | ``` 60 | 61 | is converted to below in submit_script.sh_converted 62 | 63 | ``` 64 | #spark-submit --deploy-mode cluster \ 65 | #--conf spark.executor.instances=2 \ 66 | #--conf spark.executor.memory=2G \ 67 | #--conf spark.executor.cores=2 \ 68 | #--conf spark.driver.cores=1 \ 69 | #--conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \ 70 | #--verbose \ 71 | #s3:///health_violations.py \ 72 | #--data_source s3:///food_establishment_data.csv \ 73 | #--output_uri s3:///myOutputFolder 74 | 75 | # ----- Auto converted by startJobRunConverter.py ----- 76 | aws emr-containers start-job-run \ 77 | --name emreks-test-job \ 78 | --virtual-cluster-id \ 79 | --execution-role-arn \ 80 | --release-label emr-6.4.0-latest \ 81 | --configuration-overrides '{ 82 | "monitoringConfiguration": { 83 | "cloudWatchMonitoringConfiguration": { 84 | "logGroupName": "emrekstest", 85 | "logStreamNamePrefix": "emreks_log_stream" 86 | }, 87 | "s3MonitoringConfiguration": { 88 | "logUri": "s3://" 89 | } 90 | } 91 | }' \ 92 | --tags KeyName1=string \ 93 | --job-driver '{ 94 | "sparkSubmitJobDriver": { 95 | "entryPoint": "s3:///health_violations.py", 96 | "entryPointArguments": [ 97 | "--data_source", 98 | "s3:///food_establishment_data.csv", 99 | "--output_uri", 100 | "s3:///myOutputFolder" 101 | ], 102 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1 --conf \"spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps\" --verbose" 103 | } 104 | }' 105 | ``` 106 | >As you can see in the example, the original spark-submit command is kept as comments 107 | 108 | ###Example 2 109 | ``` 110 | EXECMEM=2G 111 | EXEC_INST=2 112 | REGION=us-east-2 113 | spark-submit --deploy-mode cluster \ 114 | --conf spark.executor.instances=$EXEC_INST --conf spark.executor.memory=$EXECMEM --conf spark.executor.cores=2 --conf spark.driver.cores=1 \ 115 | s3:///wordcount.py s3:///wordcount_output $REGION 116 | ``` 117 | 118 | is converted to below in submit_script.sh_converted 119 | 120 | ``` 121 | 122 | EXECMEM=2G 123 | EXEC_INST=2 124 | REGION=us-east-2 125 | #spark-submit --deploy-mode cluster \ 126 | #--conf spark.executor.instances=$EXEC_INST --conf spark.executor.memory=$EXECMEM --conf spark.executor.cores=2 --conf spark.driver.cores=1 \ 127 | #s3:///wordcount.py s3:///wordcount_output $REGION 128 | 129 | # ----- Auto converted by startJobRunConverter.py ----- 130 | aws emr-containers start-job-run \ 131 | --name emreks-test-job \ 132 | --virtual-cluster-id \ 133 | --execution-role-arn \ 134 | --release-label emr-6.4.0-latest \ 135 | --configuration-overrides '{ 136 | "monitoringConfiguration": { 137 | "cloudWatchMonitoringConfiguration": { 138 | "logGroupName": "emrekstest", 139 | "logStreamNamePrefix": "emreks_log_stream" 140 | }, 141 | "s3MonitoringConfiguration": { 142 | "logUri": "s3://" 143 | } 144 | } 145 | }' \ 146 | --tags KeyName1=string \ 147 | --job-driver '{ 148 | "sparkSubmitJobDriver": { 149 | "entryPoint": "s3:///wordcount.py", 150 | "entryPointArguments": [ 151 | "s3:///wordcount_output", 152 | "'"$REGION"'" 153 | ], 154 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances='"$EXEC_INST"' --conf spark.executor.memory='"$EXECMEM"' --conf spark.executor.cores=2 --conf spark.driver.cores=1" 155 | } 156 | }' 157 | ``` 158 | >In bash shell, single quote won't expand variables. The tool can correctly handle the variables using double quote. 159 | 160 | ##Wait for completion 161 | One difference between spark-submit and start-job-run is that spark-submit is waiting for the spark job to complete 162 | but start-job-run is async. A wait_for_completion() bash shell function can be manually appended to the converted 163 | command if needed. 164 | 165 | ``` 166 | function wait_for_completion() { 167 | cat < /dev/stdin|jq -r '[.id, .virtualClusterId]|join(" ")'| { read id virtualClusterId; echo id=$id; echo virtualClusterId=$virtualClusterId; while [ true ] 168 | do 169 | sleep 10 170 | state=$(aws emr-containers describe-job-run --id $id --virtual-cluster-id $virtualClusterId|jq -r '.jobRun.state') 171 | echo "$(date) job run state: $state" 172 | if [ "$state" = "COMPLETED" ]; then 173 | echo "job run id: $id completed" 174 | break 175 | elif [ "$state" = "FAILED" ]; then 176 | echo "job run id: $id failed. Exiting..." 177 | exit 1 178 | fi 179 | done; } 180 | } 181 | ``` 182 | >jq tool is required for json parsing. 183 | 184 | To use it, append wait_for_completion to the end of the command. 185 | ``` 186 | # ----- Auto converted by startJobRunConverter.py ----- 187 | aws emr-containers start-job-run \ 188 | --name emreks-test-job \ 189 | --virtual-cluster-id \ 190 | --execution-role-arn \ 191 | --release-label emr-6.4.0-latest \ 192 | --configuration-overrides '{ 193 | "monitoringConfiguration": { 194 | "cloudWatchMonitoringConfiguration": { 195 | "logGroupName": "emrekstest", 196 | "logStreamNamePrefix": "emreks_log_stream" 197 | }, 198 | "s3MonitoringConfiguration": { 199 | "logUri": "s3://" 200 | } 201 | } 202 | }' \ 203 | --tags KeyName1=string,k2=v2 \ 204 | --job-driver '{ 205 | "sparkSubmitJobDriver": { 206 | "entryPoint": "s3:///wordcount.py", 207 | "entryPointArguments": [ 208 | "s3:///wordcount_output", 209 | "'"$REGION"'" 210 | ], 211 | "sparkSubmitParameters": "--deploy-mode cluster --conf spark.executor.instances='"$EXEC_INST"' --conf spark.executor.memory='"$EXECMEM"' --conf spark.executor.cores=2 --conf spark.driver.cores=1" 212 | } 213 | }'|wait_for_completion 214 | ``` 215 | -------------------------------------------------------------------------------- /tools/start-job-run-converter/startJobRunConverter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import json 4 | import os 5 | import re 6 | import shlex 7 | import sys 8 | 9 | OUTPUT_FILE_SUFFIX = "_converted" 10 | AUTO_CONVERT_MSG = "\n# ----- Auto converted by {} -----\n" 11 | 12 | SPARK_SUBMIT = "spark-submit" 13 | SPARK_UNARY_ARGUMENTS = {"-v", "--verbose"} 14 | CONVERTER_ARGUMENTS = {"file"} 15 | 16 | 17 | def add_quote(data, quote="\"", guard=" "): 18 | if isinstance(data, list): 19 | data = [quote + d + quote if guard in d and not d.startswith(quote) else d for d in data] 20 | elif isinstance(data, str): 21 | data = quote + data + quote if guard in data and not data.startswith(quote) else data 22 | return data 23 | 24 | 25 | # In argparse, any internal - characters will be converted to _ characters to make sure the string 26 | # is a valid attribute name e.g. execution_role_arn. 27 | # This function change it back, e.g. execution-role-arn 28 | def normalize_arg_key(arg): 29 | return arg.replace("_", "-") 30 | 31 | 32 | # In bash shell, single quote won't expand a variable. Need to close the single quotes, 33 | # insert variable, and then re-enter again. 34 | def convert_matched_var(match_obj): 35 | if match_obj.group() is not None: 36 | return "'\"" + match_obj.group() + "\"'" 37 | return "" 38 | 39 | # This function assumes a valid spark-submit command, otherwise it throws exception 40 | def generate_start_job_cmd(spark_cmd_line, start_job_args): 41 | start_job_cmd = "aws emr-containers start-job-run \\\n" 42 | start_idx, curr_idx = 0, 0 43 | while curr_idx < len(spark_cmd_line): 44 | curr_arg = spark_cmd_line[curr_idx].strip() 45 | if curr_arg: 46 | if SPARK_SUBMIT in curr_arg: 47 | start_idx = curr_idx + 1 48 | elif curr_arg.startswith("-"): 49 | if curr_arg not in SPARK_UNARY_ARGUMENTS: 50 | curr_idx += 1 # the argument is a pair e.g. --num-executors 50 51 | else: 52 | break 53 | curr_idx += 1 54 | spark_submit_parameters = add_quote(spark_cmd_line[start_idx: curr_idx]) 55 | entrypoint_location = spark_cmd_line[curr_idx] 56 | entrypoint_arguments = add_quote(spark_cmd_line[curr_idx + 1:]) 57 | job_driver = {"sparkSubmitJobDriver": { 58 | "entryPoint": entrypoint_location, 59 | "entryPointArguments": entrypoint_arguments, 60 | "sparkSubmitParameters": " ".join(spark_submit_parameters) 61 | }} 62 | 63 | res_str = add_quote(json.dumps(job_driver, indent=4), quote="'", guard="\n") 64 | res_str = re.sub(r"\${?[0-9a-zA-Z_]+}?", convert_matched_var, res_str) 65 | start_job_args["job_driver"] = res_str + "\n" 66 | 67 | for k, v in start_job_args.items(): 68 | if k not in CONVERTER_ARGUMENTS and v: 69 | start_job_cmd += "--" + normalize_arg_key(k) + " " + add_quote(v, quote="'", guard="\n") + " \\\n" 70 | return start_job_cmd[:len(start_job_cmd) - 2] + "\n" 71 | 72 | 73 | def convert_file(input, output, extra_args, banner="\n"): 74 | with open(input, "r") as input_fp: 75 | with open(output, "w") as output_fp: 76 | in_cmd = False 77 | cmd_line = "" 78 | for line in input_fp: 79 | new_line = line.strip() 80 | if new_line and ((new_line[0] != "#" and SPARK_SUBMIT in new_line) or in_cmd): 81 | output_fp.write("#" + line) # Keep the original lines in comment 82 | in_cmd = True 83 | cmd_line += new_line 84 | if new_line[-1] != "\\": 85 | converted_cmd = generate_start_job_cmd(shlex.split(cmd_line), extra_args) 86 | output_fp.write(banner) 87 | output_fp.writelines(str(converted_cmd) + "\n") 88 | in_cmd = False 89 | cmd_line = "" 90 | else: 91 | output_fp.write(line) 92 | 93 | 94 | if __name__ == '__main__': 95 | # Create the parser 96 | cmd_parser = argparse.ArgumentParser(description='A tool for converting spark-sumbit command line to EMR on EKS ' 97 | 'start-job-run.') 98 | 99 | # Add the arguments 100 | cmd_parser.add_argument('--file', help='the input spark-submit script file', required=True) 101 | cmd_parser.add_argument('--name', help='The name of the job run') 102 | cmd_parser.add_argument('--virtual-cluster-id', help='The virtual cluster ID for which the job run request is submitted', required=True) 103 | cmd_parser.add_argument('--client-token', help='The client idempotency token of the job run request') 104 | cmd_parser.add_argument('--execution-role-arn', help='The execution role ARN for the job run', required=True) 105 | cmd_parser.add_argument('--release-label', help='The Amazon EMR release version to use for the job run', required=True) 106 | cmd_parser.add_argument('--configuration-overrides', help='The configuration overrides for the job run') 107 | cmd_parser.add_argument('--tags', help='The tags assigned to job runs') 108 | 109 | args = cmd_parser.parse_args() 110 | 111 | input_file = args.file 112 | output_file = os.path.basename(input_file) + OUTPUT_FILE_SUFFIX 113 | 114 | if os.path.exists(output_file): 115 | print("Error: {} already exists.".format(output_file)) 116 | sys.exit(1) 117 | 118 | convert_file(input_file, output_file, vars(args), 119 | AUTO_CONVERT_MSG.format(os.path.basename(sys.argv[0]))) 120 | --------------------------------------------------------------------------------