├── .github └── workflows │ ├── pypi-publish.yaml │ └── unit-tests.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── SECURITY.md ├── poetry.lock ├── pyproject.toml ├── requirements.txt ├── src └── emr_cli │ ├── config.py │ ├── deployments │ ├── __init__.py │ ├── emr_ec2.py │ ├── emr_eks.py │ └── emr_serverless.py │ ├── emr_cli.py │ ├── packaging │ ├── detector.py │ ├── python_files_project.py │ ├── python_poetry_project.py │ ├── python_project.py │ └── simple_project.py │ ├── templates │ ├── poetry │ │ ├── README.md │ │ └── pyproject.toml │ └── pyspark │ │ ├── .dockerignore │ │ ├── .gitignore │ │ ├── Dockerfile │ │ ├── entrypoint.py │ │ ├── jobs │ │ └── extreme_weather.py │ │ └── pyproject.toml │ └── utils │ └── __init__.py └── tests ├── deployments └── test_emr_ec2.py ├── packaging ├── test_python_files_project.py └── test_python_poetry_project.py ├── test_cli.py ├── test_detector.py └── test_init.py /.github/workflows/pypi-publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPi Repository 2 | on: 3 | push: 4 | tags: 5 | - "v*.*.*" 6 | 7 | env: 8 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }} 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-20.04 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: 3.9 19 | - name: cache poetry install 20 | uses: actions/cache@v3 21 | with: 22 | path: ~/.local 23 | key: poetry-1.3.2-0 24 | - name: Install Poetry 25 | uses: snok/install-poetry@v1 26 | with: 27 | virtualenvs-create: true 28 | virtualenvs-in-project: true 29 | installer-parallel: true 30 | - name: Build and publish 31 | run: | 32 | poetry version ${{github.ref_name}} 33 | poetry publish --build 34 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yaml: -------------------------------------------------------------------------------- 1 | name: Spark Job Unit Tests 2 | on: [push] 3 | env: 4 | AWS_DEFAULT_REGION: us-east-1 5 | jobs: 6 | pytest: 7 | strategy: 8 | matrix: 9 | os: [ubuntu-latest, windows-latest, macos-latest] 10 | python-version: ["3.9", "3.10"] 11 | runs-on: ${{ matrix.os }} 12 | defaults: 13 | run: 14 | shell: bash 15 | steps: 16 | - uses: actions/checkout@v3 17 | - 18 | name: Set up Python 19 | id: setup-python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - 24 | name: cache poetry install 25 | uses: actions/cache@v3 26 | with: 27 | path: ~/.local 28 | key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-poetry-1.5.1-0 29 | - 30 | name: Install Poetry 31 | uses: snok/install-poetry@v1 32 | with: 33 | # Something changed in 1.6.0, but unsure what so pin to 1.5.1 34 | version: 1.5.1 35 | virtualenvs-create: true 36 | virtualenvs-in-project: true 37 | installer-parallel: true 38 | - 39 | name: Load cached venv 40 | id: cached-poetry-dependencies 41 | uses: actions/cache@v3 42 | with: 43 | path: .venv 44 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 45 | - 46 | name: Install dependencies 47 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 48 | run: | 49 | poetry install --no-interaction --no-root 50 | - 51 | name: Install project 52 | run: | 53 | poetry install --no-interaction 54 | - 55 | name: Run tests 56 | run: | 57 | source $VENV 58 | poetry run pytest --cov emr_cli tests/ 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__ 3 | *.egg-info/ 4 | build/ 5 | dist/ 6 | 7 | # Dev 8 | conf/deployment-sample.yaml 9 | .venv/ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EMR CLI 2 | 3 | So we're all working on data pipelines every day, but wouldn't be nice to just hit a button and have our code automatically deployed to staging or test accounts? I thought so, too, thats why I created the EMR CLI (`emr`) that can help you package and deploy your EMR jobs so you don't have to. 4 | 5 | The EMR CLI supports a wide variety of configuration options to adapt to _your_ data pipeline, not the other way around. 6 | 7 | 1. Packaging - Ensure a consistent approach to packaging your production Spark jobs. 8 | 2. Deployment - Easily deploy your Spark jobs across multiple EMR environments or deployment frameworks like EC2, EKS, and Serverless. 9 | 3. CI/CD - Easily test each iteration of your code without resorting to messy shell scripts. :) 10 | 11 | The initial use cases are: 12 | 13 | 1. Consistent packaging for PySpark projects. 14 | 2. Use in CI/CD pipelines for packaging, deployment of artifacts, and integration testing. 15 | 16 | > **Warning**: This tool is still under active development, so commands may change until a stable 1.0 release is made. 17 | 18 | ## Quick Start 19 | 20 | You can use the EMR CLI to take a project from nothing to running in EMR Serverless is 2 steps. 21 | 22 | First, let's install the `emr` command. 23 | 24 | ```bash 25 | python3 -m pip install -U emr-cli 26 | ``` 27 | 28 | > **Note** This tutorial assumes you have already [setup EMR Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/setting-up.html) and have an EMR Serverless application, job role, and S3 bucket you can use. If not, you can use the `emr bootstrap` command. 29 | 30 | 1. Create a sample project 31 | 32 | ```bash 33 | emr init scratch 34 | ``` 35 | 36 | > 📔 Tip: Use `--project-type poetry` to create a [Poetry](https://python-poetry.org/) project! 37 | 38 | You should now have a sample PySpark project in your scratch directory. 39 | 40 | ``` 41 | scratch 42 | ├── Dockerfile 43 | ├── entrypoint.py 44 | ├── jobs 45 | │   └── extreme_weather.py 46 | └── pyproject.toml 47 | 48 | 1 directory, 4 files 49 | ``` 50 | 51 | 2. Now deploy and run on an EMR Serverless application! 52 | 53 | ```bash 54 | emr run \ 55 | --entry-point entrypoint.py \ 56 | --application-id ${APPLICATION_ID} \ 57 | --job-role ${JOB_ROLE_ARN} \ 58 | --s3-code-uri s3://${S3_BUCKET}/tmp/emr-cli-demo/ \ 59 | --s3-logs-uri s3://${S3_BUCKET}/logs/emr-cli-demo/ \ 60 | --build \ 61 | --show-stdout 62 | ``` 63 | 64 | This command performs the following actions: 65 | 66 | - Packages your project dependencies into a Python virtual environment 67 | - Uploads the Spark entrypoint and packaged dependencies to S3 68 | - Starts an EMR Serverless job 69 | - Waits for the job to run to completion and shows the `stdout` of the Spark driver when finished! 70 | 71 | And you're done. Feel free to modify the project to experiment with different things. You can simply re-run the command above to re-package and re-deploy your job. 72 | 73 | ## EMR CLI Sub-commands 74 | 75 | The EMR CLI has several subcommands that you can see by running `emr --help` 76 | 77 | ``` 78 | Commands: 79 | bootstrap Bootstrap an EMR Serverless environment. 80 | deploy Copy a local project to S3. 81 | init Initialize a local PySpark project. 82 | package Package a project and dependencies into dist/ 83 | run Run a project on EMR, optionally build and deploy 84 | status 85 | ``` 86 | 87 | ### bootstrap 88 | 89 | `emr bootstrap` allows you to create a sample EMR Serverless or EMR on EC2 environment for testing. It assumes you have admin access and creates various resources for you using AWS APIs. 90 | 91 | #### EMR Serverless 92 | 93 | To create a bootstrap EMR Serverless environment, using the following command: 94 | 95 | ```shell 96 | emr bootstrap \ 97 | --target emr-serverless \ 98 | --code-bucket \ 99 | --job-role-name 100 | ``` 101 | 102 | When you do this, the CLI creates a new EMR CLI config file at `.emr/config.yaml` that will set default locations for your `emr run` command. 103 | 104 | ### init 105 | 106 | The `init` command creates a new `pyproject.toml` or `poetry` project for you with a sample PySpark application. 107 | 108 | `init` is required to create those project types as it also initializes a `Dockerfile` used to package your dependencies. Single-file PySpark jobs and simple Python modules do not require the `init` command to be used. 109 | 110 | ### package 111 | 112 | The `package` command bundles your PySpark code and dependencies in preparation for deployment. Often you'll either use `package` and `deploy` to deploy new artifacts to S3, or you'll use the `--build` flag in the `emr run` command to handle both of those tasks for you. 113 | 114 | The EMR CLI automatically detects what type of project you have and builds the necessary dependency packages. 115 | 116 | ### deploy 117 | 118 | The `deploy` command copies the project dependencies from the `dist/` folder to your specified S3 location. 119 | 120 | ### run 121 | 122 | The `run` command is intended to help package, deploy, and run your PySpark code across EMR on EC2, EMR on EKS, or EMR Serverless. 123 | 124 | You must provide one of `--cluster-id`, `--virtual-cluster-id`, or `--application-id` to specify which environment to run your code on. 125 | 126 | `emr run --help` shows all the available options: 127 | 128 | ``` 129 | Usage: emr run [OPTIONS] 130 | 131 | Run a project on EMR, optionally build and deploy 132 | 133 | Options: 134 | --application-id TEXT EMR Serverless Application ID 135 | --cluster-id TEXT EMR on EC2 Cluster ID 136 | --virtual-cluster-id TEXT EMR on EKS Virtual Cluster ID 137 | --entry-point FILE Python or Jar file for the main entrypoint 138 | --job-role TEXT IAM Role ARN to use for the job execution 139 | --wait Wait for job to finish 140 | --s3-code-uri TEXT Where to copy/run code artifacts to/from 141 | --s3-logs-uri TEXT Where to send EMR Serverless logs to 142 | --job-name TEXT The name of the job 143 | --job-args TEXT Comma-delimited string of arguments to be 144 | passed to Spark job 145 | 146 | --spark-submit-opts TEXT String of spark-submit options 147 | --build Package and deploy job artifacts 148 | --show-stdout Show the stdout of the job after it's finished 149 | --save-config Update the config file with the provided 150 | options 151 | 152 | --emr-eks-release-label TEXT EMR on EKS release label (emr-6.15.0) - 153 | defaults to latest release 154 | ``` 155 | 156 | ## Support PySpark configurations 157 | 158 | - Single-file project - Projects that have a single `.py` entrypoint file. 159 | - Multi-file project - A more typical PySpark project, but without dependencies, that has multiple Python files or modules. 160 | - Python module - A project with dependencies defined in a `pyproject.toml` file. 161 | - Poetry project - A project using [Poetry](https://python-poetry.org/) for dependency management. 162 | 163 | ## Sample Commands 164 | 165 | - Create a new PySpark project (other frameworks TBD) 166 | 167 | ```bash 168 | emr init project-dir 169 | ``` 170 | 171 | - Package your project into a virtual environment archive 172 | 173 | ```bash 174 | emr package --entry-point main.py 175 | ``` 176 | 177 | The EMR CLI auto-detects the project type and will change the packaging method appropriately. 178 | 179 | If you have additional `.py` files, those will be included in the archive. 180 | 181 | - Deploy an existing package artifact to S3. 182 | 183 | ```bash 184 | emr deploy --entry-point main.py --s3-code-uri s3:///code/ 185 | ``` 186 | 187 | - Deploy a PySpark package to S3 and trigger an EMR Serverless job 188 | 189 | ```bash 190 | emr run --entry-point main.py \ 191 | --s3-code-uri s3:///code/ \ 192 | --application-id \ 193 | --job-role 194 | ``` 195 | 196 | - Build, deploy, and run an EMR Serverless job and wait for it to finish. 197 | 198 | ```bash 199 | emr run --entry-point main.py \ 200 | --s3-code-uri s3:///code/ \ 201 | --application-id \ 202 | --job-role \ 203 | --build \ 204 | --wait 205 | ``` 206 | 207 | - Re-run an already deployed job and show the `stdout` of the driver. 208 | 209 | ```bash 210 | emr run --entry-point main.py \ 211 | --s3-code-uri s3:///code/ \ 212 | --s3-logs-uri s3:///logs/ \ 213 | --application-id \ 214 | --job-role \ 215 | --show-stdout 216 | ``` 217 | 218 | > **Note**: If the job fails, the command will exit with an error code. 219 | 220 | - Re-run your jobs with 7 characters. 221 | 222 | If you provide the `--save-config` command to `emr run`, it will save a configuration file for you in `.emr/config.yaml` and next time you can use `emr run` with no parameters to re-run your job. 223 | 224 | ```bash 225 | emr run --entry-point main.py \ 226 | ... \ 227 | --save-config 228 | 229 | [emr-cli]: Config file saved to .emr/config.yaml. Use `emr run` to re-use your configuration. 230 | ``` 231 | 232 | ```bash 233 | ❯ emr run 234 | [emr-cli]: Using config file: .emr/config.yaml 235 | ``` 236 | 237 | 🥳 238 | 239 | - Run the same job against an EMR on EC2 cluster 240 | 241 | ```bash 242 | emr run --entry-point main.py \ 243 | --s3-code-uri s3:///code/ \ 244 | --s3-logs-uri s3:///logs/ \ 245 | --cluster-id 246 | --show-stdout 247 | ``` 248 | 249 | - Or an EMR on EKS virtual cluster. 250 | 251 | ```bash 252 | emr run --entry-point main.py \ 253 | --s3-code-uri s3:///code/ \ 254 | --s3-logs-uri s3:///logs/ \ 255 | --virtual-cluster-id \ 256 | --job-role \ 257 | --show-stdout 258 | ``` 259 | 260 | ## Security 261 | 262 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 263 | 264 | ## License 265 | 266 | This project is licensed under the Apache-2.0 License. 267 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Reporting a Vulnerability 4 | 5 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security 6 | via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to aws-security@amazon.com. 7 | 8 | Please do **not** create a public GitHub issue. -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "attrs" 5 | version = "23.1.0" 6 | description = "Classes Without Boilerplate" 7 | optional = false 8 | python-versions = ">=3.7" 9 | files = [ 10 | {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, 11 | {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, 12 | ] 13 | 14 | [package.dependencies] 15 | importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} 16 | 17 | [package.extras] 18 | cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] 19 | dev = ["attrs[docs,tests]", "pre-commit"] 20 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] 21 | tests = ["attrs[tests-no-zope]", "zope-interface"] 22 | tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] 23 | 24 | [[package]] 25 | name = "boto3" 26 | version = "1.28.70" 27 | description = "The AWS SDK for Python" 28 | optional = false 29 | python-versions = ">= 3.7" 30 | files = [ 31 | {file = "boto3-1.28.70-py3-none-any.whl", hash = "sha256:22ec3b54801c81746657827c7b1c4a3b2e4cfa7c21be3b96218d32e9390ee5eb"}, 32 | {file = "boto3-1.28.70.tar.gz", hash = "sha256:89002e1d8411c7c54110f9f8fc4a11d57d6d7977c0cb4ba064887ca5d4c788f7"}, 33 | ] 34 | 35 | [package.dependencies] 36 | botocore = ">=1.31.70,<1.32.0" 37 | jmespath = ">=0.7.1,<2.0.0" 38 | s3transfer = ">=0.7.0,<0.8.0" 39 | 40 | [package.extras] 41 | crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] 42 | 43 | [[package]] 44 | name = "boto3-stubs" 45 | version = "1.28.70" 46 | description = "Type annotations for boto3 1.28.70 generated with mypy-boto3-builder 7.19.0" 47 | optional = false 48 | python-versions = ">=3.7" 49 | files = [ 50 | {file = "boto3-stubs-1.28.70.tar.gz", hash = "sha256:0c67fd217c9fd0d5688504aa64c5b760bccca9886ce05d675bbc613b568cba28"}, 51 | {file = "boto3_stubs-1.28.70-py3-none-any.whl", hash = "sha256:6d50983e2535220f3da911ac43e3fd13347d644474b3ebe694c4158c75a3df7f"}, 52 | ] 53 | 54 | [package.dependencies] 55 | botocore-stubs = "*" 56 | mypy-boto3-s3 = {version = ">=1.28.0,<1.29.0", optional = true, markers = "extra == \"s3\""} 57 | types-s3transfer = "*" 58 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} 59 | 60 | [package.extras] 61 | accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.28.0,<1.29.0)"] 62 | account = ["mypy-boto3-account (>=1.28.0,<1.29.0)"] 63 | acm = ["mypy-boto3-acm (>=1.28.0,<1.29.0)"] 64 | acm-pca = ["mypy-boto3-acm-pca (>=1.28.0,<1.29.0)"] 65 | alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.28.0,<1.29.0)"] 66 | all = ["mypy-boto3-accessanalyzer (>=1.28.0,<1.29.0)", "mypy-boto3-account (>=1.28.0,<1.29.0)", "mypy-boto3-acm (>=1.28.0,<1.29.0)", "mypy-boto3-acm-pca (>=1.28.0,<1.29.0)", "mypy-boto3-alexaforbusiness (>=1.28.0,<1.29.0)", "mypy-boto3-amp (>=1.28.0,<1.29.0)", "mypy-boto3-amplify (>=1.28.0,<1.29.0)", "mypy-boto3-amplifybackend (>=1.28.0,<1.29.0)", "mypy-boto3-amplifyuibuilder (>=1.28.0,<1.29.0)", "mypy-boto3-apigateway (>=1.28.0,<1.29.0)", "mypy-boto3-apigatewaymanagementapi (>=1.28.0,<1.29.0)", "mypy-boto3-apigatewayv2 (>=1.28.0,<1.29.0)", "mypy-boto3-appconfig (>=1.28.0,<1.29.0)", "mypy-boto3-appconfigdata (>=1.28.0,<1.29.0)", "mypy-boto3-appfabric (>=1.28.0,<1.29.0)", "mypy-boto3-appflow (>=1.28.0,<1.29.0)", "mypy-boto3-appintegrations (>=1.28.0,<1.29.0)", "mypy-boto3-application-autoscaling (>=1.28.0,<1.29.0)", "mypy-boto3-application-insights (>=1.28.0,<1.29.0)", "mypy-boto3-applicationcostprofiler (>=1.28.0,<1.29.0)", "mypy-boto3-appmesh (>=1.28.0,<1.29.0)", "mypy-boto3-apprunner (>=1.28.0,<1.29.0)", "mypy-boto3-appstream (>=1.28.0,<1.29.0)", "mypy-boto3-appsync (>=1.28.0,<1.29.0)", "mypy-boto3-arc-zonal-shift (>=1.28.0,<1.29.0)", "mypy-boto3-athena (>=1.28.0,<1.29.0)", "mypy-boto3-auditmanager (>=1.28.0,<1.29.0)", "mypy-boto3-autoscaling (>=1.28.0,<1.29.0)", "mypy-boto3-autoscaling-plans (>=1.28.0,<1.29.0)", "mypy-boto3-backup (>=1.28.0,<1.29.0)", "mypy-boto3-backup-gateway (>=1.28.0,<1.29.0)", "mypy-boto3-backupstorage (>=1.28.0,<1.29.0)", "mypy-boto3-batch (>=1.28.0,<1.29.0)", "mypy-boto3-bedrock (>=1.28.0,<1.29.0)", "mypy-boto3-bedrock-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-billingconductor (>=1.28.0,<1.29.0)", "mypy-boto3-braket (>=1.28.0,<1.29.0)", "mypy-boto3-budgets (>=1.28.0,<1.29.0)", "mypy-boto3-ce (>=1.28.0,<1.29.0)", "mypy-boto3-chime (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-identity (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-meetings (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-messaging (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-voice (>=1.28.0,<1.29.0)", "mypy-boto3-cleanrooms (>=1.28.0,<1.29.0)", "mypy-boto3-cloud9 (>=1.28.0,<1.29.0)", "mypy-boto3-cloudcontrol (>=1.28.0,<1.29.0)", "mypy-boto3-clouddirectory (>=1.28.0,<1.29.0)", "mypy-boto3-cloudformation (>=1.28.0,<1.29.0)", "mypy-boto3-cloudfront (>=1.28.0,<1.29.0)", "mypy-boto3-cloudhsm (>=1.28.0,<1.29.0)", "mypy-boto3-cloudhsmv2 (>=1.28.0,<1.29.0)", "mypy-boto3-cloudsearch (>=1.28.0,<1.29.0)", "mypy-boto3-cloudsearchdomain (>=1.28.0,<1.29.0)", "mypy-boto3-cloudtrail (>=1.28.0,<1.29.0)", "mypy-boto3-cloudtrail-data (>=1.28.0,<1.29.0)", "mypy-boto3-cloudwatch (>=1.28.0,<1.29.0)", "mypy-boto3-codeartifact (>=1.28.0,<1.29.0)", "mypy-boto3-codebuild (>=1.28.0,<1.29.0)", "mypy-boto3-codecatalyst (>=1.28.0,<1.29.0)", "mypy-boto3-codecommit (>=1.28.0,<1.29.0)", "mypy-boto3-codedeploy (>=1.28.0,<1.29.0)", "mypy-boto3-codeguru-reviewer (>=1.28.0,<1.29.0)", "mypy-boto3-codeguru-security (>=1.28.0,<1.29.0)", "mypy-boto3-codeguruprofiler (>=1.28.0,<1.29.0)", "mypy-boto3-codepipeline (>=1.28.0,<1.29.0)", "mypy-boto3-codestar (>=1.28.0,<1.29.0)", "mypy-boto3-codestar-connections (>=1.28.0,<1.29.0)", "mypy-boto3-codestar-notifications (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-identity (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-idp (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-sync (>=1.28.0,<1.29.0)", "mypy-boto3-comprehend (>=1.28.0,<1.29.0)", "mypy-boto3-comprehendmedical (>=1.28.0,<1.29.0)", "mypy-boto3-compute-optimizer (>=1.28.0,<1.29.0)", "mypy-boto3-config (>=1.28.0,<1.29.0)", "mypy-boto3-connect (>=1.28.0,<1.29.0)", "mypy-boto3-connect-contact-lens (>=1.28.0,<1.29.0)", "mypy-boto3-connectcampaigns (>=1.28.0,<1.29.0)", "mypy-boto3-connectcases (>=1.28.0,<1.29.0)", "mypy-boto3-connectparticipant (>=1.28.0,<1.29.0)", "mypy-boto3-controltower (>=1.28.0,<1.29.0)", "mypy-boto3-cur (>=1.28.0,<1.29.0)", "mypy-boto3-customer-profiles (>=1.28.0,<1.29.0)", "mypy-boto3-databrew (>=1.28.0,<1.29.0)", "mypy-boto3-dataexchange (>=1.28.0,<1.29.0)", "mypy-boto3-datapipeline (>=1.28.0,<1.29.0)", "mypy-boto3-datasync (>=1.28.0,<1.29.0)", "mypy-boto3-datazone (>=1.28.0,<1.29.0)", "mypy-boto3-dax (>=1.28.0,<1.29.0)", "mypy-boto3-detective (>=1.28.0,<1.29.0)", "mypy-boto3-devicefarm (>=1.28.0,<1.29.0)", "mypy-boto3-devops-guru (>=1.28.0,<1.29.0)", "mypy-boto3-directconnect (>=1.28.0,<1.29.0)", "mypy-boto3-discovery (>=1.28.0,<1.29.0)", "mypy-boto3-dlm (>=1.28.0,<1.29.0)", "mypy-boto3-dms (>=1.28.0,<1.29.0)", "mypy-boto3-docdb (>=1.28.0,<1.29.0)", "mypy-boto3-docdb-elastic (>=1.28.0,<1.29.0)", "mypy-boto3-drs (>=1.28.0,<1.29.0)", "mypy-boto3-ds (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodb (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodbstreams (>=1.28.0,<1.29.0)", "mypy-boto3-ebs (>=1.28.0,<1.29.0)", "mypy-boto3-ec2 (>=1.28.0,<1.29.0)", "mypy-boto3-ec2-instance-connect (>=1.28.0,<1.29.0)", "mypy-boto3-ecr (>=1.28.0,<1.29.0)", "mypy-boto3-ecr-public (>=1.28.0,<1.29.0)", "mypy-boto3-ecs (>=1.28.0,<1.29.0)", "mypy-boto3-efs (>=1.28.0,<1.29.0)", "mypy-boto3-eks (>=1.28.0,<1.29.0)", "mypy-boto3-elastic-inference (>=1.28.0,<1.29.0)", "mypy-boto3-elasticache (>=1.28.0,<1.29.0)", "mypy-boto3-elasticbeanstalk (>=1.28.0,<1.29.0)", "mypy-boto3-elastictranscoder (>=1.28.0,<1.29.0)", "mypy-boto3-elb (>=1.28.0,<1.29.0)", "mypy-boto3-elbv2 (>=1.28.0,<1.29.0)", "mypy-boto3-emr (>=1.28.0,<1.29.0)", "mypy-boto3-emr-containers (>=1.28.0,<1.29.0)", "mypy-boto3-emr-serverless (>=1.28.0,<1.29.0)", "mypy-boto3-entityresolution (>=1.28.0,<1.29.0)", "mypy-boto3-es (>=1.28.0,<1.29.0)", "mypy-boto3-events (>=1.28.0,<1.29.0)", "mypy-boto3-evidently (>=1.28.0,<1.29.0)", "mypy-boto3-finspace (>=1.28.0,<1.29.0)", "mypy-boto3-finspace-data (>=1.28.0,<1.29.0)", "mypy-boto3-firehose (>=1.28.0,<1.29.0)", "mypy-boto3-fis (>=1.28.0,<1.29.0)", "mypy-boto3-fms (>=1.28.0,<1.29.0)", "mypy-boto3-forecast (>=1.28.0,<1.29.0)", "mypy-boto3-forecastquery (>=1.28.0,<1.29.0)", "mypy-boto3-frauddetector (>=1.28.0,<1.29.0)", "mypy-boto3-fsx (>=1.28.0,<1.29.0)", "mypy-boto3-gamelift (>=1.28.0,<1.29.0)", "mypy-boto3-glacier (>=1.28.0,<1.29.0)", "mypy-boto3-globalaccelerator (>=1.28.0,<1.29.0)", "mypy-boto3-glue (>=1.28.0,<1.29.0)", "mypy-boto3-grafana (>=1.28.0,<1.29.0)", "mypy-boto3-greengrass (>=1.28.0,<1.29.0)", "mypy-boto3-greengrassv2 (>=1.28.0,<1.29.0)", "mypy-boto3-groundstation (>=1.28.0,<1.29.0)", "mypy-boto3-guardduty (>=1.28.0,<1.29.0)", "mypy-boto3-health (>=1.28.0,<1.29.0)", "mypy-boto3-healthlake (>=1.28.0,<1.29.0)", "mypy-boto3-honeycode (>=1.28.0,<1.29.0)", "mypy-boto3-iam (>=1.28.0,<1.29.0)", "mypy-boto3-identitystore (>=1.28.0,<1.29.0)", "mypy-boto3-imagebuilder (>=1.28.0,<1.29.0)", "mypy-boto3-importexport (>=1.28.0,<1.29.0)", "mypy-boto3-inspector (>=1.28.0,<1.29.0)", "mypy-boto3-inspector2 (>=1.28.0,<1.29.0)", "mypy-boto3-internetmonitor (>=1.28.0,<1.29.0)", "mypy-boto3-iot (>=1.28.0,<1.29.0)", "mypy-boto3-iot-data (>=1.28.0,<1.29.0)", "mypy-boto3-iot-jobs-data (>=1.28.0,<1.29.0)", "mypy-boto3-iot-roborunner (>=1.28.0,<1.29.0)", "mypy-boto3-iot1click-devices (>=1.28.0,<1.29.0)", "mypy-boto3-iot1click-projects (>=1.28.0,<1.29.0)", "mypy-boto3-iotanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-iotdeviceadvisor (>=1.28.0,<1.29.0)", "mypy-boto3-iotevents (>=1.28.0,<1.29.0)", "mypy-boto3-iotevents-data (>=1.28.0,<1.29.0)", "mypy-boto3-iotfleethub (>=1.28.0,<1.29.0)", "mypy-boto3-iotfleetwise (>=1.28.0,<1.29.0)", "mypy-boto3-iotsecuretunneling (>=1.28.0,<1.29.0)", "mypy-boto3-iotsitewise (>=1.28.0,<1.29.0)", "mypy-boto3-iotthingsgraph (>=1.28.0,<1.29.0)", "mypy-boto3-iottwinmaker (>=1.28.0,<1.29.0)", "mypy-boto3-iotwireless (>=1.28.0,<1.29.0)", "mypy-boto3-ivs (>=1.28.0,<1.29.0)", "mypy-boto3-ivs-realtime (>=1.28.0,<1.29.0)", "mypy-boto3-ivschat (>=1.28.0,<1.29.0)", "mypy-boto3-kafka (>=1.28.0,<1.29.0)", "mypy-boto3-kafkaconnect (>=1.28.0,<1.29.0)", "mypy-boto3-kendra (>=1.28.0,<1.29.0)", "mypy-boto3-kendra-ranking (>=1.28.0,<1.29.0)", "mypy-boto3-keyspaces (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-archived-media (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-media (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-signaling (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisvideo (>=1.28.0,<1.29.0)", "mypy-boto3-kms (>=1.28.0,<1.29.0)", "mypy-boto3-lakeformation (>=1.28.0,<1.29.0)", "mypy-boto3-lambda (>=1.28.0,<1.29.0)", "mypy-boto3-lex-models (>=1.28.0,<1.29.0)", "mypy-boto3-lex-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-lexv2-models (>=1.28.0,<1.29.0)", "mypy-boto3-lexv2-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.28.0,<1.29.0)", "mypy-boto3-lightsail (>=1.28.0,<1.29.0)", "mypy-boto3-location (>=1.28.0,<1.29.0)", "mypy-boto3-logs (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutequipment (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutmetrics (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutvision (>=1.28.0,<1.29.0)", "mypy-boto3-m2 (>=1.28.0,<1.29.0)", "mypy-boto3-machinelearning (>=1.28.0,<1.29.0)", "mypy-boto3-macie (>=1.28.0,<1.29.0)", "mypy-boto3-macie2 (>=1.28.0,<1.29.0)", "mypy-boto3-managedblockchain (>=1.28.0,<1.29.0)", "mypy-boto3-managedblockchain-query (>=1.28.0,<1.29.0)", "mypy-boto3-marketplace-catalog (>=1.28.0,<1.29.0)", "mypy-boto3-marketplace-entitlement (>=1.28.0,<1.29.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-mediaconnect (>=1.28.0,<1.29.0)", "mypy-boto3-mediaconvert (>=1.28.0,<1.29.0)", "mypy-boto3-medialive (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackage (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackage-vod (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackagev2 (>=1.28.0,<1.29.0)", "mypy-boto3-mediastore (>=1.28.0,<1.29.0)", "mypy-boto3-mediastore-data (>=1.28.0,<1.29.0)", "mypy-boto3-mediatailor (>=1.28.0,<1.29.0)", "mypy-boto3-medical-imaging (>=1.28.0,<1.29.0)", "mypy-boto3-memorydb (>=1.28.0,<1.29.0)", "mypy-boto3-meteringmarketplace (>=1.28.0,<1.29.0)", "mypy-boto3-mgh (>=1.28.0,<1.29.0)", "mypy-boto3-mgn (>=1.28.0,<1.29.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhub-config (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhuborchestrator (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhubstrategy (>=1.28.0,<1.29.0)", "mypy-boto3-mobile (>=1.28.0,<1.29.0)", "mypy-boto3-mq (>=1.28.0,<1.29.0)", "mypy-boto3-mturk (>=1.28.0,<1.29.0)", "mypy-boto3-mwaa (>=1.28.0,<1.29.0)", "mypy-boto3-neptune (>=1.28.0,<1.29.0)", "mypy-boto3-neptunedata (>=1.28.0,<1.29.0)", "mypy-boto3-network-firewall (>=1.28.0,<1.29.0)", "mypy-boto3-networkmanager (>=1.28.0,<1.29.0)", "mypy-boto3-nimble (>=1.28.0,<1.29.0)", "mypy-boto3-oam (>=1.28.0,<1.29.0)", "mypy-boto3-omics (>=1.28.0,<1.29.0)", "mypy-boto3-opensearch (>=1.28.0,<1.29.0)", "mypy-boto3-opensearchserverless (>=1.28.0,<1.29.0)", "mypy-boto3-opsworks (>=1.28.0,<1.29.0)", "mypy-boto3-opsworkscm (>=1.28.0,<1.29.0)", "mypy-boto3-organizations (>=1.28.0,<1.29.0)", "mypy-boto3-osis (>=1.28.0,<1.29.0)", "mypy-boto3-outposts (>=1.28.0,<1.29.0)", "mypy-boto3-panorama (>=1.28.0,<1.29.0)", "mypy-boto3-payment-cryptography (>=1.28.0,<1.29.0)", "mypy-boto3-payment-cryptography-data (>=1.28.0,<1.29.0)", "mypy-boto3-pca-connector-ad (>=1.28.0,<1.29.0)", "mypy-boto3-personalize (>=1.28.0,<1.29.0)", "mypy-boto3-personalize-events (>=1.28.0,<1.29.0)", "mypy-boto3-personalize-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-pi (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-email (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-sms-voice (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.28.0,<1.29.0)", "mypy-boto3-pipes (>=1.28.0,<1.29.0)", "mypy-boto3-polly (>=1.28.0,<1.29.0)", "mypy-boto3-pricing (>=1.28.0,<1.29.0)", "mypy-boto3-privatenetworks (>=1.28.0,<1.29.0)", "mypy-boto3-proton (>=1.28.0,<1.29.0)", "mypy-boto3-qldb (>=1.28.0,<1.29.0)", "mypy-boto3-qldb-session (>=1.28.0,<1.29.0)", "mypy-boto3-quicksight (>=1.28.0,<1.29.0)", "mypy-boto3-ram (>=1.28.0,<1.29.0)", "mypy-boto3-rbin (>=1.28.0,<1.29.0)", "mypy-boto3-rds (>=1.28.0,<1.29.0)", "mypy-boto3-rds-data (>=1.28.0,<1.29.0)", "mypy-boto3-redshift (>=1.28.0,<1.29.0)", "mypy-boto3-redshift-data (>=1.28.0,<1.29.0)", "mypy-boto3-redshift-serverless (>=1.28.0,<1.29.0)", "mypy-boto3-rekognition (>=1.28.0,<1.29.0)", "mypy-boto3-resiliencehub (>=1.28.0,<1.29.0)", "mypy-boto3-resource-explorer-2 (>=1.28.0,<1.29.0)", "mypy-boto3-resource-groups (>=1.28.0,<1.29.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.28.0,<1.29.0)", "mypy-boto3-robomaker (>=1.28.0,<1.29.0)", "mypy-boto3-rolesanywhere (>=1.28.0,<1.29.0)", "mypy-boto3-route53 (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-cluster (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-control-config (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-readiness (>=1.28.0,<1.29.0)", "mypy-boto3-route53domains (>=1.28.0,<1.29.0)", "mypy-boto3-route53resolver (>=1.28.0,<1.29.0)", "mypy-boto3-rum (>=1.28.0,<1.29.0)", "mypy-boto3-s3 (>=1.28.0,<1.29.0)", "mypy-boto3-s3control (>=1.28.0,<1.29.0)", "mypy-boto3-s3outposts (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-edge (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-geospatial (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-metrics (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-savingsplans (>=1.28.0,<1.29.0)", "mypy-boto3-scheduler (>=1.28.0,<1.29.0)", "mypy-boto3-schemas (>=1.28.0,<1.29.0)", "mypy-boto3-sdb (>=1.28.0,<1.29.0)", "mypy-boto3-secretsmanager (>=1.28.0,<1.29.0)", "mypy-boto3-securityhub (>=1.28.0,<1.29.0)", "mypy-boto3-securitylake (>=1.28.0,<1.29.0)", "mypy-boto3-serverlessrepo (>=1.28.0,<1.29.0)", "mypy-boto3-service-quotas (>=1.28.0,<1.29.0)", "mypy-boto3-servicecatalog (>=1.28.0,<1.29.0)", "mypy-boto3-servicecatalog-appregistry (>=1.28.0,<1.29.0)", "mypy-boto3-servicediscovery (>=1.28.0,<1.29.0)", "mypy-boto3-ses (>=1.28.0,<1.29.0)", "mypy-boto3-sesv2 (>=1.28.0,<1.29.0)", "mypy-boto3-shield (>=1.28.0,<1.29.0)", "mypy-boto3-signer (>=1.28.0,<1.29.0)", "mypy-boto3-simspaceweaver (>=1.28.0,<1.29.0)", "mypy-boto3-sms (>=1.28.0,<1.29.0)", "mypy-boto3-sms-voice (>=1.28.0,<1.29.0)", "mypy-boto3-snow-device-management (>=1.28.0,<1.29.0)", "mypy-boto3-snowball (>=1.28.0,<1.29.0)", "mypy-boto3-sns (>=1.28.0,<1.29.0)", "mypy-boto3-sqs (>=1.28.0,<1.29.0)", "mypy-boto3-ssm (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-contacts (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-incidents (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-sap (>=1.28.0,<1.29.0)", "mypy-boto3-sso (>=1.28.0,<1.29.0)", "mypy-boto3-sso-admin (>=1.28.0,<1.29.0)", "mypy-boto3-sso-oidc (>=1.28.0,<1.29.0)", "mypy-boto3-stepfunctions (>=1.28.0,<1.29.0)", "mypy-boto3-storagegateway (>=1.28.0,<1.29.0)", "mypy-boto3-sts (>=1.28.0,<1.29.0)", "mypy-boto3-support (>=1.28.0,<1.29.0)", "mypy-boto3-support-app (>=1.28.0,<1.29.0)", "mypy-boto3-swf (>=1.28.0,<1.29.0)", "mypy-boto3-synthetics (>=1.28.0,<1.29.0)", "mypy-boto3-textract (>=1.28.0,<1.29.0)", "mypy-boto3-timestream-query (>=1.28.0,<1.29.0)", "mypy-boto3-timestream-write (>=1.28.0,<1.29.0)", "mypy-boto3-tnb (>=1.28.0,<1.29.0)", "mypy-boto3-transcribe (>=1.28.0,<1.29.0)", "mypy-boto3-transfer (>=1.28.0,<1.29.0)", "mypy-boto3-translate (>=1.28.0,<1.29.0)", "mypy-boto3-verifiedpermissions (>=1.28.0,<1.29.0)", "mypy-boto3-voice-id (>=1.28.0,<1.29.0)", "mypy-boto3-vpc-lattice (>=1.28.0,<1.29.0)", "mypy-boto3-waf (>=1.28.0,<1.29.0)", "mypy-boto3-waf-regional (>=1.28.0,<1.29.0)", "mypy-boto3-wafv2 (>=1.28.0,<1.29.0)", "mypy-boto3-wellarchitected (>=1.28.0,<1.29.0)", "mypy-boto3-wisdom (>=1.28.0,<1.29.0)", "mypy-boto3-workdocs (>=1.28.0,<1.29.0)", "mypy-boto3-worklink (>=1.28.0,<1.29.0)", "mypy-boto3-workmail (>=1.28.0,<1.29.0)", "mypy-boto3-workmailmessageflow (>=1.28.0,<1.29.0)", "mypy-boto3-workspaces (>=1.28.0,<1.29.0)", "mypy-boto3-workspaces-web (>=1.28.0,<1.29.0)", "mypy-boto3-xray (>=1.28.0,<1.29.0)"] 67 | amp = ["mypy-boto3-amp (>=1.28.0,<1.29.0)"] 68 | amplify = ["mypy-boto3-amplify (>=1.28.0,<1.29.0)"] 69 | amplifybackend = ["mypy-boto3-amplifybackend (>=1.28.0,<1.29.0)"] 70 | amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.28.0,<1.29.0)"] 71 | apigateway = ["mypy-boto3-apigateway (>=1.28.0,<1.29.0)"] 72 | apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.28.0,<1.29.0)"] 73 | apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.28.0,<1.29.0)"] 74 | appconfig = ["mypy-boto3-appconfig (>=1.28.0,<1.29.0)"] 75 | appconfigdata = ["mypy-boto3-appconfigdata (>=1.28.0,<1.29.0)"] 76 | appfabric = ["mypy-boto3-appfabric (>=1.28.0,<1.29.0)"] 77 | appflow = ["mypy-boto3-appflow (>=1.28.0,<1.29.0)"] 78 | appintegrations = ["mypy-boto3-appintegrations (>=1.28.0,<1.29.0)"] 79 | application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.28.0,<1.29.0)"] 80 | application-insights = ["mypy-boto3-application-insights (>=1.28.0,<1.29.0)"] 81 | applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.28.0,<1.29.0)"] 82 | appmesh = ["mypy-boto3-appmesh (>=1.28.0,<1.29.0)"] 83 | apprunner = ["mypy-boto3-apprunner (>=1.28.0,<1.29.0)"] 84 | appstream = ["mypy-boto3-appstream (>=1.28.0,<1.29.0)"] 85 | appsync = ["mypy-boto3-appsync (>=1.28.0,<1.29.0)"] 86 | arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.28.0,<1.29.0)"] 87 | athena = ["mypy-boto3-athena (>=1.28.0,<1.29.0)"] 88 | auditmanager = ["mypy-boto3-auditmanager (>=1.28.0,<1.29.0)"] 89 | autoscaling = ["mypy-boto3-autoscaling (>=1.28.0,<1.29.0)"] 90 | autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.28.0,<1.29.0)"] 91 | backup = ["mypy-boto3-backup (>=1.28.0,<1.29.0)"] 92 | backup-gateway = ["mypy-boto3-backup-gateway (>=1.28.0,<1.29.0)"] 93 | backupstorage = ["mypy-boto3-backupstorage (>=1.28.0,<1.29.0)"] 94 | batch = ["mypy-boto3-batch (>=1.28.0,<1.29.0)"] 95 | bedrock = ["mypy-boto3-bedrock (>=1.28.0,<1.29.0)"] 96 | bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.28.0,<1.29.0)"] 97 | billingconductor = ["mypy-boto3-billingconductor (>=1.28.0,<1.29.0)"] 98 | boto3 = ["boto3 (==1.28.70)", "botocore (==1.31.70)"] 99 | braket = ["mypy-boto3-braket (>=1.28.0,<1.29.0)"] 100 | budgets = ["mypy-boto3-budgets (>=1.28.0,<1.29.0)"] 101 | ce = ["mypy-boto3-ce (>=1.28.0,<1.29.0)"] 102 | chime = ["mypy-boto3-chime (>=1.28.0,<1.29.0)"] 103 | chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.28.0,<1.29.0)"] 104 | chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.28.0,<1.29.0)"] 105 | chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.28.0,<1.29.0)"] 106 | chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.28.0,<1.29.0)"] 107 | chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.28.0,<1.29.0)"] 108 | cleanrooms = ["mypy-boto3-cleanrooms (>=1.28.0,<1.29.0)"] 109 | cloud9 = ["mypy-boto3-cloud9 (>=1.28.0,<1.29.0)"] 110 | cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.28.0,<1.29.0)"] 111 | clouddirectory = ["mypy-boto3-clouddirectory (>=1.28.0,<1.29.0)"] 112 | cloudformation = ["mypy-boto3-cloudformation (>=1.28.0,<1.29.0)"] 113 | cloudfront = ["mypy-boto3-cloudfront (>=1.28.0,<1.29.0)"] 114 | cloudhsm = ["mypy-boto3-cloudhsm (>=1.28.0,<1.29.0)"] 115 | cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.28.0,<1.29.0)"] 116 | cloudsearch = ["mypy-boto3-cloudsearch (>=1.28.0,<1.29.0)"] 117 | cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.28.0,<1.29.0)"] 118 | cloudtrail = ["mypy-boto3-cloudtrail (>=1.28.0,<1.29.0)"] 119 | cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.28.0,<1.29.0)"] 120 | cloudwatch = ["mypy-boto3-cloudwatch (>=1.28.0,<1.29.0)"] 121 | codeartifact = ["mypy-boto3-codeartifact (>=1.28.0,<1.29.0)"] 122 | codebuild = ["mypy-boto3-codebuild (>=1.28.0,<1.29.0)"] 123 | codecatalyst = ["mypy-boto3-codecatalyst (>=1.28.0,<1.29.0)"] 124 | codecommit = ["mypy-boto3-codecommit (>=1.28.0,<1.29.0)"] 125 | codedeploy = ["mypy-boto3-codedeploy (>=1.28.0,<1.29.0)"] 126 | codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.28.0,<1.29.0)"] 127 | codeguru-security = ["mypy-boto3-codeguru-security (>=1.28.0,<1.29.0)"] 128 | codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.28.0,<1.29.0)"] 129 | codepipeline = ["mypy-boto3-codepipeline (>=1.28.0,<1.29.0)"] 130 | codestar = ["mypy-boto3-codestar (>=1.28.0,<1.29.0)"] 131 | codestar-connections = ["mypy-boto3-codestar-connections (>=1.28.0,<1.29.0)"] 132 | codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.28.0,<1.29.0)"] 133 | cognito-identity = ["mypy-boto3-cognito-identity (>=1.28.0,<1.29.0)"] 134 | cognito-idp = ["mypy-boto3-cognito-idp (>=1.28.0,<1.29.0)"] 135 | cognito-sync = ["mypy-boto3-cognito-sync (>=1.28.0,<1.29.0)"] 136 | comprehend = ["mypy-boto3-comprehend (>=1.28.0,<1.29.0)"] 137 | comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.28.0,<1.29.0)"] 138 | compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.28.0,<1.29.0)"] 139 | config = ["mypy-boto3-config (>=1.28.0,<1.29.0)"] 140 | connect = ["mypy-boto3-connect (>=1.28.0,<1.29.0)"] 141 | connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.28.0,<1.29.0)"] 142 | connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.28.0,<1.29.0)"] 143 | connectcases = ["mypy-boto3-connectcases (>=1.28.0,<1.29.0)"] 144 | connectparticipant = ["mypy-boto3-connectparticipant (>=1.28.0,<1.29.0)"] 145 | controltower = ["mypy-boto3-controltower (>=1.28.0,<1.29.0)"] 146 | cur = ["mypy-boto3-cur (>=1.28.0,<1.29.0)"] 147 | customer-profiles = ["mypy-boto3-customer-profiles (>=1.28.0,<1.29.0)"] 148 | databrew = ["mypy-boto3-databrew (>=1.28.0,<1.29.0)"] 149 | dataexchange = ["mypy-boto3-dataexchange (>=1.28.0,<1.29.0)"] 150 | datapipeline = ["mypy-boto3-datapipeline (>=1.28.0,<1.29.0)"] 151 | datasync = ["mypy-boto3-datasync (>=1.28.0,<1.29.0)"] 152 | datazone = ["mypy-boto3-datazone (>=1.28.0,<1.29.0)"] 153 | dax = ["mypy-boto3-dax (>=1.28.0,<1.29.0)"] 154 | detective = ["mypy-boto3-detective (>=1.28.0,<1.29.0)"] 155 | devicefarm = ["mypy-boto3-devicefarm (>=1.28.0,<1.29.0)"] 156 | devops-guru = ["mypy-boto3-devops-guru (>=1.28.0,<1.29.0)"] 157 | directconnect = ["mypy-boto3-directconnect (>=1.28.0,<1.29.0)"] 158 | discovery = ["mypy-boto3-discovery (>=1.28.0,<1.29.0)"] 159 | dlm = ["mypy-boto3-dlm (>=1.28.0,<1.29.0)"] 160 | dms = ["mypy-boto3-dms (>=1.28.0,<1.29.0)"] 161 | docdb = ["mypy-boto3-docdb (>=1.28.0,<1.29.0)"] 162 | docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.28.0,<1.29.0)"] 163 | drs = ["mypy-boto3-drs (>=1.28.0,<1.29.0)"] 164 | ds = ["mypy-boto3-ds (>=1.28.0,<1.29.0)"] 165 | dynamodb = ["mypy-boto3-dynamodb (>=1.28.0,<1.29.0)"] 166 | dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.28.0,<1.29.0)"] 167 | ebs = ["mypy-boto3-ebs (>=1.28.0,<1.29.0)"] 168 | ec2 = ["mypy-boto3-ec2 (>=1.28.0,<1.29.0)"] 169 | ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.28.0,<1.29.0)"] 170 | ecr = ["mypy-boto3-ecr (>=1.28.0,<1.29.0)"] 171 | ecr-public = ["mypy-boto3-ecr-public (>=1.28.0,<1.29.0)"] 172 | ecs = ["mypy-boto3-ecs (>=1.28.0,<1.29.0)"] 173 | efs = ["mypy-boto3-efs (>=1.28.0,<1.29.0)"] 174 | eks = ["mypy-boto3-eks (>=1.28.0,<1.29.0)"] 175 | elastic-inference = ["mypy-boto3-elastic-inference (>=1.28.0,<1.29.0)"] 176 | elasticache = ["mypy-boto3-elasticache (>=1.28.0,<1.29.0)"] 177 | elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.28.0,<1.29.0)"] 178 | elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.28.0,<1.29.0)"] 179 | elb = ["mypy-boto3-elb (>=1.28.0,<1.29.0)"] 180 | elbv2 = ["mypy-boto3-elbv2 (>=1.28.0,<1.29.0)"] 181 | emr = ["mypy-boto3-emr (>=1.28.0,<1.29.0)"] 182 | emr-containers = ["mypy-boto3-emr-containers (>=1.28.0,<1.29.0)"] 183 | emr-serverless = ["mypy-boto3-emr-serverless (>=1.28.0,<1.29.0)"] 184 | entityresolution = ["mypy-boto3-entityresolution (>=1.28.0,<1.29.0)"] 185 | es = ["mypy-boto3-es (>=1.28.0,<1.29.0)"] 186 | essential = ["mypy-boto3-cloudformation (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodb (>=1.28.0,<1.29.0)", "mypy-boto3-ec2 (>=1.28.0,<1.29.0)", "mypy-boto3-lambda (>=1.28.0,<1.29.0)", "mypy-boto3-rds (>=1.28.0,<1.29.0)", "mypy-boto3-s3 (>=1.28.0,<1.29.0)", "mypy-boto3-sqs (>=1.28.0,<1.29.0)"] 187 | events = ["mypy-boto3-events (>=1.28.0,<1.29.0)"] 188 | evidently = ["mypy-boto3-evidently (>=1.28.0,<1.29.0)"] 189 | finspace = ["mypy-boto3-finspace (>=1.28.0,<1.29.0)"] 190 | finspace-data = ["mypy-boto3-finspace-data (>=1.28.0,<1.29.0)"] 191 | firehose = ["mypy-boto3-firehose (>=1.28.0,<1.29.0)"] 192 | fis = ["mypy-boto3-fis (>=1.28.0,<1.29.0)"] 193 | fms = ["mypy-boto3-fms (>=1.28.0,<1.29.0)"] 194 | forecast = ["mypy-boto3-forecast (>=1.28.0,<1.29.0)"] 195 | forecastquery = ["mypy-boto3-forecastquery (>=1.28.0,<1.29.0)"] 196 | frauddetector = ["mypy-boto3-frauddetector (>=1.28.0,<1.29.0)"] 197 | fsx = ["mypy-boto3-fsx (>=1.28.0,<1.29.0)"] 198 | gamelift = ["mypy-boto3-gamelift (>=1.28.0,<1.29.0)"] 199 | glacier = ["mypy-boto3-glacier (>=1.28.0,<1.29.0)"] 200 | globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.28.0,<1.29.0)"] 201 | glue = ["mypy-boto3-glue (>=1.28.0,<1.29.0)"] 202 | grafana = ["mypy-boto3-grafana (>=1.28.0,<1.29.0)"] 203 | greengrass = ["mypy-boto3-greengrass (>=1.28.0,<1.29.0)"] 204 | greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.28.0,<1.29.0)"] 205 | groundstation = ["mypy-boto3-groundstation (>=1.28.0,<1.29.0)"] 206 | guardduty = ["mypy-boto3-guardduty (>=1.28.0,<1.29.0)"] 207 | health = ["mypy-boto3-health (>=1.28.0,<1.29.0)"] 208 | healthlake = ["mypy-boto3-healthlake (>=1.28.0,<1.29.0)"] 209 | honeycode = ["mypy-boto3-honeycode (>=1.28.0,<1.29.0)"] 210 | iam = ["mypy-boto3-iam (>=1.28.0,<1.29.0)"] 211 | identitystore = ["mypy-boto3-identitystore (>=1.28.0,<1.29.0)"] 212 | imagebuilder = ["mypy-boto3-imagebuilder (>=1.28.0,<1.29.0)"] 213 | importexport = ["mypy-boto3-importexport (>=1.28.0,<1.29.0)"] 214 | inspector = ["mypy-boto3-inspector (>=1.28.0,<1.29.0)"] 215 | inspector2 = ["mypy-boto3-inspector2 (>=1.28.0,<1.29.0)"] 216 | internetmonitor = ["mypy-boto3-internetmonitor (>=1.28.0,<1.29.0)"] 217 | iot = ["mypy-boto3-iot (>=1.28.0,<1.29.0)"] 218 | iot-data = ["mypy-boto3-iot-data (>=1.28.0,<1.29.0)"] 219 | iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.28.0,<1.29.0)"] 220 | iot-roborunner = ["mypy-boto3-iot-roborunner (>=1.28.0,<1.29.0)"] 221 | iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.28.0,<1.29.0)"] 222 | iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.28.0,<1.29.0)"] 223 | iotanalytics = ["mypy-boto3-iotanalytics (>=1.28.0,<1.29.0)"] 224 | iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.28.0,<1.29.0)"] 225 | iotevents = ["mypy-boto3-iotevents (>=1.28.0,<1.29.0)"] 226 | iotevents-data = ["mypy-boto3-iotevents-data (>=1.28.0,<1.29.0)"] 227 | iotfleethub = ["mypy-boto3-iotfleethub (>=1.28.0,<1.29.0)"] 228 | iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.28.0,<1.29.0)"] 229 | iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.28.0,<1.29.0)"] 230 | iotsitewise = ["mypy-boto3-iotsitewise (>=1.28.0,<1.29.0)"] 231 | iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.28.0,<1.29.0)"] 232 | iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.28.0,<1.29.0)"] 233 | iotwireless = ["mypy-boto3-iotwireless (>=1.28.0,<1.29.0)"] 234 | ivs = ["mypy-boto3-ivs (>=1.28.0,<1.29.0)"] 235 | ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.28.0,<1.29.0)"] 236 | ivschat = ["mypy-boto3-ivschat (>=1.28.0,<1.29.0)"] 237 | kafka = ["mypy-boto3-kafka (>=1.28.0,<1.29.0)"] 238 | kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.28.0,<1.29.0)"] 239 | kendra = ["mypy-boto3-kendra (>=1.28.0,<1.29.0)"] 240 | kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.28.0,<1.29.0)"] 241 | keyspaces = ["mypy-boto3-keyspaces (>=1.28.0,<1.29.0)"] 242 | kinesis = ["mypy-boto3-kinesis (>=1.28.0,<1.29.0)"] 243 | kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.28.0,<1.29.0)"] 244 | kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.28.0,<1.29.0)"] 245 | kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.28.0,<1.29.0)"] 246 | kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.28.0,<1.29.0)"] 247 | kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.28.0,<1.29.0)"] 248 | kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.28.0,<1.29.0)"] 249 | kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.28.0,<1.29.0)"] 250 | kms = ["mypy-boto3-kms (>=1.28.0,<1.29.0)"] 251 | lakeformation = ["mypy-boto3-lakeformation (>=1.28.0,<1.29.0)"] 252 | lambda = ["mypy-boto3-lambda (>=1.28.0,<1.29.0)"] 253 | lex-models = ["mypy-boto3-lex-models (>=1.28.0,<1.29.0)"] 254 | lex-runtime = ["mypy-boto3-lex-runtime (>=1.28.0,<1.29.0)"] 255 | lexv2-models = ["mypy-boto3-lexv2-models (>=1.28.0,<1.29.0)"] 256 | lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.28.0,<1.29.0)"] 257 | license-manager = ["mypy-boto3-license-manager (>=1.28.0,<1.29.0)"] 258 | license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.28.0,<1.29.0)"] 259 | license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.28.0,<1.29.0)"] 260 | lightsail = ["mypy-boto3-lightsail (>=1.28.0,<1.29.0)"] 261 | location = ["mypy-boto3-location (>=1.28.0,<1.29.0)"] 262 | logs = ["mypy-boto3-logs (>=1.28.0,<1.29.0)"] 263 | lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.28.0,<1.29.0)"] 264 | lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.28.0,<1.29.0)"] 265 | lookoutvision = ["mypy-boto3-lookoutvision (>=1.28.0,<1.29.0)"] 266 | m2 = ["mypy-boto3-m2 (>=1.28.0,<1.29.0)"] 267 | machinelearning = ["mypy-boto3-machinelearning (>=1.28.0,<1.29.0)"] 268 | macie = ["mypy-boto3-macie (>=1.28.0,<1.29.0)"] 269 | macie2 = ["mypy-boto3-macie2 (>=1.28.0,<1.29.0)"] 270 | managedblockchain = ["mypy-boto3-managedblockchain (>=1.28.0,<1.29.0)"] 271 | managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.28.0,<1.29.0)"] 272 | marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.28.0,<1.29.0)"] 273 | marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.28.0,<1.29.0)"] 274 | marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.28.0,<1.29.0)"] 275 | mediaconnect = ["mypy-boto3-mediaconnect (>=1.28.0,<1.29.0)"] 276 | mediaconvert = ["mypy-boto3-mediaconvert (>=1.28.0,<1.29.0)"] 277 | medialive = ["mypy-boto3-medialive (>=1.28.0,<1.29.0)"] 278 | mediapackage = ["mypy-boto3-mediapackage (>=1.28.0,<1.29.0)"] 279 | mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.28.0,<1.29.0)"] 280 | mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.28.0,<1.29.0)"] 281 | mediastore = ["mypy-boto3-mediastore (>=1.28.0,<1.29.0)"] 282 | mediastore-data = ["mypy-boto3-mediastore-data (>=1.28.0,<1.29.0)"] 283 | mediatailor = ["mypy-boto3-mediatailor (>=1.28.0,<1.29.0)"] 284 | medical-imaging = ["mypy-boto3-medical-imaging (>=1.28.0,<1.29.0)"] 285 | memorydb = ["mypy-boto3-memorydb (>=1.28.0,<1.29.0)"] 286 | meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.28.0,<1.29.0)"] 287 | mgh = ["mypy-boto3-mgh (>=1.28.0,<1.29.0)"] 288 | mgn = ["mypy-boto3-mgn (>=1.28.0,<1.29.0)"] 289 | migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.28.0,<1.29.0)"] 290 | migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.28.0,<1.29.0)"] 291 | migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.28.0,<1.29.0)"] 292 | migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.28.0,<1.29.0)"] 293 | mobile = ["mypy-boto3-mobile (>=1.28.0,<1.29.0)"] 294 | mq = ["mypy-boto3-mq (>=1.28.0,<1.29.0)"] 295 | mturk = ["mypy-boto3-mturk (>=1.28.0,<1.29.0)"] 296 | mwaa = ["mypy-boto3-mwaa (>=1.28.0,<1.29.0)"] 297 | neptune = ["mypy-boto3-neptune (>=1.28.0,<1.29.0)"] 298 | neptunedata = ["mypy-boto3-neptunedata (>=1.28.0,<1.29.0)"] 299 | network-firewall = ["mypy-boto3-network-firewall (>=1.28.0,<1.29.0)"] 300 | networkmanager = ["mypy-boto3-networkmanager (>=1.28.0,<1.29.0)"] 301 | nimble = ["mypy-boto3-nimble (>=1.28.0,<1.29.0)"] 302 | oam = ["mypy-boto3-oam (>=1.28.0,<1.29.0)"] 303 | omics = ["mypy-boto3-omics (>=1.28.0,<1.29.0)"] 304 | opensearch = ["mypy-boto3-opensearch (>=1.28.0,<1.29.0)"] 305 | opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.28.0,<1.29.0)"] 306 | opsworks = ["mypy-boto3-opsworks (>=1.28.0,<1.29.0)"] 307 | opsworkscm = ["mypy-boto3-opsworkscm (>=1.28.0,<1.29.0)"] 308 | organizations = ["mypy-boto3-organizations (>=1.28.0,<1.29.0)"] 309 | osis = ["mypy-boto3-osis (>=1.28.0,<1.29.0)"] 310 | outposts = ["mypy-boto3-outposts (>=1.28.0,<1.29.0)"] 311 | panorama = ["mypy-boto3-panorama (>=1.28.0,<1.29.0)"] 312 | payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.28.0,<1.29.0)"] 313 | payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.28.0,<1.29.0)"] 314 | pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.28.0,<1.29.0)"] 315 | personalize = ["mypy-boto3-personalize (>=1.28.0,<1.29.0)"] 316 | personalize-events = ["mypy-boto3-personalize-events (>=1.28.0,<1.29.0)"] 317 | personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.28.0,<1.29.0)"] 318 | pi = ["mypy-boto3-pi (>=1.28.0,<1.29.0)"] 319 | pinpoint = ["mypy-boto3-pinpoint (>=1.28.0,<1.29.0)"] 320 | pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.28.0,<1.29.0)"] 321 | pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.28.0,<1.29.0)"] 322 | pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.28.0,<1.29.0)"] 323 | pipes = ["mypy-boto3-pipes (>=1.28.0,<1.29.0)"] 324 | polly = ["mypy-boto3-polly (>=1.28.0,<1.29.0)"] 325 | pricing = ["mypy-boto3-pricing (>=1.28.0,<1.29.0)"] 326 | privatenetworks = ["mypy-boto3-privatenetworks (>=1.28.0,<1.29.0)"] 327 | proton = ["mypy-boto3-proton (>=1.28.0,<1.29.0)"] 328 | qldb = ["mypy-boto3-qldb (>=1.28.0,<1.29.0)"] 329 | qldb-session = ["mypy-boto3-qldb-session (>=1.28.0,<1.29.0)"] 330 | quicksight = ["mypy-boto3-quicksight (>=1.28.0,<1.29.0)"] 331 | ram = ["mypy-boto3-ram (>=1.28.0,<1.29.0)"] 332 | rbin = ["mypy-boto3-rbin (>=1.28.0,<1.29.0)"] 333 | rds = ["mypy-boto3-rds (>=1.28.0,<1.29.0)"] 334 | rds-data = ["mypy-boto3-rds-data (>=1.28.0,<1.29.0)"] 335 | redshift = ["mypy-boto3-redshift (>=1.28.0,<1.29.0)"] 336 | redshift-data = ["mypy-boto3-redshift-data (>=1.28.0,<1.29.0)"] 337 | redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.28.0,<1.29.0)"] 338 | rekognition = ["mypy-boto3-rekognition (>=1.28.0,<1.29.0)"] 339 | resiliencehub = ["mypy-boto3-resiliencehub (>=1.28.0,<1.29.0)"] 340 | resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.28.0,<1.29.0)"] 341 | resource-groups = ["mypy-boto3-resource-groups (>=1.28.0,<1.29.0)"] 342 | resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.28.0,<1.29.0)"] 343 | robomaker = ["mypy-boto3-robomaker (>=1.28.0,<1.29.0)"] 344 | rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.28.0,<1.29.0)"] 345 | route53 = ["mypy-boto3-route53 (>=1.28.0,<1.29.0)"] 346 | route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.28.0,<1.29.0)"] 347 | route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.28.0,<1.29.0)"] 348 | route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.28.0,<1.29.0)"] 349 | route53domains = ["mypy-boto3-route53domains (>=1.28.0,<1.29.0)"] 350 | route53resolver = ["mypy-boto3-route53resolver (>=1.28.0,<1.29.0)"] 351 | rum = ["mypy-boto3-rum (>=1.28.0,<1.29.0)"] 352 | s3 = ["mypy-boto3-s3 (>=1.28.0,<1.29.0)"] 353 | s3control = ["mypy-boto3-s3control (>=1.28.0,<1.29.0)"] 354 | s3outposts = ["mypy-boto3-s3outposts (>=1.28.0,<1.29.0)"] 355 | sagemaker = ["mypy-boto3-sagemaker (>=1.28.0,<1.29.0)"] 356 | sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.28.0,<1.29.0)"] 357 | sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.28.0,<1.29.0)"] 358 | sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.28.0,<1.29.0)"] 359 | sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.28.0,<1.29.0)"] 360 | sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.28.0,<1.29.0)"] 361 | sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.28.0,<1.29.0)"] 362 | savingsplans = ["mypy-boto3-savingsplans (>=1.28.0,<1.29.0)"] 363 | scheduler = ["mypy-boto3-scheduler (>=1.28.0,<1.29.0)"] 364 | schemas = ["mypy-boto3-schemas (>=1.28.0,<1.29.0)"] 365 | sdb = ["mypy-boto3-sdb (>=1.28.0,<1.29.0)"] 366 | secretsmanager = ["mypy-boto3-secretsmanager (>=1.28.0,<1.29.0)"] 367 | securityhub = ["mypy-boto3-securityhub (>=1.28.0,<1.29.0)"] 368 | securitylake = ["mypy-boto3-securitylake (>=1.28.0,<1.29.0)"] 369 | serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.28.0,<1.29.0)"] 370 | service-quotas = ["mypy-boto3-service-quotas (>=1.28.0,<1.29.0)"] 371 | servicecatalog = ["mypy-boto3-servicecatalog (>=1.28.0,<1.29.0)"] 372 | servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.28.0,<1.29.0)"] 373 | servicediscovery = ["mypy-boto3-servicediscovery (>=1.28.0,<1.29.0)"] 374 | ses = ["mypy-boto3-ses (>=1.28.0,<1.29.0)"] 375 | sesv2 = ["mypy-boto3-sesv2 (>=1.28.0,<1.29.0)"] 376 | shield = ["mypy-boto3-shield (>=1.28.0,<1.29.0)"] 377 | signer = ["mypy-boto3-signer (>=1.28.0,<1.29.0)"] 378 | simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.28.0,<1.29.0)"] 379 | sms = ["mypy-boto3-sms (>=1.28.0,<1.29.0)"] 380 | sms-voice = ["mypy-boto3-sms-voice (>=1.28.0,<1.29.0)"] 381 | snow-device-management = ["mypy-boto3-snow-device-management (>=1.28.0,<1.29.0)"] 382 | snowball = ["mypy-boto3-snowball (>=1.28.0,<1.29.0)"] 383 | sns = ["mypy-boto3-sns (>=1.28.0,<1.29.0)"] 384 | sqs = ["mypy-boto3-sqs (>=1.28.0,<1.29.0)"] 385 | ssm = ["mypy-boto3-ssm (>=1.28.0,<1.29.0)"] 386 | ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.28.0,<1.29.0)"] 387 | ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.28.0,<1.29.0)"] 388 | ssm-sap = ["mypy-boto3-ssm-sap (>=1.28.0,<1.29.0)"] 389 | sso = ["mypy-boto3-sso (>=1.28.0,<1.29.0)"] 390 | sso-admin = ["mypy-boto3-sso-admin (>=1.28.0,<1.29.0)"] 391 | sso-oidc = ["mypy-boto3-sso-oidc (>=1.28.0,<1.29.0)"] 392 | stepfunctions = ["mypy-boto3-stepfunctions (>=1.28.0,<1.29.0)"] 393 | storagegateway = ["mypy-boto3-storagegateway (>=1.28.0,<1.29.0)"] 394 | sts = ["mypy-boto3-sts (>=1.28.0,<1.29.0)"] 395 | support = ["mypy-boto3-support (>=1.28.0,<1.29.0)"] 396 | support-app = ["mypy-boto3-support-app (>=1.28.0,<1.29.0)"] 397 | swf = ["mypy-boto3-swf (>=1.28.0,<1.29.0)"] 398 | synthetics = ["mypy-boto3-synthetics (>=1.28.0,<1.29.0)"] 399 | textract = ["mypy-boto3-textract (>=1.28.0,<1.29.0)"] 400 | timestream-query = ["mypy-boto3-timestream-query (>=1.28.0,<1.29.0)"] 401 | timestream-write = ["mypy-boto3-timestream-write (>=1.28.0,<1.29.0)"] 402 | tnb = ["mypy-boto3-tnb (>=1.28.0,<1.29.0)"] 403 | transcribe = ["mypy-boto3-transcribe (>=1.28.0,<1.29.0)"] 404 | transfer = ["mypy-boto3-transfer (>=1.28.0,<1.29.0)"] 405 | translate = ["mypy-boto3-translate (>=1.28.0,<1.29.0)"] 406 | verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.28.0,<1.29.0)"] 407 | voice-id = ["mypy-boto3-voice-id (>=1.28.0,<1.29.0)"] 408 | vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.28.0,<1.29.0)"] 409 | waf = ["mypy-boto3-waf (>=1.28.0,<1.29.0)"] 410 | waf-regional = ["mypy-boto3-waf-regional (>=1.28.0,<1.29.0)"] 411 | wafv2 = ["mypy-boto3-wafv2 (>=1.28.0,<1.29.0)"] 412 | wellarchitected = ["mypy-boto3-wellarchitected (>=1.28.0,<1.29.0)"] 413 | wisdom = ["mypy-boto3-wisdom (>=1.28.0,<1.29.0)"] 414 | workdocs = ["mypy-boto3-workdocs (>=1.28.0,<1.29.0)"] 415 | worklink = ["mypy-boto3-worklink (>=1.28.0,<1.29.0)"] 416 | workmail = ["mypy-boto3-workmail (>=1.28.0,<1.29.0)"] 417 | workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.28.0,<1.29.0)"] 418 | workspaces = ["mypy-boto3-workspaces (>=1.28.0,<1.29.0)"] 419 | workspaces-web = ["mypy-boto3-workspaces-web (>=1.28.0,<1.29.0)"] 420 | xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"] 421 | 422 | [[package]] 423 | name = "botocore" 424 | version = "1.31.70" 425 | description = "Low-level, data-driven core of boto 3." 426 | optional = false 427 | python-versions = ">= 3.7" 428 | files = [ 429 | {file = "botocore-1.31.70-py3-none-any.whl", hash = "sha256:049bbf526c95b6169f59617a5ff1b0061cb7a0e44992b8c27c6955832b383988"}, 430 | {file = "botocore-1.31.70.tar.gz", hash = "sha256:5f49def4ec2e4216dd0195d23d9811027d02ee6c8a37b031e2b2fe38e8c77ddc"}, 431 | ] 432 | 433 | [package.dependencies] 434 | jmespath = ">=0.7.1,<2.0.0" 435 | python-dateutil = ">=2.1,<3.0.0" 436 | urllib3 = [ 437 | {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, 438 | {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}, 439 | ] 440 | 441 | [package.extras] 442 | crt = ["awscrt (==0.16.26)"] 443 | 444 | [[package]] 445 | name = "botocore-stubs" 446 | version = "1.31.70" 447 | description = "Type annotations and code completion for botocore" 448 | optional = false 449 | python-versions = ">=3.7,<4.0" 450 | files = [ 451 | {file = "botocore_stubs-1.31.70-py3-none-any.whl", hash = "sha256:205b952cef00bfdf5e2b1a7fb7ef40c7008729af7934663703ef006f9a420a29"}, 452 | {file = "botocore_stubs-1.31.70.tar.gz", hash = "sha256:5a7f77649bf54d326461cb380b935338ed41370b0330a7495b3a82f6277369be"}, 453 | ] 454 | 455 | [package.dependencies] 456 | types-awscrt = "*" 457 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} 458 | 459 | [[package]] 460 | name = "click" 461 | version = "7.1.2" 462 | description = "Composable command line interface toolkit" 463 | optional = false 464 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 465 | files = [ 466 | {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, 467 | {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, 468 | ] 469 | 470 | [[package]] 471 | name = "colorama" 472 | version = "0.4.6" 473 | description = "Cross-platform colored terminal text." 474 | optional = false 475 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 476 | files = [ 477 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 478 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 479 | ] 480 | 481 | [[package]] 482 | name = "coverage" 483 | version = "7.2.7" 484 | description = "Code coverage measurement for Python" 485 | optional = false 486 | python-versions = ">=3.7" 487 | files = [ 488 | {file = "coverage-7.2.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d39b5b4f2a66ccae8b7263ac3c8170994b65266797fb96cbbfd3fb5b23921db8"}, 489 | {file = "coverage-7.2.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d040ef7c9859bb11dfeb056ff5b3872436e3b5e401817d87a31e1750b9ae2fb"}, 490 | {file = "coverage-7.2.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba90a9563ba44a72fda2e85302c3abc71c5589cea608ca16c22b9804262aaeb6"}, 491 | {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d9405291c6928619403db1d10bd07888888ec1abcbd9748fdaa971d7d661b2"}, 492 | {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31563e97dae5598556600466ad9beea39fb04e0229e61c12eaa206e0aa202063"}, 493 | {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ebba1cd308ef115925421d3e6a586e655ca5a77b5bf41e02eb0e4562a111f2d1"}, 494 | {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cb017fd1b2603ef59e374ba2063f593abe0fc45f2ad9abdde5b4d83bd922a353"}, 495 | {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62a5c7dad11015c66fbb9d881bc4caa5b12f16292f857842d9d1871595f4495"}, 496 | {file = "coverage-7.2.7-cp310-cp310-win32.whl", hash = "sha256:ee57190f24fba796e36bb6d3aa8a8783c643d8fa9760c89f7a98ab5455fbf818"}, 497 | {file = "coverage-7.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:f75f7168ab25dd93110c8a8117a22450c19976afbc44234cbf71481094c1b850"}, 498 | {file = "coverage-7.2.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f"}, 499 | {file = "coverage-7.2.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe"}, 500 | {file = "coverage-7.2.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3"}, 501 | {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f"}, 502 | {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb"}, 503 | {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833"}, 504 | {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97"}, 505 | {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a"}, 506 | {file = "coverage-7.2.7-cp311-cp311-win32.whl", hash = "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a"}, 507 | {file = "coverage-7.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562"}, 508 | {file = "coverage-7.2.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4"}, 509 | {file = "coverage-7.2.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4"}, 510 | {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01"}, 511 | {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6"}, 512 | {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d"}, 513 | {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de"}, 514 | {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d"}, 515 | {file = "coverage-7.2.7-cp312-cp312-win32.whl", hash = "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511"}, 516 | {file = "coverage-7.2.7-cp312-cp312-win_amd64.whl", hash = "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3"}, 517 | {file = "coverage-7.2.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:58c2ccc2f00ecb51253cbe5d8d7122a34590fac9646a960d1430d5b15321d95f"}, 518 | {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d22656368f0e6189e24722214ed8d66b8022db19d182927b9a248a2a8a2f67eb"}, 519 | {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a895fcc7b15c3fc72beb43cdcbdf0ddb7d2ebc959edac9cef390b0d14f39f8a9"}, 520 | {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84606b74eb7de6ff581a7915e2dab7a28a0517fbe1c9239eb227e1354064dcd"}, 521 | {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0a5f9e1dbd7fbe30196578ca36f3fba75376fb99888c395c5880b355e2875f8a"}, 522 | {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:419bfd2caae268623dd469eff96d510a920c90928b60f2073d79f8fe2bbc5959"}, 523 | {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2aee274c46590717f38ae5e4650988d1af340fe06167546cc32fe2f58ed05b02"}, 524 | {file = "coverage-7.2.7-cp37-cp37m-win32.whl", hash = "sha256:61b9a528fb348373c433e8966535074b802c7a5d7f23c4f421e6c6e2f1697a6f"}, 525 | {file = "coverage-7.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:b1c546aca0ca4d028901d825015dc8e4d56aac4b541877690eb76490f1dc8ed0"}, 526 | {file = "coverage-7.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:54b896376ab563bd38453cecb813c295cf347cf5906e8b41d340b0321a5433e5"}, 527 | {file = "coverage-7.2.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d376df58cc111dc8e21e3b6e24606b5bb5dee6024f46a5abca99124b2229ef5"}, 528 | {file = "coverage-7.2.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e330fc79bd7207e46c7d7fd2bb4af2963f5f635703925543a70b99574b0fea9"}, 529 | {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e9d683426464e4a252bf70c3498756055016f99ddaec3774bf368e76bbe02b6"}, 530 | {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d13c64ee2d33eccf7437961b6ea7ad8673e2be040b4f7fd4fd4d4d28d9ccb1e"}, 531 | {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b7aa5f8a41217360e600da646004f878250a0d6738bcdc11a0a39928d7dc2050"}, 532 | {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fa03bce9bfbeeef9f3b160a8bed39a221d82308b4152b27d82d8daa7041fee5"}, 533 | {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:245167dd26180ab4c91d5e1496a30be4cd721a5cf2abf52974f965f10f11419f"}, 534 | {file = "coverage-7.2.7-cp38-cp38-win32.whl", hash = "sha256:d2c2db7fd82e9b72937969bceac4d6ca89660db0a0967614ce2481e81a0b771e"}, 535 | {file = "coverage-7.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:2e07b54284e381531c87f785f613b833569c14ecacdcb85d56b25c4622c16c3c"}, 536 | {file = "coverage-7.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:537891ae8ce59ef63d0123f7ac9e2ae0fc8b72c7ccbe5296fec45fd68967b6c9"}, 537 | {file = "coverage-7.2.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06fb182e69f33f6cd1d39a6c597294cff3143554b64b9825d1dc69d18cc2fff2"}, 538 | {file = "coverage-7.2.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201e7389591af40950a6480bd9edfa8ed04346ff80002cec1a66cac4549c1ad7"}, 539 | {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f6951407391b639504e3b3be51b7ba5f3528adbf1a8ac3302b687ecababf929e"}, 540 | {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1"}, 541 | {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b29019c76039dc3c0fd815c41392a044ce555d9bcdd38b0fb60fb4cd8e475ba9"}, 542 | {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:81c13a1fc7468c40f13420732805a4c38a105d89848b7c10af65a90beff25250"}, 543 | {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:975d70ab7e3c80a3fe86001d8751f6778905ec723f5b110aed1e450da9d4b7f2"}, 544 | {file = "coverage-7.2.7-cp39-cp39-win32.whl", hash = "sha256:7ee7d9d4822c8acc74a5e26c50604dff824710bc8de424904c0982e25c39c6cb"}, 545 | {file = "coverage-7.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb393e5ebc85245347950143969b241d08b52b88a3dc39479822e073a1a8eb27"}, 546 | {file = "coverage-7.2.7-pp37.pp38.pp39-none-any.whl", hash = "sha256:b7b4c971f05e6ae490fef852c218b0e79d4e52f79ef0c8475566584a8fb3e01d"}, 547 | {file = "coverage-7.2.7.tar.gz", hash = "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59"}, 548 | ] 549 | 550 | [package.dependencies] 551 | tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} 552 | 553 | [package.extras] 554 | toml = ["tomli"] 555 | 556 | [[package]] 557 | name = "exceptiongroup" 558 | version = "1.1.3" 559 | description = "Backport of PEP 654 (exception groups)" 560 | optional = false 561 | python-versions = ">=3.7" 562 | files = [ 563 | {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, 564 | {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, 565 | ] 566 | 567 | [package.extras] 568 | test = ["pytest (>=6)"] 569 | 570 | [[package]] 571 | name = "importlib-metadata" 572 | version = "6.7.0" 573 | description = "Read metadata from Python packages" 574 | optional = false 575 | python-versions = ">=3.7" 576 | files = [ 577 | {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, 578 | {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, 579 | ] 580 | 581 | [package.dependencies] 582 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} 583 | zipp = ">=0.5" 584 | 585 | [package.extras] 586 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] 587 | perf = ["ipython"] 588 | testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] 589 | 590 | [[package]] 591 | name = "iniconfig" 592 | version = "2.0.0" 593 | description = "brain-dead simple config-ini parsing" 594 | optional = false 595 | python-versions = ">=3.7" 596 | files = [ 597 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 598 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 599 | ] 600 | 601 | [[package]] 602 | name = "jmespath" 603 | version = "1.0.1" 604 | description = "JSON Matching Expressions" 605 | optional = false 606 | python-versions = ">=3.7" 607 | files = [ 608 | {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, 609 | {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, 610 | ] 611 | 612 | [[package]] 613 | name = "markdown-it-py" 614 | version = "2.2.0" 615 | description = "Python port of markdown-it. Markdown parsing, done right!" 616 | optional = false 617 | python-versions = ">=3.7" 618 | files = [ 619 | {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"}, 620 | {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"}, 621 | ] 622 | 623 | [package.dependencies] 624 | mdurl = ">=0.1,<1.0" 625 | typing_extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} 626 | 627 | [package.extras] 628 | benchmarking = ["psutil", "pytest", "pytest-benchmark"] 629 | code-style = ["pre-commit (>=3.0,<4.0)"] 630 | compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] 631 | linkify = ["linkify-it-py (>=1,<3)"] 632 | plugins = ["mdit-py-plugins"] 633 | profiling = ["gprof2dot"] 634 | rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] 635 | testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] 636 | 637 | [[package]] 638 | name = "mdurl" 639 | version = "0.1.2" 640 | description = "Markdown URL utilities" 641 | optional = false 642 | python-versions = ">=3.7" 643 | files = [ 644 | {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, 645 | {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, 646 | ] 647 | 648 | [[package]] 649 | name = "mypy-boto3-s3" 650 | version = "1.28.55" 651 | description = "Type annotations for boto3.S3 1.28.55 service generated with mypy-boto3-builder 7.19.0" 652 | optional = false 653 | python-versions = ">=3.7" 654 | files = [ 655 | {file = "mypy-boto3-s3-1.28.55.tar.gz", hash = "sha256:b008809f448e74075012d4fc54b0176de0b4f49bc38e39de30ca0e764eb75056"}, 656 | {file = "mypy_boto3_s3-1.28.55-py3-none-any.whl", hash = "sha256:11a3db97398973d4ae28489b94c010778a0a5c65f99e00268456c3fea67eca79"}, 657 | ] 658 | 659 | [package.dependencies] 660 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} 661 | 662 | [[package]] 663 | name = "packaging" 664 | version = "23.2" 665 | description = "Core utilities for Python packages" 666 | optional = false 667 | python-versions = ">=3.7" 668 | files = [ 669 | {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, 670 | {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, 671 | ] 672 | 673 | [[package]] 674 | name = "pluggy" 675 | version = "1.2.0" 676 | description = "plugin and hook calling mechanisms for python" 677 | optional = false 678 | python-versions = ">=3.7" 679 | files = [ 680 | {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, 681 | {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, 682 | ] 683 | 684 | [package.dependencies] 685 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 686 | 687 | [package.extras] 688 | dev = ["pre-commit", "tox"] 689 | testing = ["pytest", "pytest-benchmark"] 690 | 691 | [[package]] 692 | name = "pyfakefs" 693 | version = "5.1.0" 694 | description = "pyfakefs implements a fake file system that mocks the Python file system modules." 695 | optional = false 696 | python-versions = ">=3.7" 697 | files = [ 698 | {file = "pyfakefs-5.1.0-py3-none-any.whl", hash = "sha256:e6f34a8224b41f1b1ab25aa8d430121dac42e3c6e981e01eae76b3343fba47d0"}, 699 | {file = "pyfakefs-5.1.0.tar.gz", hash = "sha256:316c6026640d14a6b4fbde71fd9674576d1b5710deda8fabde8aad51d785dbc3"}, 700 | ] 701 | 702 | [[package]] 703 | name = "pygments" 704 | version = "2.16.1" 705 | description = "Pygments is a syntax highlighting package written in Python." 706 | optional = false 707 | python-versions = ">=3.7" 708 | files = [ 709 | {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, 710 | {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, 711 | ] 712 | 713 | [package.extras] 714 | plugins = ["importlib-metadata"] 715 | 716 | [[package]] 717 | name = "pytest" 718 | version = "7.2.2" 719 | description = "pytest: simple powerful testing with Python" 720 | optional = false 721 | python-versions = ">=3.7" 722 | files = [ 723 | {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"}, 724 | {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"}, 725 | ] 726 | 727 | [package.dependencies] 728 | attrs = ">=19.2.0" 729 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 730 | exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} 731 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 732 | iniconfig = "*" 733 | packaging = "*" 734 | pluggy = ">=0.12,<2.0" 735 | tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} 736 | 737 | [package.extras] 738 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] 739 | 740 | [[package]] 741 | name = "pytest-cov" 742 | version = "4.1.0" 743 | description = "Pytest plugin for measuring coverage." 744 | optional = false 745 | python-versions = ">=3.7" 746 | files = [ 747 | {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, 748 | {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, 749 | ] 750 | 751 | [package.dependencies] 752 | coverage = {version = ">=5.2.1", extras = ["toml"]} 753 | pytest = ">=4.6" 754 | 755 | [package.extras] 756 | testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] 757 | 758 | [[package]] 759 | name = "python-dateutil" 760 | version = "2.8.2" 761 | description = "Extensions to the standard Python datetime module" 762 | optional = false 763 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 764 | files = [ 765 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 766 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 767 | ] 768 | 769 | [package.dependencies] 770 | six = ">=1.5" 771 | 772 | [[package]] 773 | name = "pyyaml" 774 | version = "5.3.1" 775 | description = "YAML parser and emitter for Python" 776 | optional = false 777 | python-versions = "*" 778 | files = [ 779 | {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"}, 780 | {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"}, 781 | {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"}, 782 | {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"}, 783 | {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"}, 784 | {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"}, 785 | {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"}, 786 | {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"}, 787 | {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"}, 788 | {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"}, 789 | {file = "PyYAML-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a"}, 790 | {file = "PyYAML-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e"}, 791 | {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"}, 792 | ] 793 | 794 | [[package]] 795 | name = "rich" 796 | version = "13.6.0" 797 | description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" 798 | optional = false 799 | python-versions = ">=3.7.0" 800 | files = [ 801 | {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"}, 802 | {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"}, 803 | ] 804 | 805 | [package.dependencies] 806 | markdown-it-py = ">=2.2.0" 807 | pygments = ">=2.13.0,<3.0.0" 808 | typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} 809 | 810 | [package.extras] 811 | jupyter = ["ipywidgets (>=7.5.1,<9)"] 812 | 813 | [[package]] 814 | name = "s3transfer" 815 | version = "0.7.0" 816 | description = "An Amazon S3 Transfer Manager" 817 | optional = false 818 | python-versions = ">= 3.7" 819 | files = [ 820 | {file = "s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a"}, 821 | {file = "s3transfer-0.7.0.tar.gz", hash = "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e"}, 822 | ] 823 | 824 | [package.dependencies] 825 | botocore = ">=1.12.36,<2.0a.0" 826 | 827 | [package.extras] 828 | crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] 829 | 830 | [[package]] 831 | name = "six" 832 | version = "1.16.0" 833 | description = "Python 2 and 3 compatibility utilities" 834 | optional = false 835 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 836 | files = [ 837 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 838 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 839 | ] 840 | 841 | [[package]] 842 | name = "tomli" 843 | version = "2.0.1" 844 | description = "A lil' TOML parser" 845 | optional = false 846 | python-versions = ">=3.7" 847 | files = [ 848 | {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, 849 | {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, 850 | ] 851 | 852 | [[package]] 853 | name = "types-awscrt" 854 | version = "0.19.3" 855 | description = "Type annotations and code completion for awscrt" 856 | optional = false 857 | python-versions = ">=3.7,<4.0" 858 | files = [ 859 | {file = "types_awscrt-0.19.3-py3-none-any.whl", hash = "sha256:7b55f5a12ccd4407bc8f1e35c69bb40c931f8513ce1ad81a4527fce3989003fd"}, 860 | {file = "types_awscrt-0.19.3.tar.gz", hash = "sha256:9a21caac4287c113dd52665707785c45bb1d3242b7a2b8aeb57c49e9e749a330"}, 861 | ] 862 | 863 | [[package]] 864 | name = "types-s3transfer" 865 | version = "0.7.0" 866 | description = "Type annotations and code completion for s3transfer" 867 | optional = false 868 | python-versions = ">=3.7,<4.0" 869 | files = [ 870 | {file = "types_s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:ae9ed9273465d9f43da8b96307383da410c6b59c3b2464c88d20b578768e97c6"}, 871 | {file = "types_s3transfer-0.7.0.tar.gz", hash = "sha256:aca0f2486d0a3a5037cd5b8f3e20a4522a29579a8dd183281ff0aa1c4e2c8aa7"}, 872 | ] 873 | 874 | [[package]] 875 | name = "typing-extensions" 876 | version = "4.7.1" 877 | description = "Backported and Experimental Type Hints for Python 3.7+" 878 | optional = false 879 | python-versions = ">=3.7" 880 | files = [ 881 | {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, 882 | {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, 883 | ] 884 | 885 | [[package]] 886 | name = "urllib3" 887 | version = "1.26.18" 888 | description = "HTTP library with thread-safe connection pooling, file post, and more." 889 | optional = false 890 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 891 | files = [ 892 | {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, 893 | {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, 894 | ] 895 | 896 | [package.extras] 897 | brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] 898 | secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] 899 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 900 | 901 | [[package]] 902 | name = "urllib3" 903 | version = "2.0.7" 904 | description = "HTTP library with thread-safe connection pooling, file post, and more." 905 | optional = false 906 | python-versions = ">=3.7" 907 | files = [ 908 | {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, 909 | {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, 910 | ] 911 | 912 | [package.extras] 913 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] 914 | secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] 915 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] 916 | zstd = ["zstandard (>=0.18.0)"] 917 | 918 | [[package]] 919 | name = "zipp" 920 | version = "3.15.0" 921 | description = "Backport of pathlib-compatible object wrapper for zip files" 922 | optional = false 923 | python-versions = ">=3.7" 924 | files = [ 925 | {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, 926 | {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, 927 | ] 928 | 929 | [package.extras] 930 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] 931 | testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] 932 | 933 | [metadata] 934 | lock-version = "2.0" 935 | python-versions = "^3.7" 936 | content-hash = "923c988f79a30772ee1f38990f9a6609f360edcf9d7bf60e77822f2c947e1f26" 937 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "emr-cli" 3 | version = "0.0.16" 4 | description = "A command-line interface for packaging, deploying, and running your PySpark jobs on EMR." 5 | authors = ["Amazon EMR "] 6 | license = "Apache-2.0" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.7" 11 | click = "^7.1.2" 12 | boto3 = "^1.26.6" 13 | pyyaml = "5.3.1" 14 | rich = "^13.4.2" 15 | importlib-metadata = {version = "6.7.0", python = "3.7"} 16 | 17 | [tool.poetry.group.dev.dependencies] 18 | pytest = "7.2.2" 19 | pytest-cov = "^4.0.0" 20 | pyfakefs = "5.1.0" 21 | boto3-stubs = {extras = ["s3"], version = "^1.28.70"} 22 | 23 | [build-system] 24 | requires = ["poetry-core"] 25 | build-backend = "poetry.core.masonry.api" 26 | 27 | [tool.poetry.scripts] 28 | emr = "emr_cli.emr_cli:cli" 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.3.5 2 | pyarrow==8.0.0 -------------------------------------------------------------------------------- /src/emr_cli/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from emr_cli.utils import console_log 6 | 7 | DEFAULT_CONFIG_PATH = ".emr/config.yaml" 8 | 9 | 10 | class ConfigReader: 11 | @classmethod 12 | def read(cls): 13 | config = {} 14 | # Look for a config file - if we don't find one, that's fine. :) 15 | p = Path(DEFAULT_CONFIG_PATH) 16 | if not p.is_file(): 17 | return config 18 | 19 | with p.open() as infile: 20 | try: 21 | config = yaml.safe_load(infile) 22 | return config 23 | except yaml.YAMLError as exc: 24 | console_log(f"There was an error parsing the config file: {exc}") 25 | return config 26 | 27 | 28 | class ConfigWriter: 29 | @classmethod 30 | def write(cls, config): 31 | """ 32 | Write the passed config, overwriting any existing config. 33 | """ 34 | p = Path(DEFAULT_CONFIG_PATH) 35 | 36 | p.parent.mkdir(parents=True, exist_ok=True) 37 | 38 | with p.open("w") as outfile: 39 | outfile.write(yaml.dump(config)) 40 | -------------------------------------------------------------------------------- /src/emr_cli/deployments/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | 4 | class SparkParams: 5 | """ 6 | SparkParams allows deployment packages to specify different sets of 7 | Spark `--conf` parameters based on the environment being deployed to. 8 | """ 9 | 10 | SUPPORTED_ENVIRONMENTS = ["emr_serverless", "emr_ec2", "emr_eks"] 11 | 12 | def __init__( 13 | self, 14 | common_params: Optional[Dict[str, str]] = None, 15 | emr_serverless_params: Optional[Dict[str, str]] = None, 16 | emr_ec2_params: Optional[Dict[str, str]] = None, 17 | emr_eks_params: Optional[Dict[str, str]] = None, 18 | ) -> None: 19 | self._common = common_params or {} 20 | self._environment_params = { 21 | "emr_serverless": emr_serverless_params or {}, 22 | "emr_ec2": emr_ec2_params or {}, 23 | "emr_eks": emr_eks_params or {}, 24 | } 25 | 26 | def params_for(self, deployment_type: str) -> str: 27 | """ 28 | Return a set of string spark-submit parameters for the provided deployment type. 29 | """ 30 | if deployment_type not in self.SUPPORTED_ENVIRONMENTS: 31 | raise ValueError(f"{deployment_type} environment is not supported.") 32 | 33 | conf_items = {} 34 | 35 | for k, v in self._common.items(): 36 | conf_items[k] = v 37 | 38 | for k, v in self._environment_params[deployment_type].items(): 39 | conf_items[k] = v 40 | 41 | return " ".join([f"--conf {k}={v}" for k, v in conf_items.items()]) 42 | -------------------------------------------------------------------------------- /src/emr_cli/deployments/emr_ec2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import shlex 3 | import sys 4 | import time 5 | from os.path import join 6 | from typing import List, Optional 7 | 8 | import boto3 9 | from botocore.exceptions import ClientError, WaiterError 10 | from emr_cli.deployments.emr_serverless import DeploymentPackage 11 | from emr_cli.utils import console_log, parse_bucket_uri, print_s3_gz 12 | 13 | LOG_WAITER_DELAY_SEC = 30 14 | 15 | 16 | class Bootstrap: 17 | DEFAULT_S3_POLICY_NAME = "emr-cli-S3Access" 18 | DEFAULT_GLUE_POLICY_NAME = "emr-cli-GlueAccess" 19 | 20 | def __init__( 21 | self, 22 | code_bucket: str, 23 | log_bucket: str, 24 | instance_role_name: str, 25 | job_role_name: str, 26 | ): 27 | self.code_bucket = code_bucket 28 | self.log_bucket = log_bucket or code_bucket 29 | self.instance_role_name = instance_role_name 30 | self.job_role_name = job_role_name 31 | self.s3_client = boto3.client("s3") 32 | self.iam_client = boto3.client("iam") 33 | self.emr_client = boto3.client("emr") 34 | 35 | def create_environment(self): 36 | self._create_s3_buckets() 37 | service_role_arn = self._create_service_role() 38 | 39 | # Make sure the role exists - there can be a tiny lag that will break setting up trust policies. 40 | # Unfortunately, using a waiter or querying or the role didn't help here. 41 | # There's a terraform issue about it here: https://github.com/hashicorp/terraform-provider-aws/issues/8905 42 | # It looks like the fix is just querying or the role, but that didn't work. 43 | time.sleep(10) 44 | print("Slept") 45 | 46 | job_role_arn = self._create_runtime_role(service_role_arn) 47 | 48 | # Allow the EC2 instance profile to assume the job role 49 | self.iam_client.put_role_policy( 50 | RoleName=self.instance_role_name, 51 | PolicyName="AssumeRuntimeRole", 52 | PolicyDocument=self._runtime_role_policy(job_role_arn), 53 | ) 54 | 55 | security_config = self._create_security_config() # "emr-cli-runtime-roles" 56 | cluster_id = self._create_cluster(security_config, self.instance_role_name) 57 | return { 58 | "cluster_id": cluster_id, 59 | "job_role_arn": job_role_arn, 60 | "code_bucket": self.code_bucket, 61 | "log_bucket": self.log_bucket, 62 | } 63 | 64 | def print_destroy_commands(self, cluster_id: str): 65 | # fmt: off 66 | print(f"aws emr terminate-clusters --cluster-ids {cluster_id}") 67 | print(f"aws emr wait cluster-terminated --cluster-id {cluster_id}") 68 | for bucket in set([self.log_bucket, self.code_bucket]): 69 | print(f"aws s3 rm s3://{bucket} --recursive") 70 | print(f"aws s3api delete-bucket --bucket {bucket}") 71 | print(f"aws iam remove-role-from-instance-profile --instance-profile-name {self.instance_role_name} --role-name {self.instance_role_name}") # noqa E501 72 | print(f"aws iam delete-instance-profile --instance-profile-name {self.instance_role_name}") # noqa E501 73 | for role_name in [self.instance_role_name, self.job_role_name]: 74 | for policy in self.iam_client.list_attached_role_policies(RoleName=role_name).get('AttachedPolicies'): # noqa E501 75 | arn = policy.get('PolicyArn') 76 | print(f"aws iam detach-role-policy --role-name {role_name} --policy-arn {arn}") # noqa E501 77 | print(f"aws iam delete-policy --policy-arn {arn}") # noqa E501 78 | for name in self.iam_client.list_role_policies(RoleName=role_name).get('PolicyNames'): # noqa E501 79 | print(f"aws iam delete-role-policy --role-name {role_name} --policy-name {name}") # noqa E501 80 | print(f"aws iam delete-role --role-name {role_name}") 81 | print(f"aws emr delete-security-configuration --name emr-cli-runtime-roles") # noqa E501 82 | # fmt: on 83 | 84 | def _create_s3_buckets(self): 85 | """ 86 | Creates both the source and log buckets if they don't already exist. 87 | """ 88 | for bucket_name in set([self.code_bucket, self.log_bucket]): 89 | self.s3_client.create_bucket( 90 | Bucket=bucket_name, 91 | CreateBucketConfiguration={"LocationConstraint": self.s3_client.meta.region_name}, 92 | ) 93 | console_log(f"Created S3 bucket: s3://{bucket_name}") 94 | self.s3_client.put_bucket_policy(Bucket=bucket_name, Policy=self._default_s3_bucket_policy(bucket_name)) 95 | 96 | def _default_s3_bucket_policy(self, bucket_name) -> str: 97 | bucket_policy = { 98 | "Version": "2012-10-17", 99 | "Statement": [ 100 | { 101 | "Sid": "RequireSecureTransport", 102 | "Effect": "Deny", 103 | "Principal": "*", 104 | "Action": "s3:*", 105 | "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"], 106 | "Condition": { 107 | "Bool": {"aws:SecureTransport": "false", "aws:SourceArn": f"arn:aws:s3:::{bucket_name} "} 108 | }, 109 | } 110 | ], 111 | } 112 | return json.dumps(bucket_policy) 113 | 114 | def _create_service_role(self): 115 | """ 116 | Create an EC2 instance profile and role for use with EMR 117 | https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-role-for-ec2.html 118 | """ 119 | # First create a role that can be assumed by EC2 120 | response = self.iam_client.create_role( 121 | RoleName=self.instance_role_name, 122 | AssumeRolePolicyDocument=json.dumps( 123 | { 124 | "Version": "2012-10-17", 125 | "Statement": [ 126 | { 127 | "Effect": "Allow", 128 | "Principal": {"Service": "ec2.amazonaws.com"}, 129 | "Action": "sts:AssumeRole", 130 | } 131 | ], 132 | } 133 | ), 134 | ) 135 | role_arn = response.get("Role").get("Arn") 136 | console_log(f"Created IAM Role: {role_arn}") 137 | 138 | self.iam_client.create_instance_profile(InstanceProfileName=self.instance_role_name) 139 | self.iam_client.add_role_to_instance_profile( 140 | InstanceProfileName=self.instance_role_name, 141 | RoleName=self.instance_role_name, 142 | ) 143 | return role_arn 144 | 145 | def _create_runtime_role(self, instance_profile_role_arn: str): 146 | response = self.iam_client.create_role( 147 | RoleName=self.job_role_name, 148 | AssumeRolePolicyDocument=json.dumps( 149 | { 150 | "Version": "2012-10-17", 151 | "Statement": [ 152 | { 153 | "Effect": "Allow", 154 | "Principal": {"AWS": instance_profile_role_arn}, 155 | "Action": "sts:AssumeRole", 156 | } 157 | ], 158 | } 159 | ), 160 | ) 161 | role_arn = response.get("Role").get("Arn") 162 | console_log(f"Created IAM Role: {role_arn}") 163 | 164 | self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_s3_policy()) 165 | self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_glue_policy()) 166 | 167 | return role_arn 168 | 169 | def _create_s3_policy(self): 170 | bucket_arns = [f"arn:aws:s3:::{name}" for name in [self.code_bucket, self.log_bucket]] 171 | policy_doc = { 172 | "Version": "2012-10-17", 173 | "Statement": [ 174 | { 175 | "Sid": "AllowListBuckets", 176 | "Effect": "Allow", 177 | "Action": ["s3:ListBucket"], 178 | "Resource": bucket_arns, 179 | }, 180 | { 181 | "Sid": "WriteToCodeAndLogBuckets", 182 | "Effect": "Allow", 183 | "Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"], 184 | "Resource": [f"{arn}/*" for arn in bucket_arns], 185 | }, 186 | ], 187 | } 188 | response = self.iam_client.create_policy( 189 | PolicyName=self.DEFAULT_S3_POLICY_NAME, 190 | PolicyDocument=json.dumps(policy_doc), 191 | ) 192 | return response.get("Policy").get("Arn") 193 | 194 | def _create_glue_policy(self): 195 | policy_doc = { 196 | "Version": "2012-10-17", 197 | "Statement": [ 198 | { 199 | "Sid": "GlueCreateAndReadDataCatalog", 200 | "Effect": "Allow", 201 | "Action": [ 202 | "glue:GetDatabase", 203 | "glue:GetDataBases", 204 | "glue:CreateTable", 205 | "glue:GetTable", 206 | "glue:GetTables", 207 | "glue:GetPartition", 208 | "glue:GetPartitions", 209 | "glue:CreatePartition", 210 | "glue:BatchCreatePartition", 211 | "glue:GetUserDefinedFunctions", 212 | ], 213 | "Resource": "*", 214 | }, 215 | ], 216 | } 217 | response = self.iam_client.create_policy( 218 | PolicyName=self.DEFAULT_GLUE_POLICY_NAME, 219 | PolicyDocument=json.dumps(policy_doc), 220 | ) 221 | return response.get("Policy").get("Arn") 222 | 223 | def _runtime_role_policy(self, runtime_role_arn: str): 224 | return json.dumps( 225 | { 226 | "Version": "2012-10-17", 227 | "Statement": [ 228 | { 229 | "Sid": "AllowRuntimeRoleUsage", 230 | "Effect": "Allow", 231 | "Action": ["sts:AssumeRole", "sts:TagSession"], 232 | "Resource": [runtime_role_arn], 233 | } 234 | ], 235 | } 236 | ) 237 | 238 | def _create_security_config(self): 239 | response = self.emr_client.create_security_configuration( 240 | Name="emr-cli-runtime-roles", 241 | SecurityConfiguration="""{ 242 | "AuthorizationConfiguration":{ 243 | "IAMConfiguration":{ 244 | "EnableApplicationScopedIAMRole":true 245 | } 246 | } 247 | }""", 248 | ) 249 | return response.get("Name") 250 | 251 | def _create_cluster(self, security_config_name: str, instance_profile_name: str): 252 | """ 253 | Create a simple Spark EMR on EC2 cluster. 254 | 255 | **WARNING** This cluster is only intended for demo/development purposes only. 256 | 257 | It is deployed in a public subnet by default and will auto-terminate in 4 hours. 258 | Runtime roles are enabled so you can submit jobs with the created job-role. 259 | 260 | To customize the cluster or create a cluster for production, use the AWS CLI 261 | or other Infrastructure as Code services like Terraform, CDK, or CloudFormation. 262 | """ 263 | response = self.emr_client.run_job_flow( 264 | Name="emr-cli-demo", 265 | ReleaseLabel="emr-6.9.0", 266 | LogUri=f"s3://{self.log_bucket}/logs/emr/", 267 | Applications=[ 268 | {"Name": "Spark"}, 269 | {"Name": "Livy"}, 270 | {"Name": "JupyterEnterpriseGateway"}, 271 | ], 272 | AutoTerminationPolicy={"IdleTimeout": 14400}, 273 | SecurityConfiguration=security_config_name, 274 | ServiceRole="EMR_DefaultRole", 275 | JobFlowRole=instance_profile_name, 276 | Instances={ 277 | "KeepJobFlowAliveWhenNoSteps": True, 278 | "InstanceFleets": [ 279 | { 280 | "Name": "Primary", 281 | "InstanceFleetType": "MASTER", 282 | "TargetOnDemandCapacity": 1, 283 | "TargetSpotCapacity": 0, 284 | "InstanceTypeConfigs": [ 285 | {"InstanceType": "r5.2xlarge"}, 286 | {"InstanceType": "r5b.2xlarge"}, 287 | {"InstanceType": "r5d.2xlarge"}, 288 | {"InstanceType": "r5a.2xlarge"}, 289 | ], 290 | }, 291 | { 292 | "Name": "Core", 293 | "InstanceFleetType": "CORE", 294 | "TargetOnDemandCapacity": 0, 295 | "TargetSpotCapacity": 1, 296 | "InstanceTypeConfigs": [ 297 | {"InstanceType": "c5a.2xlarge"}, 298 | {"InstanceType": "m5a.2xlarge"}, 299 | {"InstanceType": "r5a.2xlarge"}, 300 | ], 301 | "LaunchSpecifications": { 302 | "OnDemandSpecification": {"AllocationStrategy": "lowest-price"}, 303 | "SpotSpecification": { 304 | "TimeoutDurationMinutes": 10, 305 | "TimeoutAction": "SWITCH_TO_ON_DEMAND", 306 | "AllocationStrategy": "capacity-optimized", 307 | }, 308 | }, 309 | }, 310 | ], 311 | }, 312 | ) 313 | cluster_id = response.get("JobFlowId") 314 | console_log(f"Created EMR Cluster: {cluster_id}") 315 | return cluster_id 316 | 317 | 318 | class EMREC2: 319 | def __init__( 320 | self, 321 | cluster_id: str, 322 | deployment_package: DeploymentPackage, 323 | job_role: Optional[str] = None, 324 | region: str = "", 325 | ) -> None: 326 | self.cluster_id = cluster_id 327 | self.dp = deployment_package 328 | self.job_role = job_role 329 | self.client = boto3.client("emr") 330 | self.s3_client = boto3.client("s3") 331 | 332 | def run_job( 333 | self, 334 | job_name: str, 335 | job_args: Optional[List[str]] = None, 336 | spark_submit_opts: Optional[str] = None, 337 | wait: bool = True, 338 | show_logs: bool = False, 339 | ): 340 | """ 341 | Run a Spark job on EMR on EC2. Some important notes: 342 | 1. --deploy-mode cluster is important for distributing dependencies 343 | 2. entrypoint script must be the last argument 344 | 3. show_logs implies `wait=True` 345 | """ 346 | deploy_mode = "client" if show_logs else "cluster" 347 | spark_submit_params = self.dp.spark_submit_parameters().params_for("emr_ec2") 348 | 349 | if spark_submit_opts: 350 | spark_submit_params = f"{spark_submit_params} {spark_submit_opts}".strip() 351 | 352 | # Escape job args if they're provided 353 | if job_args: 354 | job_args = [shlex.quote(arg) for arg in job_args] 355 | 356 | # show_logs is only compatible with client mode 357 | # --conf spark.archives is only compatible with cluster mode 358 | # So if we have both, we have to throw an error 359 | # See https://issues.apache.org/jira/browse/SPARK-36088 360 | if show_logs and ("--conf spark.archives" in spark_submit_params or "--archives" in spark_submit_params): 361 | raise RuntimeError( 362 | "--show-stdout is not compatible with projects that make use of " 363 | + "dependencies.\nPlease 👍 this GitHub issue to voice your support: " 364 | + "https://github.com/awslabs/amazon-emr-cli/issues/12" 365 | ) 366 | 367 | # define params for emr.add_job_flow_steps 368 | add_job_flow_steps_params = { 369 | "JobFlowId": self.cluster_id, 370 | "Steps": [ 371 | { 372 | "Name": job_name, 373 | "ActionOnFailure": "CONTINUE", 374 | "HadoopJarStep": { 375 | "Jar": "command-runner.jar", 376 | "Args": [ 377 | "spark-submit", 378 | "--deploy-mode", 379 | deploy_mode, 380 | ] 381 | + spark_submit_params.split(" ") 382 | + [self.dp.entrypoint_uri()] 383 | + (job_args or []), 384 | }, 385 | } 386 | ], 387 | } 388 | 389 | # conditionally add ExecutionRoleArn to add_job_flow_steps if a runtime role is requested for this step 390 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-steps-runtime-roles.html 391 | if self.job_role: 392 | add_job_flow_steps_params["ExecutionRoleArn"] = self.job_role 393 | 394 | try: 395 | response = self.client.add_job_flow_steps(**add_job_flow_steps_params) 396 | except ClientError as err: 397 | console_log(err) 398 | sys.exit(1) 399 | 400 | step_id = response.get("StepIds")[0] 401 | console_log(f"Job submitted to EMR on EC2 (Step ID: {step_id})") 402 | if not wait and not show_logs: 403 | return step_id 404 | 405 | console_log("Waiting for step to complete...") 406 | waiter = self.client.get_waiter("step_complete") 407 | job_failed = False 408 | try: 409 | waiter.wait( 410 | ClusterId=self.cluster_id, 411 | StepId=step_id, 412 | ) 413 | console_log("Job completed successfully!") 414 | except WaiterError: 415 | console_log("EMR on EC2 step failed!") 416 | job_failed = True # So we can exit(1) later 417 | if not show_logs: 418 | sys.exit(1) 419 | 420 | if show_logs: 421 | # We need to validate s3-logging is enabled and fetch the location of the logs 422 | try: 423 | logs_location = self._fetch_log_location() 424 | stdout_location = self._wait_for_logs(step_id, logs_location, 30 * 60) 425 | console_log(f"stdout for {step_id}\n{'-'*36}") 426 | print_s3_gz(self.s3_client, stdout_location) 427 | if job_failed: 428 | sys.exit(1) 429 | except RuntimeError as e: 430 | console_log(f"ERR: {e}") 431 | sys.exit(1) 432 | except WaiterError as e: 433 | console_log(f"ERR: While waiting for logs to appear: {e}") 434 | sys.exit(1) 435 | 436 | return step_id 437 | 438 | def _fetch_log_location(self) -> str: 439 | """ 440 | Fetch the cluster and ensure it has the loguri set, 441 | then return the s3 location. 442 | """ 443 | cluster_info = self.client.describe_cluster(ClusterId=self.cluster_id) 444 | loguri = cluster_info.get("Cluster").get("LogUri") 445 | if loguri is None: 446 | raise RuntimeError("Cluster does not have S3 logging enabled") 447 | return loguri.replace("s3n:", "s3:") 448 | 449 | def _wait_for_logs(self, step_id: str, log_base: str, timeout_secs: int) -> str: 450 | """ 451 | Waits for stdout logs to appear in S3. Checks every LOG_WAITER_DELAY_SEC seconds 452 | until `timeout_secs`. 453 | """ 454 | object_name = join(log_base, self.cluster_id, "steps", step_id, "stdout.gz") 455 | console_log(f"Waiting for logs to appear in {object_name} ...") 456 | bucket_name, key = parse_bucket_uri(object_name) 457 | waiter = self.s3_client.get_waiter("object_exists") 458 | waiter.wait( 459 | Bucket=bucket_name, 460 | Key=key, 461 | WaiterConfig={ 462 | "Delay": LOG_WAITER_DELAY_SEC, 463 | "MaxAttempts": int(timeout_secs / LOG_WAITER_DELAY_SEC), 464 | }, 465 | ) 466 | return object_name 467 | -------------------------------------------------------------------------------- /src/emr_cli/deployments/emr_eks.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from os.path import join 4 | from platform import release 5 | from time import sleep 6 | from typing import List, Optional 7 | 8 | import boto3 9 | from emr_cli.deployments.emr_serverless import DeploymentPackage 10 | from emr_cli.utils import console_log, print_s3_gz 11 | 12 | 13 | class EMREKS: 14 | def __init__( 15 | self, virtual_cluster_id: str, job_role: str, deployment_package: DeploymentPackage, region: str = "" 16 | ) -> None: 17 | self.virtual_cluster_id = virtual_cluster_id 18 | self.job_role = job_role 19 | self.dp = deployment_package 20 | self.s3_client = boto3.client("s3") 21 | if region: 22 | self.client = boto3.client("emr-containers", region_name=region) 23 | self.emr_client = boto3.client("emr", region_name=region) 24 | else: 25 | # Note that boto3 uses AWS_DEFAULT_REGION, not AWS_REGION 26 | # We may want to add an extra check here for the latter. 27 | self.client = boto3.client("emr-containers") 28 | self.emr_client = boto3.client("emr") 29 | 30 | def fetch_latest_release_label(self): 31 | response = self.emr_client.list_release_labels( 32 | Filters={"Application": "Spark", "Prefix": "emr-6"}, MaxResults=1 33 | ) 34 | if len(response["ReleaseLabels"]) == 0: 35 | console_log("Error: No release labels found") 36 | sys.exit(1) 37 | return response["ReleaseLabels"][0] 38 | 39 | def run_job( 40 | self, 41 | job_name: str, 42 | job_args: Optional[List[str]] = None, 43 | spark_submit_opts: Optional[str] = None, 44 | wait: bool = True, 45 | show_logs: bool = False, 46 | s3_logs_uri: Optional[str] = None, 47 | release_label: Optional[str] = None, 48 | ): 49 | if show_logs and not s3_logs_uri: 50 | raise RuntimeError("--show-stdout requires --s3-logs-uri to be set.") 51 | 52 | if release_label is None: 53 | release_label = self.fetch_latest_release_label() 54 | console_log(f"Using latest release label {release_label}") 55 | release_label = f"{release_label}-latest" 56 | 57 | # If job_name is the default, just replace the space. 58 | # Otherwise throw an error 59 | if job_name == "emr-cli job": 60 | job_name = "emr-cli_job" 61 | elif not re.fullmatch("[\.\-_/#A-Za-z0-9]+", job_name): 62 | console_log(f"Invalid characters in job name {job_name} - EMR on EKS must match [\.\-_/#A-Za-z0-9]+") 63 | sys.exit(1) 64 | 65 | jobDriver = { 66 | "sparkSubmitJobDriver": { 67 | "entryPoint": self.dp.entrypoint_uri(), 68 | } 69 | } 70 | spark_submit_parameters = self.dp.spark_submit_parameters().params_for("emr_eks") 71 | 72 | if spark_submit_opts: 73 | spark_submit_parameters = f"{spark_submit_parameters} {spark_submit_opts}".strip() 74 | 75 | if spark_submit_parameters: 76 | jobDriver["sparkSubmitJobDriver"]["sparkSubmitParameters"] = spark_submit_parameters 77 | 78 | if job_args: 79 | jobDriver["sparkSubmitJobDriver"]["entryPointArguments"] = job_args # type: ignore 80 | 81 | config_overrides = {} 82 | if s3_logs_uri: 83 | config_overrides = {"monitoringConfiguration": {"s3MonitoringConfiguration": {"logUri": s3_logs_uri}}} 84 | 85 | response = self.client.start_job_run( 86 | virtualClusterId=self.virtual_cluster_id, 87 | executionRoleArn=self.job_role, 88 | name=job_name, 89 | jobDriver=jobDriver, 90 | configurationOverrides=config_overrides, 91 | releaseLabel=release_label, 92 | ) 93 | job_run_id = response.get("id") 94 | 95 | console_log(f"Job submitted to EMR Virtual Cluster (Job Run ID: {job_run_id})") 96 | if not wait and not show_logs: 97 | return job_run_id 98 | 99 | console_log("Waiting for job to complete...") 100 | job_done = False 101 | job_state = "SUBMITTED" 102 | jr_response = {} 103 | while not job_done: 104 | jr_response = self.get_job_run(job_run_id) 105 | new_state = jr_response.get("state") 106 | if new_state != job_state: 107 | console_log(f"Job state is now: {new_state}") 108 | job_state = new_state 109 | job_done = new_state in [ 110 | "COMPLETED", 111 | "FAILED", 112 | "CANCEL_PENDING", 113 | "CANCELLED", 114 | ] 115 | sleep(2) 116 | 117 | if show_logs: 118 | console_log(f"stdout for {job_run_id}\n{'-'*38}") 119 | log_location = join( 120 | f"{s3_logs_uri}", 121 | self.virtual_cluster_id, 122 | "jobs", 123 | job_run_id, 124 | "containers", 125 | f"spark-{job_run_id}", 126 | f"spark-{job_run_id}-driver", 127 | "stdout.gz", 128 | ) 129 | print_s3_gz(self.s3_client, log_location) 130 | 131 | if jr_response.get("state") != "COMPLETED": 132 | console_log(f"EMR Containers job failed: {jr_response.get('stateDetails')}") 133 | sys.exit(1) 134 | console_log("Job completed successfully!") 135 | 136 | return job_run_id 137 | 138 | def get_job_run(self, job_run_id: str) -> dict: 139 | response = self.client.describe_job_run(virtualClusterId=self.virtual_cluster_id, id=job_run_id) 140 | return response.get("jobRun") 141 | -------------------------------------------------------------------------------- /src/emr_cli/deployments/emr_serverless.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import json 3 | import os 4 | import sys 5 | import zipfile 6 | from os.path import join 7 | from time import sleep 8 | from typing import List, Optional 9 | 10 | import boto3 11 | from emr_cli.deployments import SparkParams 12 | from emr_cli.utils import console_log, find_files, mkdir, print_s3_gz 13 | 14 | 15 | class DeploymentPackage(metaclass=abc.ABCMeta): 16 | def __init__(self, entry_point_path: str = "entrypoint.py", s3_target_uri: str = "") -> None: 17 | self.entry_point_path = entry_point_path 18 | self.dist_dir = "dist" 19 | 20 | # We might not populate this until we actually deploy 21 | self.s3_uri_base = s3_target_uri 22 | 23 | def spark_submit_parameters(self) -> SparkParams: 24 | """ 25 | Returns any additional arguments necessary for spark-submit 26 | """ 27 | return SparkParams() 28 | 29 | def entrypoint_uri(self) -> str: 30 | """ 31 | Returns the full S3 URI to the entrypoint file, e.g. s3://bucket/path/somecode.py 32 | """ 33 | if self.s3_uri_base is None: 34 | raise Exception("S3 URI has not been set, aborting") 35 | return os.path.join(self.s3_uri_base, self.entry_point_path) 36 | 37 | def _zip_local_pyfiles(self): 38 | """ 39 | Zip all the files except for the entrypoint file. 40 | """ 41 | py_files = find_files(os.getcwd(), [".venv"], ".py") 42 | py_files.remove(os.path.abspath(self.entry_point_path)) 43 | cwd = os.getcwd() 44 | mkdir(self.dist_dir) 45 | with zipfile.ZipFile(f"{self.dist_dir}/pyfiles.zip", "w") as zf: 46 | for file in py_files: 47 | relpath = os.path.relpath(file, cwd) 48 | zf.write(file, relpath) 49 | 50 | 51 | class Bootstrap: 52 | # Maybe add some UUIDs to these? 53 | DEFAULT_S3_POLICY_NAME = "emr-cli-S3Access" 54 | DEFAULT_GLUE_POLICY_NAME = "emr-cli-GlueAccess" 55 | 56 | def __init__(self, code_bucket: str, log_bucket: str, job_role_name: str): 57 | self.code_bucket = code_bucket 58 | self.log_bucket = log_bucket or code_bucket 59 | self.job_role_name = job_role_name 60 | self.s3_client = boto3.client("s3") 61 | self.iam_client = boto3.client("iam") 62 | self.emrs_client = boto3.client("emr-serverless") 63 | 64 | def create_environment(self): 65 | self._create_s3_buckets() 66 | job_role_arn = self._create_job_role() 67 | app_id = self._create_application() 68 | return { 69 | "application_id": app_id, 70 | "job_role_arn": job_role_arn, 71 | "code_bucket": self.code_bucket, 72 | "log_bucket": self.log_bucket, 73 | } 74 | 75 | def print_destroy_commands(self, application_id: str): 76 | # fmt: off 77 | for bucket in set([self.log_bucket, self.code_bucket]): 78 | print(f"aws s3 rm s3://{bucket} --recursive") 79 | print(f"aws s3api delete-bucket --bucket {bucket}") 80 | for policy in self.iam_client.list_attached_role_policies(RoleName=self.job_role_name).get('AttachedPolicies'): # noqa E501 81 | arn = policy.get('PolicyArn') 82 | print(f"aws iam detach-role-policy --role-name {self.job_role_name} --policy-arn {arn}") # noqa E501 83 | print(f"aws iam delete-policy --policy-arn {arn}") # noqa E501 84 | print(f"aws iam delete-role --role-name {self.job_role_name}") 85 | print(f"aws emr-serverless stop-application --application-id {application_id}") 86 | print(f"aws emr-serverless delete-application --application-id {application_id}") # noqa E501 87 | # fmt: on 88 | 89 | def _create_s3_buckets(self): 90 | """ 91 | Creates both the source and log buckets if they don't already exist. 92 | """ 93 | for bucket_name in set([self.code_bucket, self.log_bucket]): 94 | self.s3_client.create_bucket( 95 | Bucket=bucket_name, 96 | CreateBucketConfiguration={ 97 | "LocationConstraint": self.s3_client.meta.region_name # type: ignore 98 | }, 99 | ) 100 | 101 | console_log(f"Created S3 bucket: s3://{bucket_name}") 102 | self.s3_client.put_bucket_policy(Bucket=bucket_name, Policy=self._default_s3_bucket_policy(bucket_name)) 103 | 104 | def _default_s3_bucket_policy(self, bucket_name) -> str: 105 | bucket_policy = { 106 | "Version": "2012-10-17", 107 | "Statement": [ 108 | { 109 | "Sid": "RequireSecureTransport", 110 | "Effect": "Deny", 111 | "Principal": "*", 112 | "Action": "s3:*", 113 | "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"], 114 | "Condition": { 115 | "Bool": {"aws:SecureTransport": "false", "aws:SourceArn": f"arn:aws:s3:::{bucket_name} "} 116 | }, 117 | } 118 | ], 119 | } 120 | return json.dumps(bucket_policy) 121 | 122 | def _create_job_role(self): 123 | # First create a role that can be assumed by EMR Serverless jobs 124 | response = self.iam_client.create_role( 125 | RoleName=self.job_role_name, 126 | AssumeRolePolicyDocument=json.dumps( 127 | { 128 | "Version": "2012-10-17", 129 | "Statement": [ 130 | { 131 | "Effect": "Allow", 132 | "Principal": {"Service": "emr-serverless.amazonaws.com"}, 133 | "Action": "sts:AssumeRole", 134 | } 135 | ], 136 | } 137 | ), 138 | ) 139 | role_arn = response.get("Role").get("Arn") 140 | console_log(f"Created IAM Role: {role_arn}") 141 | 142 | self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_s3_policy()) 143 | self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_glue_policy()) 144 | 145 | return role_arn 146 | 147 | def _create_s3_policy(self): 148 | bucket_arns = [f"arn:aws:s3:::{name}" for name in [self.code_bucket, self.log_bucket]] 149 | policy_doc = { 150 | "Version": "2012-10-17", 151 | "Statement": [ 152 | { 153 | "Sid": "AllowListBuckets", 154 | "Effect": "Allow", 155 | "Action": ["s3:ListBucket"], 156 | "Resource": bucket_arns, 157 | }, 158 | { 159 | "Sid": "WriteToCodeAndLogBuckets", 160 | "Effect": "Allow", 161 | "Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"], 162 | "Resource": [f"{arn}/*" for arn in bucket_arns], 163 | }, 164 | ], 165 | } 166 | response = self.iam_client.create_policy( 167 | PolicyName=self.DEFAULT_S3_POLICY_NAME, 168 | PolicyDocument=json.dumps(policy_doc), 169 | ) 170 | return response.get("Policy").get("Arn") 171 | 172 | def _create_glue_policy(self): 173 | policy_doc = { 174 | "Version": "2012-10-17", 175 | "Statement": [ 176 | { 177 | "Sid": "GlueCreateAndReadDataCatalog", 178 | "Effect": "Allow", 179 | "Action": [ 180 | "glue:GetDatabase", 181 | "glue:GetDataBases", 182 | "glue:CreateTable", 183 | "glue:GetTable", 184 | "glue:GetTables", 185 | "glue:GetPartition", 186 | "glue:GetPartitions", 187 | "glue:CreatePartition", 188 | "glue:BatchCreatePartition", 189 | "glue:GetUserDefinedFunctions", 190 | ], 191 | "Resource": "*", 192 | }, 193 | ], 194 | } 195 | response = self.iam_client.create_policy( 196 | PolicyName=self.DEFAULT_GLUE_POLICY_NAME, 197 | PolicyDocument=json.dumps(policy_doc), 198 | ) 199 | return response.get("Policy").get("Arn") 200 | 201 | def _create_application(self): 202 | """ 203 | Create a simple Spark EMR Serverless application with a default (but minimal) 204 | pre-initialized capacity. 205 | 206 | This application is only intended for demo purposes only. To customize the 207 | application or create an application for production, use the AWS CLI or other 208 | Infrastructure as Code services like Terraform, CDK, or CloudFormation. 209 | """ 210 | response = self.emrs_client.create_application( 211 | name="emr-cli-demo", 212 | releaseLabel="emr-6.9.0", 213 | type="SPARK", 214 | ) 215 | app_id = response.get("applicationId") 216 | console_log(f"Created EMR Serverless application: {app_id}") 217 | self.emrs_client.start_application(applicationId=app_id) 218 | return app_id 219 | 220 | 221 | class EMRServerless: 222 | def __init__( 223 | self, 224 | application_id: str, 225 | job_role: str, 226 | deployment_package: DeploymentPackage, 227 | region: str = "", 228 | ) -> None: 229 | self.application_id = application_id 230 | self.job_role = job_role 231 | self.dp = deployment_package 232 | self.s3_client = boto3.client("s3") 233 | if region: 234 | self.client = boto3.client("emr-serverless", region_name=region) 235 | else: 236 | # Note that boto3 uses AWS_DEFAULT_REGION, not AWS_REGION 237 | # We may want to add an extra check here for the latter. 238 | self.client = boto3.client("emr-serverless") 239 | 240 | def run_job( 241 | self, 242 | job_name: str, 243 | job_args: Optional[List[str]] = None, 244 | spark_submit_opts: Optional[str] = None, 245 | wait: bool = True, 246 | show_logs: bool = False, 247 | s3_logs_uri: Optional[str] = None, 248 | timeout: Optional[int] = None, 249 | ): 250 | if show_logs and not s3_logs_uri: 251 | raise RuntimeError("--show-stdout requires --s3-logs-uri to be set.") 252 | 253 | jobDriver = { 254 | "sparkSubmit": { 255 | "entryPoint": self.dp.entrypoint_uri(), 256 | } 257 | } 258 | spark_submit_parameters = self.dp.spark_submit_parameters().params_for("emr_serverless") 259 | 260 | if spark_submit_opts: 261 | spark_submit_parameters = f"{spark_submit_parameters} {spark_submit_opts}".strip() 262 | 263 | if spark_submit_parameters: 264 | jobDriver["sparkSubmit"]["sparkSubmitParameters"] = spark_submit_parameters 265 | 266 | if job_args: 267 | jobDriver["sparkSubmit"]["entryPointArguments"] = job_args # type: ignore 268 | 269 | config_overrides = {} 270 | if s3_logs_uri: 271 | config_overrides = {"monitoringConfiguration": {"s3MonitoringConfiguration": {"logUri": s3_logs_uri}}} 272 | 273 | response = self.client.start_job_run( 274 | applicationId=self.application_id, 275 | executionRoleArn=self.job_role, 276 | name=job_name, 277 | jobDriver=jobDriver, 278 | configurationOverrides=config_overrides, 279 | executionTimeoutMinutes=timeout, 280 | ) 281 | job_run_id = response.get("jobRunId") 282 | 283 | console_log(f"Job submitted to EMR Serverless (Job Run ID: {job_run_id})") 284 | if not wait and not show_logs: 285 | return job_run_id 286 | 287 | console_log("Waiting for job to complete...") 288 | job_done = False 289 | job_state = "SUBMITTED" 290 | jr_response = {} 291 | while not job_done: 292 | jr_response = self.get_job_run(job_run_id) 293 | new_state = jr_response.get("state") 294 | if new_state != job_state: 295 | console_log(f"Job state is now: {new_state}") 296 | job_state = new_state 297 | job_done = new_state in [ 298 | "SUCCESS", 299 | "FAILED", 300 | "CANCELLING", 301 | "CANCELLED", 302 | ] 303 | sleep(2) 304 | 305 | if show_logs: 306 | console_log(f"stdout for {job_run_id}\n{'-'*38}") 307 | log_location = join( 308 | f"{s3_logs_uri}", 309 | "applications", 310 | self.application_id, 311 | "jobs", 312 | job_run_id, 313 | "SPARK_DRIVER", 314 | "stdout.gz", 315 | ) 316 | print_s3_gz(self.s3_client, log_location) 317 | 318 | if jr_response.get("state") != "SUCCESS": 319 | console_log(f"EMR Serverless job failed: {jr_response.get('stateDetails')}") 320 | sys.exit(1) 321 | console_log("Job completed successfully!") 322 | 323 | return job_run_id 324 | 325 | def get_job_run(self, job_run_id: str) -> dict: 326 | response = self.client.get_job_run(applicationId=self.application_id, jobRunId=job_run_id) 327 | return response.get("jobRun") 328 | -------------------------------------------------------------------------------- /src/emr_cli/emr_cli.py: -------------------------------------------------------------------------------- 1 | try: 2 | from importlib.metadata import version 3 | except ModuleNotFoundError: 4 | # Python 3.7 compatibility 5 | # https://github.com/python/importlib_metadata#compatibility-with-python-3.7 6 | from importlib_metadata import version 7 | 8 | import click 9 | from emr_cli.config import DEFAULT_CONFIG_PATH, ConfigReader, ConfigWriter 10 | from emr_cli.deployments.emr_ec2 import EMREC2 11 | from emr_cli.deployments.emr_ec2 import Bootstrap as BootstrapEMRonEC2 12 | from emr_cli.deployments.emr_eks import EMREKS 13 | from emr_cli.packaging.detector import ProjectDetector 14 | from emr_cli.utils import console_log 15 | 16 | from .deployments.emr_serverless import Bootstrap as BootstrapEMRServerless 17 | from .deployments.emr_serverless import EMRServerless 18 | from .packaging.python_project import PythonProject 19 | 20 | 21 | @click.group() 22 | @click.pass_context 23 | def cli(ctx): 24 | """ 25 | Package, deploy, and run PySpark projects on EMR. 26 | """ 27 | # If we want the user to be able to force a project type, check out click.Choice 28 | ctx.obj = ProjectDetector().detect() 29 | 30 | # If a config file exists, set those as defaults for all other options 31 | ctx.default_map = ConfigReader.read() 32 | if ctx.default_map: 33 | console_log(f"Using config file: {DEFAULT_CONFIG_PATH}") 34 | 35 | 36 | @click.command() 37 | @click.pass_obj 38 | def status(project): 39 | console_log("") 40 | print(f"Project type:\t\t{ project.__name__}") 41 | print(f"EMR CLI version:\t{version('emr-cli')}") 42 | 43 | 44 | @click.command() 45 | @click.option( 46 | "--target", 47 | type=click.Choice(["emr-serverless", "emr-ec2"]), 48 | help="Bootstrap a brand new environment.", 49 | ) 50 | @click.option("--code-bucket", help="Bucket where source code will be uploaded", required=True) 51 | @click.option("--logs-bucket", help="Bucket where logs will be uploaded") 52 | @click.option( 53 | "--instance-profile-name", 54 | help=""" 55 | The name of the IAM role to be created for your EMR on EC2 instances. 56 | """, 57 | required=False, 58 | ) 59 | @click.option( 60 | "--job-role-name", 61 | help=""" 62 | The name of the IAM role to be created for your EMR Serverless jobs. 63 | This role has access to read and write to the source code and logs buckets, 64 | and access to read and create tables in the Glue Data Catalog.""", 65 | required=True, 66 | ) 67 | @click.option( 68 | "--destroy", 69 | default=False, 70 | is_flag=True, 71 | help="Prints the commands necessary to destroy the created environment.", 72 | ) 73 | def bootstrap(target, code_bucket, logs_bucket, instance_profile_name, job_role_name, destroy): 74 | """ 75 | Bootstrap an EMR Serverless environment. 76 | 77 | Includes creating an S3 bucket, tightly-scoped job roles, 78 | EMR Serverless application, and emr cli configuration file. 79 | """ 80 | # EMR on EC2 additionally needs an instance profile role 81 | if target == "emr-ec2" and instance_profile_name is None: 82 | raise click.BadArgumentUsage("EMR on EC2 clusters require --instance-profile-name to be set.") 83 | 84 | if target == "emr-serverless": 85 | b = BootstrapEMRServerless(code_bucket, logs_bucket, job_role_name) 86 | else: 87 | b = BootstrapEMRonEC2(code_bucket, logs_bucket, instance_profile_name, job_role_name) 88 | 89 | resource_id = "application_id" if target == "emr-serverless" else "cluster_id" 90 | if destroy: 91 | c = ConfigReader.read() 92 | b.print_destroy_commands(c.get("run", {}).get(resource_id, None)) 93 | exit(0) 94 | 95 | # For EMR Serverless, we need to create an S3 bucket, a job role, and an Application 96 | config = b.create_environment() 97 | 98 | # The resulting config is relevant for the "run" command 99 | run_config = { 100 | "run": { 101 | resource_id: config.get(resource_id), 102 | "job_role": config.get("job_role_arn"), 103 | "s3_code_uri": f"s3://{config.get('code_bucket')}/code/pyspark/", 104 | "s3_logs_uri": f"s3://{config.get('log_bucket')}/logs/pyspark/", 105 | } 106 | } 107 | ConfigWriter.write(run_config) 108 | 109 | 110 | @click.command() 111 | @click.argument("path") 112 | @click.option( 113 | "--dockerfile", 114 | default=False, 115 | is_flag=True, 116 | help="Only create a sample Dockerfile for packaging Python dependencies", 117 | ) 118 | @click.option( 119 | "--project-type", 120 | type=click.Choice(["python", "poetry"]), 121 | help="The type of project to create.", 122 | default="python", 123 | ) 124 | def init(path, dockerfile, project_type): 125 | """ 126 | Initialize a local PySpark project. 127 | """ 128 | if dockerfile: 129 | click.echo("Creating sample Dockerfile...") 130 | PythonProject().copy_single_file("Dockerfile") 131 | else: 132 | kls = ProjectDetector().detect(project_type) 133 | kls().initialize(path) 134 | 135 | 136 | @click.command() 137 | @click.option( 138 | "--entry-point", 139 | type=click.Path(exists=True, dir_okay=False, allow_dash=False), 140 | help="Entrypoint file", 141 | required=True, 142 | ) 143 | @click.pass_obj 144 | def package(project, entry_point): 145 | """ 146 | Package a project and dependencies into dist/ 147 | """ 148 | p = project(entry_point) 149 | p.build() 150 | 151 | 152 | @click.command() 153 | @click.option( 154 | "--entry-point", 155 | type=click.Path(exists=True, dir_okay=False, allow_dash=False), 156 | help="PySpark file to deploy", 157 | required=True, 158 | ) 159 | @click.option( 160 | "--s3-code-uri", 161 | help="Where to copy code artifacts to", 162 | required=True, 163 | ) 164 | @click.pass_obj 165 | def deploy(project, entry_point, s3_code_uri): 166 | """ 167 | Copy a local project to S3. 168 | """ 169 | p = project(entry_point) 170 | p.deploy(s3_code_uri) 171 | 172 | 173 | @click.command() 174 | @click.option("--application-id", help="EMR Serverless Application ID") 175 | @click.option("--cluster-id", help="EMR on EC2 Cluster ID") 176 | @click.option("--virtual-cluster-id", help="EMR on EKS Virtual Cluster ID") 177 | @click.option( 178 | "--entry-point", 179 | type=click.Path(exists=True, dir_okay=False, allow_dash=False), 180 | help="Python or Jar file for the main entrypoint", 181 | ) 182 | @click.option("--job-role", help="IAM Role ARN to use for the job execution") 183 | @click.option("--wait", default=False, is_flag=True, help="Wait for job to finish") 184 | @click.option("--s3-code-uri", help="Where to copy/run code artifacts to/from") 185 | @click.option("--s3-logs-uri", help="Where to send EMR Serverless logs to") 186 | @click.option("--job-name", help="The name of the job", default="emr-cli job") 187 | @click.option( 188 | "--job-args", 189 | help="Comma-delimited string of arguments to be passed to Spark job", 190 | default=None, 191 | ) 192 | @click.option( 193 | "--spark-submit-opts", 194 | help="String of spark-submit options", 195 | default=None, 196 | ) 197 | @click.option( 198 | "--build", 199 | help="Package and deploy job artifacts", 200 | default=False, 201 | is_flag=True, 202 | ) 203 | @click.option( 204 | "--show-stdout", 205 | help="Show the stdout of the job after it's finished", 206 | default=False, 207 | is_flag=True, 208 | ) 209 | @click.option( 210 | "--save-config", 211 | help="Update the config file with the provided options", 212 | is_flag=True, 213 | ) 214 | @click.option( 215 | "--emr-eks-release-label", help="EMR on EKS release label (emr-6.15.0) - defaults to latest release", default=None 216 | ) 217 | @click.option( 218 | "--emr-serverless-timeout", 219 | help="EMR Serverless job timeout in minutes - defaults to 12 hours", 220 | default=720, # set to AWS default value (12 hours in minutes) 221 | type=int 222 | ) 223 | @click.pass_obj 224 | @click.pass_context 225 | def run( 226 | ctx, 227 | project, 228 | application_id, 229 | cluster_id, 230 | virtual_cluster_id, 231 | entry_point, 232 | job_role, 233 | wait, 234 | s3_code_uri, 235 | s3_logs_uri, 236 | job_name, 237 | job_args, 238 | spark_submit_opts, 239 | build, 240 | show_stdout, 241 | save_config, 242 | emr_eks_release_label, 243 | emr_serverless_timeout, 244 | ): 245 | """ 246 | Run a project on EMR, optionally build and deploy 247 | """ 248 | resource_ids = [cluster_id, application_id, virtual_cluster_id] 249 | 250 | # A resource ID must be specified 251 | if not any(resource_ids): 252 | raise click.BadArgumentUsage( 253 | "One of --application-id, --cluster-id, or --virtual-cluster-id must be specified." 254 | ) 255 | 256 | # Only one resource ID can be specified 257 | if resource_ids.count(None) != (len(resource_ids) - 1): 258 | raise click.BadArgumentUsage( 259 | "Only one of --application-id, --cluster-id, or --virtual-cluster-id can be specified" 260 | ) 261 | 262 | # We require entry-point and s3-code-uri 263 | if entry_point is None or s3_code_uri is None: 264 | raise click.BadArgumentUsage("--entry-point and --s3-code-uri are required.") 265 | p = project(entry_point, s3_code_uri) 266 | 267 | # Do a brief validation of the EMR on EKS release label 268 | if emr_eks_release_label: 269 | if not virtual_cluster_id: 270 | raise click.BadArgumentUsage("--emr-eks-release-label can only be used with --virtual-cluster-id") 271 | elif not emr_eks_release_label.startswith("emr-"): 272 | raise click.BadArgumentUsage(f"--emr-eks-release-label must start with 'emr-', provided '{emr_eks_release_label}'") 273 | 274 | # If the user passes --save-config, update our stored config file 275 | if save_config: 276 | run_config = {"run": ctx.__dict__.get("params")} 277 | del run_config["run"]["save_config"] 278 | ConfigWriter.write(run_config) 279 | console_log(f"Config file saved to {DEFAULT_CONFIG_PATH}. Use `emr run` to re-use your configuration.") # noqa: E501 280 | 281 | if build: 282 | p.build() 283 | p.deploy(s3_code_uri) 284 | 285 | if any([application_id, virtual_cluster_id]): 286 | # We require entry-point and job-role 287 | if entry_point is None or job_role is None: 288 | raise click.BadArgumentUsage( 289 | "--entry-point and --job-role are required if --application-id or --virtual-cluster-id is used." 290 | ) 291 | 292 | if emr_serverless_timeout < 0: 293 | raise click.BadArgumentUsage("--emr-serverless-timeout must be greater than or equal to 0.") 294 | 295 | # application_id indicates EMR Serverless job 296 | if application_id is not None: 297 | if job_args: 298 | job_args = job_args.split(",") 299 | emrs = EMRServerless(application_id, job_role, p) 300 | emrs.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout, s3_logs_uri, emr_serverless_timeout) 301 | 302 | # cluster_id indicates EMR on EC2 job 303 | if cluster_id is not None: 304 | if job_args: 305 | job_args = job_args.split(",") 306 | emr = EMREC2(cluster_id, p, job_role) 307 | emr.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout) 308 | 309 | # virtual_cluster_id is EMR on EKS 310 | if virtual_cluster_id is not None: 311 | if job_args: 312 | job_args = job_args.split(",") 313 | emreks = EMREKS(virtual_cluster_id, job_role, p) 314 | emreks.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout, s3_logs_uri, emr_eks_release_label) 315 | 316 | 317 | cli.add_command(package) 318 | cli.add_command(deploy) 319 | cli.add_command(run) 320 | cli.add_command(init) 321 | cli.add_command(bootstrap) 322 | cli.add_command(status) 323 | 324 | if __name__ == "__main__": 325 | cli() # type: ignore 326 | -------------------------------------------------------------------------------- /src/emr_cli/packaging/detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | from emr_cli.deployments.emr_serverless import DeploymentPackage 4 | 5 | from emr_cli.packaging.python_files_project import PythonFilesProject 6 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject 7 | from emr_cli.packaging.python_project import PythonProject 8 | from emr_cli.packaging.simple_project import SimpleProject 9 | from emr_cli.utils import find_files 10 | 11 | 12 | class ProjectDetector: 13 | """ 14 | Detects the type of package used for Spark deployment. 15 | - Single PySpark file 16 | - setuptools-based project 17 | - poetry project 18 | - requirements.txt 19 | """ 20 | 21 | PROJECT_TYPE_MAPPINGS = { 22 | "single-file": SimpleProject, 23 | "python": PythonProject, 24 | "poetry": PythonPoetryProject, 25 | } 26 | 27 | def detect(self, project_type: Optional[str] = None) -> DeploymentPackage.__class__: 28 | if project_type: 29 | if project_type not in self.PROJECT_TYPE_MAPPINGS: 30 | raise ValueError(f"Unknown project type {project_type}") 31 | return self.PROJECT_TYPE_MAPPINGS.get(project_type) # type: ignore 32 | 33 | # We default to a single file project - if the user has just a .py or .jar 34 | project = SimpleProject 35 | 36 | # If there are multiple .py files, we escalate to a PythonProject 37 | if len(find_files(os.getcwd(), [".venv"], ".py")) > 1: 38 | project = PythonFilesProject 39 | 40 | # If we have a pyproject.toml or setup.py, we have a python project 41 | if find_files(os.getcwd(), [".venv"], "pyproject.toml") or find_files( 42 | os.getcwd(), [".venv"], "setup.py" 43 | ): 44 | project = PythonProject 45 | 46 | # If we have a poetry.lock, it's a poetry project 47 | if find_files(os.getcwd(), [".venv"], "poetry.lock"): 48 | project = PythonPoetryProject 49 | 50 | return project 51 | -------------------------------------------------------------------------------- /src/emr_cli/packaging/python_files_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import boto3 5 | from emr_cli.deployments import SparkParams 6 | from emr_cli.deployments.emr_serverless import DeploymentPackage 7 | from emr_cli.utils import ( 8 | PrettyUploader, 9 | console_log, 10 | find_files, 11 | mkdir, 12 | parse_bucket_uri, 13 | ) 14 | 15 | 16 | class PythonFilesProject(DeploymentPackage): 17 | """ 18 | A PythonFilesProject is a simple project that includes multiple `.py` files. 19 | 20 | This is a simple project that has no external dependencies and requires no 21 | additional packaging. The files in the project are simply zipped up. 22 | """ 23 | 24 | def build(self): 25 | """ 26 | Zip all the files except for the entrypoint file. 27 | """ 28 | py_files = find_files(os.getcwd(), [".venv"], ".py") 29 | py_files.remove(os.path.abspath(self.entry_point_path)) 30 | cwd = os.getcwd() 31 | mkdir(self.dist_dir) 32 | with zipfile.ZipFile(f"{self.dist_dir}/pyfiles.zip", "w") as zf: 33 | for file in py_files: 34 | relpath = os.path.relpath(file, cwd) 35 | zf.write(file, relpath) 36 | 37 | def deploy(self, s3_code_uri: str) -> str: 38 | """ 39 | Copies local code to S3 and returns the path to the uploaded entrypoint 40 | """ 41 | s3_client = boto3.client("s3") 42 | bucket, prefix = parse_bucket_uri(s3_code_uri) 43 | filename = os.path.basename(self.entry_point_path) 44 | 45 | console_log(f"Deploying {filename} and local python modules to {s3_code_uri}") 46 | 47 | uploader = PrettyUploader( 48 | s3_client, 49 | bucket, 50 | { 51 | self.entry_point_path: os.path.join(prefix, filename), 52 | os.path.join(self.dist_dir, "pyfiles.zip"): os.path.join( 53 | prefix, "pyfiles.zip" 54 | ), 55 | }, 56 | ) 57 | uploader.run() 58 | 59 | return f"s3://{bucket}/{prefix}/{filename}" 60 | 61 | def spark_submit_parameters(self) -> SparkParams: 62 | zip_path = os.path.join(self.s3_uri_base, "pyfiles.zip") 63 | return SparkParams( 64 | common_params={ 65 | "spark.submit.pyFiles": zip_path, 66 | }, 67 | ) 68 | -------------------------------------------------------------------------------- /src/emr_cli/packaging/python_poetry_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from pathlib import Path 5 | from typing import List 6 | from urllib.parse import urlparse 7 | 8 | import boto3 9 | 10 | from emr_cli.deployments import SparkParams 11 | from emr_cli.deployments.emr_serverless import DeploymentPackage 12 | from emr_cli.utils import ( 13 | PrettyUploader, 14 | console_log, 15 | copy_template, 16 | validate_build_target, 17 | ) 18 | 19 | 20 | class PythonPoetryProject(DeploymentPackage): 21 | def initialize(self, target_dir: str = os.getcwd()): 22 | """ 23 | Initializes a poetry-based pyspark project in the provided directory. 24 | - Creates a basic poetry project 25 | - Creates a pyproject.toml file 26 | - Creates a Dockerfile 27 | """ 28 | console_log(f"Initializing project in {target_dir}") 29 | copy_template("pyspark", target_dir) 30 | copy_template("poetry", target_dir) 31 | console_log("Project initialized.") 32 | 33 | def build(self): 34 | if not Path("poetry.lock").exists(): 35 | print("Error: No poetry.lock present, please setup your poetry project.") 36 | sys.exit(1) 37 | 38 | console_log(f"Packaging assets into {self.dist_dir}/") 39 | # TODO: Add an option for --force-local-build 40 | self._run_docker_build(self.dist_dir) 41 | 42 | def _run_local_build(self, output_dir: str = "dist"): 43 | subprocess.run( 44 | ["poetry", "bundle", "venv", "poeticemrbundle", "--without", "dev"], 45 | check=True, 46 | ) 47 | 48 | def _run_docker_build(self, output_dir: str): 49 | validate_build_target("export-poetry") 50 | subprocess.run( 51 | [ 52 | "docker", 53 | "build", 54 | "--target", 55 | "export-poetry", 56 | "--output", 57 | output_dir, 58 | "--file", 59 | self._dockerfile_path(), 60 | ".", 61 | ], 62 | check=True, 63 | env=dict(os.environ, DOCKER_BUILDKIT="1"), 64 | ) 65 | 66 | def _dockerfile_path(self) -> str: 67 | if Path("Dockerfile").is_file(): 68 | return "Dockerfile" 69 | 70 | templates = os.path.abspath( 71 | os.path.join(os.path.dirname(__file__), "..", "templates", "pyspark") 72 | ) 73 | return os.path.join(templates, "Dockerfile") 74 | 75 | def deploy(self, s3_code_uri: str) -> str: 76 | """ 77 | Copies local code to S3 and returns the path to the uploaded entrypoint 78 | """ 79 | s3_client = boto3.client("s3") 80 | bucket, prefix = self._parse_bucket_uri(s3_code_uri) 81 | filename = os.path.basename(self.entry_point_path) 82 | 83 | console_log(f"Deploying {filename} and dependencies to {s3_code_uri}") 84 | 85 | uploader = PrettyUploader( 86 | s3_client, 87 | bucket, 88 | { 89 | self.entry_point_path: os.path.join(prefix, filename), 90 | os.path.join(self.dist_dir, "pyspark_deps.tar.gz"): os.path.join( 91 | prefix, "pyspark_deps.tar.gz" 92 | ), 93 | }, 94 | ) 95 | uploader.run() 96 | 97 | return f"s3://{bucket}/{prefix}/{filename}" 98 | 99 | def spark_submit_parameters(self) -> SparkParams: 100 | tar_path = os.path.join(self.s3_uri_base, "pyspark_deps.tar.gz") 101 | return SparkParams( 102 | common_params={ 103 | "spark.archives": f"{tar_path}#environment", 104 | }, 105 | emr_serverless_params={ 106 | "spark.emr-serverless.driverEnv.PYSPARK_DRIVER_PYTHON": "./environment/bin/python", 107 | "spark.emr-serverless.driverEnv.PYSPARK_PYTHON": "./environment/bin/python", 108 | "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python", 109 | }, 110 | emr_ec2_params={ 111 | "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python", 112 | "spark.yarn.appMasterEnv.PYSPARK_PYTHON": "./environment/bin/python", 113 | }, 114 | emr_eks_params={ 115 | "spark.pyspark.python": "./environment/bin/python", 116 | }, 117 | ) 118 | 119 | def _parse_bucket_uri(self, uri: str) -> List[str]: 120 | result = urlparse(uri, allow_fragments=False) 121 | return [result.netloc, result.path.strip("/")] 122 | -------------------------------------------------------------------------------- /src/emr_cli/packaging/python_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from pathlib import Path 5 | from shutil import copy 6 | 7 | import boto3 8 | 9 | from emr_cli.deployments import SparkParams 10 | from emr_cli.deployments.emr_serverless import DeploymentPackage 11 | from emr_cli.utils import ( 12 | PrettyUploader, 13 | console_log, 14 | copy_template, 15 | parse_bucket_uri, 16 | validate_build_target, 17 | ) 18 | 19 | 20 | class PythonProject(DeploymentPackage): 21 | def initialize(self, target_dir: str = os.getcwd()): 22 | """ 23 | Initializes a pyspark project in the provided directory. 24 | - Creates a basic project 25 | - Creates a pyproject.toml file 26 | - Creates a Dockerfile 27 | """ 28 | console_log(f"Initializing project in {target_dir}") 29 | copy_template("pyspark", target_dir) 30 | console_log("Project initialized.") 31 | 32 | def copy_single_file(self, relative_file_path: str, target_dir: str = os.getcwd()): 33 | """ 34 | Copies a single file from the template directory to the target directory. 35 | """ 36 | template_path = ( 37 | Path(__file__).parent.parent / "templates" / "pyspark" / relative_file_path 38 | ) 39 | target_path = Path(target_dir) 40 | copy(template_path, target_path) 41 | 42 | def build(self): 43 | """ 44 | For now, uses a pre-existing Docker file and setuptools 45 | """ 46 | if not Path("Dockerfile").exists(): 47 | print( 48 | "Error: No Dockerfile present, use 'emr-cli init --dockerfile' to generate one" # noqa: E501 49 | ) 50 | sys.exit(1) 51 | if not Path("pyproject.toml").exists(): 52 | print("Error: No pyproject.toml present, please set one up before building") 53 | sys.exit(1) 54 | 55 | console_log(f"Packaging assets into {self.dist_dir}/") 56 | self._run_docker_build(self.dist_dir) 57 | 58 | def _run_docker_build(self, output_dir: str): 59 | validate_build_target("export-python") 60 | subprocess.run( 61 | [ 62 | "docker", 63 | "build", 64 | "--target", 65 | "export-python", 66 | "--output", 67 | output_dir, 68 | ".", 69 | ], 70 | check=True, 71 | env=dict(os.environ, DOCKER_BUILDKIT="1"), 72 | ) 73 | 74 | def deploy(self, s3_code_uri: str) -> str: 75 | """ 76 | Copies local code to S3 and returns the path to the uploaded entrypoint 77 | """ 78 | self.s3_uri_base = s3_code_uri 79 | s3_client = boto3.client("s3") 80 | bucket, prefix = parse_bucket_uri(self.s3_uri_base) 81 | filename = os.path.basename(self.entry_point_path) 82 | 83 | console_log(f"Deploying {filename} and dependencies to {self.s3_uri_base}") 84 | 85 | uploader = PrettyUploader( 86 | s3_client, 87 | bucket, 88 | { 89 | self.entry_point_path: os.path.join(prefix, filename), 90 | os.path.join(self.dist_dir, "pyspark_deps.tar.gz"): os.path.join( 91 | prefix, "pyspark_deps.tar.gz" 92 | ), 93 | }, 94 | ) 95 | uploader.run() 96 | 97 | return f"s3://{bucket}/{prefix}/{filename}" 98 | 99 | def spark_submit_parameters(self) -> SparkParams: 100 | tar_path = os.path.join(self.s3_uri_base, "pyspark_deps.tar.gz") 101 | return SparkParams( 102 | common_params={ 103 | "spark.archives": f"{tar_path}#environment", 104 | }, 105 | emr_serverless_params={ 106 | "spark.emr-serverless.driverEnv.PYSPARK_DRIVER_PYTHON": "./environment/bin/python", 107 | "spark.emr-serverless.driverEnv.PYSPARK_PYTHON": "./environment/bin/python", 108 | "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python", 109 | }, 110 | emr_ec2_params={ 111 | "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python", 112 | "spark.yarn.appMasterEnv.PYSPARK_PYTHON": "./environment/bin/python", 113 | }, 114 | emr_eks_params={ 115 | "spark.pyspark.python": "./environment/bin/python", 116 | }, 117 | ) 118 | -------------------------------------------------------------------------------- /src/emr_cli/packaging/simple_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import boto3 4 | 5 | from emr_cli.deployments.emr_serverless import DeploymentPackage 6 | from emr_cli.utils import PrettyUploader, console_log, parse_bucket_uri 7 | 8 | 9 | class SimpleProject(DeploymentPackage): 10 | """ 11 | A simple project only has a single entry point file. 12 | This can be a pyspark file or packaged jar file. 13 | """ 14 | 15 | def build(self): 16 | pass 17 | 18 | def deploy(self, s3_code_uri: str) -> str: 19 | """ 20 | Copies local code to S3 and returns the path to the uploaded entrypoint 21 | """ 22 | s3_client = boto3.client("s3") 23 | bucket, prefix = parse_bucket_uri(s3_code_uri) 24 | filename = os.path.basename(self.entry_point_path) 25 | 26 | console_log(f"Deploying {filename} to {s3_code_uri}") 27 | uploader = PrettyUploader( 28 | s3_client, 29 | bucket, 30 | { 31 | self.entry_point_path: os.path.join(prefix, filename), 32 | }, 33 | ) 34 | uploader.run() 35 | 36 | return f"s3://{bucket}/{prefix}/{filename}" 37 | -------------------------------------------------------------------------------- /src/emr_cli/templates/poetry/README.md: -------------------------------------------------------------------------------- 1 | # EMR Serverless Poetry Template 2 | 3 | Welcome to your new EMR Serverless Poetry PySpark project! 4 | 5 | To get started, change into the project you just created and run the `install` command. 6 | 7 | ```bash 8 | poetry install 9 | ``` 10 | 11 | Your dependencies should now all be resolved and yo ushould have a new `poetry.lock` file in your project. 12 | 13 | ## Deploy! 14 | 15 | Now we can go ahead and build our project and deploy it on EMR Serverless. 16 | 17 | > **Note** This tutorial assumes you have already [setup EMR Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/setting-up.html) and have an EMR Serverless application, job role, and S3 bucket you can use. You can also use the `emr bootstrap` command. 18 | 19 | 1. Set your relevant variables 20 | 21 | ```bash 22 | APPLICATION_ID= 23 | JOB_ROLE_ARN= 24 | S3_BUCKET= 25 | ``` 26 | 27 | 2. Package, deploy, and run your job all in one command. 28 | 29 | ``` 30 | emr run \ 31 | --entry-point entrypoint.py \ 32 | --application-id ${APPLICATION_ID} \ 33 | --job-role ${JOB_ROLE_ARN} \ 34 | --s3-code-uri s3://${S3_BUCKET}/tmp/emr-cli-demo-poetry/ \ 35 | --build --wait 36 | ``` -------------------------------------------------------------------------------- /src/emr_cli/templates/poetry/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "mysparkjobs" 3 | version = "0.0.1" 4 | description = "EMR Serverless Spark jobs" 5 | authors = ["Amazon EMR"] 6 | readme = "README.md" 7 | packages = [{include = "jobs"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.7.10" 11 | pandas = "1.3.5" 12 | pyarrow = "8.0.0" 13 | 14 | [tool.poetry.group.dev.dependencies] 15 | pyspark = "3.3.0" 16 | pytest = "^7.2.0" 17 | 18 | [build-system] 19 | requires = ["poetry-core"] 20 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv/ -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/.gitignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | dist/ -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/Dockerfile: -------------------------------------------------------------------------------- 1 | # This is a muti-stage Dockerfile that can be used to build many different types of 2 | # bundled dependencies for PySpark projects. 3 | # The `base` stage installs generic tools necessary for packaging. 4 | # 5 | # There are `export-` and `build-` stages for the different types of projects. 6 | # - python-packages - Generic support for Python projects with pyproject.toml 7 | # - poetry - Support for Poetry projects 8 | # 9 | # This Dockerfile is generated automatically as part of the emr-cli tool. 10 | # Feel free to modify it for your needs, but leave the `build-` and `export-` 11 | # stages related to your project. 12 | # 13 | # To build manually, you can use the following command, assuming 14 | # the Docker BuildKit backend is enabled. https://docs.docker.com/build/buildkit/ 15 | # 16 | # Example for building a poetry project and saving the output to dist/ folder 17 | # docker build --target export-poetry --output dist . 18 | 19 | 20 | ## ---------------------------------------------------------------------------- 21 | ## Base stage for python development 22 | ## ---------------------------------------------------------------------------- 23 | FROM --platform=linux/amd64 amazonlinux:2 AS base 24 | 25 | RUN yum install -y python3 tar gzip 26 | 27 | ENV VIRTUAL_ENV=/opt/venv 28 | RUN python3 -m venv $VIRTUAL_ENV 29 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 30 | 31 | # EMR 6.x uses Python 3.7 - limit Poetry version to 1.5.1 32 | ENV POETRY_VERSION=1.5.1 33 | RUN python3 -m pip install --upgrade pip 34 | RUN curl -sSL https://install.python-poetry.org | python3 - 35 | 36 | ENV PATH="$PATH:/root/.local/bin" 37 | 38 | WORKDIR /app 39 | 40 | COPY . . 41 | 42 | # Test stage - installs test dependencies defined in pyproject.toml 43 | FROM base as test 44 | RUN python3 -m pip install .[test] 45 | 46 | ## ---------------------------------------------------------------------------- 47 | ## Build and export stages for standard Python projects 48 | ## ---------------------------------------------------------------------------- 49 | # Build stage - installs required dependencies and creates a venv package 50 | FROM base as build-python 51 | RUN python3 -m pip install venv-pack==0.2.0 && \ 52 | python3 -m pip install . 53 | RUN mkdir /output && venv-pack -o /output/pyspark_deps.tar.gz 54 | 55 | # Export stage - used to copy packaged venv to local filesystem 56 | FROM scratch AS export-python 57 | COPY --from=build-python /output/pyspark_deps.tar.gz / 58 | 59 | ## ---------------------------------------------------------------------------- 60 | ## Build and export stages for Poetry Python projects 61 | ## ---------------------------------------------------------------------------- 62 | # Build stage for poetry 63 | FROM base as build-poetry 64 | RUN poetry self add poetry-plugin-bundle && \ 65 | poetry bundle venv dist/bundle --without dev && \ 66 | tar -czvf dist/pyspark_deps.tar.gz -C dist/bundle . && \ 67 | rm -rf dist/bundle 68 | 69 | FROM scratch as export-poetry 70 | COPY --from=build-poetry /app/dist/pyspark_deps.tar.gz / 71 | -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/entrypoint.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from datetime import date 3 | 4 | from jobs.extreme_weather import ExtremeWeather 5 | 6 | if __name__ == "__main__": 7 | """ 8 | Usage: extreme-weather [year] 9 | Displays extreme weather stats (highest temperature, wind, precipitation) for the given, or latest, year. 10 | """ 11 | if len(sys.argv) > 1: 12 | year = KeyboardInterrupt(sys.argv[1]) 13 | else: 14 | year = date.today().year 15 | 16 | extreme_weather = ExtremeWeather(year) 17 | extreme_weather.run() -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/jobs/extreme_weather.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from datetime import date 3 | from typing import List 4 | 5 | from pyspark.sql import DataFrame, Row, SparkSession 6 | from pyspark.sql import functions as F 7 | 8 | GSOD_S3_BASE = "s3://noaa-gsod-pds" 9 | 10 | 11 | class ExtremeWeather: 12 | """ 13 | Usage: extreme-weather [--year xxxx] 14 | 15 | Displays extreme weather stats (highest temp, wind, precipitation) for the given year. 16 | """ 17 | 18 | def __init__(self, year: int) -> None: 19 | self.year = year 20 | self.spark = SparkSession.builder.appName("ExtremeWeather").getOrCreate() 21 | 22 | def run(self) -> None: 23 | df = self._fetch_data() 24 | for stat in [ 25 | {"description": "Highest temperature", "column_name": "MAX", "units": "°F"}, 26 | { 27 | "description": "Highest all-day average temperature", 28 | "column_name": "TEMP", 29 | "units": "°F", 30 | }, 31 | ]: 32 | max_row = self.findLargest(df, stat.get("column_name")) 33 | print(f"--- {stat['description']}") 34 | print( 35 | f" {max_row[stat['column_name']]}{stat['units']} on {max_row.DATE} at {max_row.NAME} ({max_row.LATITUDE}, {max_row.LONGITUDE})" 36 | ) 37 | 38 | print("--- Top 10 Outliers") 39 | outliers = self.find_outliers_for_column(df, stat.get("column_name")) 40 | for i, row in outliers[:10].iterrows(): 41 | print( 42 | f" {row['NAME']} ({row['DATE']}) – {row[stat['column_name']]}{stat['units']}" 43 | ) 44 | print("\n") 45 | 46 | def find_outliers_for_column( 47 | self, 48 | df: DataFrame, 49 | col: str, 50 | percent: float = 0.99, 51 | ) -> DataFrame: 52 | """ 53 | Converts the provided DataFrame to a Pandas DataFrame and retrieves the top 99% quantile. 54 | """ 55 | dfp = df.toPandas() 56 | q = dfp.quantile(0.99) 57 | return dfp[dfp[col] > q[col]] 58 | 59 | def _gsod_year_uri(self, year: int) -> str: 60 | """ 61 | Builds the s3 URI for the provided year 62 | """ 63 | return f"{GSOD_S3_BASE}/{year}/" 64 | 65 | def _fetch_data(self) -> DataFrame: 66 | """ 67 | Reads GSOD csv data for the specified year. 68 | """ 69 | df = self.spark.read.csv( 70 | self._gsod_year_uri(self.year), header=True, inferSchema=True 71 | ) 72 | return df 73 | 74 | def findLargest(self, df: DataFrame, col_name: str) -> Row: 75 | """ 76 | Find the largest value in `col_name` column. 77 | Values of 99.99, 999.9 and 9999.9 are excluded because they indicate "no reading" for that attribute. 78 | While 99.99 _could_ be a valid value for temperature, for example, we know there are higher readings. 79 | """ 80 | return ( 81 | df.select( 82 | "STATION", 83 | "DATE", 84 | "LATITUDE", 85 | "LONGITUDE", 86 | "ELEVATION", 87 | "NAME", 88 | col_name, 89 | ) 90 | .filter(~F.col(col_name).isin([99.99, 999.9, 9999.9])) 91 | .orderBy(F.desc(col_name)) 92 | .limit(1) 93 | .first() 94 | ) 95 | 96 | 97 | def parse_args() -> argparse.Namespace: 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument("--year", type=int, required=False, default=date.today().year) 100 | return parser.parse_args() 101 | 102 | 103 | if __name__ == "__main__": 104 | args = parse_args() 105 | weather_data = ExtremeWeather(args.year) 106 | weather_data.run() -------------------------------------------------------------------------------- /src/emr_cli/templates/pyspark/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mysparkjobs" 3 | version = "0.0.1" 4 | 5 | dependencies = [ 6 | 'pandas==1.3.5', 7 | 'pyarrow==8.0.0', 8 | ] 9 | 10 | [project.optional-dependencies] 11 | tests = [ 12 | 'pytest==7.1.2', 13 | ] -------------------------------------------------------------------------------- /src/emr_cli/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import re 4 | import sys 5 | from pathlib import Path 6 | from shutil import copyfile, copytree, ignore_patterns 7 | from typing import TYPE_CHECKING, Dict, List 8 | from urllib.parse import urlparse 9 | 10 | from rich.progress import Progress, TotalFileSizeColumn 11 | 12 | if TYPE_CHECKING: 13 | from mypy_boto3_s3 import S3Client 14 | else: 15 | S3Client = object 16 | 17 | 18 | def console_log(message): 19 | print(f"[emr-cli]: {message}") 20 | 21 | 22 | def find_files(directory, excluded_dirs=[], search=None) -> List[str]: 23 | files = [] 24 | for root, dirs, filenames in os.walk(directory): 25 | dirs[:] = [d for d in dirs if d not in excluded_dirs] 26 | for filename in filenames: 27 | if search is None or filename == search or filename.endswith(search): 28 | files.append(os.path.join(root, filename)) 29 | return files 30 | 31 | 32 | def parse_bucket_uri(uri: str) -> List[str]: 33 | result = urlparse(uri, allow_fragments=False) 34 | return [result.netloc, result.path.strip("/")] 35 | 36 | 37 | def mkdir(path: str): 38 | try: 39 | os.mkdir(path) 40 | except FileExistsError: 41 | pass 42 | 43 | 44 | def copy_template(source: str, target_dir: str): 45 | """ 46 | Copies the entire `source` directory to `target_dir`. 47 | """ 48 | source = os.path.abspath(Path(__file__).parent.parent / "templates" / source) 49 | if sys.version_info.major == 3 and sys.version_info.minor == 7: 50 | py37_copytree(source, target_dir, ignore=ignore_patterns("__pycache__")) 51 | else: 52 | copytree( 53 | source, 54 | target_dir, 55 | dirs_exist_ok=True, 56 | ignore=ignore_patterns("__pycache__"), 57 | ) 58 | 59 | 60 | def py37_copytree(src, dest, ignore=None): 61 | """ 62 | A Python3 3.7 version of shutils.copytree since `dirs_exist_ok` was introduced in 3.8 63 | """ 64 | if os.path.isdir(src): 65 | if not os.path.isdir(dest): 66 | os.makedirs(dest) 67 | files = os.listdir(src) 68 | if ignore is not None: 69 | ignored = ignore(src, files) 70 | else: 71 | ignored = set() 72 | for f in files: 73 | if f not in ignored: 74 | py37_copytree(os.path.join(src, f), os.path.join(dest, f), ignore) 75 | else: 76 | copyfile(src, dest) 77 | 78 | 79 | def validate_build_target(name: str) -> bool: 80 | """ 81 | Grep the local Dockerfile for the desired target, raise an exception if it's not found 82 | """ 83 | r = None 84 | search_term = f"FROM .* AS {name}$" 85 | with open("Dockerfile", "r") as file: 86 | for line in file: 87 | r = re.search(search_term, line, flags=re.IGNORECASE) 88 | if r: 89 | return True 90 | if not r: 91 | console_log(f"ERR: Target `{name}` not found in Dockerfile.") 92 | console_log( 93 | "ERR: Try creating a new dockerfile with the `emr init --dockerfile .` command." 94 | ) 95 | sys.exit(1) 96 | 97 | return False 98 | 99 | 100 | def print_s3_gz(client: S3Client, s3_uri: str): 101 | """ 102 | Downloads and decompresses a gzip file from S3 and prints the logs to stdout. 103 | """ 104 | bucket, key = parse_bucket_uri(s3_uri) 105 | gz = client.get_object(Bucket=bucket, Key=key) 106 | with gzip.open(gz["Body"]) as data: 107 | print(data.read().decode()) 108 | 109 | 110 | class PrettyUploader: 111 | def __init__( 112 | self, 113 | s3_client: S3Client, 114 | bucket: str, 115 | src_target: Dict[str, str], 116 | ): 117 | self._s3_client = s3_client 118 | self._bucket = bucket 119 | self._src_target = src_target 120 | self._totalsize = sum( 121 | [float(os.path.getsize(filename)) for filename in self._src_target.keys()] 122 | ) 123 | self._seensize = 0 124 | self._progress = Progress( 125 | *Progress.get_default_columns(), TotalFileSizeColumn() 126 | ) 127 | self._task = self._progress.add_task("Uploading...", total=self._totalsize) 128 | 129 | def run(self): 130 | with self._progress: 131 | for src, target in self._src_target.items(): 132 | self._s3_client.upload_file(src, self._bucket, target, Callback=self) 133 | 134 | def __call__(self, bytes_amount): 135 | self._progress.update(self._task, advance=bytes_amount) 136 | -------------------------------------------------------------------------------- /tests/deployments/test_emr_ec2.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import MagicMock 3 | 4 | from emr_cli.deployments.emr_ec2 import EMREC2 5 | from emr_cli.deployments.emr_serverless import DeploymentPackage 6 | 7 | CLUSTER_ID = "j-11111111" 8 | 9 | 10 | class TestEMREC2(unittest.TestCase): 11 | def setUp(self): 12 | self.obj = EMREC2(CLUSTER_ID, DeploymentPackage()) 13 | 14 | def test_fetch_log_location_success(self): 15 | self.obj.client.describe_cluster = MagicMock( 16 | return_value={"Cluster": {"LogUri": "s3n://example-bucket/logs/"}} 17 | ) 18 | self.assertEqual(self.obj._fetch_log_location(), "s3://example-bucket/logs/") 19 | 20 | def test_fetch_log_location_no_loguri(self): 21 | self.obj.client.describe_cluster = MagicMock(return_value={"Cluster": {}}) 22 | # Ensure that a RuntimeError is raised 23 | with self.assertRaises(RuntimeError): 24 | self.obj._fetch_log_location() 25 | 26 | def test_fetch_log_location_loguri_none(self): 27 | self.obj.client.describe_cluster = MagicMock( 28 | return_value={"Cluster": {"LogUri": None}} 29 | ) 30 | # Ensure that a RuntimeError is raised 31 | with self.assertRaises(RuntimeError): 32 | self.obj._fetch_log_location() 33 | 34 | def test_fetch_log_location_replace_s3n_with_s3(self): 35 | self.obj.client.describe_cluster = MagicMock( 36 | return_value={"Cluster": {"LogUri": "s3n://example-bucket/logs/"}} 37 | ) 38 | # Ensure that "s3n:" is replaced with "s3:" in the returned S3 location 39 | self.assertEqual(self.obj._fetch_log_location(), "s3://example-bucket/logs/") 40 | -------------------------------------------------------------------------------- /tests/packaging/test_python_files_project.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from emr_cli.deployments import SparkParams 3 | 4 | from emr_cli.packaging.python_files_project import PythonFilesProject 5 | 6 | 7 | class TestPythonFilesProject: 8 | def test_build(self, fs): 9 | fs.create_file("main.py") 10 | fs.create_file("lib/file1.py") 11 | fs.create_file("lib/file2.py") 12 | pfp = PythonFilesProject("main.py") 13 | pfp.build() 14 | assert Path("dist/pyfiles.zip").exists() 15 | 16 | def test_spark_submit(self, fs): 17 | fs.create_file("main.py") 18 | fs.create_file("lib/file1.py") 19 | fs.create_file("lib/file2.py") 20 | pfp = PythonFilesProject("main.py") 21 | sp = pfp.spark_submit_parameters() 22 | assert type(sp) == SparkParams 23 | assert sp.params_for("emr_serverless").startswith("--conf spark.submit.pyFiles=") 24 | -------------------------------------------------------------------------------- /tests/packaging/test_python_poetry_project.py: -------------------------------------------------------------------------------- 1 | from emr_cli.deployments import SparkParams 2 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject 3 | 4 | 5 | class TestPythonFilesProject: 6 | def test_spark_submit(self, fs): 7 | fs.create_file("main.py") 8 | fs.create_file("lib/file1.py") 9 | fs.create_file("lib/file2.py") 10 | ppp = PythonPoetryProject("main.py") 11 | sp = ppp.spark_submit_parameters() 12 | assert type(sp) == SparkParams 13 | assert "spark.archives" in sp.params_for("emr_serverless") 14 | assert "spark.emr-serverless.driverEnv" in sp.params_for("emr_serverless") 15 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | 3 | from emr_cli.emr_cli import cli 4 | 5 | class TestCli: 6 | def test_version(self): 7 | runner = CliRunner() 8 | result = runner.invoke(cli, ['status']) 9 | assert result.exit_code == 0 10 | assert 'EMR CLI version:' in result.output 11 | 12 | def test_project_detection(self): 13 | runner = CliRunner() 14 | with runner.isolated_filesystem(): 15 | with open('main.py', 'w') as f: 16 | f.write('print("Hello World")') 17 | 18 | result = runner.invoke(cli, ['status']) 19 | assert result.exit_code == 0 20 | assert 'Project type:\t\tSimpleProject' in result.output 21 | 22 | def test_resource_validation(self): 23 | runner = CliRunner() 24 | result = runner.invoke(cli, ['run']) 25 | assert result.exit_code == 2 26 | assert 'Error: One of' in result.output 27 | assert "must be specified" in result.output 28 | 29 | result = runner.invoke(cli, ['run', '--application-id', '1234', '--cluster-id', '567']) 30 | assert result.exit_code == 2 31 | assert 'Error: Only one of' in result.output 32 | assert "can be specified" in result.output 33 | 34 | for arg in ['--application-id', '--cluster-id', '--virtual-cluster-id']: 35 | result = runner.invoke(cli, ['run', arg, '1234']) 36 | assert result.exit_code == 2 37 | assert 'Error: --entry-point' in result.output 38 | -------------------------------------------------------------------------------- /tests/test_detector.py: -------------------------------------------------------------------------------- 1 | from emr_cli.packaging.detector import ProjectDetector 2 | from emr_cli.packaging.python_files_project import PythonFilesProject 3 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject 4 | from emr_cli.packaging.python_project import PythonProject 5 | from emr_cli.packaging.simple_project import SimpleProject 6 | 7 | 8 | class TestDetector: 9 | def test_single_py_file(self, fs): 10 | fs.create_file("main.py") 11 | obj = ProjectDetector().detect() 12 | assert obj == SimpleProject 13 | 14 | def test_multi_py_file(self, fs): 15 | fs.create_file("main.py") 16 | fs.create_file("lib/file1.py") 17 | fs.create_file("lib/file2.py") 18 | obj = ProjectDetector().detect() 19 | assert obj == PythonFilesProject 20 | 21 | def test_poetry_project(self, fs): 22 | fs.create_file("poetry.lock") 23 | obj = ProjectDetector().detect() 24 | assert obj == PythonPoetryProject 25 | 26 | def test_dependency_project(self, fs): 27 | fs.create_file("main.py") 28 | fs.create_file("pyproject.toml") 29 | fs.create_file("lib/file1.py") 30 | fs.create_file("lib/file2.py") 31 | obj = ProjectDetector().detect() 32 | assert obj == PythonProject 33 | -------------------------------------------------------------------------------- /tests/test_init.py: -------------------------------------------------------------------------------- 1 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject 2 | from emr_cli.packaging.python_project import PythonProject 3 | 4 | 5 | class TestInit: 6 | def test_default_init(self, tmp_path): 7 | p = PythonProject() 8 | target_path = tmp_path / "python_proj" 9 | p.initialize(target_path) 10 | assert (target_path / "pyproject.toml").exists() 11 | assert (target_path / "entrypoint.py").exists() 12 | assert (target_path / "jobs" / "extreme_weather.py").exists() 13 | assert not (target_path / "README.md").exists() 14 | 15 | def test_poetry_init(self, tmp_path): 16 | p = PythonPoetryProject() 17 | target_path = tmp_path / "python_poetry_proj" 18 | p.initialize(target_path) 19 | assert (target_path / "entrypoint.py").exists() 20 | assert (target_path / "pyproject.toml").exists() 21 | assert (target_path / "README.md").exists() 22 | 23 | def test_create_in_existing_folder(self, tmp_path): 24 | pass 25 | --------------------------------------------------------------------------------