├── .github
    └── workflows
    │   ├── pypi-publish.yaml
    │   └── unit-tests.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── SECURITY.md
├── poetry.lock
├── pyproject.toml
├── requirements.txt
├── src
    └── emr_cli
    │   ├── config.py
    │   ├── deployments
    │       ├── __init__.py
    │       ├── emr_ec2.py
    │       ├── emr_eks.py
    │       └── emr_serverless.py
    │   ├── emr_cli.py
    │   ├── packaging
    │       ├── detector.py
    │       ├── python_files_project.py
    │       ├── python_poetry_project.py
    │       ├── python_project.py
    │       └── simple_project.py
    │   ├── templates
    │       ├── poetry
    │       │   ├── README.md
    │       │   └── pyproject.toml
    │       └── pyspark
    │       │   ├── .dockerignore
    │       │   ├── .gitignore
    │       │   ├── Dockerfile
    │       │   ├── entrypoint.py
    │       │   ├── jobs
    │       │       └── extreme_weather.py
    │       │   └── pyproject.toml
    │   └── utils
    │       └── __init__.py
└── tests
    ├── deployments
        └── test_emr_ec2.py
    ├── packaging
        ├── test_python_files_project.py
        └── test_python_poetry_project.py
    ├── test_cli.py
    ├── test_detector.py
    └── test_init.py


/.github/workflows/pypi-publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPi Repository
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - "v*.*.*"
 6 | 
 7 | env:
 8 |   POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }}
 9 | 
10 | jobs:
11 |   deploy:
12 |     runs-on: ubuntu-20.04
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v4
17 |         with:
18 |           python-version: 3.9
19 |       - name: cache poetry install
20 |         uses: actions/cache@v3
21 |         with:
22 |           path: ~/.local
23 |           key: poetry-1.3.2-0
24 |       - name: Install Poetry
25 |         uses: snok/install-poetry@v1
26 |         with:
27 |           virtualenvs-create: true
28 |           virtualenvs-in-project: true
29 |           installer-parallel: true
30 |       - name: Build and publish
31 |         run: |
32 |           poetry version ${{github.ref_name}}
33 |           poetry publish --build
34 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Spark Job Unit Tests
 2 | on: [push]
 3 | env:
 4 |   AWS_DEFAULT_REGION: us-east-1
 5 | jobs:
 6 |   pytest:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ubuntu-latest, windows-latest, macos-latest]
10 |         python-version: ["3.9", "3.10"]
11 |     runs-on: ${{ matrix.os }}
12 |     defaults:
13 |       run:
14 |         shell: bash
15 |     steps:
16 |     - uses: actions/checkout@v3
17 |     - 
18 |       name: Set up Python
19 |       id: setup-python
20 |       uses: actions/setup-python@v4
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     -
24 |       name: cache poetry install
25 |       uses: actions/cache@v3
26 |       with:
27 |         path: ~/.local
28 |         key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-poetry-1.5.1-0
29 |     - 
30 |       name: Install Poetry
31 |       uses: snok/install-poetry@v1
32 |       with:
33 |         # Something changed in 1.6.0, but unsure what so pin to 1.5.1
34 |         version: 1.5.1
35 |         virtualenvs-create: true
36 |         virtualenvs-in-project: true
37 |         installer-parallel: true
38 |     - 
39 |       name: Load cached venv
40 |       id: cached-poetry-dependencies
41 |       uses: actions/cache@v3
42 |       with:
43 |         path: .venv
44 |         key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
45 |     - 
46 |       name: Install dependencies
47 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
48 |       run: |
49 |         poetry install --no-interaction --no-root
50 |     - 
51 |       name: Install project
52 |       run: |
53 |         poetry install --no-interaction
54 |     - 
55 |       name: Run tests
56 |       run: |
57 |         source $VENV
58 |         poetry run pytest --cov emr_cli tests/
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__
3 | *.egg-info/
4 | build/
5 | dist/
6 | 
7 | # Dev
8 | conf/deployment-sample.yaml
9 | .venv/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # EMR CLI
  2 | 
  3 | So we're all working on data pipelines every day, but wouldn't be nice to just hit a button and have our code automatically deployed to staging or test accounts? I thought so, too, thats why I created the EMR CLI (`emr`) that can help you package and deploy your EMR jobs so you don't have to.
  4 | 
  5 | The EMR CLI supports a wide variety of configuration options to adapt to _your_ data pipeline, not the other way around.
  6 | 
  7 | 1. Packaging - Ensure a consistent approach to packaging your production Spark jobs.
  8 | 2. Deployment - Easily deploy your Spark jobs across multiple EMR environments or deployment frameworks like EC2, EKS, and Serverless.
  9 | 3. CI/CD - Easily test each iteration of your code without resorting to messy shell scripts. :)
 10 | 
 11 | The initial use cases are:
 12 | 
 13 | 1. Consistent packaging for PySpark projects.
 14 | 2. Use in CI/CD pipelines for packaging, deployment of artifacts, and integration testing.
 15 | 
 16 | > **Warning**: This tool is still under active development, so commands may change until a stable 1.0 release is made.
 17 | 
 18 | ## Quick Start
 19 | 
 20 | You can use the EMR CLI to take a project from nothing to running in EMR Serverless is 2 steps.
 21 | 
 22 | First, let's install the `emr` command.
 23 | 
 24 | ```bash
 25 | python3 -m pip install -U emr-cli
 26 | ```
 27 | 
 28 | > **Note** This tutorial assumes you have already [setup EMR Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/setting-up.html) and have an EMR Serverless application, job role, and S3 bucket you can use. If not, you can use the `emr bootstrap` command.
 29 | 
 30 | 1. Create a sample project
 31 | 
 32 | ```bash
 33 | emr init scratch
 34 | ```
 35 | 
 36 | > 📔 Tip: Use `--project-type poetry` to create a [Poetry](https://python-poetry.org/) project!
 37 | 
 38 | You should now have a sample PySpark project in your scratch directory.
 39 | 
 40 | ```
 41 | scratch
 42 | ├── Dockerfile
 43 | ├── entrypoint.py
 44 | ├── jobs
 45 | │   └── extreme_weather.py
 46 | └── pyproject.toml
 47 | 
 48 | 1 directory, 4 files
 49 | ```
 50 | 
 51 | 2. Now deploy and run on an EMR Serverless application!
 52 | 
 53 | ```bash
 54 | emr run \
 55 |     --entry-point entrypoint.py \
 56 |     --application-id ${APPLICATION_ID} \
 57 |     --job-role ${JOB_ROLE_ARN} \
 58 |     --s3-code-uri  s3://${S3_BUCKET}/tmp/emr-cli-demo/ \
 59 |     --s3-logs-uri  s3://${S3_BUCKET}/logs/emr-cli-demo/ \
 60 |     --build \
 61 |     --show-stdout
 62 | ```
 63 | 
 64 | This command performs the following actions:
 65 | 
 66 | - Packages your project dependencies into a Python virtual environment
 67 | - Uploads the Spark entrypoint and packaged dependencies to S3
 68 | - Starts an EMR Serverless job
 69 | - Waits for the job to run to completion and shows the `stdout` of the Spark driver when finished!
 70 | 
 71 | And you're done. Feel free to modify the project to experiment with different things. You can simply re-run the command above to re-package and re-deploy your job.
 72 | 
 73 | ## EMR CLI Sub-commands
 74 | 
 75 | The EMR CLI has several subcommands that you can see by running `emr --help`
 76 | 
 77 | ```
 78 | Commands:
 79 |   bootstrap  Bootstrap an EMR Serverless environment.
 80 |   deploy     Copy a local project to S3.
 81 |   init       Initialize a local PySpark project.
 82 |   package    Package a project and dependencies into dist/
 83 |   run        Run a project on EMR, optionally build and deploy
 84 |   status
 85 | ```
 86 | 
 87 | ### bootstrap
 88 | 
 89 | `emr bootstrap` allows you to create a sample EMR Serverless or EMR on EC2 environment for testing. It assumes you have admin access and creates various resources for you using AWS APIs.
 90 | 
 91 | #### EMR Serverless
 92 | 
 93 | To create a bootstrap EMR Serverless environment, using the following command:
 94 | 
 95 | ```shell
 96 | emr bootstrap \
 97 |     --target emr-serverless \
 98 |     --code-bucket <your_unique_new_bucket_name> \
 99 |     --job-role-name <your_unique_emr_serverless_job_role_name>
100 | ```
101 | 
102 | When you do this, the CLI creates a new EMR CLI config file at `.emr/config.yaml` that will set default locations for your `emr run` command.
103 | 
104 | ### init
105 | 
106 | The `init` command creates a new `pyproject.toml` or `poetry` project for you with a sample PySpark application.
107 | 
108 | `init` is required to create those project types as it also initializes a `Dockerfile` used to package your dependencies. Single-file PySpark jobs and simple Python modules do not require the `init` command to be used.
109 | 
110 | ### package
111 | 
112 | The `package` command bundles your PySpark code and dependencies in preparation for deployment. Often you'll either use `package` and `deploy` to deploy new artifacts to S3, or you'll use the `--build` flag in the `emr run` command to handle both of those tasks for you.
113 | 
114 | The EMR CLI automatically detects what type of project you have and builds the necessary dependency packages.
115 | 
116 | ### deploy
117 | 
118 | The `deploy` command copies the project dependencies from the `dist/` folder to your specified S3 location.
119 | 
120 | ### run
121 | 
122 | The `run` command is intended to help package, deploy, and run your PySpark code across EMR on EC2, EMR on EKS, or EMR Serverless.
123 | 
124 | You must provide one of `--cluster-id`, `--virtual-cluster-id`, or `--application-id` to specify which environment to run your code on.
125 | 
126 | `emr run --help` shows all the available options:
127 | 
128 | ```
129 | Usage: emr run [OPTIONS]
130 | 
131 |   Run a project on EMR, optionally build and deploy
132 | 
133 | Options:
134 |   --application-id TEXT         EMR Serverless Application ID
135 |   --cluster-id TEXT             EMR on EC2 Cluster ID
136 |   --virtual-cluster-id TEXT     EMR on EKS Virtual Cluster ID
137 |   --entry-point FILE            Python or Jar file for the main entrypoint
138 |   --job-role TEXT               IAM Role ARN to use for the job execution
139 |   --wait                        Wait for job to finish
140 |   --s3-code-uri TEXT            Where to copy/run code artifacts to/from
141 |   --s3-logs-uri TEXT            Where to send EMR Serverless logs to
142 |   --job-name TEXT               The name of the job
143 |   --job-args TEXT               Comma-delimited string of arguments to be
144 |                                 passed to Spark job
145 | 
146 |   --spark-submit-opts TEXT      String of spark-submit options
147 |   --build                       Package and deploy job artifacts
148 |   --show-stdout                 Show the stdout of the job after it's finished
149 |   --save-config                 Update the config file with the provided
150 |                                 options
151 | 
152 |   --emr-eks-release-label TEXT  EMR on EKS release label (emr-6.15.0) -
153 |                                 defaults to latest release
154 | ```
155 | 
156 | ## Support PySpark configurations
157 | 
158 | - Single-file project - Projects that have a single `.py` entrypoint file.
159 | - Multi-file project - A more typical PySpark project, but without dependencies, that has multiple Python files or modules.
160 | - Python module - A project with dependencies defined in a `pyproject.toml` file.
161 | - Poetry project - A project using [Poetry](https://python-poetry.org/) for dependency management.
162 | 
163 | ## Sample Commands
164 | 
165 | - Create a new PySpark project (other frameworks TBD)
166 | 
167 | ```bash
168 | emr init project-dir
169 | ```
170 | 
171 | - Package your project into a virtual environment archive
172 | 
173 | ```bash
174 | emr package --entry-point main.py
175 | ```
176 | 
177 | The EMR CLI auto-detects the project type and will change the packaging method appropriately.
178 | 
179 | If you have additional `.py` files, those will be included in the archive.
180 | 
181 | - Deploy an existing package artifact to S3.
182 | 
183 | ```bash
184 | emr deploy --entry-point main.py --s3-code-uri s3://<BUCKET>/code/
185 | ```
186 | 
187 | - Deploy a PySpark package to S3 and trigger an EMR Serverless job
188 | 
189 | ```bash
190 | emr run --entry-point main.py \
191 |     --s3-code-uri s3://<BUCKET>/code/ \
192 |     --application-id <EMR_SERVERLESS_APP> \
193 |     --job-role <JOB_ROLE_ARN>
194 | ```
195 | 
196 | - Build, deploy, and run an EMR Serverless job and wait for it to finish.
197 | 
198 | ```bash
199 | emr run --entry-point main.py \
200 |     --s3-code-uri s3://<BUCKET>/code/ \
201 |     --application-id <EMR_SERVERLESS_APP> \
202 |     --job-role <JOB_ROLE_ARN> \
203 |     --build \
204 |     --wait
205 | ```
206 | 
207 | - Re-run an already deployed job and show the `stdout` of the driver.
208 | 
209 | ```bash
210 | emr run --entry-point main.py \
211 |     --s3-code-uri s3://<BUCKET>/code/ \
212 |     --s3-logs-uri s3://<BUCKET>/logs/ \
213 |     --application-id <EMR_SERVERLESS_APP> \
214 |     --job-role <JOB_ROLE_ARN> \
215 |     --show-stdout
216 | ```
217 | 
218 | > **Note**: If the job fails, the command will exit with an error code.
219 | 
220 | - Re-run your jobs with 7 characters.
221 | 
222 | If you provide the `--save-config` command to `emr run`, it will save a configuration file for you in `.emr/config.yaml` and next time you can use `emr run` with no parameters to re-run your job.
223 | 
224 | ```bash
225 | emr run --entry-point main.py \
226 |     ... \
227 |     --save-config
228 | 
229 | [emr-cli]: Config file saved to .emr/config.yaml. Use `emr run` to re-use your configuration.
230 | ```
231 | 
232 | ```bash
233 | ❯ emr run
234 | [emr-cli]: Using config file: .emr/config.yaml
235 | ```
236 | 
237 | 🥳
238 | 
239 | - Run the same job against an EMR on EC2 cluster
240 | 
241 | ```bash
242 | emr run --entry-point main.py \
243 |     --s3-code-uri s3://<BUCKET>/code/ \
244 |     --s3-logs-uri s3://<BUCKET>/logs/ \
245 |     --cluster-id <EMR_EC2_CLUSTER_ID>
246 |     --show-stdout
247 | ```
248 | 
249 | - Or an EMR on EKS virtual cluster.
250 | 
251 | ```bash
252 | emr run --entry-point main.py \
253 |     --s3-code-uri s3://<BUCKET>/code/ \
254 |     --s3-logs-uri s3://<BUCKET>/logs/ \
255 |     --virtual-cluster-id <EMR_EC2_CLUSTER_ID> \
256 |     --job-role <EMR_EKS_JOB_ROLE_ARN> \
257 |     --show-stdout
258 | ```
259 | 
260 | ## Security
261 | 
262 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
263 | 
264 | ## License
265 | 
266 | This project is licensed under the Apache-2.0 License.
267 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | <!-- markdownlint-disable MD043 -->
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security
6 | via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to aws-security@amazon.com.
7 | 
8 | Please do **not** create a public GitHub issue.


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "attrs"
  5 | version = "23.1.0"
  6 | description = "Classes Without Boilerplate"
  7 | optional = false
  8 | python-versions = ">=3.7"
  9 | files = [
 10 |     {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
 11 |     {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
 12 | ]
 13 | 
 14 | [package.dependencies]
 15 | importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 16 | 
 17 | [package.extras]
 18 | cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
 19 | dev = ["attrs[docs,tests]", "pre-commit"]
 20 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
 21 | tests = ["attrs[tests-no-zope]", "zope-interface"]
 22 | tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 23 | 
 24 | [[package]]
 25 | name = "boto3"
 26 | version = "1.28.70"
 27 | description = "The AWS SDK for Python"
 28 | optional = false
 29 | python-versions = ">= 3.7"
 30 | files = [
 31 |     {file = "boto3-1.28.70-py3-none-any.whl", hash = "sha256:22ec3b54801c81746657827c7b1c4a3b2e4cfa7c21be3b96218d32e9390ee5eb"},
 32 |     {file = "boto3-1.28.70.tar.gz", hash = "sha256:89002e1d8411c7c54110f9f8fc4a11d57d6d7977c0cb4ba064887ca5d4c788f7"},
 33 | ]
 34 | 
 35 | [package.dependencies]
 36 | botocore = ">=1.31.70,<1.32.0"
 37 | jmespath = ">=0.7.1,<2.0.0"
 38 | s3transfer = ">=0.7.0,<0.8.0"
 39 | 
 40 | [package.extras]
 41 | crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 42 | 
 43 | [[package]]
 44 | name = "boto3-stubs"
 45 | version = "1.28.70"
 46 | description = "Type annotations for boto3 1.28.70 generated with mypy-boto3-builder 7.19.0"
 47 | optional = false
 48 | python-versions = ">=3.7"
 49 | files = [
 50 |     {file = "boto3-stubs-1.28.70.tar.gz", hash = "sha256:0c67fd217c9fd0d5688504aa64c5b760bccca9886ce05d675bbc613b568cba28"},
 51 |     {file = "boto3_stubs-1.28.70-py3-none-any.whl", hash = "sha256:6d50983e2535220f3da911ac43e3fd13347d644474b3ebe694c4158c75a3df7f"},
 52 | ]
 53 | 
 54 | [package.dependencies]
 55 | botocore-stubs = "*"
 56 | mypy-boto3-s3 = {version = ">=1.28.0,<1.29.0", optional = true, markers = "extra == \"s3\""}
 57 | types-s3transfer = "*"
 58 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""}
 59 | 
 60 | [package.extras]
 61 | accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.28.0,<1.29.0)"]
 62 | account = ["mypy-boto3-account (>=1.28.0,<1.29.0)"]
 63 | acm = ["mypy-boto3-acm (>=1.28.0,<1.29.0)"]
 64 | acm-pca = ["mypy-boto3-acm-pca (>=1.28.0,<1.29.0)"]
 65 | alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.28.0,<1.29.0)"]
 66 | all = ["mypy-boto3-accessanalyzer (>=1.28.0,<1.29.0)", "mypy-boto3-account (>=1.28.0,<1.29.0)", "mypy-boto3-acm (>=1.28.0,<1.29.0)", "mypy-boto3-acm-pca (>=1.28.0,<1.29.0)", "mypy-boto3-alexaforbusiness (>=1.28.0,<1.29.0)", "mypy-boto3-amp (>=1.28.0,<1.29.0)", "mypy-boto3-amplify (>=1.28.0,<1.29.0)", "mypy-boto3-amplifybackend (>=1.28.0,<1.29.0)", "mypy-boto3-amplifyuibuilder (>=1.28.0,<1.29.0)", "mypy-boto3-apigateway (>=1.28.0,<1.29.0)", "mypy-boto3-apigatewaymanagementapi (>=1.28.0,<1.29.0)", "mypy-boto3-apigatewayv2 (>=1.28.0,<1.29.0)", "mypy-boto3-appconfig (>=1.28.0,<1.29.0)", "mypy-boto3-appconfigdata (>=1.28.0,<1.29.0)", "mypy-boto3-appfabric (>=1.28.0,<1.29.0)", "mypy-boto3-appflow (>=1.28.0,<1.29.0)", "mypy-boto3-appintegrations (>=1.28.0,<1.29.0)", "mypy-boto3-application-autoscaling (>=1.28.0,<1.29.0)", "mypy-boto3-application-insights (>=1.28.0,<1.29.0)", "mypy-boto3-applicationcostprofiler (>=1.28.0,<1.29.0)", "mypy-boto3-appmesh (>=1.28.0,<1.29.0)", "mypy-boto3-apprunner (>=1.28.0,<1.29.0)", "mypy-boto3-appstream (>=1.28.0,<1.29.0)", "mypy-boto3-appsync (>=1.28.0,<1.29.0)", "mypy-boto3-arc-zonal-shift (>=1.28.0,<1.29.0)", "mypy-boto3-athena (>=1.28.0,<1.29.0)", "mypy-boto3-auditmanager (>=1.28.0,<1.29.0)", "mypy-boto3-autoscaling (>=1.28.0,<1.29.0)", "mypy-boto3-autoscaling-plans (>=1.28.0,<1.29.0)", "mypy-boto3-backup (>=1.28.0,<1.29.0)", "mypy-boto3-backup-gateway (>=1.28.0,<1.29.0)", "mypy-boto3-backupstorage (>=1.28.0,<1.29.0)", "mypy-boto3-batch (>=1.28.0,<1.29.0)", "mypy-boto3-bedrock (>=1.28.0,<1.29.0)", "mypy-boto3-bedrock-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-billingconductor (>=1.28.0,<1.29.0)", "mypy-boto3-braket (>=1.28.0,<1.29.0)", "mypy-boto3-budgets (>=1.28.0,<1.29.0)", "mypy-boto3-ce (>=1.28.0,<1.29.0)", "mypy-boto3-chime (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-identity (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-meetings (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-messaging (>=1.28.0,<1.29.0)", "mypy-boto3-chime-sdk-voice (>=1.28.0,<1.29.0)", "mypy-boto3-cleanrooms (>=1.28.0,<1.29.0)", "mypy-boto3-cloud9 (>=1.28.0,<1.29.0)", "mypy-boto3-cloudcontrol (>=1.28.0,<1.29.0)", "mypy-boto3-clouddirectory (>=1.28.0,<1.29.0)", "mypy-boto3-cloudformation (>=1.28.0,<1.29.0)", "mypy-boto3-cloudfront (>=1.28.0,<1.29.0)", "mypy-boto3-cloudhsm (>=1.28.0,<1.29.0)", "mypy-boto3-cloudhsmv2 (>=1.28.0,<1.29.0)", "mypy-boto3-cloudsearch (>=1.28.0,<1.29.0)", "mypy-boto3-cloudsearchdomain (>=1.28.0,<1.29.0)", "mypy-boto3-cloudtrail (>=1.28.0,<1.29.0)", "mypy-boto3-cloudtrail-data (>=1.28.0,<1.29.0)", "mypy-boto3-cloudwatch (>=1.28.0,<1.29.0)", "mypy-boto3-codeartifact (>=1.28.0,<1.29.0)", "mypy-boto3-codebuild (>=1.28.0,<1.29.0)", "mypy-boto3-codecatalyst (>=1.28.0,<1.29.0)", "mypy-boto3-codecommit (>=1.28.0,<1.29.0)", "mypy-boto3-codedeploy (>=1.28.0,<1.29.0)", "mypy-boto3-codeguru-reviewer (>=1.28.0,<1.29.0)", "mypy-boto3-codeguru-security (>=1.28.0,<1.29.0)", "mypy-boto3-codeguruprofiler (>=1.28.0,<1.29.0)", "mypy-boto3-codepipeline (>=1.28.0,<1.29.0)", "mypy-boto3-codestar (>=1.28.0,<1.29.0)", "mypy-boto3-codestar-connections (>=1.28.0,<1.29.0)", "mypy-boto3-codestar-notifications (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-identity (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-idp (>=1.28.0,<1.29.0)", "mypy-boto3-cognito-sync (>=1.28.0,<1.29.0)", "mypy-boto3-comprehend (>=1.28.0,<1.29.0)", "mypy-boto3-comprehendmedical (>=1.28.0,<1.29.0)", "mypy-boto3-compute-optimizer (>=1.28.0,<1.29.0)", "mypy-boto3-config (>=1.28.0,<1.29.0)", "mypy-boto3-connect (>=1.28.0,<1.29.0)", "mypy-boto3-connect-contact-lens (>=1.28.0,<1.29.0)", "mypy-boto3-connectcampaigns (>=1.28.0,<1.29.0)", "mypy-boto3-connectcases (>=1.28.0,<1.29.0)", "mypy-boto3-connectparticipant (>=1.28.0,<1.29.0)", "mypy-boto3-controltower (>=1.28.0,<1.29.0)", "mypy-boto3-cur (>=1.28.0,<1.29.0)", "mypy-boto3-customer-profiles (>=1.28.0,<1.29.0)", "mypy-boto3-databrew (>=1.28.0,<1.29.0)", "mypy-boto3-dataexchange (>=1.28.0,<1.29.0)", "mypy-boto3-datapipeline (>=1.28.0,<1.29.0)", "mypy-boto3-datasync (>=1.28.0,<1.29.0)", "mypy-boto3-datazone (>=1.28.0,<1.29.0)", "mypy-boto3-dax (>=1.28.0,<1.29.0)", "mypy-boto3-detective (>=1.28.0,<1.29.0)", "mypy-boto3-devicefarm (>=1.28.0,<1.29.0)", "mypy-boto3-devops-guru (>=1.28.0,<1.29.0)", "mypy-boto3-directconnect (>=1.28.0,<1.29.0)", "mypy-boto3-discovery (>=1.28.0,<1.29.0)", "mypy-boto3-dlm (>=1.28.0,<1.29.0)", "mypy-boto3-dms (>=1.28.0,<1.29.0)", "mypy-boto3-docdb (>=1.28.0,<1.29.0)", "mypy-boto3-docdb-elastic (>=1.28.0,<1.29.0)", "mypy-boto3-drs (>=1.28.0,<1.29.0)", "mypy-boto3-ds (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodb (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodbstreams (>=1.28.0,<1.29.0)", "mypy-boto3-ebs (>=1.28.0,<1.29.0)", "mypy-boto3-ec2 (>=1.28.0,<1.29.0)", "mypy-boto3-ec2-instance-connect (>=1.28.0,<1.29.0)", "mypy-boto3-ecr (>=1.28.0,<1.29.0)", "mypy-boto3-ecr-public (>=1.28.0,<1.29.0)", "mypy-boto3-ecs (>=1.28.0,<1.29.0)", "mypy-boto3-efs (>=1.28.0,<1.29.0)", "mypy-boto3-eks (>=1.28.0,<1.29.0)", "mypy-boto3-elastic-inference (>=1.28.0,<1.29.0)", "mypy-boto3-elasticache (>=1.28.0,<1.29.0)", "mypy-boto3-elasticbeanstalk (>=1.28.0,<1.29.0)", "mypy-boto3-elastictranscoder (>=1.28.0,<1.29.0)", "mypy-boto3-elb (>=1.28.0,<1.29.0)", "mypy-boto3-elbv2 (>=1.28.0,<1.29.0)", "mypy-boto3-emr (>=1.28.0,<1.29.0)", "mypy-boto3-emr-containers (>=1.28.0,<1.29.0)", "mypy-boto3-emr-serverless (>=1.28.0,<1.29.0)", "mypy-boto3-entityresolution (>=1.28.0,<1.29.0)", "mypy-boto3-es (>=1.28.0,<1.29.0)", "mypy-boto3-events (>=1.28.0,<1.29.0)", "mypy-boto3-evidently (>=1.28.0,<1.29.0)", "mypy-boto3-finspace (>=1.28.0,<1.29.0)", "mypy-boto3-finspace-data (>=1.28.0,<1.29.0)", "mypy-boto3-firehose (>=1.28.0,<1.29.0)", "mypy-boto3-fis (>=1.28.0,<1.29.0)", "mypy-boto3-fms (>=1.28.0,<1.29.0)", "mypy-boto3-forecast (>=1.28.0,<1.29.0)", "mypy-boto3-forecastquery (>=1.28.0,<1.29.0)", "mypy-boto3-frauddetector (>=1.28.0,<1.29.0)", "mypy-boto3-fsx (>=1.28.0,<1.29.0)", "mypy-boto3-gamelift (>=1.28.0,<1.29.0)", "mypy-boto3-glacier (>=1.28.0,<1.29.0)", "mypy-boto3-globalaccelerator (>=1.28.0,<1.29.0)", "mypy-boto3-glue (>=1.28.0,<1.29.0)", "mypy-boto3-grafana (>=1.28.0,<1.29.0)", "mypy-boto3-greengrass (>=1.28.0,<1.29.0)", "mypy-boto3-greengrassv2 (>=1.28.0,<1.29.0)", "mypy-boto3-groundstation (>=1.28.0,<1.29.0)", "mypy-boto3-guardduty (>=1.28.0,<1.29.0)", "mypy-boto3-health (>=1.28.0,<1.29.0)", "mypy-boto3-healthlake (>=1.28.0,<1.29.0)", "mypy-boto3-honeycode (>=1.28.0,<1.29.0)", "mypy-boto3-iam (>=1.28.0,<1.29.0)", "mypy-boto3-identitystore (>=1.28.0,<1.29.0)", "mypy-boto3-imagebuilder (>=1.28.0,<1.29.0)", "mypy-boto3-importexport (>=1.28.0,<1.29.0)", "mypy-boto3-inspector (>=1.28.0,<1.29.0)", "mypy-boto3-inspector2 (>=1.28.0,<1.29.0)", "mypy-boto3-internetmonitor (>=1.28.0,<1.29.0)", "mypy-boto3-iot (>=1.28.0,<1.29.0)", "mypy-boto3-iot-data (>=1.28.0,<1.29.0)", "mypy-boto3-iot-jobs-data (>=1.28.0,<1.29.0)", "mypy-boto3-iot-roborunner (>=1.28.0,<1.29.0)", "mypy-boto3-iot1click-devices (>=1.28.0,<1.29.0)", "mypy-boto3-iot1click-projects (>=1.28.0,<1.29.0)", "mypy-boto3-iotanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-iotdeviceadvisor (>=1.28.0,<1.29.0)", "mypy-boto3-iotevents (>=1.28.0,<1.29.0)", "mypy-boto3-iotevents-data (>=1.28.0,<1.29.0)", "mypy-boto3-iotfleethub (>=1.28.0,<1.29.0)", "mypy-boto3-iotfleetwise (>=1.28.0,<1.29.0)", "mypy-boto3-iotsecuretunneling (>=1.28.0,<1.29.0)", "mypy-boto3-iotsitewise (>=1.28.0,<1.29.0)", "mypy-boto3-iotthingsgraph (>=1.28.0,<1.29.0)", "mypy-boto3-iottwinmaker (>=1.28.0,<1.29.0)", "mypy-boto3-iotwireless (>=1.28.0,<1.29.0)", "mypy-boto3-ivs (>=1.28.0,<1.29.0)", "mypy-boto3-ivs-realtime (>=1.28.0,<1.29.0)", "mypy-boto3-ivschat (>=1.28.0,<1.29.0)", "mypy-boto3-kafka (>=1.28.0,<1.29.0)", "mypy-boto3-kafkaconnect (>=1.28.0,<1.29.0)", "mypy-boto3-kendra (>=1.28.0,<1.29.0)", "mypy-boto3-kendra-ranking (>=1.28.0,<1.29.0)", "mypy-boto3-keyspaces (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-archived-media (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-media (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-signaling (>=1.28.0,<1.29.0)", "mypy-boto3-kinesis-video-webrtc-storage (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.28.0,<1.29.0)", "mypy-boto3-kinesisvideo (>=1.28.0,<1.29.0)", "mypy-boto3-kms (>=1.28.0,<1.29.0)", "mypy-boto3-lakeformation (>=1.28.0,<1.29.0)", "mypy-boto3-lambda (>=1.28.0,<1.29.0)", "mypy-boto3-lex-models (>=1.28.0,<1.29.0)", "mypy-boto3-lex-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-lexv2-models (>=1.28.0,<1.29.0)", "mypy-boto3-lexv2-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager-linux-subscriptions (>=1.28.0,<1.29.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.28.0,<1.29.0)", "mypy-boto3-lightsail (>=1.28.0,<1.29.0)", "mypy-boto3-location (>=1.28.0,<1.29.0)", "mypy-boto3-logs (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutequipment (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutmetrics (>=1.28.0,<1.29.0)", "mypy-boto3-lookoutvision (>=1.28.0,<1.29.0)", "mypy-boto3-m2 (>=1.28.0,<1.29.0)", "mypy-boto3-machinelearning (>=1.28.0,<1.29.0)", "mypy-boto3-macie (>=1.28.0,<1.29.0)", "mypy-boto3-macie2 (>=1.28.0,<1.29.0)", "mypy-boto3-managedblockchain (>=1.28.0,<1.29.0)", "mypy-boto3-managedblockchain-query (>=1.28.0,<1.29.0)", "mypy-boto3-marketplace-catalog (>=1.28.0,<1.29.0)", "mypy-boto3-marketplace-entitlement (>=1.28.0,<1.29.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.28.0,<1.29.0)", "mypy-boto3-mediaconnect (>=1.28.0,<1.29.0)", "mypy-boto3-mediaconvert (>=1.28.0,<1.29.0)", "mypy-boto3-medialive (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackage (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackage-vod (>=1.28.0,<1.29.0)", "mypy-boto3-mediapackagev2 (>=1.28.0,<1.29.0)", "mypy-boto3-mediastore (>=1.28.0,<1.29.0)", "mypy-boto3-mediastore-data (>=1.28.0,<1.29.0)", "mypy-boto3-mediatailor (>=1.28.0,<1.29.0)", "mypy-boto3-medical-imaging (>=1.28.0,<1.29.0)", "mypy-boto3-memorydb (>=1.28.0,<1.29.0)", "mypy-boto3-meteringmarketplace (>=1.28.0,<1.29.0)", "mypy-boto3-mgh (>=1.28.0,<1.29.0)", "mypy-boto3-mgn (>=1.28.0,<1.29.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhub-config (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhuborchestrator (>=1.28.0,<1.29.0)", "mypy-boto3-migrationhubstrategy (>=1.28.0,<1.29.0)", "mypy-boto3-mobile (>=1.28.0,<1.29.0)", "mypy-boto3-mq (>=1.28.0,<1.29.0)", "mypy-boto3-mturk (>=1.28.0,<1.29.0)", "mypy-boto3-mwaa (>=1.28.0,<1.29.0)", "mypy-boto3-neptune (>=1.28.0,<1.29.0)", "mypy-boto3-neptunedata (>=1.28.0,<1.29.0)", "mypy-boto3-network-firewall (>=1.28.0,<1.29.0)", "mypy-boto3-networkmanager (>=1.28.0,<1.29.0)", "mypy-boto3-nimble (>=1.28.0,<1.29.0)", "mypy-boto3-oam (>=1.28.0,<1.29.0)", "mypy-boto3-omics (>=1.28.0,<1.29.0)", "mypy-boto3-opensearch (>=1.28.0,<1.29.0)", "mypy-boto3-opensearchserverless (>=1.28.0,<1.29.0)", "mypy-boto3-opsworks (>=1.28.0,<1.29.0)", "mypy-boto3-opsworkscm (>=1.28.0,<1.29.0)", "mypy-boto3-organizations (>=1.28.0,<1.29.0)", "mypy-boto3-osis (>=1.28.0,<1.29.0)", "mypy-boto3-outposts (>=1.28.0,<1.29.0)", "mypy-boto3-panorama (>=1.28.0,<1.29.0)", "mypy-boto3-payment-cryptography (>=1.28.0,<1.29.0)", "mypy-boto3-payment-cryptography-data (>=1.28.0,<1.29.0)", "mypy-boto3-pca-connector-ad (>=1.28.0,<1.29.0)", "mypy-boto3-personalize (>=1.28.0,<1.29.0)", "mypy-boto3-personalize-events (>=1.28.0,<1.29.0)", "mypy-boto3-personalize-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-pi (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-email (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-sms-voice (>=1.28.0,<1.29.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.28.0,<1.29.0)", "mypy-boto3-pipes (>=1.28.0,<1.29.0)", "mypy-boto3-polly (>=1.28.0,<1.29.0)", "mypy-boto3-pricing (>=1.28.0,<1.29.0)", "mypy-boto3-privatenetworks (>=1.28.0,<1.29.0)", "mypy-boto3-proton (>=1.28.0,<1.29.0)", "mypy-boto3-qldb (>=1.28.0,<1.29.0)", "mypy-boto3-qldb-session (>=1.28.0,<1.29.0)", "mypy-boto3-quicksight (>=1.28.0,<1.29.0)", "mypy-boto3-ram (>=1.28.0,<1.29.0)", "mypy-boto3-rbin (>=1.28.0,<1.29.0)", "mypy-boto3-rds (>=1.28.0,<1.29.0)", "mypy-boto3-rds-data (>=1.28.0,<1.29.0)", "mypy-boto3-redshift (>=1.28.0,<1.29.0)", "mypy-boto3-redshift-data (>=1.28.0,<1.29.0)", "mypy-boto3-redshift-serverless (>=1.28.0,<1.29.0)", "mypy-boto3-rekognition (>=1.28.0,<1.29.0)", "mypy-boto3-resiliencehub (>=1.28.0,<1.29.0)", "mypy-boto3-resource-explorer-2 (>=1.28.0,<1.29.0)", "mypy-boto3-resource-groups (>=1.28.0,<1.29.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.28.0,<1.29.0)", "mypy-boto3-robomaker (>=1.28.0,<1.29.0)", "mypy-boto3-rolesanywhere (>=1.28.0,<1.29.0)", "mypy-boto3-route53 (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-cluster (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-control-config (>=1.28.0,<1.29.0)", "mypy-boto3-route53-recovery-readiness (>=1.28.0,<1.29.0)", "mypy-boto3-route53domains (>=1.28.0,<1.29.0)", "mypy-boto3-route53resolver (>=1.28.0,<1.29.0)", "mypy-boto3-rum (>=1.28.0,<1.29.0)", "mypy-boto3-s3 (>=1.28.0,<1.29.0)", "mypy-boto3-s3control (>=1.28.0,<1.29.0)", "mypy-boto3-s3outposts (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-edge (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-geospatial (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-metrics (>=1.28.0,<1.29.0)", "mypy-boto3-sagemaker-runtime (>=1.28.0,<1.29.0)", "mypy-boto3-savingsplans (>=1.28.0,<1.29.0)", "mypy-boto3-scheduler (>=1.28.0,<1.29.0)", "mypy-boto3-schemas (>=1.28.0,<1.29.0)", "mypy-boto3-sdb (>=1.28.0,<1.29.0)", "mypy-boto3-secretsmanager (>=1.28.0,<1.29.0)", "mypy-boto3-securityhub (>=1.28.0,<1.29.0)", "mypy-boto3-securitylake (>=1.28.0,<1.29.0)", "mypy-boto3-serverlessrepo (>=1.28.0,<1.29.0)", "mypy-boto3-service-quotas (>=1.28.0,<1.29.0)", "mypy-boto3-servicecatalog (>=1.28.0,<1.29.0)", "mypy-boto3-servicecatalog-appregistry (>=1.28.0,<1.29.0)", "mypy-boto3-servicediscovery (>=1.28.0,<1.29.0)", "mypy-boto3-ses (>=1.28.0,<1.29.0)", "mypy-boto3-sesv2 (>=1.28.0,<1.29.0)", "mypy-boto3-shield (>=1.28.0,<1.29.0)", "mypy-boto3-signer (>=1.28.0,<1.29.0)", "mypy-boto3-simspaceweaver (>=1.28.0,<1.29.0)", "mypy-boto3-sms (>=1.28.0,<1.29.0)", "mypy-boto3-sms-voice (>=1.28.0,<1.29.0)", "mypy-boto3-snow-device-management (>=1.28.0,<1.29.0)", "mypy-boto3-snowball (>=1.28.0,<1.29.0)", "mypy-boto3-sns (>=1.28.0,<1.29.0)", "mypy-boto3-sqs (>=1.28.0,<1.29.0)", "mypy-boto3-ssm (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-contacts (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-incidents (>=1.28.0,<1.29.0)", "mypy-boto3-ssm-sap (>=1.28.0,<1.29.0)", "mypy-boto3-sso (>=1.28.0,<1.29.0)", "mypy-boto3-sso-admin (>=1.28.0,<1.29.0)", "mypy-boto3-sso-oidc (>=1.28.0,<1.29.0)", "mypy-boto3-stepfunctions (>=1.28.0,<1.29.0)", "mypy-boto3-storagegateway (>=1.28.0,<1.29.0)", "mypy-boto3-sts (>=1.28.0,<1.29.0)", "mypy-boto3-support (>=1.28.0,<1.29.0)", "mypy-boto3-support-app (>=1.28.0,<1.29.0)", "mypy-boto3-swf (>=1.28.0,<1.29.0)", "mypy-boto3-synthetics (>=1.28.0,<1.29.0)", "mypy-boto3-textract (>=1.28.0,<1.29.0)", "mypy-boto3-timestream-query (>=1.28.0,<1.29.0)", "mypy-boto3-timestream-write (>=1.28.0,<1.29.0)", "mypy-boto3-tnb (>=1.28.0,<1.29.0)", "mypy-boto3-transcribe (>=1.28.0,<1.29.0)", "mypy-boto3-transfer (>=1.28.0,<1.29.0)", "mypy-boto3-translate (>=1.28.0,<1.29.0)", "mypy-boto3-verifiedpermissions (>=1.28.0,<1.29.0)", "mypy-boto3-voice-id (>=1.28.0,<1.29.0)", "mypy-boto3-vpc-lattice (>=1.28.0,<1.29.0)", "mypy-boto3-waf (>=1.28.0,<1.29.0)", "mypy-boto3-waf-regional (>=1.28.0,<1.29.0)", "mypy-boto3-wafv2 (>=1.28.0,<1.29.0)", "mypy-boto3-wellarchitected (>=1.28.0,<1.29.0)", "mypy-boto3-wisdom (>=1.28.0,<1.29.0)", "mypy-boto3-workdocs (>=1.28.0,<1.29.0)", "mypy-boto3-worklink (>=1.28.0,<1.29.0)", "mypy-boto3-workmail (>=1.28.0,<1.29.0)", "mypy-boto3-workmailmessageflow (>=1.28.0,<1.29.0)", "mypy-boto3-workspaces (>=1.28.0,<1.29.0)", "mypy-boto3-workspaces-web (>=1.28.0,<1.29.0)", "mypy-boto3-xray (>=1.28.0,<1.29.0)"]
 67 | amp = ["mypy-boto3-amp (>=1.28.0,<1.29.0)"]
 68 | amplify = ["mypy-boto3-amplify (>=1.28.0,<1.29.0)"]
 69 | amplifybackend = ["mypy-boto3-amplifybackend (>=1.28.0,<1.29.0)"]
 70 | amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.28.0,<1.29.0)"]
 71 | apigateway = ["mypy-boto3-apigateway (>=1.28.0,<1.29.0)"]
 72 | apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.28.0,<1.29.0)"]
 73 | apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.28.0,<1.29.0)"]
 74 | appconfig = ["mypy-boto3-appconfig (>=1.28.0,<1.29.0)"]
 75 | appconfigdata = ["mypy-boto3-appconfigdata (>=1.28.0,<1.29.0)"]
 76 | appfabric = ["mypy-boto3-appfabric (>=1.28.0,<1.29.0)"]
 77 | appflow = ["mypy-boto3-appflow (>=1.28.0,<1.29.0)"]
 78 | appintegrations = ["mypy-boto3-appintegrations (>=1.28.0,<1.29.0)"]
 79 | application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.28.0,<1.29.0)"]
 80 | application-insights = ["mypy-boto3-application-insights (>=1.28.0,<1.29.0)"]
 81 | applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.28.0,<1.29.0)"]
 82 | appmesh = ["mypy-boto3-appmesh (>=1.28.0,<1.29.0)"]
 83 | apprunner = ["mypy-boto3-apprunner (>=1.28.0,<1.29.0)"]
 84 | appstream = ["mypy-boto3-appstream (>=1.28.0,<1.29.0)"]
 85 | appsync = ["mypy-boto3-appsync (>=1.28.0,<1.29.0)"]
 86 | arc-zonal-shift = ["mypy-boto3-arc-zonal-shift (>=1.28.0,<1.29.0)"]
 87 | athena = ["mypy-boto3-athena (>=1.28.0,<1.29.0)"]
 88 | auditmanager = ["mypy-boto3-auditmanager (>=1.28.0,<1.29.0)"]
 89 | autoscaling = ["mypy-boto3-autoscaling (>=1.28.0,<1.29.0)"]
 90 | autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.28.0,<1.29.0)"]
 91 | backup = ["mypy-boto3-backup (>=1.28.0,<1.29.0)"]
 92 | backup-gateway = ["mypy-boto3-backup-gateway (>=1.28.0,<1.29.0)"]
 93 | backupstorage = ["mypy-boto3-backupstorage (>=1.28.0,<1.29.0)"]
 94 | batch = ["mypy-boto3-batch (>=1.28.0,<1.29.0)"]
 95 | bedrock = ["mypy-boto3-bedrock (>=1.28.0,<1.29.0)"]
 96 | bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.28.0,<1.29.0)"]
 97 | billingconductor = ["mypy-boto3-billingconductor (>=1.28.0,<1.29.0)"]
 98 | boto3 = ["boto3 (==1.28.70)", "botocore (==1.31.70)"]
 99 | braket = ["mypy-boto3-braket (>=1.28.0,<1.29.0)"]
100 | budgets = ["mypy-boto3-budgets (>=1.28.0,<1.29.0)"]
101 | ce = ["mypy-boto3-ce (>=1.28.0,<1.29.0)"]
102 | chime = ["mypy-boto3-chime (>=1.28.0,<1.29.0)"]
103 | chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.28.0,<1.29.0)"]
104 | chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.28.0,<1.29.0)"]
105 | chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.28.0,<1.29.0)"]
106 | chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.28.0,<1.29.0)"]
107 | chime-sdk-voice = ["mypy-boto3-chime-sdk-voice (>=1.28.0,<1.29.0)"]
108 | cleanrooms = ["mypy-boto3-cleanrooms (>=1.28.0,<1.29.0)"]
109 | cloud9 = ["mypy-boto3-cloud9 (>=1.28.0,<1.29.0)"]
110 | cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.28.0,<1.29.0)"]
111 | clouddirectory = ["mypy-boto3-clouddirectory (>=1.28.0,<1.29.0)"]
112 | cloudformation = ["mypy-boto3-cloudformation (>=1.28.0,<1.29.0)"]
113 | cloudfront = ["mypy-boto3-cloudfront (>=1.28.0,<1.29.0)"]
114 | cloudhsm = ["mypy-boto3-cloudhsm (>=1.28.0,<1.29.0)"]
115 | cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.28.0,<1.29.0)"]
116 | cloudsearch = ["mypy-boto3-cloudsearch (>=1.28.0,<1.29.0)"]
117 | cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.28.0,<1.29.0)"]
118 | cloudtrail = ["mypy-boto3-cloudtrail (>=1.28.0,<1.29.0)"]
119 | cloudtrail-data = ["mypy-boto3-cloudtrail-data (>=1.28.0,<1.29.0)"]
120 | cloudwatch = ["mypy-boto3-cloudwatch (>=1.28.0,<1.29.0)"]
121 | codeartifact = ["mypy-boto3-codeartifact (>=1.28.0,<1.29.0)"]
122 | codebuild = ["mypy-boto3-codebuild (>=1.28.0,<1.29.0)"]
123 | codecatalyst = ["mypy-boto3-codecatalyst (>=1.28.0,<1.29.0)"]
124 | codecommit = ["mypy-boto3-codecommit (>=1.28.0,<1.29.0)"]
125 | codedeploy = ["mypy-boto3-codedeploy (>=1.28.0,<1.29.0)"]
126 | codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.28.0,<1.29.0)"]
127 | codeguru-security = ["mypy-boto3-codeguru-security (>=1.28.0,<1.29.0)"]
128 | codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.28.0,<1.29.0)"]
129 | codepipeline = ["mypy-boto3-codepipeline (>=1.28.0,<1.29.0)"]
130 | codestar = ["mypy-boto3-codestar (>=1.28.0,<1.29.0)"]
131 | codestar-connections = ["mypy-boto3-codestar-connections (>=1.28.0,<1.29.0)"]
132 | codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.28.0,<1.29.0)"]
133 | cognito-identity = ["mypy-boto3-cognito-identity (>=1.28.0,<1.29.0)"]
134 | cognito-idp = ["mypy-boto3-cognito-idp (>=1.28.0,<1.29.0)"]
135 | cognito-sync = ["mypy-boto3-cognito-sync (>=1.28.0,<1.29.0)"]
136 | comprehend = ["mypy-boto3-comprehend (>=1.28.0,<1.29.0)"]
137 | comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.28.0,<1.29.0)"]
138 | compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.28.0,<1.29.0)"]
139 | config = ["mypy-boto3-config (>=1.28.0,<1.29.0)"]
140 | connect = ["mypy-boto3-connect (>=1.28.0,<1.29.0)"]
141 | connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.28.0,<1.29.0)"]
142 | connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.28.0,<1.29.0)"]
143 | connectcases = ["mypy-boto3-connectcases (>=1.28.0,<1.29.0)"]
144 | connectparticipant = ["mypy-boto3-connectparticipant (>=1.28.0,<1.29.0)"]
145 | controltower = ["mypy-boto3-controltower (>=1.28.0,<1.29.0)"]
146 | cur = ["mypy-boto3-cur (>=1.28.0,<1.29.0)"]
147 | customer-profiles = ["mypy-boto3-customer-profiles (>=1.28.0,<1.29.0)"]
148 | databrew = ["mypy-boto3-databrew (>=1.28.0,<1.29.0)"]
149 | dataexchange = ["mypy-boto3-dataexchange (>=1.28.0,<1.29.0)"]
150 | datapipeline = ["mypy-boto3-datapipeline (>=1.28.0,<1.29.0)"]
151 | datasync = ["mypy-boto3-datasync (>=1.28.0,<1.29.0)"]
152 | datazone = ["mypy-boto3-datazone (>=1.28.0,<1.29.0)"]
153 | dax = ["mypy-boto3-dax (>=1.28.0,<1.29.0)"]
154 | detective = ["mypy-boto3-detective (>=1.28.0,<1.29.0)"]
155 | devicefarm = ["mypy-boto3-devicefarm (>=1.28.0,<1.29.0)"]
156 | devops-guru = ["mypy-boto3-devops-guru (>=1.28.0,<1.29.0)"]
157 | directconnect = ["mypy-boto3-directconnect (>=1.28.0,<1.29.0)"]
158 | discovery = ["mypy-boto3-discovery (>=1.28.0,<1.29.0)"]
159 | dlm = ["mypy-boto3-dlm (>=1.28.0,<1.29.0)"]
160 | dms = ["mypy-boto3-dms (>=1.28.0,<1.29.0)"]
161 | docdb = ["mypy-boto3-docdb (>=1.28.0,<1.29.0)"]
162 | docdb-elastic = ["mypy-boto3-docdb-elastic (>=1.28.0,<1.29.0)"]
163 | drs = ["mypy-boto3-drs (>=1.28.0,<1.29.0)"]
164 | ds = ["mypy-boto3-ds (>=1.28.0,<1.29.0)"]
165 | dynamodb = ["mypy-boto3-dynamodb (>=1.28.0,<1.29.0)"]
166 | dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.28.0,<1.29.0)"]
167 | ebs = ["mypy-boto3-ebs (>=1.28.0,<1.29.0)"]
168 | ec2 = ["mypy-boto3-ec2 (>=1.28.0,<1.29.0)"]
169 | ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.28.0,<1.29.0)"]
170 | ecr = ["mypy-boto3-ecr (>=1.28.0,<1.29.0)"]
171 | ecr-public = ["mypy-boto3-ecr-public (>=1.28.0,<1.29.0)"]
172 | ecs = ["mypy-boto3-ecs (>=1.28.0,<1.29.0)"]
173 | efs = ["mypy-boto3-efs (>=1.28.0,<1.29.0)"]
174 | eks = ["mypy-boto3-eks (>=1.28.0,<1.29.0)"]
175 | elastic-inference = ["mypy-boto3-elastic-inference (>=1.28.0,<1.29.0)"]
176 | elasticache = ["mypy-boto3-elasticache (>=1.28.0,<1.29.0)"]
177 | elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.28.0,<1.29.0)"]
178 | elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.28.0,<1.29.0)"]
179 | elb = ["mypy-boto3-elb (>=1.28.0,<1.29.0)"]
180 | elbv2 = ["mypy-boto3-elbv2 (>=1.28.0,<1.29.0)"]
181 | emr = ["mypy-boto3-emr (>=1.28.0,<1.29.0)"]
182 | emr-containers = ["mypy-boto3-emr-containers (>=1.28.0,<1.29.0)"]
183 | emr-serverless = ["mypy-boto3-emr-serverless (>=1.28.0,<1.29.0)"]
184 | entityresolution = ["mypy-boto3-entityresolution (>=1.28.0,<1.29.0)"]
185 | es = ["mypy-boto3-es (>=1.28.0,<1.29.0)"]
186 | essential = ["mypy-boto3-cloudformation (>=1.28.0,<1.29.0)", "mypy-boto3-dynamodb (>=1.28.0,<1.29.0)", "mypy-boto3-ec2 (>=1.28.0,<1.29.0)", "mypy-boto3-lambda (>=1.28.0,<1.29.0)", "mypy-boto3-rds (>=1.28.0,<1.29.0)", "mypy-boto3-s3 (>=1.28.0,<1.29.0)", "mypy-boto3-sqs (>=1.28.0,<1.29.0)"]
187 | events = ["mypy-boto3-events (>=1.28.0,<1.29.0)"]
188 | evidently = ["mypy-boto3-evidently (>=1.28.0,<1.29.0)"]
189 | finspace = ["mypy-boto3-finspace (>=1.28.0,<1.29.0)"]
190 | finspace-data = ["mypy-boto3-finspace-data (>=1.28.0,<1.29.0)"]
191 | firehose = ["mypy-boto3-firehose (>=1.28.0,<1.29.0)"]
192 | fis = ["mypy-boto3-fis (>=1.28.0,<1.29.0)"]
193 | fms = ["mypy-boto3-fms (>=1.28.0,<1.29.0)"]
194 | forecast = ["mypy-boto3-forecast (>=1.28.0,<1.29.0)"]
195 | forecastquery = ["mypy-boto3-forecastquery (>=1.28.0,<1.29.0)"]
196 | frauddetector = ["mypy-boto3-frauddetector (>=1.28.0,<1.29.0)"]
197 | fsx = ["mypy-boto3-fsx (>=1.28.0,<1.29.0)"]
198 | gamelift = ["mypy-boto3-gamelift (>=1.28.0,<1.29.0)"]
199 | glacier = ["mypy-boto3-glacier (>=1.28.0,<1.29.0)"]
200 | globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.28.0,<1.29.0)"]
201 | glue = ["mypy-boto3-glue (>=1.28.0,<1.29.0)"]
202 | grafana = ["mypy-boto3-grafana (>=1.28.0,<1.29.0)"]
203 | greengrass = ["mypy-boto3-greengrass (>=1.28.0,<1.29.0)"]
204 | greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.28.0,<1.29.0)"]
205 | groundstation = ["mypy-boto3-groundstation (>=1.28.0,<1.29.0)"]
206 | guardduty = ["mypy-boto3-guardduty (>=1.28.0,<1.29.0)"]
207 | health = ["mypy-boto3-health (>=1.28.0,<1.29.0)"]
208 | healthlake = ["mypy-boto3-healthlake (>=1.28.0,<1.29.0)"]
209 | honeycode = ["mypy-boto3-honeycode (>=1.28.0,<1.29.0)"]
210 | iam = ["mypy-boto3-iam (>=1.28.0,<1.29.0)"]
211 | identitystore = ["mypy-boto3-identitystore (>=1.28.0,<1.29.0)"]
212 | imagebuilder = ["mypy-boto3-imagebuilder (>=1.28.0,<1.29.0)"]
213 | importexport = ["mypy-boto3-importexport (>=1.28.0,<1.29.0)"]
214 | inspector = ["mypy-boto3-inspector (>=1.28.0,<1.29.0)"]
215 | inspector2 = ["mypy-boto3-inspector2 (>=1.28.0,<1.29.0)"]
216 | internetmonitor = ["mypy-boto3-internetmonitor (>=1.28.0,<1.29.0)"]
217 | iot = ["mypy-boto3-iot (>=1.28.0,<1.29.0)"]
218 | iot-data = ["mypy-boto3-iot-data (>=1.28.0,<1.29.0)"]
219 | iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.28.0,<1.29.0)"]
220 | iot-roborunner = ["mypy-boto3-iot-roborunner (>=1.28.0,<1.29.0)"]
221 | iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.28.0,<1.29.0)"]
222 | iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.28.0,<1.29.0)"]
223 | iotanalytics = ["mypy-boto3-iotanalytics (>=1.28.0,<1.29.0)"]
224 | iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.28.0,<1.29.0)"]
225 | iotevents = ["mypy-boto3-iotevents (>=1.28.0,<1.29.0)"]
226 | iotevents-data = ["mypy-boto3-iotevents-data (>=1.28.0,<1.29.0)"]
227 | iotfleethub = ["mypy-boto3-iotfleethub (>=1.28.0,<1.29.0)"]
228 | iotfleetwise = ["mypy-boto3-iotfleetwise (>=1.28.0,<1.29.0)"]
229 | iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.28.0,<1.29.0)"]
230 | iotsitewise = ["mypy-boto3-iotsitewise (>=1.28.0,<1.29.0)"]
231 | iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.28.0,<1.29.0)"]
232 | iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.28.0,<1.29.0)"]
233 | iotwireless = ["mypy-boto3-iotwireless (>=1.28.0,<1.29.0)"]
234 | ivs = ["mypy-boto3-ivs (>=1.28.0,<1.29.0)"]
235 | ivs-realtime = ["mypy-boto3-ivs-realtime (>=1.28.0,<1.29.0)"]
236 | ivschat = ["mypy-boto3-ivschat (>=1.28.0,<1.29.0)"]
237 | kafka = ["mypy-boto3-kafka (>=1.28.0,<1.29.0)"]
238 | kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.28.0,<1.29.0)"]
239 | kendra = ["mypy-boto3-kendra (>=1.28.0,<1.29.0)"]
240 | kendra-ranking = ["mypy-boto3-kendra-ranking (>=1.28.0,<1.29.0)"]
241 | keyspaces = ["mypy-boto3-keyspaces (>=1.28.0,<1.29.0)"]
242 | kinesis = ["mypy-boto3-kinesis (>=1.28.0,<1.29.0)"]
243 | kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.28.0,<1.29.0)"]
244 | kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.28.0,<1.29.0)"]
245 | kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.28.0,<1.29.0)"]
246 | kinesis-video-webrtc-storage = ["mypy-boto3-kinesis-video-webrtc-storage (>=1.28.0,<1.29.0)"]
247 | kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.28.0,<1.29.0)"]
248 | kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.28.0,<1.29.0)"]
249 | kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.28.0,<1.29.0)"]
250 | kms = ["mypy-boto3-kms (>=1.28.0,<1.29.0)"]
251 | lakeformation = ["mypy-boto3-lakeformation (>=1.28.0,<1.29.0)"]
252 | lambda = ["mypy-boto3-lambda (>=1.28.0,<1.29.0)"]
253 | lex-models = ["mypy-boto3-lex-models (>=1.28.0,<1.29.0)"]
254 | lex-runtime = ["mypy-boto3-lex-runtime (>=1.28.0,<1.29.0)"]
255 | lexv2-models = ["mypy-boto3-lexv2-models (>=1.28.0,<1.29.0)"]
256 | lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.28.0,<1.29.0)"]
257 | license-manager = ["mypy-boto3-license-manager (>=1.28.0,<1.29.0)"]
258 | license-manager-linux-subscriptions = ["mypy-boto3-license-manager-linux-subscriptions (>=1.28.0,<1.29.0)"]
259 | license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.28.0,<1.29.0)"]
260 | lightsail = ["mypy-boto3-lightsail (>=1.28.0,<1.29.0)"]
261 | location = ["mypy-boto3-location (>=1.28.0,<1.29.0)"]
262 | logs = ["mypy-boto3-logs (>=1.28.0,<1.29.0)"]
263 | lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.28.0,<1.29.0)"]
264 | lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.28.0,<1.29.0)"]
265 | lookoutvision = ["mypy-boto3-lookoutvision (>=1.28.0,<1.29.0)"]
266 | m2 = ["mypy-boto3-m2 (>=1.28.0,<1.29.0)"]
267 | machinelearning = ["mypy-boto3-machinelearning (>=1.28.0,<1.29.0)"]
268 | macie = ["mypy-boto3-macie (>=1.28.0,<1.29.0)"]
269 | macie2 = ["mypy-boto3-macie2 (>=1.28.0,<1.29.0)"]
270 | managedblockchain = ["mypy-boto3-managedblockchain (>=1.28.0,<1.29.0)"]
271 | managedblockchain-query = ["mypy-boto3-managedblockchain-query (>=1.28.0,<1.29.0)"]
272 | marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.28.0,<1.29.0)"]
273 | marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.28.0,<1.29.0)"]
274 | marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.28.0,<1.29.0)"]
275 | mediaconnect = ["mypy-boto3-mediaconnect (>=1.28.0,<1.29.0)"]
276 | mediaconvert = ["mypy-boto3-mediaconvert (>=1.28.0,<1.29.0)"]
277 | medialive = ["mypy-boto3-medialive (>=1.28.0,<1.29.0)"]
278 | mediapackage = ["mypy-boto3-mediapackage (>=1.28.0,<1.29.0)"]
279 | mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.28.0,<1.29.0)"]
280 | mediapackagev2 = ["mypy-boto3-mediapackagev2 (>=1.28.0,<1.29.0)"]
281 | mediastore = ["mypy-boto3-mediastore (>=1.28.0,<1.29.0)"]
282 | mediastore-data = ["mypy-boto3-mediastore-data (>=1.28.0,<1.29.0)"]
283 | mediatailor = ["mypy-boto3-mediatailor (>=1.28.0,<1.29.0)"]
284 | medical-imaging = ["mypy-boto3-medical-imaging (>=1.28.0,<1.29.0)"]
285 | memorydb = ["mypy-boto3-memorydb (>=1.28.0,<1.29.0)"]
286 | meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.28.0,<1.29.0)"]
287 | mgh = ["mypy-boto3-mgh (>=1.28.0,<1.29.0)"]
288 | mgn = ["mypy-boto3-mgn (>=1.28.0,<1.29.0)"]
289 | migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.28.0,<1.29.0)"]
290 | migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.28.0,<1.29.0)"]
291 | migrationhuborchestrator = ["mypy-boto3-migrationhuborchestrator (>=1.28.0,<1.29.0)"]
292 | migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.28.0,<1.29.0)"]
293 | mobile = ["mypy-boto3-mobile (>=1.28.0,<1.29.0)"]
294 | mq = ["mypy-boto3-mq (>=1.28.0,<1.29.0)"]
295 | mturk = ["mypy-boto3-mturk (>=1.28.0,<1.29.0)"]
296 | mwaa = ["mypy-boto3-mwaa (>=1.28.0,<1.29.0)"]
297 | neptune = ["mypy-boto3-neptune (>=1.28.0,<1.29.0)"]
298 | neptunedata = ["mypy-boto3-neptunedata (>=1.28.0,<1.29.0)"]
299 | network-firewall = ["mypy-boto3-network-firewall (>=1.28.0,<1.29.0)"]
300 | networkmanager = ["mypy-boto3-networkmanager (>=1.28.0,<1.29.0)"]
301 | nimble = ["mypy-boto3-nimble (>=1.28.0,<1.29.0)"]
302 | oam = ["mypy-boto3-oam (>=1.28.0,<1.29.0)"]
303 | omics = ["mypy-boto3-omics (>=1.28.0,<1.29.0)"]
304 | opensearch = ["mypy-boto3-opensearch (>=1.28.0,<1.29.0)"]
305 | opensearchserverless = ["mypy-boto3-opensearchserverless (>=1.28.0,<1.29.0)"]
306 | opsworks = ["mypy-boto3-opsworks (>=1.28.0,<1.29.0)"]
307 | opsworkscm = ["mypy-boto3-opsworkscm (>=1.28.0,<1.29.0)"]
308 | organizations = ["mypy-boto3-organizations (>=1.28.0,<1.29.0)"]
309 | osis = ["mypy-boto3-osis (>=1.28.0,<1.29.0)"]
310 | outposts = ["mypy-boto3-outposts (>=1.28.0,<1.29.0)"]
311 | panorama = ["mypy-boto3-panorama (>=1.28.0,<1.29.0)"]
312 | payment-cryptography = ["mypy-boto3-payment-cryptography (>=1.28.0,<1.29.0)"]
313 | payment-cryptography-data = ["mypy-boto3-payment-cryptography-data (>=1.28.0,<1.29.0)"]
314 | pca-connector-ad = ["mypy-boto3-pca-connector-ad (>=1.28.0,<1.29.0)"]
315 | personalize = ["mypy-boto3-personalize (>=1.28.0,<1.29.0)"]
316 | personalize-events = ["mypy-boto3-personalize-events (>=1.28.0,<1.29.0)"]
317 | personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.28.0,<1.29.0)"]
318 | pi = ["mypy-boto3-pi (>=1.28.0,<1.29.0)"]
319 | pinpoint = ["mypy-boto3-pinpoint (>=1.28.0,<1.29.0)"]
320 | pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.28.0,<1.29.0)"]
321 | pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.28.0,<1.29.0)"]
322 | pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.28.0,<1.29.0)"]
323 | pipes = ["mypy-boto3-pipes (>=1.28.0,<1.29.0)"]
324 | polly = ["mypy-boto3-polly (>=1.28.0,<1.29.0)"]
325 | pricing = ["mypy-boto3-pricing (>=1.28.0,<1.29.0)"]
326 | privatenetworks = ["mypy-boto3-privatenetworks (>=1.28.0,<1.29.0)"]
327 | proton = ["mypy-boto3-proton (>=1.28.0,<1.29.0)"]
328 | qldb = ["mypy-boto3-qldb (>=1.28.0,<1.29.0)"]
329 | qldb-session = ["mypy-boto3-qldb-session (>=1.28.0,<1.29.0)"]
330 | quicksight = ["mypy-boto3-quicksight (>=1.28.0,<1.29.0)"]
331 | ram = ["mypy-boto3-ram (>=1.28.0,<1.29.0)"]
332 | rbin = ["mypy-boto3-rbin (>=1.28.0,<1.29.0)"]
333 | rds = ["mypy-boto3-rds (>=1.28.0,<1.29.0)"]
334 | rds-data = ["mypy-boto3-rds-data (>=1.28.0,<1.29.0)"]
335 | redshift = ["mypy-boto3-redshift (>=1.28.0,<1.29.0)"]
336 | redshift-data = ["mypy-boto3-redshift-data (>=1.28.0,<1.29.0)"]
337 | redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.28.0,<1.29.0)"]
338 | rekognition = ["mypy-boto3-rekognition (>=1.28.0,<1.29.0)"]
339 | resiliencehub = ["mypy-boto3-resiliencehub (>=1.28.0,<1.29.0)"]
340 | resource-explorer-2 = ["mypy-boto3-resource-explorer-2 (>=1.28.0,<1.29.0)"]
341 | resource-groups = ["mypy-boto3-resource-groups (>=1.28.0,<1.29.0)"]
342 | resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.28.0,<1.29.0)"]
343 | robomaker = ["mypy-boto3-robomaker (>=1.28.0,<1.29.0)"]
344 | rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.28.0,<1.29.0)"]
345 | route53 = ["mypy-boto3-route53 (>=1.28.0,<1.29.0)"]
346 | route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.28.0,<1.29.0)"]
347 | route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.28.0,<1.29.0)"]
348 | route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.28.0,<1.29.0)"]
349 | route53domains = ["mypy-boto3-route53domains (>=1.28.0,<1.29.0)"]
350 | route53resolver = ["mypy-boto3-route53resolver (>=1.28.0,<1.29.0)"]
351 | rum = ["mypy-boto3-rum (>=1.28.0,<1.29.0)"]
352 | s3 = ["mypy-boto3-s3 (>=1.28.0,<1.29.0)"]
353 | s3control = ["mypy-boto3-s3control (>=1.28.0,<1.29.0)"]
354 | s3outposts = ["mypy-boto3-s3outposts (>=1.28.0,<1.29.0)"]
355 | sagemaker = ["mypy-boto3-sagemaker (>=1.28.0,<1.29.0)"]
356 | sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.28.0,<1.29.0)"]
357 | sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.28.0,<1.29.0)"]
358 | sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.28.0,<1.29.0)"]
359 | sagemaker-geospatial = ["mypy-boto3-sagemaker-geospatial (>=1.28.0,<1.29.0)"]
360 | sagemaker-metrics = ["mypy-boto3-sagemaker-metrics (>=1.28.0,<1.29.0)"]
361 | sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.28.0,<1.29.0)"]
362 | savingsplans = ["mypy-boto3-savingsplans (>=1.28.0,<1.29.0)"]
363 | scheduler = ["mypy-boto3-scheduler (>=1.28.0,<1.29.0)"]
364 | schemas = ["mypy-boto3-schemas (>=1.28.0,<1.29.0)"]
365 | sdb = ["mypy-boto3-sdb (>=1.28.0,<1.29.0)"]
366 | secretsmanager = ["mypy-boto3-secretsmanager (>=1.28.0,<1.29.0)"]
367 | securityhub = ["mypy-boto3-securityhub (>=1.28.0,<1.29.0)"]
368 | securitylake = ["mypy-boto3-securitylake (>=1.28.0,<1.29.0)"]
369 | serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.28.0,<1.29.0)"]
370 | service-quotas = ["mypy-boto3-service-quotas (>=1.28.0,<1.29.0)"]
371 | servicecatalog = ["mypy-boto3-servicecatalog (>=1.28.0,<1.29.0)"]
372 | servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.28.0,<1.29.0)"]
373 | servicediscovery = ["mypy-boto3-servicediscovery (>=1.28.0,<1.29.0)"]
374 | ses = ["mypy-boto3-ses (>=1.28.0,<1.29.0)"]
375 | sesv2 = ["mypy-boto3-sesv2 (>=1.28.0,<1.29.0)"]
376 | shield = ["mypy-boto3-shield (>=1.28.0,<1.29.0)"]
377 | signer = ["mypy-boto3-signer (>=1.28.0,<1.29.0)"]
378 | simspaceweaver = ["mypy-boto3-simspaceweaver (>=1.28.0,<1.29.0)"]
379 | sms = ["mypy-boto3-sms (>=1.28.0,<1.29.0)"]
380 | sms-voice = ["mypy-boto3-sms-voice (>=1.28.0,<1.29.0)"]
381 | snow-device-management = ["mypy-boto3-snow-device-management (>=1.28.0,<1.29.0)"]
382 | snowball = ["mypy-boto3-snowball (>=1.28.0,<1.29.0)"]
383 | sns = ["mypy-boto3-sns (>=1.28.0,<1.29.0)"]
384 | sqs = ["mypy-boto3-sqs (>=1.28.0,<1.29.0)"]
385 | ssm = ["mypy-boto3-ssm (>=1.28.0,<1.29.0)"]
386 | ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.28.0,<1.29.0)"]
387 | ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.28.0,<1.29.0)"]
388 | ssm-sap = ["mypy-boto3-ssm-sap (>=1.28.0,<1.29.0)"]
389 | sso = ["mypy-boto3-sso (>=1.28.0,<1.29.0)"]
390 | sso-admin = ["mypy-boto3-sso-admin (>=1.28.0,<1.29.0)"]
391 | sso-oidc = ["mypy-boto3-sso-oidc (>=1.28.0,<1.29.0)"]
392 | stepfunctions = ["mypy-boto3-stepfunctions (>=1.28.0,<1.29.0)"]
393 | storagegateway = ["mypy-boto3-storagegateway (>=1.28.0,<1.29.0)"]
394 | sts = ["mypy-boto3-sts (>=1.28.0,<1.29.0)"]
395 | support = ["mypy-boto3-support (>=1.28.0,<1.29.0)"]
396 | support-app = ["mypy-boto3-support-app (>=1.28.0,<1.29.0)"]
397 | swf = ["mypy-boto3-swf (>=1.28.0,<1.29.0)"]
398 | synthetics = ["mypy-boto3-synthetics (>=1.28.0,<1.29.0)"]
399 | textract = ["mypy-boto3-textract (>=1.28.0,<1.29.0)"]
400 | timestream-query = ["mypy-boto3-timestream-query (>=1.28.0,<1.29.0)"]
401 | timestream-write = ["mypy-boto3-timestream-write (>=1.28.0,<1.29.0)"]
402 | tnb = ["mypy-boto3-tnb (>=1.28.0,<1.29.0)"]
403 | transcribe = ["mypy-boto3-transcribe (>=1.28.0,<1.29.0)"]
404 | transfer = ["mypy-boto3-transfer (>=1.28.0,<1.29.0)"]
405 | translate = ["mypy-boto3-translate (>=1.28.0,<1.29.0)"]
406 | verifiedpermissions = ["mypy-boto3-verifiedpermissions (>=1.28.0,<1.29.0)"]
407 | voice-id = ["mypy-boto3-voice-id (>=1.28.0,<1.29.0)"]
408 | vpc-lattice = ["mypy-boto3-vpc-lattice (>=1.28.0,<1.29.0)"]
409 | waf = ["mypy-boto3-waf (>=1.28.0,<1.29.0)"]
410 | waf-regional = ["mypy-boto3-waf-regional (>=1.28.0,<1.29.0)"]
411 | wafv2 = ["mypy-boto3-wafv2 (>=1.28.0,<1.29.0)"]
412 | wellarchitected = ["mypy-boto3-wellarchitected (>=1.28.0,<1.29.0)"]
413 | wisdom = ["mypy-boto3-wisdom (>=1.28.0,<1.29.0)"]
414 | workdocs = ["mypy-boto3-workdocs (>=1.28.0,<1.29.0)"]
415 | worklink = ["mypy-boto3-worklink (>=1.28.0,<1.29.0)"]
416 | workmail = ["mypy-boto3-workmail (>=1.28.0,<1.29.0)"]
417 | workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.28.0,<1.29.0)"]
418 | workspaces = ["mypy-boto3-workspaces (>=1.28.0,<1.29.0)"]
419 | workspaces-web = ["mypy-boto3-workspaces-web (>=1.28.0,<1.29.0)"]
420 | xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"]
421 | 
422 | [[package]]
423 | name = "botocore"
424 | version = "1.31.70"
425 | description = "Low-level, data-driven core of boto 3."
426 | optional = false
427 | python-versions = ">= 3.7"
428 | files = [
429 |     {file = "botocore-1.31.70-py3-none-any.whl", hash = "sha256:049bbf526c95b6169f59617a5ff1b0061cb7a0e44992b8c27c6955832b383988"},
430 |     {file = "botocore-1.31.70.tar.gz", hash = "sha256:5f49def4ec2e4216dd0195d23d9811027d02ee6c8a37b031e2b2fe38e8c77ddc"},
431 | ]
432 | 
433 | [package.dependencies]
434 | jmespath = ">=0.7.1,<2.0.0"
435 | python-dateutil = ">=2.1,<3.0.0"
436 | urllib3 = [
437 |     {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
438 |     {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""},
439 | ]
440 | 
441 | [package.extras]
442 | crt = ["awscrt (==0.16.26)"]
443 | 
444 | [[package]]
445 | name = "botocore-stubs"
446 | version = "1.31.70"
447 | description = "Type annotations and code completion for botocore"
448 | optional = false
449 | python-versions = ">=3.7,<4.0"
450 | files = [
451 |     {file = "botocore_stubs-1.31.70-py3-none-any.whl", hash = "sha256:205b952cef00bfdf5e2b1a7fb7ef40c7008729af7934663703ef006f9a420a29"},
452 |     {file = "botocore_stubs-1.31.70.tar.gz", hash = "sha256:5a7f77649bf54d326461cb380b935338ed41370b0330a7495b3a82f6277369be"},
453 | ]
454 | 
455 | [package.dependencies]
456 | types-awscrt = "*"
457 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""}
458 | 
459 | [[package]]
460 | name = "click"
461 | version = "7.1.2"
462 | description = "Composable command line interface toolkit"
463 | optional = false
464 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
465 | files = [
466 |     {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
467 |     {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"},
468 | ]
469 | 
470 | [[package]]
471 | name = "colorama"
472 | version = "0.4.6"
473 | description = "Cross-platform colored terminal text."
474 | optional = false
475 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
476 | files = [
477 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
478 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
479 | ]
480 | 
481 | [[package]]
482 | name = "coverage"
483 | version = "7.2.7"
484 | description = "Code coverage measurement for Python"
485 | optional = false
486 | python-versions = ">=3.7"
487 | files = [
488 |     {file = "coverage-7.2.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d39b5b4f2a66ccae8b7263ac3c8170994b65266797fb96cbbfd3fb5b23921db8"},
489 |     {file = "coverage-7.2.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d040ef7c9859bb11dfeb056ff5b3872436e3b5e401817d87a31e1750b9ae2fb"},
490 |     {file = "coverage-7.2.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba90a9563ba44a72fda2e85302c3abc71c5589cea608ca16c22b9804262aaeb6"},
491 |     {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d9405291c6928619403db1d10bd07888888ec1abcbd9748fdaa971d7d661b2"},
492 |     {file = "coverage-7.2.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31563e97dae5598556600466ad9beea39fb04e0229e61c12eaa206e0aa202063"},
493 |     {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ebba1cd308ef115925421d3e6a586e655ca5a77b5bf41e02eb0e4562a111f2d1"},
494 |     {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cb017fd1b2603ef59e374ba2063f593abe0fc45f2ad9abdde5b4d83bd922a353"},
495 |     {file = "coverage-7.2.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62a5c7dad11015c66fbb9d881bc4caa5b12f16292f857842d9d1871595f4495"},
496 |     {file = "coverage-7.2.7-cp310-cp310-win32.whl", hash = "sha256:ee57190f24fba796e36bb6d3aa8a8783c643d8fa9760c89f7a98ab5455fbf818"},
497 |     {file = "coverage-7.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:f75f7168ab25dd93110c8a8117a22450c19976afbc44234cbf71481094c1b850"},
498 |     {file = "coverage-7.2.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f"},
499 |     {file = "coverage-7.2.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe"},
500 |     {file = "coverage-7.2.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3"},
501 |     {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f"},
502 |     {file = "coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb"},
503 |     {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833"},
504 |     {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97"},
505 |     {file = "coverage-7.2.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a"},
506 |     {file = "coverage-7.2.7-cp311-cp311-win32.whl", hash = "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a"},
507 |     {file = "coverage-7.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562"},
508 |     {file = "coverage-7.2.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4"},
509 |     {file = "coverage-7.2.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4"},
510 |     {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01"},
511 |     {file = "coverage-7.2.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6"},
512 |     {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d"},
513 |     {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de"},
514 |     {file = "coverage-7.2.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d"},
515 |     {file = "coverage-7.2.7-cp312-cp312-win32.whl", hash = "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511"},
516 |     {file = "coverage-7.2.7-cp312-cp312-win_amd64.whl", hash = "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3"},
517 |     {file = "coverage-7.2.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:58c2ccc2f00ecb51253cbe5d8d7122a34590fac9646a960d1430d5b15321d95f"},
518 |     {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d22656368f0e6189e24722214ed8d66b8022db19d182927b9a248a2a8a2f67eb"},
519 |     {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a895fcc7b15c3fc72beb43cdcbdf0ddb7d2ebc959edac9cef390b0d14f39f8a9"},
520 |     {file = "coverage-7.2.7-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84606b74eb7de6ff581a7915e2dab7a28a0517fbe1c9239eb227e1354064dcd"},
521 |     {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0a5f9e1dbd7fbe30196578ca36f3fba75376fb99888c395c5880b355e2875f8a"},
522 |     {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:419bfd2caae268623dd469eff96d510a920c90928b60f2073d79f8fe2bbc5959"},
523 |     {file = "coverage-7.2.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2aee274c46590717f38ae5e4650988d1af340fe06167546cc32fe2f58ed05b02"},
524 |     {file = "coverage-7.2.7-cp37-cp37m-win32.whl", hash = "sha256:61b9a528fb348373c433e8966535074b802c7a5d7f23c4f421e6c6e2f1697a6f"},
525 |     {file = "coverage-7.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:b1c546aca0ca4d028901d825015dc8e4d56aac4b541877690eb76490f1dc8ed0"},
526 |     {file = "coverage-7.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:54b896376ab563bd38453cecb813c295cf347cf5906e8b41d340b0321a5433e5"},
527 |     {file = "coverage-7.2.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d376df58cc111dc8e21e3b6e24606b5bb5dee6024f46a5abca99124b2229ef5"},
528 |     {file = "coverage-7.2.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e330fc79bd7207e46c7d7fd2bb4af2963f5f635703925543a70b99574b0fea9"},
529 |     {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e9d683426464e4a252bf70c3498756055016f99ddaec3774bf368e76bbe02b6"},
530 |     {file = "coverage-7.2.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d13c64ee2d33eccf7437961b6ea7ad8673e2be040b4f7fd4fd4d4d28d9ccb1e"},
531 |     {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b7aa5f8a41217360e600da646004f878250a0d6738bcdc11a0a39928d7dc2050"},
532 |     {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fa03bce9bfbeeef9f3b160a8bed39a221d82308b4152b27d82d8daa7041fee5"},
533 |     {file = "coverage-7.2.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:245167dd26180ab4c91d5e1496a30be4cd721a5cf2abf52974f965f10f11419f"},
534 |     {file = "coverage-7.2.7-cp38-cp38-win32.whl", hash = "sha256:d2c2db7fd82e9b72937969bceac4d6ca89660db0a0967614ce2481e81a0b771e"},
535 |     {file = "coverage-7.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:2e07b54284e381531c87f785f613b833569c14ecacdcb85d56b25c4622c16c3c"},
536 |     {file = "coverage-7.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:537891ae8ce59ef63d0123f7ac9e2ae0fc8b72c7ccbe5296fec45fd68967b6c9"},
537 |     {file = "coverage-7.2.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06fb182e69f33f6cd1d39a6c597294cff3143554b64b9825d1dc69d18cc2fff2"},
538 |     {file = "coverage-7.2.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201e7389591af40950a6480bd9edfa8ed04346ff80002cec1a66cac4549c1ad7"},
539 |     {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f6951407391b639504e3b3be51b7ba5f3528adbf1a8ac3302b687ecababf929e"},
540 |     {file = "coverage-7.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1"},
541 |     {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b29019c76039dc3c0fd815c41392a044ce555d9bcdd38b0fb60fb4cd8e475ba9"},
542 |     {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:81c13a1fc7468c40f13420732805a4c38a105d89848b7c10af65a90beff25250"},
543 |     {file = "coverage-7.2.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:975d70ab7e3c80a3fe86001d8751f6778905ec723f5b110aed1e450da9d4b7f2"},
544 |     {file = "coverage-7.2.7-cp39-cp39-win32.whl", hash = "sha256:7ee7d9d4822c8acc74a5e26c50604dff824710bc8de424904c0982e25c39c6cb"},
545 |     {file = "coverage-7.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb393e5ebc85245347950143969b241d08b52b88a3dc39479822e073a1a8eb27"},
546 |     {file = "coverage-7.2.7-pp37.pp38.pp39-none-any.whl", hash = "sha256:b7b4c971f05e6ae490fef852c218b0e79d4e52f79ef0c8475566584a8fb3e01d"},
547 |     {file = "coverage-7.2.7.tar.gz", hash = "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59"},
548 | ]
549 | 
550 | [package.dependencies]
551 | tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
552 | 
553 | [package.extras]
554 | toml = ["tomli"]
555 | 
556 | [[package]]
557 | name = "exceptiongroup"
558 | version = "1.1.3"
559 | description = "Backport of PEP 654 (exception groups)"
560 | optional = false
561 | python-versions = ">=3.7"
562 | files = [
563 |     {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
564 |     {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
565 | ]
566 | 
567 | [package.extras]
568 | test = ["pytest (>=6)"]
569 | 
570 | [[package]]
571 | name = "importlib-metadata"
572 | version = "6.7.0"
573 | description = "Read metadata from Python packages"
574 | optional = false
575 | python-versions = ">=3.7"
576 | files = [
577 |     {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"},
578 |     {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"},
579 | ]
580 | 
581 | [package.dependencies]
582 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
583 | zipp = ">=0.5"
584 | 
585 | [package.extras]
586 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
587 | perf = ["ipython"]
588 | testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
589 | 
590 | [[package]]
591 | name = "iniconfig"
592 | version = "2.0.0"
593 | description = "brain-dead simple config-ini parsing"
594 | optional = false
595 | python-versions = ">=3.7"
596 | files = [
597 |     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
598 |     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
599 | ]
600 | 
601 | [[package]]
602 | name = "jmespath"
603 | version = "1.0.1"
604 | description = "JSON Matching Expressions"
605 | optional = false
606 | python-versions = ">=3.7"
607 | files = [
608 |     {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
609 |     {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
610 | ]
611 | 
612 | [[package]]
613 | name = "markdown-it-py"
614 | version = "2.2.0"
615 | description = "Python port of markdown-it. Markdown parsing, done right!"
616 | optional = false
617 | python-versions = ">=3.7"
618 | files = [
619 |     {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"},
620 |     {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"},
621 | ]
622 | 
623 | [package.dependencies]
624 | mdurl = ">=0.1,<1.0"
625 | typing_extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
626 | 
627 | [package.extras]
628 | benchmarking = ["psutil", "pytest", "pytest-benchmark"]
629 | code-style = ["pre-commit (>=3.0,<4.0)"]
630 | compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
631 | linkify = ["linkify-it-py (>=1,<3)"]
632 | plugins = ["mdit-py-plugins"]
633 | profiling = ["gprof2dot"]
634 | rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
635 | testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
636 | 
637 | [[package]]
638 | name = "mdurl"
639 | version = "0.1.2"
640 | description = "Markdown URL utilities"
641 | optional = false
642 | python-versions = ">=3.7"
643 | files = [
644 |     {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
645 |     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
646 | ]
647 | 
648 | [[package]]
649 | name = "mypy-boto3-s3"
650 | version = "1.28.55"
651 | description = "Type annotations for boto3.S3 1.28.55 service generated with mypy-boto3-builder 7.19.0"
652 | optional = false
653 | python-versions = ">=3.7"
654 | files = [
655 |     {file = "mypy-boto3-s3-1.28.55.tar.gz", hash = "sha256:b008809f448e74075012d4fc54b0176de0b4f49bc38e39de30ca0e764eb75056"},
656 |     {file = "mypy_boto3_s3-1.28.55-py3-none-any.whl", hash = "sha256:11a3db97398973d4ae28489b94c010778a0a5c65f99e00268456c3fea67eca79"},
657 | ]
658 | 
659 | [package.dependencies]
660 | typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""}
661 | 
662 | [[package]]
663 | name = "packaging"
664 | version = "23.2"
665 | description = "Core utilities for Python packages"
666 | optional = false
667 | python-versions = ">=3.7"
668 | files = [
669 |     {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
670 |     {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
671 | ]
672 | 
673 | [[package]]
674 | name = "pluggy"
675 | version = "1.2.0"
676 | description = "plugin and hook calling mechanisms for python"
677 | optional = false
678 | python-versions = ">=3.7"
679 | files = [
680 |     {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"},
681 |     {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"},
682 | ]
683 | 
684 | [package.dependencies]
685 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
686 | 
687 | [package.extras]
688 | dev = ["pre-commit", "tox"]
689 | testing = ["pytest", "pytest-benchmark"]
690 | 
691 | [[package]]
692 | name = "pyfakefs"
693 | version = "5.1.0"
694 | description = "pyfakefs implements a fake file system that mocks the Python file system modules."
695 | optional = false
696 | python-versions = ">=3.7"
697 | files = [
698 |     {file = "pyfakefs-5.1.0-py3-none-any.whl", hash = "sha256:e6f34a8224b41f1b1ab25aa8d430121dac42e3c6e981e01eae76b3343fba47d0"},
699 |     {file = "pyfakefs-5.1.0.tar.gz", hash = "sha256:316c6026640d14a6b4fbde71fd9674576d1b5710deda8fabde8aad51d785dbc3"},
700 | ]
701 | 
702 | [[package]]
703 | name = "pygments"
704 | version = "2.16.1"
705 | description = "Pygments is a syntax highlighting package written in Python."
706 | optional = false
707 | python-versions = ">=3.7"
708 | files = [
709 |     {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
710 |     {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
711 | ]
712 | 
713 | [package.extras]
714 | plugins = ["importlib-metadata"]
715 | 
716 | [[package]]
717 | name = "pytest"
718 | version = "7.2.2"
719 | description = "pytest: simple powerful testing with Python"
720 | optional = false
721 | python-versions = ">=3.7"
722 | files = [
723 |     {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
724 |     {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
725 | ]
726 | 
727 | [package.dependencies]
728 | attrs = ">=19.2.0"
729 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
730 | exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
731 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
732 | iniconfig = "*"
733 | packaging = "*"
734 | pluggy = ">=0.12,<2.0"
735 | tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
736 | 
737 | [package.extras]
738 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
739 | 
740 | [[package]]
741 | name = "pytest-cov"
742 | version = "4.1.0"
743 | description = "Pytest plugin for measuring coverage."
744 | optional = false
745 | python-versions = ">=3.7"
746 | files = [
747 |     {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
748 |     {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
749 | ]
750 | 
751 | [package.dependencies]
752 | coverage = {version = ">=5.2.1", extras = ["toml"]}
753 | pytest = ">=4.6"
754 | 
755 | [package.extras]
756 | testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
757 | 
758 | [[package]]
759 | name = "python-dateutil"
760 | version = "2.8.2"
761 | description = "Extensions to the standard Python datetime module"
762 | optional = false
763 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
764 | files = [
765 |     {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
766 |     {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
767 | ]
768 | 
769 | [package.dependencies]
770 | six = ">=1.5"
771 | 
772 | [[package]]
773 | name = "pyyaml"
774 | version = "5.3.1"
775 | description = "YAML parser and emitter for Python"
776 | optional = false
777 | python-versions = "*"
778 | files = [
779 |     {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"},
780 |     {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"},
781 |     {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"},
782 |     {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"},
783 |     {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"},
784 |     {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"},
785 |     {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"},
786 |     {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"},
787 |     {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"},
788 |     {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"},
789 |     {file = "PyYAML-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a"},
790 |     {file = "PyYAML-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e"},
791 |     {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"},
792 | ]
793 | 
794 | [[package]]
795 | name = "rich"
796 | version = "13.6.0"
797 | description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
798 | optional = false
799 | python-versions = ">=3.7.0"
800 | files = [
801 |     {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"},
802 |     {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"},
803 | ]
804 | 
805 | [package.dependencies]
806 | markdown-it-py = ">=2.2.0"
807 | pygments = ">=2.13.0,<3.0.0"
808 | typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""}
809 | 
810 | [package.extras]
811 | jupyter = ["ipywidgets (>=7.5.1,<9)"]
812 | 
813 | [[package]]
814 | name = "s3transfer"
815 | version = "0.7.0"
816 | description = "An Amazon S3 Transfer Manager"
817 | optional = false
818 | python-versions = ">= 3.7"
819 | files = [
820 |     {file = "s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a"},
821 |     {file = "s3transfer-0.7.0.tar.gz", hash = "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e"},
822 | ]
823 | 
824 | [package.dependencies]
825 | botocore = ">=1.12.36,<2.0a.0"
826 | 
827 | [package.extras]
828 | crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"]
829 | 
830 | [[package]]
831 | name = "six"
832 | version = "1.16.0"
833 | description = "Python 2 and 3 compatibility utilities"
834 | optional = false
835 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
836 | files = [
837 |     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
838 |     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
839 | ]
840 | 
841 | [[package]]
842 | name = "tomli"
843 | version = "2.0.1"
844 | description = "A lil' TOML parser"
845 | optional = false
846 | python-versions = ">=3.7"
847 | files = [
848 |     {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
849 |     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
850 | ]
851 | 
852 | [[package]]
853 | name = "types-awscrt"
854 | version = "0.19.3"
855 | description = "Type annotations and code completion for awscrt"
856 | optional = false
857 | python-versions = ">=3.7,<4.0"
858 | files = [
859 |     {file = "types_awscrt-0.19.3-py3-none-any.whl", hash = "sha256:7b55f5a12ccd4407bc8f1e35c69bb40c931f8513ce1ad81a4527fce3989003fd"},
860 |     {file = "types_awscrt-0.19.3.tar.gz", hash = "sha256:9a21caac4287c113dd52665707785c45bb1d3242b7a2b8aeb57c49e9e749a330"},
861 | ]
862 | 
863 | [[package]]
864 | name = "types-s3transfer"
865 | version = "0.7.0"
866 | description = "Type annotations and code completion for s3transfer"
867 | optional = false
868 | python-versions = ">=3.7,<4.0"
869 | files = [
870 |     {file = "types_s3transfer-0.7.0-py3-none-any.whl", hash = "sha256:ae9ed9273465d9f43da8b96307383da410c6b59c3b2464c88d20b578768e97c6"},
871 |     {file = "types_s3transfer-0.7.0.tar.gz", hash = "sha256:aca0f2486d0a3a5037cd5b8f3e20a4522a29579a8dd183281ff0aa1c4e2c8aa7"},
872 | ]
873 | 
874 | [[package]]
875 | name = "typing-extensions"
876 | version = "4.7.1"
877 | description = "Backported and Experimental Type Hints for Python 3.7+"
878 | optional = false
879 | python-versions = ">=3.7"
880 | files = [
881 |     {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
882 |     {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
883 | ]
884 | 
885 | [[package]]
886 | name = "urllib3"
887 | version = "1.26.18"
888 | description = "HTTP library with thread-safe connection pooling, file post, and more."
889 | optional = false
890 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
891 | files = [
892 |     {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"},
893 |     {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"},
894 | ]
895 | 
896 | [package.extras]
897 | brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
898 | secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
899 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
900 | 
901 | [[package]]
902 | name = "urllib3"
903 | version = "2.0.7"
904 | description = "HTTP library with thread-safe connection pooling, file post, and more."
905 | optional = false
906 | python-versions = ">=3.7"
907 | files = [
908 |     {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"},
909 |     {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"},
910 | ]
911 | 
912 | [package.extras]
913 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
914 | secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
915 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
916 | zstd = ["zstandard (>=0.18.0)"]
917 | 
918 | [[package]]
919 | name = "zipp"
920 | version = "3.15.0"
921 | description = "Backport of pathlib-compatible object wrapper for zip files"
922 | optional = false
923 | python-versions = ">=3.7"
924 | files = [
925 |     {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"},
926 |     {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"},
927 | ]
928 | 
929 | [package.extras]
930 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
931 | testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
932 | 
933 | [metadata]
934 | lock-version = "2.0"
935 | python-versions = "^3.7"
936 | content-hash = "923c988f79a30772ee1f38990f9a6609f360edcf9d7bf60e77822f2c947e1f26"
937 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "emr-cli"
 3 | version = "0.0.16"
 4 | description = "A command-line interface for packaging, deploying, and running your PySpark jobs on EMR."
 5 | authors = ["Amazon EMR <emr-developer-advocates@amazon.com>"]
 6 | license = "Apache-2.0"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.7"
11 | click = "^7.1.2"
12 | boto3 = "^1.26.6"
13 | pyyaml = "5.3.1"
14 | rich = "^13.4.2"
15 | importlib-metadata = {version = "6.7.0", python = "3.7"}
16 | 
17 | [tool.poetry.group.dev.dependencies]
18 | pytest = "7.2.2"
19 | pytest-cov = "^4.0.0"
20 | pyfakefs = "5.1.0"
21 | boto3-stubs = {extras = ["s3"], version = "^1.28.70"}
22 | 
23 | [build-system]
24 | requires = ["poetry-core"]
25 | build-backend = "poetry.core.masonry.api"
26 | 
27 | [tool.poetry.scripts]
28 | emr = "emr_cli.emr_cli:cli"
29 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.3.5
2 | pyarrow==8.0.0


--------------------------------------------------------------------------------
/src/emr_cli/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from emr_cli.utils import console_log
 6 | 
 7 | DEFAULT_CONFIG_PATH = ".emr/config.yaml"
 8 | 
 9 | 
10 | class ConfigReader:
11 |     @classmethod
12 |     def read(cls):
13 |         config = {}
14 |         # Look for a config file - if we don't find one, that's fine. :)
15 |         p = Path(DEFAULT_CONFIG_PATH)
16 |         if not p.is_file():
17 |             return config
18 | 
19 |         with p.open() as infile:
20 |             try:
21 |                 config = yaml.safe_load(infile)
22 |                 return config
23 |             except yaml.YAMLError as exc:
24 |                 console_log(f"There was an error parsing the config file: {exc}")
25 |                 return config
26 | 
27 | 
28 | class ConfigWriter:
29 |     @classmethod
30 |     def write(cls, config):
31 |         """
32 |         Write the passed config, overwriting any existing config.
33 |         """
34 |         p = Path(DEFAULT_CONFIG_PATH)
35 | 
36 |         p.parent.mkdir(parents=True, exist_ok=True)
37 | 
38 |         with p.open("w") as outfile:
39 |             outfile.write(yaml.dump(config))
40 | 


--------------------------------------------------------------------------------
/src/emr_cli/deployments/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | 
 4 | class SparkParams:
 5 |     """
 6 |     SparkParams allows deployment packages to specify different sets of
 7 |     Spark `--conf` parameters based on the environment being deployed to.
 8 |     """
 9 | 
10 |     SUPPORTED_ENVIRONMENTS = ["emr_serverless", "emr_ec2", "emr_eks"]
11 | 
12 |     def __init__(
13 |         self,
14 |         common_params: Optional[Dict[str, str]] = None,
15 |         emr_serverless_params: Optional[Dict[str, str]] = None,
16 |         emr_ec2_params: Optional[Dict[str, str]] = None,
17 |         emr_eks_params: Optional[Dict[str, str]] = None,
18 |     ) -> None:
19 |         self._common = common_params or {}
20 |         self._environment_params = {
21 |             "emr_serverless": emr_serverless_params or {},
22 |             "emr_ec2": emr_ec2_params or {},
23 |             "emr_eks": emr_eks_params or {},
24 |         }
25 | 
26 |     def params_for(self, deployment_type: str) -> str:
27 |         """
28 |         Return a set of string spark-submit parameters for the provided deployment type.
29 |         """
30 |         if deployment_type not in self.SUPPORTED_ENVIRONMENTS:
31 |             raise ValueError(f"{deployment_type} environment is not supported.")
32 | 
33 |         conf_items = {}
34 | 
35 |         for k, v in self._common.items():
36 |             conf_items[k] = v
37 | 
38 |         for k, v in self._environment_params[deployment_type].items():
39 |             conf_items[k] = v
40 | 
41 |         return " ".join([f"--conf {k}={v}" for k, v in conf_items.items()])
42 | 


--------------------------------------------------------------------------------
/src/emr_cli/deployments/emr_ec2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import shlex
  3 | import sys
  4 | import time
  5 | from os.path import join
  6 | from typing import List, Optional
  7 | 
  8 | import boto3
  9 | from botocore.exceptions import ClientError, WaiterError
 10 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 11 | from emr_cli.utils import console_log, parse_bucket_uri, print_s3_gz
 12 | 
 13 | LOG_WAITER_DELAY_SEC = 30
 14 | 
 15 | 
 16 | class Bootstrap:
 17 |     DEFAULT_S3_POLICY_NAME = "emr-cli-S3Access"
 18 |     DEFAULT_GLUE_POLICY_NAME = "emr-cli-GlueAccess"
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         code_bucket: str,
 23 |         log_bucket: str,
 24 |         instance_role_name: str,
 25 |         job_role_name: str,
 26 |     ):
 27 |         self.code_bucket = code_bucket
 28 |         self.log_bucket = log_bucket or code_bucket
 29 |         self.instance_role_name = instance_role_name
 30 |         self.job_role_name = job_role_name
 31 |         self.s3_client = boto3.client("s3")
 32 |         self.iam_client = boto3.client("iam")
 33 |         self.emr_client = boto3.client("emr")
 34 | 
 35 |     def create_environment(self):
 36 |         self._create_s3_buckets()
 37 |         service_role_arn = self._create_service_role()
 38 | 
 39 |         # Make sure the role exists - there can be a tiny lag that will break setting up trust policies.
 40 |         # Unfortunately, using a waiter or querying or the role didn't help here.
 41 |         # There's a terraform issue about it here: https://github.com/hashicorp/terraform-provider-aws/issues/8905
 42 |         # It looks like the fix is just querying or the role, but that didn't work.
 43 |         time.sleep(10)
 44 |         print("Slept")
 45 | 
 46 |         job_role_arn = self._create_runtime_role(service_role_arn)
 47 | 
 48 |         # Allow the EC2 instance profile to assume the job role
 49 |         self.iam_client.put_role_policy(
 50 |             RoleName=self.instance_role_name,
 51 |             PolicyName="AssumeRuntimeRole",
 52 |             PolicyDocument=self._runtime_role_policy(job_role_arn),
 53 |         )
 54 | 
 55 |         security_config = self._create_security_config()  # "emr-cli-runtime-roles"
 56 |         cluster_id = self._create_cluster(security_config, self.instance_role_name)
 57 |         return {
 58 |             "cluster_id": cluster_id,
 59 |             "job_role_arn": job_role_arn,
 60 |             "code_bucket": self.code_bucket,
 61 |             "log_bucket": self.log_bucket,
 62 |         }
 63 | 
 64 |     def print_destroy_commands(self, cluster_id: str):
 65 |         # fmt: off
 66 |         print(f"aws emr terminate-clusters --cluster-ids {cluster_id}")
 67 |         print(f"aws emr wait cluster-terminated --cluster-id {cluster_id}")
 68 |         for bucket in set([self.log_bucket, self.code_bucket]):
 69 |             print(f"aws s3 rm s3://{bucket} --recursive")
 70 |             print(f"aws s3api delete-bucket --bucket {bucket}")
 71 |         print(f"aws iam remove-role-from-instance-profile --instance-profile-name {self.instance_role_name} --role-name {self.instance_role_name}")  # noqa E501
 72 |         print(f"aws iam delete-instance-profile --instance-profile-name {self.instance_role_name}")  # noqa E501
 73 |         for role_name in [self.instance_role_name, self.job_role_name]:
 74 |             for policy in self.iam_client.list_attached_role_policies(RoleName=role_name).get('AttachedPolicies'):  # noqa E501
 75 |                 arn = policy.get('PolicyArn')
 76 |                 print(f"aws iam detach-role-policy --role-name {role_name} --policy-arn {arn}")  # noqa E501
 77 |                 print(f"aws iam delete-policy --policy-arn {arn}")  # noqa E501
 78 |             for name in self.iam_client.list_role_policies(RoleName=role_name).get('PolicyNames'):  # noqa E501
 79 |                 print(f"aws iam delete-role-policy --role-name {role_name} --policy-name {name}")  # noqa E501
 80 |             print(f"aws iam delete-role --role-name {role_name}")
 81 |         print(f"aws emr delete-security-configuration --name emr-cli-runtime-roles")  # noqa E501
 82 |         # fmt: on
 83 | 
 84 |     def _create_s3_buckets(self):
 85 |         """
 86 |         Creates both the source and log buckets if they don't already exist.
 87 |         """
 88 |         for bucket_name in set([self.code_bucket, self.log_bucket]):
 89 |             self.s3_client.create_bucket(
 90 |                 Bucket=bucket_name,
 91 |                 CreateBucketConfiguration={"LocationConstraint": self.s3_client.meta.region_name},
 92 |             )
 93 |             console_log(f"Created S3 bucket: s3://{bucket_name}")
 94 |             self.s3_client.put_bucket_policy(Bucket=bucket_name, Policy=self._default_s3_bucket_policy(bucket_name))
 95 | 
 96 |     def _default_s3_bucket_policy(self, bucket_name) -> str:
 97 |         bucket_policy = {
 98 |             "Version": "2012-10-17",
 99 |             "Statement": [
100 |                 {
101 |                     "Sid": "RequireSecureTransport",
102 |                     "Effect": "Deny",
103 |                     "Principal": "*",
104 |                     "Action": "s3:*",
105 |                     "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"],
106 |                     "Condition": {
107 |                         "Bool": {"aws:SecureTransport": "false", "aws:SourceArn": f"arn:aws:s3:::{bucket_name} "}
108 |                     },
109 |                 }
110 |             ],
111 |         }
112 |         return json.dumps(bucket_policy)
113 | 
114 |     def _create_service_role(self):
115 |         """
116 |         Create an EC2 instance profile and role for use with EMR
117 |         https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-role-for-ec2.html
118 |         """
119 |         # First create a role that can be assumed by EC2
120 |         response = self.iam_client.create_role(
121 |             RoleName=self.instance_role_name,
122 |             AssumeRolePolicyDocument=json.dumps(
123 |                 {
124 |                     "Version": "2012-10-17",
125 |                     "Statement": [
126 |                         {
127 |                             "Effect": "Allow",
128 |                             "Principal": {"Service": "ec2.amazonaws.com"},
129 |                             "Action": "sts:AssumeRole",
130 |                         }
131 |                     ],
132 |                 }
133 |             ),
134 |         )
135 |         role_arn = response.get("Role").get("Arn")
136 |         console_log(f"Created IAM Role: {role_arn}")
137 | 
138 |         self.iam_client.create_instance_profile(InstanceProfileName=self.instance_role_name)
139 |         self.iam_client.add_role_to_instance_profile(
140 |             InstanceProfileName=self.instance_role_name,
141 |             RoleName=self.instance_role_name,
142 |         )
143 |         return role_arn
144 | 
145 |     def _create_runtime_role(self, instance_profile_role_arn: str):
146 |         response = self.iam_client.create_role(
147 |             RoleName=self.job_role_name,
148 |             AssumeRolePolicyDocument=json.dumps(
149 |                 {
150 |                     "Version": "2012-10-17",
151 |                     "Statement": [
152 |                         {
153 |                             "Effect": "Allow",
154 |                             "Principal": {"AWS": instance_profile_role_arn},
155 |                             "Action": "sts:AssumeRole",
156 |                         }
157 |                     ],
158 |                 }
159 |             ),
160 |         )
161 |         role_arn = response.get("Role").get("Arn")
162 |         console_log(f"Created IAM Role: {role_arn}")
163 | 
164 |         self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_s3_policy())
165 |         self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_glue_policy())
166 | 
167 |         return role_arn
168 | 
169 |     def _create_s3_policy(self):
170 |         bucket_arns = [f"arn:aws:s3:::{name}" for name in [self.code_bucket, self.log_bucket]]
171 |         policy_doc = {
172 |             "Version": "2012-10-17",
173 |             "Statement": [
174 |                 {
175 |                     "Sid": "AllowListBuckets",
176 |                     "Effect": "Allow",
177 |                     "Action": ["s3:ListBucket"],
178 |                     "Resource": bucket_arns,
179 |                 },
180 |                 {
181 |                     "Sid": "WriteToCodeAndLogBuckets",
182 |                     "Effect": "Allow",
183 |                     "Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"],
184 |                     "Resource": [f"{arn}/*" for arn in bucket_arns],
185 |                 },
186 |             ],
187 |         }
188 |         response = self.iam_client.create_policy(
189 |             PolicyName=self.DEFAULT_S3_POLICY_NAME,
190 |             PolicyDocument=json.dumps(policy_doc),
191 |         )
192 |         return response.get("Policy").get("Arn")
193 | 
194 |     def _create_glue_policy(self):
195 |         policy_doc = {
196 |             "Version": "2012-10-17",
197 |             "Statement": [
198 |                 {
199 |                     "Sid": "GlueCreateAndReadDataCatalog",
200 |                     "Effect": "Allow",
201 |                     "Action": [
202 |                         "glue:GetDatabase",
203 |                         "glue:GetDataBases",
204 |                         "glue:CreateTable",
205 |                         "glue:GetTable",
206 |                         "glue:GetTables",
207 |                         "glue:GetPartition",
208 |                         "glue:GetPartitions",
209 |                         "glue:CreatePartition",
210 |                         "glue:BatchCreatePartition",
211 |                         "glue:GetUserDefinedFunctions",
212 |                     ],
213 |                     "Resource": "*",
214 |                 },
215 |             ],
216 |         }
217 |         response = self.iam_client.create_policy(
218 |             PolicyName=self.DEFAULT_GLUE_POLICY_NAME,
219 |             PolicyDocument=json.dumps(policy_doc),
220 |         )
221 |         return response.get("Policy").get("Arn")
222 | 
223 |     def _runtime_role_policy(self, runtime_role_arn: str):
224 |         return json.dumps(
225 |             {
226 |                 "Version": "2012-10-17",
227 |                 "Statement": [
228 |                     {
229 |                         "Sid": "AllowRuntimeRoleUsage",
230 |                         "Effect": "Allow",
231 |                         "Action": ["sts:AssumeRole", "sts:TagSession"],
232 |                         "Resource": [runtime_role_arn],
233 |                     }
234 |                 ],
235 |             }
236 |         )
237 | 
238 |     def _create_security_config(self):
239 |         response = self.emr_client.create_security_configuration(
240 |             Name="emr-cli-runtime-roles",
241 |             SecurityConfiguration="""{
242 |                 "AuthorizationConfiguration":{
243 |                     "IAMConfiguration":{
244 |                         "EnableApplicationScopedIAMRole":true
245 |                     }
246 |                 }
247 |             }""",
248 |         )
249 |         return response.get("Name")
250 | 
251 |     def _create_cluster(self, security_config_name: str, instance_profile_name: str):
252 |         """
253 |         Create a simple Spark EMR on EC2 cluster.
254 | 
255 |         **WARNING** This cluster is only intended for demo/development purposes only.
256 | 
257 |         It is deployed in a public subnet by default and will auto-terminate in 4 hours.
258 |         Runtime roles are enabled so you can submit jobs with the created job-role.
259 | 
260 |         To customize the cluster or create a cluster for production, use the AWS CLI
261 |         or other Infrastructure as Code services like Terraform, CDK, or CloudFormation.
262 |         """
263 |         response = self.emr_client.run_job_flow(
264 |             Name="emr-cli-demo",
265 |             ReleaseLabel="emr-6.9.0",
266 |             LogUri=f"s3://{self.log_bucket}/logs/emr/",
267 |             Applications=[
268 |                 {"Name": "Spark"},
269 |                 {"Name": "Livy"},
270 |                 {"Name": "JupyterEnterpriseGateway"},
271 |             ],
272 |             AutoTerminationPolicy={"IdleTimeout": 14400},
273 |             SecurityConfiguration=security_config_name,
274 |             ServiceRole="EMR_DefaultRole",
275 |             JobFlowRole=instance_profile_name,
276 |             Instances={
277 |                 "KeepJobFlowAliveWhenNoSteps": True,
278 |                 "InstanceFleets": [
279 |                     {
280 |                         "Name": "Primary",
281 |                         "InstanceFleetType": "MASTER",
282 |                         "TargetOnDemandCapacity": 1,
283 |                         "TargetSpotCapacity": 0,
284 |                         "InstanceTypeConfigs": [
285 |                             {"InstanceType": "r5.2xlarge"},
286 |                             {"InstanceType": "r5b.2xlarge"},
287 |                             {"InstanceType": "r5d.2xlarge"},
288 |                             {"InstanceType": "r5a.2xlarge"},
289 |                         ],
290 |                     },
291 |                     {
292 |                         "Name": "Core",
293 |                         "InstanceFleetType": "CORE",
294 |                         "TargetOnDemandCapacity": 0,
295 |                         "TargetSpotCapacity": 1,
296 |                         "InstanceTypeConfigs": [
297 |                             {"InstanceType": "c5a.2xlarge"},
298 |                             {"InstanceType": "m5a.2xlarge"},
299 |                             {"InstanceType": "r5a.2xlarge"},
300 |                         ],
301 |                         "LaunchSpecifications": {
302 |                             "OnDemandSpecification": {"AllocationStrategy": "lowest-price"},
303 |                             "SpotSpecification": {
304 |                                 "TimeoutDurationMinutes": 10,
305 |                                 "TimeoutAction": "SWITCH_TO_ON_DEMAND",
306 |                                 "AllocationStrategy": "capacity-optimized",
307 |                             },
308 |                         },
309 |                     },
310 |                 ],
311 |             },
312 |         )
313 |         cluster_id = response.get("JobFlowId")
314 |         console_log(f"Created EMR Cluster: {cluster_id}")
315 |         return cluster_id
316 | 
317 | 
318 | class EMREC2:
319 |     def __init__(
320 |         self,
321 |         cluster_id: str,
322 |         deployment_package: DeploymentPackage,
323 |         job_role: Optional[str] = None,
324 |         region: str = "",
325 |     ) -> None:
326 |         self.cluster_id = cluster_id
327 |         self.dp = deployment_package
328 |         self.job_role = job_role
329 |         self.client = boto3.client("emr")
330 |         self.s3_client = boto3.client("s3")
331 | 
332 |     def run_job(
333 |         self,
334 |         job_name: str,
335 |         job_args: Optional[List[str]] = None,
336 |         spark_submit_opts: Optional[str] = None,
337 |         wait: bool = True,
338 |         show_logs: bool = False,
339 |     ):
340 |         """
341 |         Run a Spark job on EMR on EC2. Some important notes:
342 |         1. --deploy-mode cluster is important for distributing dependencies
343 |         2. entrypoint script must be the last argument
344 |         3. show_logs implies `wait=True`
345 |         """
346 |         deploy_mode = "client" if show_logs else "cluster"
347 |         spark_submit_params = self.dp.spark_submit_parameters().params_for("emr_ec2")
348 | 
349 |         if spark_submit_opts:
350 |             spark_submit_params = f"{spark_submit_params} {spark_submit_opts}".strip()
351 | 
352 |         # Escape job args if they're provided
353 |         if job_args:
354 |             job_args = [shlex.quote(arg) for arg in job_args]
355 | 
356 |         # show_logs is only compatible with client mode
357 |         # --conf spark.archives is only compatible with cluster mode
358 |         # So if we have both, we have to throw an error
359 |         # See https://issues.apache.org/jira/browse/SPARK-36088
360 |         if show_logs and ("--conf spark.archives" in spark_submit_params or "--archives" in spark_submit_params):
361 |             raise RuntimeError(
362 |                 "--show-stdout is not compatible with projects that make use of "
363 |                 + "dependencies.\nPlease 👍 this GitHub issue to voice your support: "
364 |                 + "https://github.com/awslabs/amazon-emr-cli/issues/12"
365 |             )
366 | 
367 |         # define params for emr.add_job_flow_steps
368 |         add_job_flow_steps_params = {
369 |             "JobFlowId": self.cluster_id,
370 |             "Steps": [
371 |                 {
372 |                     "Name": job_name,
373 |                     "ActionOnFailure": "CONTINUE",
374 |                     "HadoopJarStep": {
375 |                         "Jar": "command-runner.jar",
376 |                         "Args": [
377 |                             "spark-submit",
378 |                             "--deploy-mode",
379 |                             deploy_mode,
380 |                         ]
381 |                         + spark_submit_params.split(" ")
382 |                         + [self.dp.entrypoint_uri()]
383 |                         + (job_args or []),
384 |                     },
385 |                 }
386 |             ],
387 |         }
388 | 
389 |         # conditionally add ExecutionRoleArn to add_job_flow_steps if a runtime role is requested for this step
390 |         # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-steps-runtime-roles.html
391 |         if self.job_role:
392 |             add_job_flow_steps_params["ExecutionRoleArn"] = self.job_role
393 | 
394 |         try:
395 |             response = self.client.add_job_flow_steps(**add_job_flow_steps_params)
396 |         except ClientError as err:
397 |             console_log(err)
398 |             sys.exit(1)
399 | 
400 |         step_id = response.get("StepIds")[0]
401 |         console_log(f"Job submitted to EMR on EC2 (Step ID: {step_id})")
402 |         if not wait and not show_logs:
403 |             return step_id
404 | 
405 |         console_log("Waiting for step to complete...")
406 |         waiter = self.client.get_waiter("step_complete")
407 |         job_failed = False
408 |         try:
409 |             waiter.wait(
410 |                 ClusterId=self.cluster_id,
411 |                 StepId=step_id,
412 |             )
413 |             console_log("Job completed successfully!")
414 |         except WaiterError:
415 |             console_log("EMR on EC2 step failed!")
416 |             job_failed = True  # So we can exit(1) later
417 |             if not show_logs:
418 |                 sys.exit(1)
419 | 
420 |         if show_logs:
421 |             # We need to validate s3-logging is enabled and fetch the location of the logs
422 |             try:
423 |                 logs_location = self._fetch_log_location()
424 |                 stdout_location = self._wait_for_logs(step_id, logs_location, 30 * 60)
425 |                 console_log(f"stdout for {step_id}\n{'-'*36}")
426 |                 print_s3_gz(self.s3_client, stdout_location)
427 |                 if job_failed:
428 |                     sys.exit(1)
429 |             except RuntimeError as e:
430 |                 console_log(f"ERR: {e}")
431 |                 sys.exit(1)
432 |             except WaiterError as e:
433 |                 console_log(f"ERR: While waiting for logs to appear: {e}")
434 |                 sys.exit(1)
435 | 
436 |         return step_id
437 | 
438 |     def _fetch_log_location(self) -> str:
439 |         """
440 |         Fetch the cluster and ensure it has the loguri set,
441 |         then return the s3 location.
442 |         """
443 |         cluster_info = self.client.describe_cluster(ClusterId=self.cluster_id)
444 |         loguri = cluster_info.get("Cluster").get("LogUri")
445 |         if loguri is None:
446 |             raise RuntimeError("Cluster does not have S3 logging enabled")
447 |         return loguri.replace("s3n:", "s3:")
448 | 
449 |     def _wait_for_logs(self, step_id: str, log_base: str, timeout_secs: int) -> str:
450 |         """
451 |         Waits for stdout logs to appear in S3. Checks every LOG_WAITER_DELAY_SEC seconds
452 |         until `timeout_secs`.
453 |         """
454 |         object_name = join(log_base, self.cluster_id, "steps", step_id, "stdout.gz")
455 |         console_log(f"Waiting for logs to appear in {object_name} ...")
456 |         bucket_name, key = parse_bucket_uri(object_name)
457 |         waiter = self.s3_client.get_waiter("object_exists")
458 |         waiter.wait(
459 |             Bucket=bucket_name,
460 |             Key=key,
461 |             WaiterConfig={
462 |                 "Delay": LOG_WAITER_DELAY_SEC,
463 |                 "MaxAttempts": int(timeout_secs / LOG_WAITER_DELAY_SEC),
464 |             },
465 |         )
466 |         return object_name
467 | 


--------------------------------------------------------------------------------
/src/emr_cli/deployments/emr_eks.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import sys
  3 | from os.path import join
  4 | from platform import release
  5 | from time import sleep
  6 | from typing import List, Optional
  7 | 
  8 | import boto3
  9 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 10 | from emr_cli.utils import console_log, print_s3_gz
 11 | 
 12 | 
 13 | class EMREKS:
 14 |     def __init__(
 15 |         self, virtual_cluster_id: str, job_role: str, deployment_package: DeploymentPackage, region: str = ""
 16 |     ) -> None:
 17 |         self.virtual_cluster_id = virtual_cluster_id
 18 |         self.job_role = job_role
 19 |         self.dp = deployment_package
 20 |         self.s3_client = boto3.client("s3")
 21 |         if region:
 22 |             self.client = boto3.client("emr-containers", region_name=region)
 23 |             self.emr_client = boto3.client("emr", region_name=region)
 24 |         else:
 25 |             # Note that boto3 uses AWS_DEFAULT_REGION, not AWS_REGION
 26 |             # We may want to add an extra check here for the latter.
 27 |             self.client = boto3.client("emr-containers")
 28 |             self.emr_client = boto3.client("emr")
 29 | 
 30 |     def fetch_latest_release_label(self):
 31 |         response = self.emr_client.list_release_labels(
 32 |             Filters={"Application": "Spark", "Prefix": "emr-6"}, MaxResults=1
 33 |         )
 34 |         if len(response["ReleaseLabels"]) == 0:
 35 |             console_log("Error: No release labels found")
 36 |             sys.exit(1)
 37 |         return response["ReleaseLabels"][0]
 38 | 
 39 |     def run_job(
 40 |         self,
 41 |         job_name: str,
 42 |         job_args: Optional[List[str]] = None,
 43 |         spark_submit_opts: Optional[str] = None,
 44 |         wait: bool = True,
 45 |         show_logs: bool = False,
 46 |         s3_logs_uri: Optional[str] = None,
 47 |         release_label: Optional[str] = None,
 48 |     ):
 49 |         if show_logs and not s3_logs_uri:
 50 |             raise RuntimeError("--show-stdout requires --s3-logs-uri to be set.")
 51 | 
 52 |         if release_label is None:
 53 |             release_label = self.fetch_latest_release_label()
 54 |             console_log(f"Using latest release label {release_label}")
 55 |         release_label = f"{release_label}-latest"
 56 | 
 57 |         # If job_name is the default, just replace the space.
 58 |         # Otherwise throw an error
 59 |         if job_name == "emr-cli job":
 60 |             job_name = "emr-cli_job"
 61 |         elif not re.fullmatch("[\.\-_/#A-Za-z0-9]+", job_name):
 62 |             console_log(f"Invalid characters in job name {job_name} - EMR on EKS must match [\.\-_/#A-Za-z0-9]+")
 63 |             sys.exit(1)
 64 | 
 65 |         jobDriver = {
 66 |             "sparkSubmitJobDriver": {
 67 |                 "entryPoint": self.dp.entrypoint_uri(),
 68 |             }
 69 |         }
 70 |         spark_submit_parameters = self.dp.spark_submit_parameters().params_for("emr_eks")
 71 | 
 72 |         if spark_submit_opts:
 73 |             spark_submit_parameters = f"{spark_submit_parameters} {spark_submit_opts}".strip()
 74 | 
 75 |         if spark_submit_parameters:
 76 |             jobDriver["sparkSubmitJobDriver"]["sparkSubmitParameters"] = spark_submit_parameters
 77 | 
 78 |         if job_args:
 79 |             jobDriver["sparkSubmitJobDriver"]["entryPointArguments"] = job_args  # type: ignore
 80 | 
 81 |         config_overrides = {}
 82 |         if s3_logs_uri:
 83 |             config_overrides = {"monitoringConfiguration": {"s3MonitoringConfiguration": {"logUri": s3_logs_uri}}}
 84 | 
 85 |         response = self.client.start_job_run(
 86 |             virtualClusterId=self.virtual_cluster_id,
 87 |             executionRoleArn=self.job_role,
 88 |             name=job_name,
 89 |             jobDriver=jobDriver,
 90 |             configurationOverrides=config_overrides,
 91 |             releaseLabel=release_label,
 92 |         )
 93 |         job_run_id = response.get("id")
 94 | 
 95 |         console_log(f"Job submitted to EMR Virtual Cluster (Job Run ID: {job_run_id})")
 96 |         if not wait and not show_logs:
 97 |             return job_run_id
 98 | 
 99 |         console_log("Waiting for job to complete...")
100 |         job_done = False
101 |         job_state = "SUBMITTED"
102 |         jr_response = {}
103 |         while not job_done:
104 |             jr_response = self.get_job_run(job_run_id)
105 |             new_state = jr_response.get("state")
106 |             if new_state != job_state:
107 |                 console_log(f"Job state is now: {new_state}")
108 |                 job_state = new_state
109 |             job_done = new_state in [
110 |                 "COMPLETED",
111 |                 "FAILED",
112 |                 "CANCEL_PENDING",
113 |                 "CANCELLED",
114 |             ]
115 |             sleep(2)
116 | 
117 |         if show_logs:
118 |             console_log(f"stdout for {job_run_id}\n{'-'*38}")
119 |             log_location = join(
120 |                 f"{s3_logs_uri}",
121 |                 self.virtual_cluster_id,
122 |                 "jobs",
123 |                 job_run_id,
124 |                 "containers",
125 |                 f"spark-{job_run_id}",
126 |                 f"spark-{job_run_id}-driver",
127 |                 "stdout.gz",
128 |             )
129 |             print_s3_gz(self.s3_client, log_location)
130 | 
131 |         if jr_response.get("state") != "COMPLETED":
132 |             console_log(f"EMR Containers job failed: {jr_response.get('stateDetails')}")
133 |             sys.exit(1)
134 |         console_log("Job completed successfully!")
135 | 
136 |         return job_run_id
137 | 
138 |     def get_job_run(self, job_run_id: str) -> dict:
139 |         response = self.client.describe_job_run(virtualClusterId=self.virtual_cluster_id, id=job_run_id)
140 |         return response.get("jobRun")
141 | 


--------------------------------------------------------------------------------
/src/emr_cli/deployments/emr_serverless.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import json
  3 | import os
  4 | import sys
  5 | import zipfile
  6 | from os.path import join
  7 | from time import sleep
  8 | from typing import List, Optional
  9 | 
 10 | import boto3
 11 | from emr_cli.deployments import SparkParams
 12 | from emr_cli.utils import console_log, find_files, mkdir, print_s3_gz
 13 | 
 14 | 
 15 | class DeploymentPackage(metaclass=abc.ABCMeta):
 16 |     def __init__(self, entry_point_path: str = "entrypoint.py", s3_target_uri: str = "") -> None:
 17 |         self.entry_point_path = entry_point_path
 18 |         self.dist_dir = "dist"
 19 | 
 20 |         # We might not populate this until we actually deploy
 21 |         self.s3_uri_base = s3_target_uri
 22 | 
 23 |     def spark_submit_parameters(self) -> SparkParams:
 24 |         """
 25 |         Returns any additional arguments necessary for spark-submit
 26 |         """
 27 |         return SparkParams()
 28 | 
 29 |     def entrypoint_uri(self) -> str:
 30 |         """
 31 |         Returns the full S3 URI to the entrypoint file, e.g. s3://bucket/path/somecode.py
 32 |         """
 33 |         if self.s3_uri_base is None:
 34 |             raise Exception("S3 URI has not been set, aborting")
 35 |         return os.path.join(self.s3_uri_base, self.entry_point_path)
 36 | 
 37 |     def _zip_local_pyfiles(self):
 38 |         """
 39 |         Zip all the files except for the entrypoint file.
 40 |         """
 41 |         py_files = find_files(os.getcwd(), [".venv"], ".py")
 42 |         py_files.remove(os.path.abspath(self.entry_point_path))
 43 |         cwd = os.getcwd()
 44 |         mkdir(self.dist_dir)
 45 |         with zipfile.ZipFile(f"{self.dist_dir}/pyfiles.zip", "w") as zf:
 46 |             for file in py_files:
 47 |                 relpath = os.path.relpath(file, cwd)
 48 |                 zf.write(file, relpath)
 49 | 
 50 | 
 51 | class Bootstrap:
 52 |     # Maybe add some UUIDs to these?
 53 |     DEFAULT_S3_POLICY_NAME = "emr-cli-S3Access"
 54 |     DEFAULT_GLUE_POLICY_NAME = "emr-cli-GlueAccess"
 55 | 
 56 |     def __init__(self, code_bucket: str, log_bucket: str, job_role_name: str):
 57 |         self.code_bucket = code_bucket
 58 |         self.log_bucket = log_bucket or code_bucket
 59 |         self.job_role_name = job_role_name
 60 |         self.s3_client = boto3.client("s3")
 61 |         self.iam_client = boto3.client("iam")
 62 |         self.emrs_client = boto3.client("emr-serverless")
 63 | 
 64 |     def create_environment(self):
 65 |         self._create_s3_buckets()
 66 |         job_role_arn = self._create_job_role()
 67 |         app_id = self._create_application()
 68 |         return {
 69 |             "application_id": app_id,
 70 |             "job_role_arn": job_role_arn,
 71 |             "code_bucket": self.code_bucket,
 72 |             "log_bucket": self.log_bucket,
 73 |         }
 74 | 
 75 |     def print_destroy_commands(self, application_id: str):
 76 |         # fmt: off
 77 |         for bucket in set([self.log_bucket, self.code_bucket]):
 78 |             print(f"aws s3 rm s3://{bucket} --recursive")
 79 |             print(f"aws s3api delete-bucket --bucket {bucket}")
 80 |         for policy in self.iam_client.list_attached_role_policies(RoleName=self.job_role_name).get('AttachedPolicies'):  # noqa E501
 81 |             arn = policy.get('PolicyArn')
 82 |             print(f"aws iam detach-role-policy --role-name {self.job_role_name} --policy-arn {arn}")  # noqa E501
 83 |             print(f"aws iam delete-policy --policy-arn {arn}")  # noqa E501
 84 |         print(f"aws iam delete-role --role-name {self.job_role_name}")
 85 |         print(f"aws emr-serverless stop-application --application-id {application_id}")
 86 |         print(f"aws emr-serverless delete-application --application-id {application_id}")  # noqa E501
 87 |         # fmt: on
 88 | 
 89 |     def _create_s3_buckets(self):
 90 |         """
 91 |         Creates both the source and log buckets if they don't already exist.
 92 |         """
 93 |         for bucket_name in set([self.code_bucket, self.log_bucket]):
 94 |             self.s3_client.create_bucket(
 95 |                 Bucket=bucket_name,
 96 |                 CreateBucketConfiguration={
 97 |                     "LocationConstraint": self.s3_client.meta.region_name  # type: ignore
 98 |                 },
 99 |             )
100 | 
101 |             console_log(f"Created S3 bucket: s3://{bucket_name}")
102 |             self.s3_client.put_bucket_policy(Bucket=bucket_name, Policy=self._default_s3_bucket_policy(bucket_name))
103 | 
104 |     def _default_s3_bucket_policy(self, bucket_name) -> str:
105 |         bucket_policy = {
106 |             "Version": "2012-10-17",
107 |             "Statement": [
108 |                 {
109 |                     "Sid": "RequireSecureTransport",
110 |                     "Effect": "Deny",
111 |                     "Principal": "*",
112 |                     "Action": "s3:*",
113 |                     "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"],
114 |                     "Condition": {
115 |                         "Bool": {"aws:SecureTransport": "false", "aws:SourceArn": f"arn:aws:s3:::{bucket_name} "}
116 |                     },
117 |                 }
118 |             ],
119 |         }
120 |         return json.dumps(bucket_policy)
121 | 
122 |     def _create_job_role(self):
123 |         # First create a role that can be assumed by EMR Serverless jobs
124 |         response = self.iam_client.create_role(
125 |             RoleName=self.job_role_name,
126 |             AssumeRolePolicyDocument=json.dumps(
127 |                 {
128 |                     "Version": "2012-10-17",
129 |                     "Statement": [
130 |                         {
131 |                             "Effect": "Allow",
132 |                             "Principal": {"Service": "emr-serverless.amazonaws.com"},
133 |                             "Action": "sts:AssumeRole",
134 |                         }
135 |                     ],
136 |                 }
137 |             ),
138 |         )
139 |         role_arn = response.get("Role").get("Arn")
140 |         console_log(f"Created IAM Role: {role_arn}")
141 | 
142 |         self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_s3_policy())
143 |         self.iam_client.attach_role_policy(RoleName=self.job_role_name, PolicyArn=self._create_glue_policy())
144 | 
145 |         return role_arn
146 | 
147 |     def _create_s3_policy(self):
148 |         bucket_arns = [f"arn:aws:s3:::{name}" for name in [self.code_bucket, self.log_bucket]]
149 |         policy_doc = {
150 |             "Version": "2012-10-17",
151 |             "Statement": [
152 |                 {
153 |                     "Sid": "AllowListBuckets",
154 |                     "Effect": "Allow",
155 |                     "Action": ["s3:ListBucket"],
156 |                     "Resource": bucket_arns,
157 |                 },
158 |                 {
159 |                     "Sid": "WriteToCodeAndLogBuckets",
160 |                     "Effect": "Allow",
161 |                     "Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"],
162 |                     "Resource": [f"{arn}/*" for arn in bucket_arns],
163 |                 },
164 |             ],
165 |         }
166 |         response = self.iam_client.create_policy(
167 |             PolicyName=self.DEFAULT_S3_POLICY_NAME,
168 |             PolicyDocument=json.dumps(policy_doc),
169 |         )
170 |         return response.get("Policy").get("Arn")
171 | 
172 |     def _create_glue_policy(self):
173 |         policy_doc = {
174 |             "Version": "2012-10-17",
175 |             "Statement": [
176 |                 {
177 |                     "Sid": "GlueCreateAndReadDataCatalog",
178 |                     "Effect": "Allow",
179 |                     "Action": [
180 |                         "glue:GetDatabase",
181 |                         "glue:GetDataBases",
182 |                         "glue:CreateTable",
183 |                         "glue:GetTable",
184 |                         "glue:GetTables",
185 |                         "glue:GetPartition",
186 |                         "glue:GetPartitions",
187 |                         "glue:CreatePartition",
188 |                         "glue:BatchCreatePartition",
189 |                         "glue:GetUserDefinedFunctions",
190 |                     ],
191 |                     "Resource": "*",
192 |                 },
193 |             ],
194 |         }
195 |         response = self.iam_client.create_policy(
196 |             PolicyName=self.DEFAULT_GLUE_POLICY_NAME,
197 |             PolicyDocument=json.dumps(policy_doc),
198 |         )
199 |         return response.get("Policy").get("Arn")
200 | 
201 |     def _create_application(self):
202 |         """
203 |         Create a simple Spark EMR Serverless application with a default (but minimal)
204 |         pre-initialized capacity.
205 | 
206 |         This application is only intended for demo purposes only. To customize the
207 |         application or create an application for production, use the AWS CLI or other
208 |         Infrastructure as Code services like Terraform, CDK, or CloudFormation.
209 |         """
210 |         response = self.emrs_client.create_application(
211 |             name="emr-cli-demo",
212 |             releaseLabel="emr-6.9.0",
213 |             type="SPARK",
214 |         )
215 |         app_id = response.get("applicationId")
216 |         console_log(f"Created EMR Serverless application: {app_id}")
217 |         self.emrs_client.start_application(applicationId=app_id)
218 |         return app_id
219 | 
220 | 
221 | class EMRServerless:
222 |     def __init__(
223 |         self,
224 |         application_id: str,
225 |         job_role: str,
226 |         deployment_package: DeploymentPackage,
227 |         region: str = "",
228 |     ) -> None:
229 |         self.application_id = application_id
230 |         self.job_role = job_role
231 |         self.dp = deployment_package
232 |         self.s3_client = boto3.client("s3")
233 |         if region:
234 |             self.client = boto3.client("emr-serverless", region_name=region)
235 |         else:
236 |             # Note that boto3 uses AWS_DEFAULT_REGION, not AWS_REGION
237 |             # We may want to add an extra check here for the latter.
238 |             self.client = boto3.client("emr-serverless")
239 | 
240 |     def run_job(
241 |         self,
242 |         job_name: str,
243 |         job_args: Optional[List[str]] = None,
244 |         spark_submit_opts: Optional[str] = None,
245 |         wait: bool = True,
246 |         show_logs: bool = False,
247 |         s3_logs_uri: Optional[str] = None,
248 |         timeout: Optional[int] = None,
249 |     ):
250 |         if show_logs and not s3_logs_uri:
251 |             raise RuntimeError("--show-stdout requires --s3-logs-uri to be set.")
252 | 
253 |         jobDriver = {
254 |             "sparkSubmit": {
255 |                 "entryPoint": self.dp.entrypoint_uri(),
256 |             }
257 |         }
258 |         spark_submit_parameters = self.dp.spark_submit_parameters().params_for("emr_serverless")
259 | 
260 |         if spark_submit_opts:
261 |             spark_submit_parameters = f"{spark_submit_parameters} {spark_submit_opts}".strip()
262 | 
263 |         if spark_submit_parameters:
264 |             jobDriver["sparkSubmit"]["sparkSubmitParameters"] = spark_submit_parameters
265 | 
266 |         if job_args:
267 |             jobDriver["sparkSubmit"]["entryPointArguments"] = job_args  # type: ignore
268 | 
269 |         config_overrides = {}
270 |         if s3_logs_uri:
271 |             config_overrides = {"monitoringConfiguration": {"s3MonitoringConfiguration": {"logUri": s3_logs_uri}}}
272 | 
273 |         response = self.client.start_job_run(
274 |             applicationId=self.application_id,
275 |             executionRoleArn=self.job_role,
276 |             name=job_name,
277 |             jobDriver=jobDriver,
278 |             configurationOverrides=config_overrides,
279 |             executionTimeoutMinutes=timeout,
280 |         )
281 |         job_run_id = response.get("jobRunId")
282 | 
283 |         console_log(f"Job submitted to EMR Serverless (Job Run ID: {job_run_id})")
284 |         if not wait and not show_logs:
285 |             return job_run_id
286 | 
287 |         console_log("Waiting for job to complete...")
288 |         job_done = False
289 |         job_state = "SUBMITTED"
290 |         jr_response = {}
291 |         while not job_done:
292 |             jr_response = self.get_job_run(job_run_id)
293 |             new_state = jr_response.get("state")
294 |             if new_state != job_state:
295 |                 console_log(f"Job state is now: {new_state}")
296 |                 job_state = new_state
297 |             job_done = new_state in [
298 |                 "SUCCESS",
299 |                 "FAILED",
300 |                 "CANCELLING",
301 |                 "CANCELLED",
302 |             ]
303 |             sleep(2)
304 | 
305 |         if show_logs:
306 |             console_log(f"stdout for {job_run_id}\n{'-'*38}")
307 |             log_location = join(
308 |                 f"{s3_logs_uri}",
309 |                 "applications",
310 |                 self.application_id,
311 |                 "jobs",
312 |                 job_run_id,
313 |                 "SPARK_DRIVER",
314 |                 "stdout.gz",
315 |             )
316 |             print_s3_gz(self.s3_client, log_location)
317 | 
318 |         if jr_response.get("state") != "SUCCESS":
319 |             console_log(f"EMR Serverless job failed: {jr_response.get('stateDetails')}")
320 |             sys.exit(1)
321 |         console_log("Job completed successfully!")
322 | 
323 |         return job_run_id
324 | 
325 |     def get_job_run(self, job_run_id: str) -> dict:
326 |         response = self.client.get_job_run(applicationId=self.application_id, jobRunId=job_run_id)
327 |         return response.get("jobRun")
328 | 


--------------------------------------------------------------------------------
/src/emr_cli/emr_cli.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from importlib.metadata import version
  3 | except ModuleNotFoundError:
  4 |     # Python 3.7 compatibility
  5 |     # https://github.com/python/importlib_metadata#compatibility-with-python-3.7
  6 |     from importlib_metadata import version
  7 | 
  8 | import click
  9 | from emr_cli.config import DEFAULT_CONFIG_PATH, ConfigReader, ConfigWriter
 10 | from emr_cli.deployments.emr_ec2 import EMREC2
 11 | from emr_cli.deployments.emr_ec2 import Bootstrap as BootstrapEMRonEC2
 12 | from emr_cli.deployments.emr_eks import EMREKS
 13 | from emr_cli.packaging.detector import ProjectDetector
 14 | from emr_cli.utils import console_log
 15 | 
 16 | from .deployments.emr_serverless import Bootstrap as BootstrapEMRServerless
 17 | from .deployments.emr_serverless import EMRServerless
 18 | from .packaging.python_project import PythonProject
 19 | 
 20 | 
 21 | @click.group()
 22 | @click.pass_context
 23 | def cli(ctx):
 24 |     """
 25 |     Package, deploy, and run PySpark projects on EMR.
 26 |     """
 27 |     # If we want the user to be able to force a project type, check out click.Choice
 28 |     ctx.obj = ProjectDetector().detect()
 29 | 
 30 |     # If a config file exists, set those as defaults for all other options
 31 |     ctx.default_map = ConfigReader.read()
 32 |     if ctx.default_map:
 33 |         console_log(f"Using config file: {DEFAULT_CONFIG_PATH}")
 34 | 
 35 | 
 36 | @click.command()
 37 | @click.pass_obj
 38 | def status(project):
 39 |     console_log("")
 40 |     print(f"Project type:\t\t{ project.__name__}")
 41 |     print(f"EMR CLI version:\t{version('emr-cli')}")
 42 | 
 43 | 
 44 | @click.command()
 45 | @click.option(
 46 |     "--target",
 47 |     type=click.Choice(["emr-serverless", "emr-ec2"]),
 48 |     help="Bootstrap a brand new environment.",
 49 | )
 50 | @click.option("--code-bucket", help="Bucket where source code will be uploaded", required=True)
 51 | @click.option("--logs-bucket", help="Bucket where logs will be uploaded")
 52 | @click.option(
 53 |     "--instance-profile-name",
 54 |     help="""
 55 | The name of the IAM role to be created for your EMR on EC2 instances.
 56 | """,
 57 |     required=False,
 58 | )
 59 | @click.option(
 60 |     "--job-role-name",
 61 |     help="""
 62 | The name of the IAM role to be created for your EMR Serverless jobs.
 63 | This role has access to read and write to the source code and logs buckets,
 64 | and access to read and create tables in the Glue Data Catalog.""",
 65 |     required=True,
 66 | )
 67 | @click.option(
 68 |     "--destroy",
 69 |     default=False,
 70 |     is_flag=True,
 71 |     help="Prints the commands necessary to destroy the created environment.",
 72 | )
 73 | def bootstrap(target, code_bucket, logs_bucket, instance_profile_name, job_role_name, destroy):
 74 |     """
 75 |     Bootstrap an EMR Serverless environment.
 76 | 
 77 |     Includes creating an S3 bucket, tightly-scoped job roles,
 78 |     EMR Serverless application, and emr cli configuration file.
 79 |     """
 80 |     # EMR on EC2 additionally needs an instance profile role
 81 |     if target == "emr-ec2" and instance_profile_name is None:
 82 |         raise click.BadArgumentUsage("EMR on EC2 clusters require --instance-profile-name to be set.")
 83 | 
 84 |     if target == "emr-serverless":
 85 |         b = BootstrapEMRServerless(code_bucket, logs_bucket, job_role_name)
 86 |     else:
 87 |         b = BootstrapEMRonEC2(code_bucket, logs_bucket, instance_profile_name, job_role_name)
 88 | 
 89 |     resource_id = "application_id" if target == "emr-serverless" else "cluster_id"
 90 |     if destroy:
 91 |         c = ConfigReader.read()
 92 |         b.print_destroy_commands(c.get("run", {}).get(resource_id, None))
 93 |         exit(0)
 94 | 
 95 |     # For EMR Serverless, we need to create an S3 bucket, a job role, and an Application
 96 |     config = b.create_environment()
 97 | 
 98 |     # The resulting config is relevant for the "run" command
 99 |     run_config = {
100 |         "run": {
101 |             resource_id: config.get(resource_id),
102 |             "job_role": config.get("job_role_arn"),
103 |             "s3_code_uri": f"s3://{config.get('code_bucket')}/code/pyspark/",
104 |             "s3_logs_uri": f"s3://{config.get('log_bucket')}/logs/pyspark/",
105 |         }
106 |     }
107 |     ConfigWriter.write(run_config)
108 | 
109 | 
110 | @click.command()
111 | @click.argument("path")
112 | @click.option(
113 |     "--dockerfile",
114 |     default=False,
115 |     is_flag=True,
116 |     help="Only create a sample Dockerfile for packaging Python dependencies",
117 | )
118 | @click.option(
119 |     "--project-type",
120 |     type=click.Choice(["python", "poetry"]),
121 |     help="The type of project to create.",
122 |     default="python",
123 | )
124 | def init(path, dockerfile, project_type):
125 |     """
126 |     Initialize a local PySpark project.
127 |     """
128 |     if dockerfile:
129 |         click.echo("Creating sample Dockerfile...")
130 |         PythonProject().copy_single_file("Dockerfile")
131 |     else:
132 |         kls = ProjectDetector().detect(project_type)
133 |         kls().initialize(path)
134 | 
135 | 
136 | @click.command()
137 | @click.option(
138 |     "--entry-point",
139 |     type=click.Path(exists=True, dir_okay=False, allow_dash=False),
140 |     help="Entrypoint file",
141 |     required=True,
142 | )
143 | @click.pass_obj
144 | def package(project, entry_point):
145 |     """
146 |     Package a project and dependencies into dist/
147 |     """
148 |     p = project(entry_point)
149 |     p.build()
150 | 
151 | 
152 | @click.command()
153 | @click.option(
154 |     "--entry-point",
155 |     type=click.Path(exists=True, dir_okay=False, allow_dash=False),
156 |     help="PySpark file to deploy",
157 |     required=True,
158 | )
159 | @click.option(
160 |     "--s3-code-uri",
161 |     help="Where to copy code artifacts to",
162 |     required=True,
163 | )
164 | @click.pass_obj
165 | def deploy(project, entry_point, s3_code_uri):
166 |     """
167 |     Copy a local project to S3.
168 |     """
169 |     p = project(entry_point)
170 |     p.deploy(s3_code_uri)
171 | 
172 | 
173 | @click.command()
174 | @click.option("--application-id", help="EMR Serverless Application ID")
175 | @click.option("--cluster-id", help="EMR on EC2 Cluster ID")
176 | @click.option("--virtual-cluster-id", help="EMR on EKS Virtual Cluster ID")
177 | @click.option(
178 |     "--entry-point",
179 |     type=click.Path(exists=True, dir_okay=False, allow_dash=False),
180 |     help="Python or Jar file for the main entrypoint",
181 | )
182 | @click.option("--job-role", help="IAM Role ARN to use for the job execution")
183 | @click.option("--wait", default=False, is_flag=True, help="Wait for job to finish")
184 | @click.option("--s3-code-uri", help="Where to copy/run code artifacts to/from")
185 | @click.option("--s3-logs-uri", help="Where to send EMR Serverless logs to")
186 | @click.option("--job-name", help="The name of the job", default="emr-cli job")
187 | @click.option(
188 |     "--job-args",
189 |     help="Comma-delimited string of arguments to be passed to Spark job",
190 |     default=None,
191 | )
192 | @click.option(
193 |     "--spark-submit-opts",
194 |     help="String of spark-submit options",
195 |     default=None,
196 | )
197 | @click.option(
198 |     "--build",
199 |     help="Package and deploy job artifacts",
200 |     default=False,
201 |     is_flag=True,
202 | )
203 | @click.option(
204 |     "--show-stdout",
205 |     help="Show the stdout of the job after it's finished",
206 |     default=False,
207 |     is_flag=True,
208 | )
209 | @click.option(
210 |     "--save-config",
211 |     help="Update the config file with the provided options",
212 |     is_flag=True,
213 | )
214 | @click.option(
215 |     "--emr-eks-release-label", help="EMR on EKS release label (emr-6.15.0) - defaults to latest release", default=None
216 | )
217 | @click.option(
218 |     "--emr-serverless-timeout",
219 |     help="EMR Serverless job timeout in minutes - defaults to 12 hours",
220 |     default=720, # set to AWS default value (12 hours in minutes)
221 |     type=int
222 | )
223 | @click.pass_obj
224 | @click.pass_context
225 | def run(
226 |     ctx,
227 |     project,
228 |     application_id,
229 |     cluster_id,
230 |     virtual_cluster_id,
231 |     entry_point,
232 |     job_role,
233 |     wait,
234 |     s3_code_uri,
235 |     s3_logs_uri,
236 |     job_name,
237 |     job_args,
238 |     spark_submit_opts,
239 |     build,
240 |     show_stdout,
241 |     save_config,
242 |     emr_eks_release_label,
243 |     emr_serverless_timeout,
244 | ):
245 |     """
246 |     Run a project on EMR, optionally build and deploy
247 |     """
248 |     resource_ids = [cluster_id, application_id, virtual_cluster_id]
249 | 
250 |     # A resource ID must be specified
251 |     if not any(resource_ids):
252 |         raise click.BadArgumentUsage(
253 |             "One of --application-id, --cluster-id, or --virtual-cluster-id must be specified."
254 |         )
255 | 
256 |     # Only one resource ID can be specified
257 |     if resource_ids.count(None) != (len(resource_ids) - 1):
258 |         raise click.BadArgumentUsage(
259 |             "Only one of --application-id, --cluster-id, or --virtual-cluster-id can be specified"
260 |         )
261 | 
262 |     # We require entry-point and s3-code-uri
263 |     if entry_point is None or s3_code_uri is None:
264 |         raise click.BadArgumentUsage("--entry-point and --s3-code-uri are required.")
265 |     p = project(entry_point, s3_code_uri)
266 | 
267 |     # Do a brief validation of the EMR on EKS release label
268 |     if emr_eks_release_label:
269 |         if not virtual_cluster_id:
270 |             raise click.BadArgumentUsage("--emr-eks-release-label can only be used with --virtual-cluster-id")
271 |         elif not emr_eks_release_label.startswith("emr-"):
272 |             raise click.BadArgumentUsage(f"--emr-eks-release-label must start with 'emr-', provided '{emr_eks_release_label}'")
273 | 
274 |     # If the user passes --save-config, update our stored config file
275 |     if save_config:
276 |         run_config = {"run": ctx.__dict__.get("params")}
277 |         del run_config["run"]["save_config"]
278 |         ConfigWriter.write(run_config)
279 |         console_log(f"Config file saved to {DEFAULT_CONFIG_PATH}. Use `emr run` to re-use your configuration.")  # noqa: E501
280 | 
281 |     if build:
282 |         p.build()
283 |         p.deploy(s3_code_uri)
284 | 
285 |     if any([application_id, virtual_cluster_id]):
286 |         # We require entry-point and job-role
287 |         if entry_point is None or job_role is None:
288 |             raise click.BadArgumentUsage(
289 |                 "--entry-point and --job-role are required if --application-id or --virtual-cluster-id is used."
290 |             )
291 | 
292 |     if emr_serverless_timeout < 0:
293 |         raise click.BadArgumentUsage("--emr-serverless-timeout must be greater than or equal to 0.")
294 | 
295 |     # application_id indicates EMR Serverless job
296 |     if application_id is not None:
297 |         if job_args:
298 |             job_args = job_args.split(",")
299 |         emrs = EMRServerless(application_id, job_role, p)
300 |         emrs.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout, s3_logs_uri, emr_serverless_timeout)
301 | 
302 |     # cluster_id indicates EMR on EC2 job
303 |     if cluster_id is not None:
304 |         if job_args:
305 |             job_args = job_args.split(",")
306 |         emr = EMREC2(cluster_id, p, job_role)
307 |         emr.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout)
308 | 
309 |     # virtual_cluster_id is EMR on EKS
310 |     if virtual_cluster_id is not None:
311 |         if job_args:
312 |             job_args = job_args.split(",")
313 |         emreks = EMREKS(virtual_cluster_id, job_role, p)
314 |         emreks.run_job(job_name, job_args, spark_submit_opts, wait, show_stdout, s3_logs_uri, emr_eks_release_label)
315 | 
316 | 
317 | cli.add_command(package)
318 | cli.add_command(deploy)
319 | cli.add_command(run)
320 | cli.add_command(init)
321 | cli.add_command(bootstrap)
322 | cli.add_command(status)
323 | 
324 | if __name__ == "__main__":
325 |     cli()  # type: ignore
326 | 


--------------------------------------------------------------------------------
/src/emr_cli/packaging/detector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 4 | 
 5 | from emr_cli.packaging.python_files_project import PythonFilesProject
 6 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject
 7 | from emr_cli.packaging.python_project import PythonProject
 8 | from emr_cli.packaging.simple_project import SimpleProject
 9 | from emr_cli.utils import find_files
10 | 
11 | 
12 | class ProjectDetector:
13 |     """
14 |     Detects the type of package used for Spark deployment.
15 |     - Single PySpark file
16 |     - setuptools-based project
17 |     - poetry project
18 |     - requirements.txt
19 |     """
20 | 
21 |     PROJECT_TYPE_MAPPINGS = {
22 |         "single-file": SimpleProject,
23 |         "python": PythonProject,
24 |         "poetry": PythonPoetryProject,
25 |     }
26 | 
27 |     def detect(self, project_type: Optional[str] = None) -> DeploymentPackage.__class__:
28 |         if project_type:
29 |             if project_type not in self.PROJECT_TYPE_MAPPINGS:
30 |                 raise ValueError(f"Unknown project type {project_type}")
31 |             return self.PROJECT_TYPE_MAPPINGS.get(project_type) # type: ignore
32 | 
33 |         # We default to a single file project - if the user has just a .py or .jar
34 |         project = SimpleProject
35 | 
36 |         # If there are multiple .py files, we escalate to a PythonProject
37 |         if len(find_files(os.getcwd(), [".venv"], ".py")) > 1:
38 |             project = PythonFilesProject
39 | 
40 |         # If we have a pyproject.toml or setup.py, we have a python project
41 |         if find_files(os.getcwd(), [".venv"], "pyproject.toml") or find_files(
42 |             os.getcwd(), [".venv"], "setup.py"
43 |         ):
44 |             project = PythonProject
45 | 
46 |         # If we have a poetry.lock, it's a poetry project
47 |         if find_files(os.getcwd(), [".venv"], "poetry.lock"):
48 |             project = PythonPoetryProject
49 | 
50 |         return project
51 | 


--------------------------------------------------------------------------------
/src/emr_cli/packaging/python_files_project.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | 
 4 | import boto3
 5 | from emr_cli.deployments import SparkParams
 6 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 7 | from emr_cli.utils import (
 8 |     PrettyUploader,
 9 |     console_log,
10 |     find_files,
11 |     mkdir,
12 |     parse_bucket_uri,
13 | )
14 | 
15 | 
16 | class PythonFilesProject(DeploymentPackage):
17 |     """
18 |     A PythonFilesProject is a simple project that includes multiple `.py` files.
19 | 
20 |     This is a simple project that has no external dependencies and requires no
21 |     additional packaging. The files in the project are simply zipped up.
22 |     """
23 | 
24 |     def build(self):
25 |         """
26 |         Zip all the files except for the entrypoint file.
27 |         """
28 |         py_files = find_files(os.getcwd(), [".venv"], ".py")
29 |         py_files.remove(os.path.abspath(self.entry_point_path))
30 |         cwd = os.getcwd()
31 |         mkdir(self.dist_dir)
32 |         with zipfile.ZipFile(f"{self.dist_dir}/pyfiles.zip", "w") as zf:
33 |             for file in py_files:
34 |                 relpath = os.path.relpath(file, cwd)
35 |                 zf.write(file, relpath)
36 | 
37 |     def deploy(self, s3_code_uri: str) -> str:
38 |         """
39 |         Copies local code to S3 and returns the path to the uploaded entrypoint
40 |         """
41 |         s3_client = boto3.client("s3")
42 |         bucket, prefix = parse_bucket_uri(s3_code_uri)
43 |         filename = os.path.basename(self.entry_point_path)
44 | 
45 |         console_log(f"Deploying {filename} and local python modules to {s3_code_uri}")
46 | 
47 |         uploader = PrettyUploader(
48 |             s3_client,
49 |             bucket,
50 |             {
51 |                 self.entry_point_path: os.path.join(prefix, filename),
52 |                 os.path.join(self.dist_dir, "pyfiles.zip"): os.path.join(
53 |                     prefix, "pyfiles.zip"
54 |                 ),
55 |             },
56 |         )
57 |         uploader.run()
58 | 
59 |         return f"s3://{bucket}/{prefix}/{filename}"
60 | 
61 |     def spark_submit_parameters(self) -> SparkParams:
62 |         zip_path = os.path.join(self.s3_uri_base, "pyfiles.zip")
63 |         return SparkParams(
64 |             common_params={
65 |                 "spark.submit.pyFiles": zip_path,
66 |             },
67 |         )
68 | 


--------------------------------------------------------------------------------
/src/emr_cli/packaging/python_poetry_project.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import sys
  4 | from pathlib import Path
  5 | from typing import List
  6 | from urllib.parse import urlparse
  7 | 
  8 | import boto3
  9 | 
 10 | from emr_cli.deployments import SparkParams
 11 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 12 | from emr_cli.utils import (
 13 |     PrettyUploader,
 14 |     console_log,
 15 |     copy_template,
 16 |     validate_build_target,
 17 | )
 18 | 
 19 | 
 20 | class PythonPoetryProject(DeploymentPackage):
 21 |     def initialize(self, target_dir: str = os.getcwd()):
 22 |         """
 23 |         Initializes a poetry-based pyspark project in the provided directory.
 24 |         - Creates a basic poetry project
 25 |         - Creates a pyproject.toml file
 26 |         - Creates a Dockerfile
 27 |         """
 28 |         console_log(f"Initializing project in {target_dir}")
 29 |         copy_template("pyspark", target_dir)
 30 |         copy_template("poetry", target_dir)
 31 |         console_log("Project initialized.")
 32 | 
 33 |     def build(self):
 34 |         if not Path("poetry.lock").exists():
 35 |             print("Error: No poetry.lock present, please setup your poetry project.")
 36 |             sys.exit(1)
 37 | 
 38 |         console_log(f"Packaging assets into {self.dist_dir}/")
 39 |         # TODO: Add an option for --force-local-build
 40 |         self._run_docker_build(self.dist_dir)
 41 | 
 42 |     def _run_local_build(self, output_dir: str = "dist"):
 43 |         subprocess.run(
 44 |             ["poetry", "bundle", "venv", "poeticemrbundle", "--without", "dev"],
 45 |             check=True,
 46 |         )
 47 | 
 48 |     def _run_docker_build(self, output_dir: str):
 49 |         validate_build_target("export-poetry")
 50 |         subprocess.run(
 51 |             [
 52 |                 "docker",
 53 |                 "build",
 54 |                 "--target",
 55 |                 "export-poetry",
 56 |                 "--output",
 57 |                 output_dir,
 58 |                 "--file",
 59 |                 self._dockerfile_path(),
 60 |                 ".",
 61 |             ],
 62 |             check=True,
 63 |             env=dict(os.environ, DOCKER_BUILDKIT="1"),
 64 |         )
 65 | 
 66 |     def _dockerfile_path(self) -> str:
 67 |         if Path("Dockerfile").is_file():
 68 |             return "Dockerfile"
 69 | 
 70 |         templates = os.path.abspath(
 71 |             os.path.join(os.path.dirname(__file__), "..", "templates", "pyspark")
 72 |         )
 73 |         return os.path.join(templates, "Dockerfile")
 74 | 
 75 |     def deploy(self, s3_code_uri: str) -> str:
 76 |         """
 77 |         Copies local code to S3 and returns the path to the uploaded entrypoint
 78 |         """
 79 |         s3_client = boto3.client("s3")
 80 |         bucket, prefix = self._parse_bucket_uri(s3_code_uri)
 81 |         filename = os.path.basename(self.entry_point_path)
 82 | 
 83 |         console_log(f"Deploying {filename} and dependencies to {s3_code_uri}")
 84 | 
 85 |         uploader = PrettyUploader(
 86 |             s3_client,
 87 |             bucket,
 88 |             {
 89 |                 self.entry_point_path: os.path.join(prefix, filename),
 90 |                 os.path.join(self.dist_dir, "pyspark_deps.tar.gz"): os.path.join(
 91 |                     prefix, "pyspark_deps.tar.gz"
 92 |                 ),
 93 |             },
 94 |         )
 95 |         uploader.run()
 96 | 
 97 |         return f"s3://{bucket}/{prefix}/{filename}"
 98 | 
 99 |     def spark_submit_parameters(self) -> SparkParams:
100 |         tar_path = os.path.join(self.s3_uri_base, "pyspark_deps.tar.gz")
101 |         return SparkParams(
102 |             common_params={
103 |                 "spark.archives": f"{tar_path}#environment",
104 |             },
105 |             emr_serverless_params={
106 |                 "spark.emr-serverless.driverEnv.PYSPARK_DRIVER_PYTHON": "./environment/bin/python",
107 |                 "spark.emr-serverless.driverEnv.PYSPARK_PYTHON": "./environment/bin/python",
108 |                 "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python",
109 |             },
110 |             emr_ec2_params={
111 |                 "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python",
112 |                 "spark.yarn.appMasterEnv.PYSPARK_PYTHON": "./environment/bin/python",
113 |             },
114 |             emr_eks_params={
115 |                 "spark.pyspark.python": "./environment/bin/python",
116 |             },
117 |         )
118 | 
119 |     def _parse_bucket_uri(self, uri: str) -> List[str]:
120 |         result = urlparse(uri, allow_fragments=False)
121 |         return [result.netloc, result.path.strip("/")]
122 | 


--------------------------------------------------------------------------------
/src/emr_cli/packaging/python_project.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import sys
  4 | from pathlib import Path
  5 | from shutil import copy
  6 | 
  7 | import boto3
  8 | 
  9 | from emr_cli.deployments import SparkParams
 10 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 11 | from emr_cli.utils import (
 12 |     PrettyUploader,
 13 |     console_log,
 14 |     copy_template,
 15 |     parse_bucket_uri,
 16 |     validate_build_target,
 17 | )
 18 | 
 19 | 
 20 | class PythonProject(DeploymentPackage):
 21 |     def initialize(self, target_dir: str = os.getcwd()):
 22 |         """
 23 |         Initializes a pyspark project in the provided directory.
 24 |         - Creates a basic project
 25 |         - Creates a pyproject.toml file
 26 |         - Creates a Dockerfile
 27 |         """
 28 |         console_log(f"Initializing project in {target_dir}")
 29 |         copy_template("pyspark", target_dir)
 30 |         console_log("Project initialized.")
 31 | 
 32 |     def copy_single_file(self, relative_file_path: str, target_dir: str = os.getcwd()):
 33 |         """
 34 |         Copies a single file from the template directory to the target directory.
 35 |         """
 36 |         template_path = (
 37 |             Path(__file__).parent.parent / "templates" / "pyspark" / relative_file_path
 38 |         )
 39 |         target_path = Path(target_dir)
 40 |         copy(template_path, target_path)
 41 | 
 42 |     def build(self):
 43 |         """
 44 |         For now, uses a pre-existing Docker file and setuptools
 45 |         """
 46 |         if not Path("Dockerfile").exists():
 47 |             print(
 48 |                 "Error: No Dockerfile present, use 'emr-cli init --dockerfile' to generate one"  # noqa: E501
 49 |             )
 50 |             sys.exit(1)
 51 |         if not Path("pyproject.toml").exists():
 52 |             print("Error: No pyproject.toml present, please set one up before building")
 53 |             sys.exit(1)
 54 | 
 55 |         console_log(f"Packaging assets into {self.dist_dir}/")
 56 |         self._run_docker_build(self.dist_dir)
 57 | 
 58 |     def _run_docker_build(self, output_dir: str):
 59 |         validate_build_target("export-python")
 60 |         subprocess.run(
 61 |             [
 62 |                 "docker",
 63 |                 "build",
 64 |                 "--target",
 65 |                 "export-python",
 66 |                 "--output",
 67 |                 output_dir,
 68 |                 ".",
 69 |             ],
 70 |             check=True,
 71 |             env=dict(os.environ, DOCKER_BUILDKIT="1"),
 72 |         )
 73 | 
 74 |     def deploy(self, s3_code_uri: str) -> str:
 75 |         """
 76 |         Copies local code to S3 and returns the path to the uploaded entrypoint
 77 |         """
 78 |         self.s3_uri_base = s3_code_uri
 79 |         s3_client = boto3.client("s3")
 80 |         bucket, prefix = parse_bucket_uri(self.s3_uri_base)
 81 |         filename = os.path.basename(self.entry_point_path)
 82 | 
 83 |         console_log(f"Deploying {filename} and dependencies to {self.s3_uri_base}")
 84 | 
 85 |         uploader = PrettyUploader(
 86 |             s3_client,
 87 |             bucket,
 88 |             {
 89 |                 self.entry_point_path: os.path.join(prefix, filename),
 90 |                 os.path.join(self.dist_dir, "pyspark_deps.tar.gz"): os.path.join(
 91 |                     prefix, "pyspark_deps.tar.gz"
 92 |                 ),
 93 |             },
 94 |         )
 95 |         uploader.run()
 96 | 
 97 |         return f"s3://{bucket}/{prefix}/{filename}"
 98 | 
 99 |     def spark_submit_parameters(self) -> SparkParams:
100 |         tar_path = os.path.join(self.s3_uri_base, "pyspark_deps.tar.gz")
101 |         return SparkParams(
102 |             common_params={
103 |                 "spark.archives": f"{tar_path}#environment",
104 |             },
105 |             emr_serverless_params={
106 |                 "spark.emr-serverless.driverEnv.PYSPARK_DRIVER_PYTHON": "./environment/bin/python",
107 |                 "spark.emr-serverless.driverEnv.PYSPARK_PYTHON": "./environment/bin/python",
108 |                 "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python",
109 |             },
110 |             emr_ec2_params={
111 |                 "spark.executorEnv.PYSPARK_PYTHON": "./environment/bin/python",
112 |                 "spark.yarn.appMasterEnv.PYSPARK_PYTHON": "./environment/bin/python",
113 |             },
114 |             emr_eks_params={
115 |                 "spark.pyspark.python": "./environment/bin/python",
116 |             },
117 |         )
118 | 


--------------------------------------------------------------------------------
/src/emr_cli/packaging/simple_project.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import boto3
 4 | 
 5 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 6 | from emr_cli.utils import PrettyUploader, console_log, parse_bucket_uri
 7 | 
 8 | 
 9 | class SimpleProject(DeploymentPackage):
10 |     """
11 |     A simple project only has a single entry point file.
12 |     This can be a pyspark file or packaged jar file.
13 |     """
14 | 
15 |     def build(self):
16 |         pass
17 | 
18 |     def deploy(self, s3_code_uri: str) -> str:
19 |         """
20 |         Copies local code to S3 and returns the path to the uploaded entrypoint
21 |         """
22 |         s3_client = boto3.client("s3")
23 |         bucket, prefix = parse_bucket_uri(s3_code_uri)
24 |         filename = os.path.basename(self.entry_point_path)
25 | 
26 |         console_log(f"Deploying {filename} to {s3_code_uri}")
27 |         uploader = PrettyUploader(
28 |             s3_client,
29 |             bucket,
30 |             {
31 |                 self.entry_point_path: os.path.join(prefix, filename),
32 |             },
33 |         )
34 |         uploader.run()
35 | 
36 |         return f"s3://{bucket}/{prefix}/{filename}"
37 | 


--------------------------------------------------------------------------------
/src/emr_cli/templates/poetry/README.md:
--------------------------------------------------------------------------------
 1 | # EMR Serverless Poetry Template
 2 | 
 3 | Welcome to your new EMR Serverless Poetry PySpark project!
 4 | 
 5 | To get started, change into the project you just created and run the `install` command.
 6 | 
 7 | ```bash
 8 | poetry install
 9 | ```
10 | 
11 | Your dependencies should now all be resolved and yo ushould have a new `poetry.lock` file in your project.
12 | 
13 | ## Deploy!
14 | 
15 | Now we can go ahead and build our project and deploy it on EMR Serverless.
16 | 
17 | > **Note** This tutorial assumes you have already [setup EMR Serverless](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/setting-up.html) and have an EMR Serverless application, job role, and S3 bucket you can use. You can also use the `emr bootstrap` command.
18 | 
19 | 1. Set your relevant variables
20 | 
21 | ```bash
22 | APPLICATION_ID=<emr-serverless-app-id>
23 | JOB_ROLE_ARN=<emr-serverless-job-role>
24 | S3_BUCKET=<s3-bucket-name>
25 | ```
26 | 
27 | 2. Package, deploy, and run your job all in one command.
28 | 
29 | ```
30 | emr run \
31 |     --entry-point entrypoint.py \
32 |     --application-id ${APPLICATION_ID} \
33 |     --job-role ${JOB_ROLE_ARN} \
34 |     --s3-code-uri  s3://${S3_BUCKET}/tmp/emr-cli-demo-poetry/ \
35 |     --build --wait
36 | ```


--------------------------------------------------------------------------------
/src/emr_cli/templates/poetry/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "mysparkjobs"
 3 | version = "0.0.1"
 4 | description = "EMR Serverless Spark jobs"
 5 | authors = ["Amazon EMR"]
 6 | readme = "README.md"
 7 | packages = [{include = "jobs"}]
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.7.10"
11 | pandas = "1.3.5"
12 | pyarrow = "8.0.0"
13 | 
14 | [tool.poetry.group.dev.dependencies]
15 | pyspark = "3.3.0"
16 | pytest = "^7.2.0"
17 | 
18 | [build-system]
19 | requires = ["poetry-core"]
20 | build-backend = "poetry.core.masonry.api"


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/.dockerignore:
--------------------------------------------------------------------------------
1 | .venv/


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/.gitignore:
--------------------------------------------------------------------------------
1 | .venv/
2 | dist/


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/Dockerfile:
--------------------------------------------------------------------------------
 1 | # This is a muti-stage Dockerfile that can be used to build many different types of
 2 | # bundled dependencies for PySpark projects. 
 3 | # The `base` stage installs generic tools necessary for packaging.
 4 | #
 5 | # There are `export-` and `build-` stages for the different types of projects.
 6 | # - python-packages - Generic support for Python projects with pyproject.toml
 7 | # - poetry - Support for Poetry projects
 8 | #
 9 | # This Dockerfile is generated automatically as part of the emr-cli tool.
10 | # Feel free to modify it for your needs, but leave the `build-` and `export-`
11 | # stages related to your project.
12 | #
13 | # To build manually, you can use the following command, assuming 
14 | # the Docker BuildKit backend is enabled. https://docs.docker.com/build/buildkit/
15 | #
16 | # Example for building a poetry project and saving the output to dist/ folder
17 | # docker build --target export-poetry --output dist .
18 | 
19 | 
20 | ## ----------------------------------------------------------------------------
21 | ##  Base stage for python development
22 | ## ----------------------------------------------------------------------------
23 | FROM --platform=linux/amd64 amazonlinux:2 AS base
24 | 
25 | RUN yum install -y python3 tar gzip
26 | 
27 | ENV VIRTUAL_ENV=/opt/venv
28 | RUN python3 -m venv $VIRTUAL_ENV
29 | ENV PATH="$VIRTUAL_ENV/bin:$PATH"
30 | 
31 | # EMR 6.x uses Python 3.7 - limit Poetry version to 1.5.1
32 | ENV POETRY_VERSION=1.5.1
33 | RUN python3 -m pip install --upgrade pip
34 | RUN curl -sSL https://install.python-poetry.org | python3 -
35 | 
36 | ENV PATH="$PATH:/root/.local/bin"
37 | 
38 | WORKDIR /app
39 | 
40 | COPY . .
41 | 
42 | # Test stage - installs test dependencies defined in pyproject.toml
43 | FROM base as test
44 | RUN python3 -m pip install .[test]
45 | 
46 | ## ----------------------------------------------------------------------------
47 | ##  Build and export stages for standard Python projects
48 | ## ----------------------------------------------------------------------------
49 | # Build stage - installs required dependencies and creates a venv package
50 | FROM base as build-python
51 | RUN python3 -m pip install venv-pack==0.2.0 && \
52 |     python3 -m pip install .
53 | RUN mkdir /output && venv-pack -o /output/pyspark_deps.tar.gz
54 | 
55 | # Export stage - used to copy packaged venv to local filesystem
56 | FROM scratch AS export-python
57 | COPY --from=build-python /output/pyspark_deps.tar.gz /
58 | 
59 | ## ----------------------------------------------------------------------------
60 | ##  Build and export stages for Poetry Python projects
61 | ## ----------------------------------------------------------------------------
62 | # Build stage for poetry
63 | FROM base as build-poetry
64 | RUN poetry self add poetry-plugin-bundle && \
65 |     poetry bundle venv dist/bundle --without dev && \
66 |     tar -czvf dist/pyspark_deps.tar.gz -C dist/bundle . && \
67 |     rm -rf dist/bundle
68 | 
69 | FROM scratch as export-poetry
70 | COPY --from=build-poetry /app/dist/pyspark_deps.tar.gz /
71 | 


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/entrypoint.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from datetime import date
 3 | 
 4 | from jobs.extreme_weather import ExtremeWeather
 5 | 
 6 | if __name__ == "__main__":
 7 |     """
 8 |     Usage: extreme-weather [year]
 9 |     Displays extreme weather stats (highest temperature, wind, precipitation) for the given, or latest, year.
10 |     """
11 |     if len(sys.argv) > 1:
12 |         year = KeyboardInterrupt(sys.argv[1])
13 |     else:
14 |         year = date.today().year
15 | 
16 |     extreme_weather = ExtremeWeather(year)
17 |     extreme_weather.run()


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/jobs/extreme_weather.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from datetime import date
  3 | from typing import List
  4 | 
  5 | from pyspark.sql import DataFrame, Row, SparkSession
  6 | from pyspark.sql import functions as F
  7 | 
  8 | GSOD_S3_BASE = "s3://noaa-gsod-pds"
  9 | 
 10 | 
 11 | class ExtremeWeather:
 12 |     """
 13 |     Usage: extreme-weather [--year xxxx]
 14 | 
 15 |     Displays extreme weather stats (highest temp, wind, precipitation) for the given year.
 16 |     """
 17 | 
 18 |     def __init__(self, year: int) -> None:
 19 |         self.year = year
 20 |         self.spark = SparkSession.builder.appName("ExtremeWeather").getOrCreate()
 21 | 
 22 |     def run(self) -> None:
 23 |         df = self._fetch_data()
 24 |         for stat in [
 25 |             {"description": "Highest temperature", "column_name": "MAX", "units": "°F"},
 26 |             {
 27 |                 "description": "Highest all-day average temperature",
 28 |                 "column_name": "TEMP",
 29 |                 "units": "°F",
 30 |             },
 31 |         ]:
 32 |             max_row = self.findLargest(df, stat.get("column_name"))
 33 |             print(f"--- {stat['description']}")
 34 |             print(
 35 |                 f"    {max_row[stat['column_name']]}{stat['units']} on {max_row.DATE} at {max_row.NAME} ({max_row.LATITUDE}, {max_row.LONGITUDE})"
 36 |             )
 37 | 
 38 |             print("--- Top 10 Outliers")
 39 |             outliers = self.find_outliers_for_column(df, stat.get("column_name"))
 40 |             for i, row in outliers[:10].iterrows():
 41 |                 print(
 42 |                     f"    {row['NAME']} ({row['DATE']}) – {row[stat['column_name']]}{stat['units']}"
 43 |                 )
 44 |             print("\n")
 45 | 
 46 |     def find_outliers_for_column(
 47 |         self,
 48 |         df: DataFrame,
 49 |         col: str,
 50 |         percent: float = 0.99,
 51 |     ) -> DataFrame:
 52 |         """
 53 |         Converts the provided DataFrame to a Pandas DataFrame and retrieves the top 99% quantile.
 54 |         """
 55 |         dfp = df.toPandas()
 56 |         q = dfp.quantile(0.99)
 57 |         return dfp[dfp[col] > q[col]]
 58 | 
 59 |     def _gsod_year_uri(self, year: int) -> str:
 60 |         """
 61 |         Builds the s3 URI for the provided year
 62 |         """
 63 |         return f"{GSOD_S3_BASE}/{year}/"
 64 | 
 65 |     def _fetch_data(self) -> DataFrame:
 66 |         """
 67 |         Reads GSOD csv data for the specified year.
 68 |         """
 69 |         df = self.spark.read.csv(
 70 |             self._gsod_year_uri(self.year), header=True, inferSchema=True
 71 |         )
 72 |         return df
 73 | 
 74 |     def findLargest(self, df: DataFrame, col_name: str) -> Row:
 75 |         """
 76 |         Find the largest value in `col_name` column.
 77 |         Values of 99.99, 999.9 and 9999.9 are excluded because they indicate "no reading" for that attribute.
 78 |         While 99.99 _could_ be a valid value for temperature, for example, we know there are higher readings.
 79 |         """
 80 |         return (
 81 |             df.select(
 82 |                 "STATION",
 83 |                 "DATE",
 84 |                 "LATITUDE",
 85 |                 "LONGITUDE",
 86 |                 "ELEVATION",
 87 |                 "NAME",
 88 |                 col_name,
 89 |             )
 90 |             .filter(~F.col(col_name).isin([99.99, 999.9, 9999.9]))
 91 |             .orderBy(F.desc(col_name))
 92 |             .limit(1)
 93 |             .first()
 94 |         )
 95 | 
 96 | 
 97 | def parse_args() -> argparse.Namespace:
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument("--year", type=int, required=False, default=date.today().year)
100 |     return parser.parse_args()
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     args = parse_args()
105 |     weather_data = ExtremeWeather(args.year)
106 |     weather_data.run()


--------------------------------------------------------------------------------
/src/emr_cli/templates/pyspark/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "mysparkjobs"
 3 | version = "0.0.1"
 4 | 
 5 | dependencies = [
 6 |     'pandas==1.3.5',
 7 |     'pyarrow==8.0.0',
 8 | ]
 9 | 
10 | [project.optional-dependencies]
11 | tests = [
12 |   'pytest==7.1.2',
13 | ]


--------------------------------------------------------------------------------
/src/emr_cli/utils/__init__.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import os
  3 | import re
  4 | import sys
  5 | from pathlib import Path
  6 | from shutil import copyfile, copytree, ignore_patterns
  7 | from typing import TYPE_CHECKING, Dict, List
  8 | from urllib.parse import urlparse
  9 | 
 10 | from rich.progress import Progress, TotalFileSizeColumn
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from mypy_boto3_s3 import S3Client
 14 | else:
 15 |     S3Client = object
 16 | 
 17 | 
 18 | def console_log(message):
 19 |     print(f"[emr-cli]: {message}")
 20 | 
 21 | 
 22 | def find_files(directory, excluded_dirs=[], search=None) -> List[str]:
 23 |     files = []
 24 |     for root, dirs, filenames in os.walk(directory):
 25 |         dirs[:] = [d for d in dirs if d not in excluded_dirs]
 26 |         for filename in filenames:
 27 |             if search is None or filename == search or filename.endswith(search):
 28 |                 files.append(os.path.join(root, filename))
 29 |     return files
 30 | 
 31 | 
 32 | def parse_bucket_uri(uri: str) -> List[str]:
 33 |     result = urlparse(uri, allow_fragments=False)
 34 |     return [result.netloc, result.path.strip("/")]
 35 | 
 36 | 
 37 | def mkdir(path: str):
 38 |     try:
 39 |         os.mkdir(path)
 40 |     except FileExistsError:
 41 |         pass
 42 | 
 43 | 
 44 | def copy_template(source: str, target_dir: str):
 45 |     """
 46 |     Copies the entire `source` directory to `target_dir`.
 47 |     """
 48 |     source = os.path.abspath(Path(__file__).parent.parent / "templates" / source)
 49 |     if sys.version_info.major == 3 and sys.version_info.minor == 7:
 50 |         py37_copytree(source, target_dir, ignore=ignore_patterns("__pycache__"))
 51 |     else:
 52 |         copytree(
 53 |             source,
 54 |             target_dir,
 55 |             dirs_exist_ok=True,
 56 |             ignore=ignore_patterns("__pycache__"),
 57 |         )
 58 | 
 59 | 
 60 | def py37_copytree(src, dest, ignore=None):
 61 |     """
 62 |     A Python3 3.7 version of shutils.copytree since `dirs_exist_ok` was introduced in 3.8
 63 |     """
 64 |     if os.path.isdir(src):
 65 |         if not os.path.isdir(dest):
 66 |             os.makedirs(dest)
 67 |         files = os.listdir(src)
 68 |         if ignore is not None:
 69 |             ignored = ignore(src, files)
 70 |         else:
 71 |             ignored = set()
 72 |         for f in files:
 73 |             if f not in ignored:
 74 |                 py37_copytree(os.path.join(src, f), os.path.join(dest, f), ignore)
 75 |     else:
 76 |         copyfile(src, dest)
 77 | 
 78 | 
 79 | def validate_build_target(name: str) -> bool:
 80 |     """
 81 |     Grep the local Dockerfile for the desired target, raise an exception if it's not found
 82 |     """
 83 |     r = None
 84 |     search_term = f"FROM .* AS {name}$"
 85 |     with open("Dockerfile", "r") as file:
 86 |         for line in file:
 87 |             r = re.search(search_term, line, flags=re.IGNORECASE)
 88 |             if r:
 89 |                 return True
 90 |     if not r:
 91 |         console_log(f"ERR: Target `{name}` not found in Dockerfile.")
 92 |         console_log(
 93 |             "ERR: Try creating a new dockerfile with the `emr init --dockerfile .` command."
 94 |         )
 95 |         sys.exit(1)
 96 | 
 97 |     return False
 98 | 
 99 | 
100 | def print_s3_gz(client: S3Client, s3_uri: str):
101 |     """
102 |     Downloads and decompresses a gzip file from S3 and prints the logs to stdout.
103 |     """
104 |     bucket, key = parse_bucket_uri(s3_uri)
105 |     gz = client.get_object(Bucket=bucket, Key=key)
106 |     with gzip.open(gz["Body"]) as data:
107 |         print(data.read().decode())
108 | 
109 | 
110 | class PrettyUploader:
111 |     def __init__(
112 |         self,
113 |         s3_client: S3Client,
114 |         bucket: str,
115 |         src_target: Dict[str, str],
116 |     ):
117 |         self._s3_client = s3_client
118 |         self._bucket = bucket
119 |         self._src_target = src_target
120 |         self._totalsize = sum(
121 |             [float(os.path.getsize(filename)) for filename in self._src_target.keys()]
122 |         )
123 |         self._seensize = 0
124 |         self._progress = Progress(
125 |             *Progress.get_default_columns(), TotalFileSizeColumn()
126 |         )
127 |         self._task = self._progress.add_task("Uploading...", total=self._totalsize)
128 | 
129 |     def run(self):
130 |         with self._progress:
131 |             for src, target in self._src_target.items():
132 |                 self._s3_client.upload_file(src, self._bucket, target, Callback=self)
133 | 
134 |     def __call__(self, bytes_amount):
135 |         self._progress.update(self._task, advance=bytes_amount)
136 | 


--------------------------------------------------------------------------------
/tests/deployments/test_emr_ec2.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import MagicMock
 3 | 
 4 | from emr_cli.deployments.emr_ec2 import EMREC2
 5 | from emr_cli.deployments.emr_serverless import DeploymentPackage
 6 | 
 7 | CLUSTER_ID = "j-11111111"
 8 | 
 9 | 
10 | class TestEMREC2(unittest.TestCase):
11 |     def setUp(self):
12 |         self.obj = EMREC2(CLUSTER_ID, DeploymentPackage())
13 | 
14 |     def test_fetch_log_location_success(self):
15 |         self.obj.client.describe_cluster = MagicMock(
16 |             return_value={"Cluster": {"LogUri": "s3n://example-bucket/logs/"}}
17 |         )
18 |         self.assertEqual(self.obj._fetch_log_location(), "s3://example-bucket/logs/")
19 | 
20 |     def test_fetch_log_location_no_loguri(self):
21 |         self.obj.client.describe_cluster = MagicMock(return_value={"Cluster": {}})
22 |         # Ensure that a RuntimeError is raised
23 |         with self.assertRaises(RuntimeError):
24 |             self.obj._fetch_log_location()
25 | 
26 |     def test_fetch_log_location_loguri_none(self):
27 |         self.obj.client.describe_cluster = MagicMock(
28 |             return_value={"Cluster": {"LogUri": None}}
29 |         )
30 |         # Ensure that a RuntimeError is raised
31 |         with self.assertRaises(RuntimeError):
32 |             self.obj._fetch_log_location()
33 | 
34 |     def test_fetch_log_location_replace_s3n_with_s3(self):
35 |         self.obj.client.describe_cluster = MagicMock(
36 |             return_value={"Cluster": {"LogUri": "s3n://example-bucket/logs/"}}
37 |         )
38 |         # Ensure that "s3n:" is replaced with "s3:" in the returned S3 location
39 |         self.assertEqual(self.obj._fetch_log_location(), "s3://example-bucket/logs/")
40 | 


--------------------------------------------------------------------------------
/tests/packaging/test_python_files_project.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from emr_cli.deployments import SparkParams
 3 | 
 4 | from emr_cli.packaging.python_files_project import PythonFilesProject
 5 | 
 6 | 
 7 | class TestPythonFilesProject:
 8 |     def test_build(self, fs):
 9 |         fs.create_file("main.py")
10 |         fs.create_file("lib/file1.py")
11 |         fs.create_file("lib/file2.py")
12 |         pfp = PythonFilesProject("main.py")
13 |         pfp.build()
14 |         assert Path("dist/pyfiles.zip").exists()
15 |     
16 |     def test_spark_submit(self, fs):
17 |         fs.create_file("main.py")
18 |         fs.create_file("lib/file1.py")
19 |         fs.create_file("lib/file2.py")
20 |         pfp = PythonFilesProject("main.py")
21 |         sp = pfp.spark_submit_parameters()
22 |         assert type(sp) == SparkParams
23 |         assert sp.params_for("emr_serverless").startswith("--conf spark.submit.pyFiles=")
24 | 


--------------------------------------------------------------------------------
/tests/packaging/test_python_poetry_project.py:
--------------------------------------------------------------------------------
 1 | from emr_cli.deployments import SparkParams
 2 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject
 3 | 
 4 | 
 5 | class TestPythonFilesProject:
 6 |     def test_spark_submit(self, fs):
 7 |         fs.create_file("main.py")
 8 |         fs.create_file("lib/file1.py")
 9 |         fs.create_file("lib/file2.py")
10 |         ppp = PythonPoetryProject("main.py")
11 |         sp = ppp.spark_submit_parameters()
12 |         assert type(sp) == SparkParams
13 |         assert "spark.archives" in sp.params_for("emr_serverless")
14 |         assert "spark.emr-serverless.driverEnv" in sp.params_for("emr_serverless")
15 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | from click.testing import CliRunner
 2 | 
 3 | from emr_cli.emr_cli import cli
 4 | 
 5 | class TestCli:
 6 |     def test_version(self):
 7 |         runner = CliRunner()
 8 |         result = runner.invoke(cli, ['status'])
 9 |         assert result.exit_code == 0
10 |         assert 'EMR CLI version:' in result.output
11 |     
12 |     def test_project_detection(self):
13 |         runner = CliRunner()
14 |         with runner.isolated_filesystem():
15 |             with open('main.py', 'w') as f:
16 |                 f.write('print("Hello World")')
17 |             
18 |             result = runner.invoke(cli, ['status'])
19 |             assert result.exit_code == 0
20 |             assert 'Project type:\t\tSimpleProject' in result.output
21 |     
22 |     def test_resource_validation(self):
23 |         runner = CliRunner()
24 |         result = runner.invoke(cli, ['run'])
25 |         assert result.exit_code == 2
26 |         assert 'Error: One of' in result.output
27 |         assert "must be specified" in result.output
28 | 
29 |         result = runner.invoke(cli, ['run', '--application-id', '1234', '--cluster-id', '567'])
30 |         assert result.exit_code == 2
31 |         assert 'Error: Only one of' in result.output
32 |         assert "can be specified" in result.output
33 | 
34 |         for arg in ['--application-id', '--cluster-id', '--virtual-cluster-id']:
35 |             result = runner.invoke(cli, ['run', arg, '1234'])
36 |             assert result.exit_code == 2
37 |             assert 'Error: --entry-point' in result.output
38 | 


--------------------------------------------------------------------------------
/tests/test_detector.py:
--------------------------------------------------------------------------------
 1 | from emr_cli.packaging.detector import ProjectDetector
 2 | from emr_cli.packaging.python_files_project import PythonFilesProject
 3 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject
 4 | from emr_cli.packaging.python_project import PythonProject
 5 | from emr_cli.packaging.simple_project import SimpleProject
 6 | 
 7 | 
 8 | class TestDetector:
 9 |     def test_single_py_file(self, fs):
10 |         fs.create_file("main.py")
11 |         obj = ProjectDetector().detect()
12 |         assert obj == SimpleProject
13 | 
14 |     def test_multi_py_file(self, fs):
15 |         fs.create_file("main.py")
16 |         fs.create_file("lib/file1.py")
17 |         fs.create_file("lib/file2.py")
18 |         obj = ProjectDetector().detect()
19 |         assert obj == PythonFilesProject
20 | 
21 |     def test_poetry_project(self, fs):
22 |         fs.create_file("poetry.lock")
23 |         obj = ProjectDetector().detect()
24 |         assert obj == PythonPoetryProject
25 | 
26 |     def test_dependency_project(self, fs):
27 |         fs.create_file("main.py")
28 |         fs.create_file("pyproject.toml")
29 |         fs.create_file("lib/file1.py")
30 |         fs.create_file("lib/file2.py")
31 |         obj = ProjectDetector().detect()
32 |         assert obj == PythonProject
33 | 


--------------------------------------------------------------------------------
/tests/test_init.py:
--------------------------------------------------------------------------------
 1 | from emr_cli.packaging.python_poetry_project import PythonPoetryProject
 2 | from emr_cli.packaging.python_project import PythonProject
 3 | 
 4 | 
 5 | class TestInit:
 6 |     def test_default_init(self, tmp_path):
 7 |         p = PythonProject()
 8 |         target_path = tmp_path / "python_proj"
 9 |         p.initialize(target_path)
10 |         assert (target_path / "pyproject.toml").exists()
11 |         assert (target_path / "entrypoint.py").exists()
12 |         assert (target_path / "jobs" / "extreme_weather.py").exists()
13 |         assert not (target_path / "README.md").exists()
14 | 
15 |     def test_poetry_init(self, tmp_path):
16 |         p = PythonPoetryProject()
17 |         target_path = tmp_path / "python_poetry_proj"
18 |         p.initialize(target_path)
19 |         assert (target_path / "entrypoint.py").exists()
20 |         assert (target_path / "pyproject.toml").exists()
21 |         assert (target_path / "README.md").exists()
22 | 
23 |     def test_create_in_existing_folder(self, tmp_path):
24 |         pass
25 | 


--------------------------------------------------------------------------------