├── .github ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ └── privileged-run.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── NOTICE.txt ├── README.md ├── amazon_kclpy ├── __init__.py ├── checkpoint_error.py ├── dispatch.py ├── jars │ └── __init__.py ├── kcl.py ├── messages.py ├── v2 │ ├── __init__.py │ └── processor.py └── v3 │ ├── __init__.py │ └── processor.py ├── docs ├── Makefile ├── conf.py ├── guide │ ├── quickstart.rst │ ├── record_processor_v1.rst │ ├── record_processor_v2.rst │ └── sample.rst ├── index.rst └── make.bat ├── pom.xml ├── requirements.txt ├── samples ├── __init__.py ├── amazon_kclpy_helper.py ├── sample.properties ├── sample_kclpy_app.py └── sample_kinesis_wordputter.py ├── scripts └── build_deps.py ├── setup.cfg ├── setup.py ├── test ├── __init__.py ├── conftest.py ├── test_amazon_kclpy.py ├── test_amazon_kclpy_input_output_integration.py ├── utils.py └── v3 │ ├── __init__.py │ └── delegate_test.py └── test_requirements.txt /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | 8 | - package-ecosystem: "maven" 9 | directory: "/" 10 | open-pull-requests-limit: 4 11 | schedule: 12 | interval: "daily" 13 | -------------------------------------------------------------------------------- /.github/workflows/privileged-run.yml: -------------------------------------------------------------------------------- 1 | # This workflow will trigger on pushes, pull requests (to master branch), and manually from the GitHub Actions tab (when requested) 2 | # sample_run uses matrix to create 12 unique combinations of operating systems and python versions 3 | # each of the 12 runs download the jars needed to run the KCL, run the sample_kinesis_wordputter.py, and use a timeout command to run the sample_kclpy_app.py 4 | # auto_merge uses GitHub events to check if dependabot is the pull requester, and if the request fits the criteria the PR is automatically merged 5 | 6 | name: Sample Run and Dependabot Auto-merge 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request_target: 11 | branches: [ master ] 12 | workflow_dispatch: 13 | 14 | permissions: 15 | id-token: write 16 | contents: write 17 | pull-requests: write 18 | statuses: write 19 | 20 | jobs: 21 | sample-run: 22 | timeout-minutes: 8 23 | runs-on: ${{ matrix.os }} 24 | defaults: 25 | run: 26 | shell: bash 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | python-version: [ "3.9", "3.10", "3.11" ] 32 | jdk-version: [ "8", "11", "17", "21", "24" ] 33 | os: [ ubuntu-latest, macOS-latest, windows-latest ] 34 | 35 | steps: 36 | - name: Checkout 37 | uses: actions/checkout@v4 38 | with: 39 | ref: ${{ github.event.pull_request.head.sha }} 40 | 41 | - name: Configure AWS Credentials 42 | uses: aws-actions/configure-aws-credentials@v4 43 | with: 44 | aws-region: us-east-1 45 | role-to-assume: arn:aws:iam::751999266872:role/GitHubPython 46 | role-session-name: myGitHubActionsPython 47 | 48 | - name: Set up JDK ${{ matrix.jdk-version }} 49 | uses: actions/setup-java@v4 50 | with: 51 | java-version: ${{ matrix.jdk-version }} 52 | distribution: 'corretto' 53 | 54 | - name: Set up Python ${{ matrix.python-version }} 55 | uses: actions/setup-python@v2 56 | with: 57 | python-version: ${{ matrix.python-version }} 58 | 59 | - name: Install Python and required pips 60 | run: | 61 | python -m pip install --upgrade pip 62 | pip install -r requirements.txt 63 | pip install -r test_requirements.txt 64 | pip install build 65 | 66 | - name: Test with Pytest 67 | run: | 68 | python -m pytest 69 | 70 | - name: Install .jar files 71 | run: | 72 | python -m build 73 | python setup.py download_jars 74 | python setup.py install 75 | env: 76 | KCL_MVN_REPO_SEARCH_URL: https://repo1.maven.org/maven2/ 77 | 78 | - name: Put words to sample stream 79 | run: | 80 | sample_kinesis_wordputter.py --stream kclpysample -w cat -w dog -w bird -w lobster -w octopus 81 | 82 | - name: Start KCL application (windows or ubuntu) 83 | if: matrix.os != 'macOS-latest' 84 | run: | 85 | timeout 45 $(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi 86 | 87 | - name: Start KCL application (macOS) 88 | if: matrix.os == 'macOS-latest' 89 | run: | 90 | brew install coreutils 91 | gtimeout 45 $(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi 92 | 93 | auto-merge-dependabot: 94 | needs: [sample-run] 95 | runs-on: ubuntu-latest 96 | if: github.actor == 'dependabot[bot]' && github.event.pull_request.user.login == 'dependabot[bot]' 97 | steps: 98 | - name: Fetch Dependabot metadata 99 | id: metadata 100 | uses: dependabot/fetch-metadata@v2 101 | with: 102 | alert-lookup: true 103 | github-token: "${{ secrets.GITHUB_TOKEN }}" 104 | 105 | - name: Approve PR 106 | if: steps.metadata.outputs.update-type != 'version-update:semver-major' 107 | run: gh pr review --approve "$PR_URL" 108 | env: 109 | PR_URL: ${{github.event.pull_request.html_url}} 110 | GH_TOKEN: ${{secrets.GITHUB_TOKEN}} 111 | 112 | # - name: Enable auto-merge for Dependabot PRs 113 | # if: steps.metadata.outputs.update-type != 'version-update:semver-major' 114 | # run: gh pr merge --auto --merge "$PR_URL" 115 | # env: 116 | # PR_URL: ${{github.event.pull_request.html_url}} 117 | # GH_TOKEN: ${{secrets.GITHUB_TOKEN}} 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.cache/ 2 | /amazon_kclpy.egg-info/ 3 | /amazon_kclpy/jars/ 4 | *.pyc 5 | /build/ 6 | /dist/ 7 | /docs/_build/ 8 | /.eggs/ 9 | 10 | # IntelliJ idea stuff 11 | .idea 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-kinesis-client-python/issues), or [recently closed](https://github.com/awslabs/amazon-kinesis-client-python/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-kinesis-client-python/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/awslabs/amazon-kinesis-client-python/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include NOTICE.txt 3 | include README.md 4 | include pom.xml -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | AmazonKinesisClientLibraryForPython 2 | Copyright 2012-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Kinesis Client Library for Python 2 | 3 | [![Version](https://img.shields.io/pypi/v/amazon-kclpy.svg?style=flat)](https://pypi.org/project/amazon-kclpy/) [![UnitTestCoverage](https://github.com/awslabs/amazon-kinesis-client-python/actions/workflows/run-unit-tests.yml/badge.svg)](https://github.com/awslabs/amazon-kinesis-client-python/actions/workflows/run-unit-tests.yml) 4 | 5 | This package provides an interface to the Amazon Kinesis Client Library (KCL) MultiLangDaemon, 6 | which is part of the [Amazon KCL for Java][kinesis-github]. 7 | Developers can use the [Amazon KCL][amazon-kcl] to build distributed applications that 8 | process streaming data reliably at scale. The [Amazon KCL][amazon-kcl] takes care of 9 | many of the complex tasks associated with distributed computing, such as load-balancing 10 | across multiple instances, responding to instance failures, checkpointing processed records, 11 | and reacting to changes in stream volume. 12 | This interface manages the interaction with the MultiLangDaemon so that developers can focus on 13 | implementing their record processor executable. A record processor executable 14 | typically looks something like: 15 | 16 | ```python 17 | #!env python 18 | from amazon_kclpy import kcl 19 | import json, base64 20 | 21 | class RecordProcessor(kcl.RecordProcessorBase): 22 | 23 | def initialize(self, initialiation_input): 24 | pass 25 | 26 | def process_records(self, process_records_input): 27 | pass 28 | 29 | def lease_lost(self, lease_lost_input): 30 | pass 31 | 32 | def shard_ended(self, shard_ended_input): 33 | pass 34 | 35 | def shutdown_requested(self, shutdown_requested_input): 36 | pass 37 | 38 | if __name__ == "__main__": 39 | kclprocess = kcl.KCLProcess(RecordProcessor()) 40 | kclprocess.run() 41 | ``` 42 | 43 | ## Before You Get Started 44 | 45 | Before running the samples, you'll want to make sure that your environment is 46 | configured to allow the samples to use your 47 | [AWS Security Credentials](http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html). 48 | 49 | By default the samples use the [DefaultCredentialsProvider][DefaultCredentialsProvider] 50 | so you'll want to make your credentials available to one of the credentials providers in that 51 | provider chain. There are several ways to do this such as providing a ~/.aws/credentials file, 52 | or if you're running on EC2, you can associate an IAM role with your instance with appropriate 53 | access. 54 | 55 | For questions regarding Amazon Kinesis Service and the client libraries please visit the 56 | [Amazon Kinesis Forums][kinesis-forum] 57 | 58 | ## Running the Sample 59 | 60 | Using the `amazon_kclpy` package requires the MultiLangDaemon which is provided 61 | by the [Amazon KCL for Java][kinesis-github]. These jars will be downloaded automatically 62 | by the `install` command, but you can explicitly download them with the `download_jars` command. 63 | From the root of this repo, run: 64 | 65 | python setup.py download_jars 66 | python setup.py install 67 | 68 | If you'd like to override the default search location for the jars, you can set the `KCL_MVN_REPO_SEARCH_URL` 69 | environment variable to the location of the maven repository you'd like to use. 70 | 71 | export KCL_MVN_REPO_SEARCH_URL=https://path/to/maven/repo 72 | 73 | Now the `amazon_kclpy` and [boto][boto] (used by the sample putter script) and required 74 | jars should be installed in your environment. To start the sample putter, run: 75 | 76 | sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird -w lobster 77 | 78 | This will create an Amazon Kinesis stream called words and put the words 79 | specified by the -w options into the stream once each. Use -p SECONDS to 80 | indicate a period over which to repeatedly put these words. 81 | 82 | Now we would like to run an Amazon KCL for Python application that reads records 83 | from the stream we just created, but first take a look in the samples directory, 84 | you'll find a file called sample.properties, cat that file: 85 | 86 | cat samples/sample.properties 87 | 88 | You'll see several properties defined there. `executableName` indicates the 89 | executable for the MultiLangDaemon to run, `streamName` is the Kinesis stream 90 | to read from, `appName` is the Amazon KCL application name to use which will be the 91 | name of an Amazon DynamoDB table that gets created by the Amazon KCL, 92 | `initialPositionInStream` tells the Amazon KCL how to start reading from shards upon 93 | a fresh startup. To run the sample application you can use a helper script 94 | included in this package. Note you must provide a path to java (version 1.7 95 | or greater) to run the Amazon KCL. 96 | 97 | amazon_kclpy_helper.py --print_command \ 98 | --java --properties samples/sample.properties 99 | 100 | This will print the command needed to run the sample which you can copy paste, 101 | or surround the command with back ticks to run it. 102 | 103 | `amazon_kclpy_helper.py --print_command \ 104 | --java --properties samples/sample.properties` 105 | 106 | Alternatively, if you don't have the source on hand, but want to run the sample 107 | app you can use the `--sample` argument to indicate you'd like to get the 108 | sample.properties file from the installation location. 109 | 110 | amazon_kclpy_helper.py --print_command --java --sample 111 | 112 | ## Running on EC2 113 | 114 | Running on EC2 is simple. Assuming you are already logged into an EC2 instance running 115 | Amazon Linux, the following steps will prepare your environment for running the sample 116 | app. Note the version of java that ships with Amazon Linux can be found at 117 | `/usr/bin/java` and should be 1.7 or greater. 118 | 119 | sudo yum install python-pip 120 | 121 | sudo pip install virtualenv 122 | 123 | virtualenv /tmp/kclpy-sample-env 124 | 125 | source /tmp/kclpy-sample-env/bin/activate 126 | 127 | pip install amazon_kclpy 128 | 129 | ## Under the Hood - What You Should Know about Amazon KCL's [MultiLangDaemon][multi-lang-daemon] 130 | Amazon KCL for Python uses [Amazon KCL for Java][kinesis-github] internally. We have implemented 131 | a Java-based daemon, called the *MultiLangDaemon* that does all the heavy lifting. Our approach 132 | has the daemon spawn the user-defined record processor script/program as a sub-process. The 133 | *MultiLangDaemon* communicates with this sub-process over standard input/output using a simple 134 | protocol, and therefore the record processor script/program can be written in any language. 135 | 136 | At runtime, there will always be a one-to-one correspondence between a record processor, a child process, 137 | and an [Amazon Kinesis Shard][amazon-kinesis-shard]. The *MultiLangDaemon* will make sure of 138 | that, without any need for the developer to intervene. 139 | 140 | In this release, we have abstracted these implementation details away and exposed an interface that enables 141 | you to focus on writing record processing logic in Python. This approach enables [Amazon KCL][amazon-kcl] to 142 | be language agnostic, while providing identical features and similar parallel processing model across 143 | all languages. 144 | 145 | ## See Also 146 | * [Developing Consumer Applications for Amazon Kinesis Using the Amazon Kinesis Client Library][amazon-kcl] 147 | * The [Amazon KCL for Java][kinesis-github] 148 | * The [Amazon KCL for Ruby][amazon-kinesis-ruby-github] 149 | * The [Amazon Kinesis Documentation][amazon-kinesis-docs] 150 | * The [Amazon Kinesis Forum][kinesis-forum] 151 | 152 | ## Release Notes 153 | ### Release 3.0.3 (March 25, 2025) 154 | * Downgrade logback from 1.5.16 to 1.3.15 to maintain JDK 8 compatability 155 | 156 | ### Release 3.0.2 (March 24, 2025) 157 | #### :warning: [BREAKING CHANGES] - Release 3.0.2 contains a dependency version that is not compatible with JDK 8. Please upgrade to a later version if your KCL application requires JDK 8. 158 | * [KCL 3.0.2 Changelog](https://github.com/awslabs/amazon-kinesis-client/blob/5263b4227ce7210d52bec6817191d43f047cd1b2/CHANGELOG.md) Upgrade KCL and KCL-Multilang dependencies from 3.0.0 to 3.0.2 159 | * [#266](https://github.com/awslabs/amazon-kinesis-client-python/pull/266) Upgrade netty.version from 4.1.108.Final to 4.1.118.Final 160 | * [#265](https://github.com/awslabs/amazon-kinesis-client-python/pull/265) Upgrade logback.version from 1.3.14 to 1.5.16 161 | 162 | ### Release 3.0.1 (November 6, 2024) 163 | * New lease assignment / load balancing algorithm 164 | * KCL 3.x introduces a new lease assignment and load balancing algorithm. It assigns leases among workers based on worker utilization metrics and throughput on each lease, replacing the previous lease count-based lease assignment algorithm. 165 | * When KCL detects higher variance in CPU utilization among workers, it proactively reassigns leases from over-utilized workers to under-utilized workers for even load balancing. This ensures even CPU utilization across workers and removes the need to over-provision the stream processing compute hosts. 166 | * Optimized DynamoDB RCU usage 167 | * KCL 3.x optimizes DynamoDB read capacity unit (RCU) usage on the lease table by implementing a global secondary index with leaseOwner as the partition key. This index mirrors the leaseKey attribute from the base lease table, allowing workers to efficiently discover their assigned leases by querying the index instead of scanning the entire table. 168 | * This approach significantly reduces read operations compared to earlier KCL versions, where workers performed full table scans, resulting in higher RCU consumption. 169 | * Graceful lease handoff 170 | * KCL 3.x introduces a feature called "graceful lease handoff" to minimize data reprocessing during lease reassignments. Graceful lease handoff allows the current worker to complete checkpointing of processed records before transferring the lease to another worker. For graceful lease handoff, you should implement checkpointing logic within the existing `shutdownRequested()` method. 171 | * This feature is enabled by default in KCL 3.x, but you can turn off this feature by adjusting the configuration property `isGracefulLeaseHandoffEnabled`. 172 | * While this approach significantly reduces the probability of data reprocessing during lease transfers, it doesn't completely eliminate the possibility. To maintain data integrity and consistency, it's crucial to design your downstream consumer applications to be idempotent. This ensures that the application can handle potential duplicate record processing without adverse effects. 173 | * New DynamoDB metadata management artifacts 174 | * KCL 3.x introduces two new DynamoDB tables for improved lease management: 175 | * Worker metrics table: Records CPU utilization metrics from each worker. KCL uses these metrics for optimal lease assignments, balancing resource utilization across workers. If CPU utilization metric is not available, KCL assigns leases to balance the total sum of shard throughput per worker instead. 176 | * Coordinator state table: Stores internal state information for workers. Used to coordinate in-place migration from KCL 2.x to KCL 3.x and leader election among workers. 177 | * Follow this [documentation](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html#kcl-migration-from-2-3-IAM-permissions) to add required IAM permissions for your KCL application. 178 | * Other improvements and changes 179 | * Dependency on the AWS SDK for Java 1.x has been fully removed. 180 | * The Glue Schema Registry integration functionality no longer depends on AWS SDK for Java 1.x. Previously, it required this as a transient dependency. 181 | * Multilangdaemon has been upgraded to use AWS SDK for Java 2.x. It no longer depends on AWS SDK for Java 1.x. 182 | * `idleTimeBetweenReadsInMillis` (PollingConfig) now has a minimum default value of 200. 183 | * This polling configuration property determines the [publishers](https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java) wait time between GetRecords calls in both success and failure cases. Previously, setting this value below 200 caused unnecessary throttling. This is because Amazon Kinesis Data Streams supports up to five read transactions per second per shard for shared-throughput consumers. 184 | * Shard lifecycle management is improved to deal with edge cases around shard splits and merges to ensure records continue being processed as expected. 185 | * Migration 186 | * The programming interfaces of KCL 3.x remain identical with KCL 2.x for an easier migration. For detailed migration instructions, please refer to the [Migrate consumers from KCL 2.x to KCL 3.x](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html) page in the Amazon Kinesis Data Streams developer guide. 187 | * Configuration properties 188 | * New configuration properties introduced in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#new-configurations-in-kcl-3x). 189 | * Deprecated configuration properties in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#discontinued-configuration-properties-in-kcl-3x). You need to keep the deprecated configuration properties during the migration from any previous KCL version to KCL 3.x. 190 | * Metrics 191 | * New CloudWatch metrics introduced in KCL 3.x are explained in the [Monitor the Kinesis Client Library with Amazon CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) in the Amazon Kinesis Data Streams developer guide. The following operations are newly added in KCL 3.x: 192 | * `LeaseAssignmentManager` 193 | * `WorkerMetricStatsReporter` 194 | * `LeaseDiscovery` 195 | ### Release 3.0.0 (November 6, 2024) 196 | 197 | **We found an issue with the release 3.0.0 regarding the build failure. Please use the release 3.0.1 to use KCL 3.0.** 198 | 199 | --- 200 | For **2.x** and **1.x** release notes, please see [v2.x/README.md](https://github.com/awslabs/amazon-kinesis-client-python/blob/v2.x/README.md#release-notes) 201 | 202 | [amazon-kinesis-shard]: http://docs.aws.amazon.com/kinesis/latest/dev/key-concepts.html 203 | [amazon-kinesis-docs]: http://aws.amazon.com/documentation/kinesis/ 204 | [amazon-kcl]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html 205 | [multi-lang-daemon]: https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java 206 | [kinesis]: http://aws.amazon.com/kinesis 207 | [amazon-kinesis-ruby-github]: https://github.com/awslabs/amazon-kinesis-client-ruby 208 | [kinesis-github]: https://github.com/awslabs/amazon-kinesis-client 209 | [boto]: http://boto.readthedocs.org/en/latest/ 210 | [DefaultCredentialsProvider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html 211 | [kinesis-forum]: http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169 212 | 213 | ## License 214 | 215 | This library is licensed under the Apache 2.0 License. 216 | -------------------------------------------------------------------------------- /amazon_kclpy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | """ 5 | This package provides an interface to the KCL MultiLangDaemon. This interface 6 | manages the interaction with the MultiLangDaemon so that developers can focus 7 | on implementing their record processor. A record processor executable typically 8 | looks something like:: 9 | 10 | #!env python 11 | from amazon_kclpy import kcl 12 | import json, base64 13 | 14 | class RecordProcessor(kcl.RecordProcessorBase): 15 | 16 | def initialize(self, shard_id): 17 | pass 18 | 19 | def process_records(self, records, checkpointer): 20 | pass 21 | 22 | def shutdown(self, checkpointer, reason): 23 | pass 24 | 25 | if __name__ == "__main__": 26 | kclprocess = kcl.KCLProcess(RecordProcessor()) 27 | kclprocess.run() 28 | """ 29 | -------------------------------------------------------------------------------- /amazon_kclpy/checkpoint_error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | class CheckpointError(Exception): 5 | """ 6 | Error class used for wrapping exception names passed through the input file. 7 | """ 8 | def __init__(self, value): 9 | """ 10 | :type value: str 11 | :param value: The name of the exception that was received while checkpointing. For more details see 12 | https://github.com/awslabs/amazon-kinesis-client/tree/master/src/main/java/com/amazonaws/services/kinesis/clientlibrary/exceptions 13 | Any of those exceptions' names could be returned by the MultiLangDaemon as a response to a checkpoint action. 14 | """ 15 | self.value = value 16 | 17 | def __str__(self): 18 | return repr(self.value) 19 | 20 | -------------------------------------------------------------------------------- /amazon_kclpy/dispatch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from amazon_kclpy import messages 5 | 6 | 7 | class MalformedAction(Exception): 8 | """ 9 | Raised when an action given by the MultiLangDaemon doesn't have all the appropriate attributes. 10 | """ 11 | pass 12 | 13 | 14 | _serializers = { 15 | "initialize": messages.InitializeInput, 16 | "processRecords": messages.ProcessRecordsInput, 17 | "shutdown": messages.ShutdownInput, 18 | "checkpoint": messages.CheckpointInput, 19 | "record": messages.Record, 20 | "shutdownRequested": messages.ShutdownRequestedInput, 21 | "leaseLost": messages.LeaseLostInput, 22 | "shardEnded": messages.ShardEndedInput, 23 | } 24 | 25 | 26 | def _format_serializer_names(): 27 | return ", ".join('"{k}"'.format(k=k) for k in _serializers.keys()) 28 | 29 | 30 | def message_decode(json_dict): 31 | """ 32 | Translates incoming JSON commands into MessageDispatch classes 33 | 34 | :param dict json_dict: a dictionary of JSON data 35 | 36 | :return: an object that can be used to dispatch the received JSON command 37 | :rtype: amazon_kclpy.messages.MessageDispatcher 38 | 39 | :raises MalformedAction: if the JSON object is missing action, or an appropriate serializer for that 40 | action can't be found 41 | """ 42 | try: 43 | action = json_dict["action"] 44 | except KeyError as key_error: 45 | raise MalformedAction("Action {json_dict} was expected to have key {key!s}".format(json_dict=json_dict, 46 | key=key_error)) 47 | try: 48 | serializer = _serializers[action] 49 | except KeyError: 50 | raise MalformedAction("Received an action which couldn't be understood. Action was '{action}' -- Allowed {keys}" 51 | .format(action=action, keys=_format_serializer_names())) 52 | 53 | return serializer(json_dict) 54 | -------------------------------------------------------------------------------- /amazon_kclpy/jars/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-kinesis-client-python/2d08638dca28a8afcd77ee78b209dc8e16aa8d85/amazon_kclpy/jars/__init__.py -------------------------------------------------------------------------------- /amazon_kclpy/kcl.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import abc 4 | import json 5 | import sys 6 | import traceback 7 | 8 | from amazon_kclpy import dispatch 9 | from amazon_kclpy.v2 import processor as v2processor 10 | from amazon_kclpy.v3 import processor as v3processor 11 | from amazon_kclpy import messages 12 | from amazon_kclpy.checkpoint_error import CheckpointError 13 | 14 | 15 | class _IOHandler(object): 16 | """ 17 | Hidden class used by :class:`KCLProcess` and :class:`Checkpointer` to communicate with the input and output 18 | files. 19 | """ 20 | 21 | def __init__(self, input_file, output_file, error_file): 22 | """ 23 | :param file input_file: A file to read input lines from (e.g. sys.stdin). 24 | :param file output_file: A file to write output lines to (e.g. sys.stdout). 25 | :param file error_file: A file to write error lines to (e.g. sys.stderr). 26 | """ 27 | self.input_file = input_file 28 | self.output_file = output_file 29 | self.error_file = error_file 30 | 31 | def write_line(self, line): 32 | """ 33 | Writes a line to the output file. The line is preceded and followed by a new line because other libraries 34 | could be writing to the output file as well (e.g. some libs might write debugging info to STDOUT) so we would 35 | like to prevent our lines from being interlaced with other messages so the MultiLangDaemon can understand them. 36 | 37 | :param str line: A line to write (e.g. '{"action" : "status", "responseFor" : ""}') 38 | """ 39 | self.output_file.write('\n{line}\n'.format(line=line)) 40 | self.output_file.flush() 41 | 42 | def write_error(self, error_message): 43 | """ 44 | Write a line to the error file. 45 | 46 | :param str error_message: An error message. 47 | """ 48 | self.error_file.write('{error_message}\n'.format(error_message=error_message)) 49 | self.error_file.flush() 50 | 51 | def read_line(self): 52 | """ 53 | Reads a line from the input file. 54 | 55 | :rtype: str 56 | :return: A single line read from the input_file (e.g. '{"action" : "initialize", "shardId" : "shardId-000001"}') 57 | """ 58 | return self.input_file.readline() 59 | 60 | def load_action(self, line): 61 | """ 62 | Decodes a message from the MultiLangDaemon. 63 | :type line: str 64 | :param line: A message line that was delivered received from the MultiLangDaemon (e.g. 65 | '{"action" : "initialize", "shardId" : "shardId-000001"}') 66 | 67 | :rtype: amazon_kclpy.messages.MessageDispatcher 68 | :return: A callable action class that contains the action presented in the line 69 | """ 70 | return json.loads(line, object_hook=dispatch.message_decode) 71 | 72 | def write_action(self, response): 73 | """ 74 | :type response: dict 75 | :param response: A dictionary with an action message such as 'checkpoint' or 'status'. For example if the action that was 76 | just handled by this processor was an 'initialize' action, this dictionary would look like 77 | {'action' : status', 'responseFor' : 'initialize'} 78 | """ 79 | self.write_line(json.dumps(response)) 80 | 81 | 82 | CheckpointError = CheckpointError 83 | 84 | 85 | class Checkpointer(object): 86 | """ 87 | A checkpointer class which allows you to make checkpoint requests. A checkpoint marks a point in a shard 88 | where you've successfully processed to. If this processor fails or loses its lease to that shard, another 89 | processor will be started either by this MultiLangDaemon or a different instance and resume at the most recent 90 | checkpoint in this shard. 91 | """ 92 | def __init__(self, io_handler): 93 | """ 94 | :type io_handler: amazon_kclpy.kcl._IOHandler 95 | :param io_handler: An IOHandler object which this checkpointer will use to write and read checkpoint actions 96 | to and from the MultiLangDaemon. 97 | """ 98 | self.io_handler = io_handler 99 | 100 | def _get_action(self): 101 | """ 102 | Gets the next json message from STDIN 103 | 104 | :rtype: object 105 | :return: Either a child of MessageDispatcher, or a housekeeping object type 106 | """ 107 | line = self.io_handler.read_line() 108 | action = self.io_handler.load_action(line) 109 | return action 110 | 111 | def checkpoint(self, sequence_number=None, sub_sequence_number=None): 112 | """ 113 | Checkpoints at a particular sequence number you provide or if no sequence number is given, the checkpoint will 114 | be at the end of the most recently delivered list of records 115 | 116 | :param str or None sequence_number: The sequence number to checkpoint at or None if you want to checkpoint at the 117 | farthest record 118 | :param int or None sub_sequence_number: the sub sequence to checkpoint at, if set to None will checkpoint 119 | at the farthest sub_sequence_number 120 | """ 121 | response = {"action": "checkpoint", "sequenceNumber": sequence_number, "subSequenceNumber": sub_sequence_number} 122 | self.io_handler.write_action(response) 123 | action = self._get_action() 124 | if isinstance(action, messages.CheckpointInput): 125 | if action.error is not None: 126 | raise CheckpointError(action.error) 127 | else: 128 | # 129 | # We are in an invalid state. We will raise a checkpoint exception 130 | # to the RecordProcessor indicating that the KCL (or KCLpy) is in 131 | # an invalid state. See KCL documentation for description of this 132 | # exception. Note that the documented guidance is that this exception 133 | # is NOT retryable so the client code should exit. 134 | # 135 | raise CheckpointError('InvalidStateException') 136 | 137 | 138 | # RecordProcessor base class 139 | class RecordProcessorBase(object): 140 | """ 141 | Base class for implementing a record processor.A RecordProcessor processes a shard in a stream. 142 | Its methods will be called with this pattern: 143 | 144 | - initialize will be called once 145 | - process_records will be called zero or more times 146 | - shutdown will be called if this MultiLangDaemon instance loses the lease to this shard 147 | """ 148 | __metaclass__ = abc.ABCMeta 149 | 150 | @abc.abstractmethod 151 | def initialize(self, shard_id): 152 | """ 153 | Called once by a KCLProcess before any calls to process_records 154 | 155 | :type shard_id: str 156 | :param shard_id: The shard id that this processor is going to be working on. 157 | """ 158 | raise NotImplementedError 159 | 160 | @abc.abstractmethod 161 | def process_records(self, records, checkpointer): 162 | """ 163 | Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers 164 | from the records to indicate where in the stream to checkpoint. 165 | 166 | :type records: list 167 | :param records: A list of records that are to be processed. A record looks like 168 | {"data":"","partitionKey":"someKey","sequenceNumber":"1234567890"} Note that "data" is a base64 169 | encoded string. You can use base64.b64decode to decode the data into a string. We currently do not do this decoding for you 170 | so as to leave it to your discretion whether you need to decode this particular piece of data. 171 | 172 | :type checkpointer: amazon_kclpy.kcl.Checkpointer 173 | :param checkpointer: A checkpointer which accepts a sequence number or no parameters. 174 | """ 175 | raise NotImplementedError 176 | 177 | @abc.abstractmethod 178 | def shutdown(self, checkpointer, reason): 179 | """ 180 | Called by a KCLProcess instance to indicate that this record processor should shutdown. After this is called, 181 | there will be no more calls to any other methods of this record processor. 182 | 183 | :type checkpointer: amazon_kclpy.kcl.Checkpointer 184 | :param checkpointer: A checkpointer which accepts a sequence number or no parameters. 185 | 186 | :type reason: str 187 | :param reason: The reason this record processor is being shutdown, either TERMINATE or ZOMBIE. If ZOMBIE, 188 | clients should not checkpoint because there is possibly another record processor which has acquired the lease 189 | for this shard. If TERMINATE then checkpointer.checkpoint() should be called to checkpoint at the end of the 190 | shard so that this processor will be shutdown and new processor(s) will be created to for the child(ren) of 191 | this shard. 192 | """ 193 | raise NotImplementedError 194 | 195 | def shutdown_requested(self, checkpointer): 196 | """ 197 | Called by a KCLProcess instance to indicate that this record processor is about to be be shutdown. This gives 198 | the record processor a chance to checkpoint, before the lease is terminated. 199 | 200 | :type checkpointer: amazon_kclpy.kcl.Checkpointer 201 | :param checkpointer: A checkpointer which accepts a sequence number or no parameters. 202 | """ 203 | pass 204 | 205 | version = 1 206 | 207 | 208 | class KCLProcess(object): 209 | 210 | def __init__(self, record_processor, input_file=sys.stdin, output_file=sys.stdout, error_file=sys.stderr): 211 | """ 212 | :type record_processor: RecordProcessorBase or amazon_kclpy.v2.processor.RecordProcessorBase 213 | :param record_processor: A record processor to use for processing a shard. 214 | 215 | :param file input_file: A file to read action messages from. Typically STDIN. 216 | 217 | :param file output_file: A file to write action messages to. Typically STDOUT. 218 | 219 | :param file error_file: A file to write error messages to. Typically STDERR. 220 | """ 221 | self.io_handler = _IOHandler(input_file, output_file, error_file) 222 | self.checkpointer = Checkpointer(self.io_handler) 223 | if record_processor.version == 2: 224 | self.processor = v3processor.V2toV3Processor(record_processor) 225 | elif record_processor.version == 1: 226 | self.processor = v3processor.V2toV3Processor(v2processor.V1toV2Processor(record_processor)) 227 | else: 228 | self.processor = record_processor 229 | 230 | def _perform_action(self, action): 231 | """ 232 | Maps input action to the appropriate method of the record processor. 233 | 234 | :type action: 235 | :param MessageDispatcher action: A derivative of MessageDispatcher that will handle the provided input 236 | 237 | :raises MalformedAction: Raised if the action is missing attributes. 238 | """ 239 | 240 | try: 241 | action.dispatch(self.checkpointer, self.processor) 242 | except SystemExit as sys_exit: 243 | # On a system exit exception just go ahead and exit 244 | raise sys_exit 245 | except Exception as ex: 246 | """ 247 | We don't know what the client's code could raise and we have no way to recover if we let it propagate 248 | up further. We will mimic the KCL and pass over client errors. We print their stack trace to STDERR to 249 | help them notice and debug this type of issue. 250 | """ 251 | self.io_handler.error_file.write("Caught exception from action dispatch: {ex}".format(ex=str(ex))) 252 | traceback.print_exc(file=self.io_handler.error_file) 253 | self.io_handler.error_file.flush() 254 | 255 | def _report_done(self, response_for=None): 256 | """ 257 | Writes a status message to the output file. 258 | 259 | :param response_for: Required parameter; the action that this status message is confirming completed. 260 | """ 261 | self.io_handler.write_action({"action": "status", "responseFor": response_for}) 262 | 263 | def _handle_a_line(self, line): 264 | """ 265 | - Parses the line from JSON 266 | - Invokes the appropriate method of the RecordProcessor 267 | - Writes a status message back to MultiLanguageDaemon 268 | 269 | :type line: str 270 | :param line: A line that has been read from STDIN and is expected to be a JSON encoded dictionary 271 | representing what action to take. 272 | """ 273 | action = self.io_handler.load_action(line) 274 | self._perform_action(action) 275 | self._report_done(action.action) 276 | 277 | def run(self): 278 | """ 279 | Starts this KCL processor's main loop. 280 | """ 281 | line = True 282 | """ 283 | We don't make any effort to stop errors from propagating up and exiting the program 284 | because there is really nothing the KCLpy can do to recover after most exceptions that could 285 | occur, e.g. I/O error or json decoding exception (the MultiLangDaemon should never write a non-json string 286 | to this process). 287 | """ 288 | while line: 289 | line = self.io_handler.read_line() 290 | if line: 291 | self._handle_a_line(line) 292 | 293 | 294 | -------------------------------------------------------------------------------- /amazon_kclpy/messages.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import abc 5 | import base64 6 | from datetime import datetime 7 | 8 | from amazon_kclpy.checkpoint_error import CheckpointError 9 | 10 | 11 | class MessageDispatcher(object): 12 | """ 13 | The base class use to dispatch actions to record processors. This allows derived classes 14 | to determine which method on the record processor they need to call. Additionally classes 15 | implementing this generally wrap up the parameters into themselves 16 | """ 17 | __metaclass__ = abc.ABCMeta 18 | 19 | @abc.abstractmethod 20 | def dispatch(self, checkpointer, record_processor): 21 | """ 22 | Dispatches the current message to the record processor. 23 | 24 | :param amazon_kclpy.kcl.Checkpointer checkpointer: The checkpointer that can be used by the record 25 | process to record its progress 26 | 27 | :param amazon_kclpy.v3.processor.RecordProcessorBase record_processor: The record processor that will receive, 28 | and process the message. 29 | 30 | :return: Nothing 31 | """ 32 | raise NotImplementedError 33 | 34 | @abc.abstractmethod 35 | def action(self): 36 | """ 37 | Retrieves the name of the action that caused the creation of this dispatcher. 38 | 39 | :return str: The name of the action e.g. initialize, or processRecords 40 | """ 41 | raise NotImplementedError 42 | 43 | 44 | class InitializeInput(MessageDispatcher): 45 | """ 46 | Provides the necessary parameters to initialize a Record Processor 47 | """ 48 | def __init__(self, json_dict): 49 | """ 50 | Configures the input, preparing it for dispatch 51 | 52 | :param dict json_dict: The raw representation of the JSON data 53 | """ 54 | self._shard_id = json_dict["shardId"] 55 | self._sequence_number = json_dict["sequenceNumber"] 56 | self._sub_sequence_number = json_dict["subSequenceNumber"] 57 | self._action = json_dict['action'] 58 | 59 | @property 60 | def shard_id(self): 61 | """ 62 | The shard id that this record processor will be processing. 63 | 64 | :return: the shard id 65 | :rtype: str 66 | """ 67 | return self._shard_id 68 | 69 | @property 70 | def sequence_number(self): 71 | """ 72 | The sequence number that this record processor will start at. This can be None if this record processor is 73 | starting on a fresh shard. 74 | 75 | :return: the sequence number 76 | :rtype: str or None 77 | """ 78 | return self._sequence_number 79 | 80 | @property 81 | def sub_sequence_number(self): 82 | """ 83 | The sub sequence number that this record processor will start at. This will never be none, 84 | but can be 0 if there was no sub-sequence number 85 | 86 | :return: the subsequence number 87 | :rtype: int 88 | """ 89 | return self._sub_sequence_number 90 | 91 | @property 92 | def action(self): 93 | """ 94 | The action that spawned this message 95 | 96 | :return: the original action value 97 | :rtype: str 98 | """ 99 | return self._action 100 | 101 | def dispatch(self, checkpointer, record_processor): 102 | record_processor.initialize(self) 103 | 104 | 105 | class ProcessRecordsInput(MessageDispatcher): 106 | """ 107 | Provides the records, and associated metadata for calls to process_records. 108 | """ 109 | def __init__(self, json_dict): 110 | self._records = json_dict["records"] 111 | self._millis_behind_latest = json_dict["millisBehindLatest"] 112 | self._checkpointer = None 113 | self._action = json_dict['action'] 114 | 115 | @property 116 | def records(self): 117 | """ 118 | The records that are part of this request. 119 | 120 | :return: records that are part of this request 121 | :rtype: list[amazon_kclpy.messages.Record] 122 | """ 123 | return self._records 124 | 125 | @property 126 | def millis_behind_latest(self): 127 | """ 128 | An approximation of how far behind the current time this batch of records is. 129 | 130 | :return: the number of milliseconds 131 | :rtype: int 132 | """ 133 | return self._millis_behind_latest 134 | 135 | @property 136 | def checkpointer(self): 137 | """ 138 | Provides the checkpointer that will confirm all records upto, and including this batch of records. 139 | 140 | :return: the checkpointer for this request 141 | :rtype: amazon_kclpy.kcl.Checkpointer 142 | """ 143 | return self._checkpointer 144 | 145 | @property 146 | def action(self): 147 | """ 148 | The action that spawned this message 149 | 150 | :return: the original action value 151 | :rtype: str 152 | """ 153 | return self._action 154 | 155 | def dispatch(self, checkpointer, record_processor): 156 | self._checkpointer = checkpointer 157 | record_processor.process_records(self) 158 | 159 | 160 | class LeaseLostCheckpointer: 161 | """ 162 | Checkpointer for use in the lease loss scenario. This is specifically used in the v2 to v3 delegate 163 | """ 164 | def __init__(self): 165 | pass 166 | 167 | def checkpoint(self, sequence_number=None, sub_sequence_number=None): 168 | raise CheckpointError("Checkpointing is not allowed when the lease is lost") 169 | 170 | 171 | class ShutdownInput: 172 | """ 173 | Used to tell the record processor it will be shutdown. 174 | """ 175 | 176 | @staticmethod 177 | def zombie(): 178 | return ShutdownInput("ZOMBIE", LeaseLostCheckpointer()) 179 | 180 | @staticmethod 181 | def terminate(checkpointer): 182 | return ShutdownInput("TERMINATE", checkpointer) 183 | 184 | def __init__(self, reason, checkpointer): 185 | self._action = 'shutdown' 186 | self._reason = reason 187 | self._checkpointer = checkpointer 188 | 189 | @property 190 | def reason(self): 191 | """ 192 | The reason that this record processor is being shutdown, will be one of 193 | 194 | * TERMINATE 195 | * ZOMBIE 196 | 197 | :return: the reason for the shutdown 198 | :rtype: str 199 | """ 200 | return self._reason 201 | 202 | @property 203 | def checkpointer(self): 204 | """ 205 | The checkpointer that can be used to checkpoint this shutdown. 206 | 207 | :return: the checkpointer 208 | :rtype: amazon_kclpy.kcl.Checkpointer 209 | """ 210 | return self._checkpointer 211 | 212 | @property 213 | def action(self): 214 | """ 215 | The action that spawned this message 216 | 217 | :return: the original action value 218 | :rtype: str 219 | """ 220 | return self._action 221 | 222 | 223 | class LeaseLostInput(MessageDispatcher): 224 | """ 225 | Message, and input that is sent when the client has lost the lease for this shard. 226 | """ 227 | 228 | def __init__(self, json_dict): 229 | self._action = json_dict['action'] 230 | 231 | @property 232 | def action(self): 233 | """ 234 | The action that indicated the lease was lost 235 | 236 | :return: the name of the action 237 | :rtype: str 238 | """ 239 | return self._action 240 | 241 | def dispatch(self, checkpointer, record_processor): 242 | """ 243 | Dispatch the lease lost notification to the record processor 244 | 245 | :param checkpointer: unused 246 | :param record_processor: the record processor to dispatch the call to 247 | :return: None 248 | """ 249 | record_processor.lease_lost(self) 250 | 251 | 252 | class ShardEndedInput(MessageDispatcher): 253 | """ 254 | Message and input that is sent to the record processor when the client has reached the end of the shard. 255 | """ 256 | 257 | def __init__(self, json_dict): 258 | self._checkpointer = None 259 | self._action = json_dict['action'] 260 | 261 | @property 262 | def action(self): 263 | """ 264 | The action that caused the creation of this message 265 | 266 | :return: the action name 267 | :rtype: str 268 | """ 269 | return self._action 270 | 271 | @property 272 | def checkpointer(self): 273 | """ 274 | The checkpointer that the record processor will use to checkpoint the end of the shard 275 | :return: the checkpointer 276 | :rtype: Checkpointer 277 | """ 278 | return self._checkpointer 279 | 280 | def dispatch(self, checkpointer, record_processor): 281 | """ 282 | Dispatches the shard ended message to the record processor 283 | 284 | :param checkpointer: the checkpointer to be used to officially end processing on the shard 285 | :param record_processor: the record processor that will handle the shard end message 286 | """ 287 | self._checkpointer = checkpointer 288 | record_processor.shard_ended(self) 289 | 290 | 291 | class ShutdownRequestedInput(MessageDispatcher): 292 | """ 293 | Used to tell the record processor it will be shutdown. 294 | """ 295 | def __init__(self, json_dict): 296 | self._checkpointer = None 297 | self._action = json_dict['action'] 298 | 299 | @property 300 | def checkpointer(self): 301 | """ 302 | The checkpointer that can be used to checkpoint before actual shutdown. 303 | 304 | :return: the checkpointer 305 | :rtype: amazon_kclpy.kcl.Checkpointer 306 | """ 307 | return self._checkpointer 308 | 309 | @property 310 | def action(self): 311 | """ 312 | The action that spawned this message 313 | 314 | :return: the original action value 315 | :rtype: str 316 | """ 317 | return self._action 318 | 319 | def dispatch(self, checkpointer, record_processor): 320 | self._checkpointer = checkpointer 321 | record_processor.shutdown_requested(self) 322 | 323 | 324 | class CheckpointInput(object): 325 | """ 326 | Used in preparing the response back during the checkpoint process. This shouldn't be used by record processors. 327 | """ 328 | def __init__(self, json_dict): 329 | """ 330 | Creates a new CheckpointInput object with the given sequence number, and sub-sequence number. 331 | The provided dictionary must contain: 332 | * sequenceNumber 333 | * subSequenceNumber 334 | 335 | The provided dictionary can optionally contain: 336 | * error 337 | 338 | :param dict json_dict: 339 | """ 340 | self._sequence_number = json_dict["sequenceNumber"] 341 | self._sub_sequence_number = json_dict["subSequenceNumber"] 342 | self._error = json_dict.get("error", None) 343 | 344 | @property 345 | def sequence_number(self): 346 | """ 347 | The sequence number that record processor intends to checkpoint at. Can be None if the default 348 | checkpoint behavior is desired. 349 | 350 | :return: the sequence number 351 | :rtype: str or None 352 | """ 353 | return self._sequence_number 354 | 355 | @property 356 | def sub_sequence_number(self): 357 | """ 358 | The sub-sequence number that the record processor intends to checkpoint at. Can be None if 359 | the default checkpoint behavior is desired. 360 | 361 | :return: the sub-sequence number 362 | :rtype: int or None 363 | """ 364 | return self._sub_sequence_number 365 | 366 | @property 367 | def error(self): 368 | """ 369 | The error message that may have resulted from checkpointing. This will be None if no error occurred. 370 | 371 | :return: the error message 372 | :rtype: str or None 373 | """ 374 | return self._error 375 | 376 | 377 | class Record(object): 378 | """ 379 | Represents a single record as returned by Kinesis, or Disaggregated from the Kinesis Producer Library 380 | """ 381 | def __init__(self, json_dict): 382 | """ 383 | Creates a new Record object that represent a single record in Kinesis. Construction for the provided 384 | dictionary requires that the following fields are present: 385 | * sequenceNumber 386 | * subSequenceNumber 387 | * approximateArrivalTimestamp 388 | * partitionKey 389 | * data 390 | 391 | :param dict json_dict: 392 | """ 393 | self._sequence_number = json_dict["sequenceNumber"] 394 | self._sub_sequence_number = json_dict["subSequenceNumber"] 395 | 396 | self._timestamp_millis = int(json_dict["approximateArrivalTimestamp"]) 397 | self._approximate_arrival_timestamp = datetime.fromtimestamp(self._timestamp_millis / 1000.0) 398 | 399 | self._partition_key = json_dict["partitionKey"] 400 | self._data = json_dict["data"] 401 | self._json_dict = json_dict 402 | 403 | @property 404 | def binary_data(self): 405 | """ 406 | The raw binary data automatically decoded from the Base 64 representation provided by 407 | 408 | :py:attr:`data` the original source of the data 409 | 410 | :return: a string representing the raw bytes from 411 | :rtype: str 412 | """ 413 | return base64.b64decode(self._data) 414 | 415 | @property 416 | def sequence_number(self): 417 | """ 418 | The sequence number for this record. This number maybe the same for other records, if they're 419 | all part of an aggregated record. In that case the sub_sequence_number will be greater than 0 420 | 421 | :py:attr:`sub_sequence_number` 422 | 423 | :return: the sequence number 424 | :rtype: str 425 | """ 426 | return self._sequence_number 427 | 428 | @property 429 | def sub_sequence_number(self): 430 | """ 431 | The sub-sequence number of this record. This is only populated when the record is a disaggregated 432 | record produced by the `amazon-kinesis-producer-library ` 433 | 434 | :return: the sub-sequence number 435 | :rtype: int 436 | """ 437 | return self._sub_sequence_number 438 | 439 | @property 440 | def timestamp_millis(self): 441 | """ 442 | The timestamp of the approximate arrival time of the record in milliseconds since the Unix epoch 443 | 444 | :return: the timestamp in milliseconds 445 | :rtype: int 446 | """ 447 | return self._timestamp_millis 448 | 449 | @property 450 | def approximate_arrival_timestamp(self): 451 | """ 452 | The approximate time when this record was accepted, and stored by Kinesis. 453 | 454 | :return: the timestamp 455 | :rtype: datetime 456 | """ 457 | return self._approximate_arrival_timestamp 458 | 459 | @property 460 | def partition_key(self): 461 | """ 462 | The partition key for this record 463 | 464 | :return: the partition key 465 | :rtype: str 466 | """ 467 | return self._partition_key 468 | 469 | @property 470 | def data(self): 471 | """ 472 | The Base64 encoded data of this record. 473 | 474 | :return: a string containing the Base64 data 475 | :rtype: str 476 | """ 477 | return self._data 478 | 479 | def get(self, field): 480 | return self._json_dict[field] 481 | 482 | def __getitem__(self, field): 483 | return self.get(field) 484 | 485 | -------------------------------------------------------------------------------- /amazon_kclpy/v2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | """ 4 | This package provides an interface to the KCL MultiLangDaemon. This interface 5 | manages the interaction with the MultiLangDaemon so that developers can focus 6 | on implementing their record processor. A record processor executable typically 7 | looks something like:: 8 | #!env python 9 | from amazon_kclpy import kcl 10 | import json, base64 11 | 12 | class RecordProcessor(kcl.RecordProcessorBase): 13 | 14 | def initialize(self, shard_id): 15 | pass 16 | 17 | def process_records(self, records, checkpointer): 18 | pass 19 | 20 | def shutdown(self, checkpointer, reason): 21 | pass 22 | 23 | if __name__ == "__main__": 24 | kclprocess = kcl.KCLProcess(RecordProcessor()) 25 | kclprocess.run() 26 | """ 27 | -------------------------------------------------------------------------------- /amazon_kclpy/v2/processor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import abc 5 | 6 | 7 | class RecordProcessorBase(object): 8 | """ 9 | Base class for implementing a record processor.A RecordProcessor processes a shard in a stream. 10 | Its methods will be called with this pattern: 11 | 12 | - initialize will be called once 13 | - process_records will be called zero or more times 14 | - shutdown will be called if this MultiLangDaemon instance loses the lease to this shard 15 | """ 16 | __metaclass__ = abc.ABCMeta 17 | 18 | @abc.abstractmethod 19 | def initialize(self, initialize_input): 20 | """ 21 | Called once by a KCLProcess before any calls to process_records 22 | 23 | :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the 24 | initialization request for the record processor 25 | """ 26 | raise NotImplementedError 27 | 28 | @abc.abstractmethod 29 | def process_records(self, process_records_input): 30 | """ 31 | Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers 32 | from the records to indicate where in the stream to checkpoint. 33 | 34 | :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, and metadata about the 35 | records. 36 | 37 | """ 38 | raise NotImplementedError 39 | 40 | @abc.abstractmethod 41 | def shutdown(self, shutdown_input): 42 | """ 43 | Called by a KCLProcess instance to indicate that this record processor should shutdown. After this is called, 44 | there will be no more calls to any other methods of this record processor. 45 | 46 | As part of the shutdown process you must inspect :attr:`amazon_kclpy.messages.ShutdownInput.reason` to 47 | determine the steps to take. 48 | 49 | * Shutdown Reason ZOMBIE: 50 | **ATTEMPTING TO CHECKPOINT ONCE A LEASE IS LOST WILL FAIL** 51 | 52 | A record processor will be shutdown if it loses its lease. In this case the KCL will terminate the 53 | record processor. It is not possible to checkpoint once a record processor has lost its lease. 54 | * Shutdown Reason TERMINATE: 55 | **THE RECORD PROCESSOR MUST CHECKPOINT OR THE KCL WILL BE UNABLE TO PROGRESS** 56 | 57 | A record processor will be shutdown once it reaches the end of a shard. A shard ending indicates that 58 | it has been either split into multiple shards or merged with another shard. To begin processing the new 59 | shard(s) it's required that a final checkpoint occurs. 60 | 61 | 62 | :param amazon_kclpy.messages.ShutdownInput shutdown_input: Information related to the shutdown request 63 | """ 64 | raise NotImplementedError 65 | 66 | def shutdown_requested(self, shutdown_requested_input): 67 | """ 68 | Called by a KCLProcess instance to indicate that this record processor is about to be be shutdown. This gives 69 | the record processor a chance to checkpoint, before the lease is terminated. 70 | 71 | :param amazon_kclpy.messages.ShutdownRequestedInput shutdown_requested_input: 72 | Information related to shutdown requested. 73 | """ 74 | pass 75 | 76 | version = 2 77 | 78 | 79 | class V1toV2Processor(RecordProcessorBase): 80 | """ 81 | Provides a bridge between the new v2 RecordProcessorBase, and the original RecordProcessorBase. 82 | 83 | This handles the conversion of the new input types to the older expected forms. This normally shouldn't be used 84 | directly by record processors, since it's just a compatibility layer. 85 | 86 | The delegate should be a :py:class:`amazon_kclpy.kcl.RecordProcessorBase`: 87 | 88 | """ 89 | def __init__(self, delegate): 90 | """ 91 | Creates a new V1 to V2 record processor. 92 | 93 | :param amazon_kclpy.kcl.RecordProcessorBase delegate: the delegate where requests will be forwarded to 94 | """ 95 | self.delegate = delegate 96 | 97 | def initialize(self, initialize_input): 98 | """ 99 | Initializes the record processor 100 | 101 | :param amazon_kclpy.messages.InitializeInput initialize_input: the initialization request 102 | :return: None 103 | """ 104 | self.delegate.initialize(initialize_input.shard_id) 105 | 106 | def process_records(self, process_records_input): 107 | """ 108 | Expands the requests, and hands it off to the delegate for processing 109 | 110 | :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: information about the records 111 | to process 112 | :return: None 113 | """ 114 | self.delegate.process_records(process_records_input.records, process_records_input.checkpointer) 115 | 116 | def shutdown(self, shutdown_input): 117 | """ 118 | Sends the shutdown request to the delegate 119 | 120 | :param amazon_kclpy.messages.ShutdownInput shutdown_input: information related to the record processor shutdown 121 | :return: None 122 | """ 123 | self.delegate.shutdown(shutdown_input.checkpointer, shutdown_input.reason) 124 | 125 | def shutdown_requested(self, shutdown_requested_input): 126 | """ 127 | Sends the shutdown request to the delegate 128 | 129 | :param amazon_kclpy.messages.ShutdownInput shutdown_input: information related to the record processor shutdown 130 | :return: None 131 | """ 132 | self.delegate.shutdown_requested(shutdown_requested_input.checkpointer) 133 | -------------------------------------------------------------------------------- /amazon_kclpy/v3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | """ 4 | This package provides an interface to the KCL MultiLangDaemon. This interface 5 | manages the interaction with the MultiLangDaemon so that developers can focus 6 | on implementing their record processor. A record processor executable typically 7 | looks something like:: 8 | #!env python 9 | from amazon_kclpy import kcl 10 | import json, base64 11 | 12 | class RecordProcessor(kcl.RecordProcessorBase): 13 | 14 | def initialize(self, shard_id): 15 | pass 16 | 17 | def process_records(self, records, checkpointer): 18 | pass 19 | 20 | def shutdown(self, checkpointer, reason): 21 | pass 22 | 23 | if __name__ == "__main__": 24 | kclprocess = kcl.KCLProcess(RecordProcessor()) 25 | kclprocess.run() 26 | """ 27 | -------------------------------------------------------------------------------- /amazon_kclpy/v3/processor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import abc 4 | 5 | from amazon_kclpy.messages import ShutdownInput 6 | 7 | 8 | class RecordProcessorBase(object): 9 | """ 10 | Base class for implementing a record processor. Each RecordProcessor processes a single shard in a stream. 11 | 12 | The record processor represents a lifecycle where it will be initialized, possibly process records, and 13 | finally be terminated. 14 | """ 15 | __metaclass__ = abc.ABCMeta 16 | 17 | @abc.abstractmethod 18 | def initialize(self, initialize_input): 19 | """ 20 | Called once by a the KCL to allow the record processor to configure itself before starting to process records. 21 | 22 | :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the 23 | initialization request for the record processor 24 | """ 25 | raise NotImplementedError 26 | 27 | @abc.abstractmethod 28 | def process_records(self, process_records_input): 29 | """ 30 | This is called whenever records are received. The method will be provided the batch of records that were 31 | received. A checkpointer is also supplied that allows the application to checkpoint its progress within the 32 | shard. 33 | 34 | :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, metadata about the 35 | records, and a checkpointer. 36 | 37 | """ 38 | raise NotImplementedError 39 | 40 | @abc.abstractmethod 41 | def lease_lost(self, lease_lost_input): 42 | """ 43 | This is called whenever the record processor has lost the lease. After this returns the record processor will 44 | be shutdown. Additionally once a lease has been lost checkpointing is no longer possible. 45 | 46 | :param amazon_kclpy.messages.LeaseLostInput lease_lost_input: information about the lease loss (currently empty) 47 | """ 48 | raise NotImplementedError 49 | 50 | @abc.abstractmethod 51 | def shard_ended(self, shard_ended_input): 52 | """ 53 | This is called whenever the record processor has reached the end of the shard. The record processor needs to 54 | checkpoint to notify the KCL that it's ok to start processing the child shard(s). Failing to checkpoint will 55 | trigger a retry of the shard end 56 | 57 | :param amazon_kclpy.messages.ShardEndedInput shard_ended_input: information about reaching the end of the shard. 58 | """ 59 | raise NotImplementedError 60 | 61 | @abc.abstractmethod 62 | def shutdown_requested(self, shutdown_requested_input): 63 | """ 64 | Called when the parent process is preparing to shutdown. This gives the record processor one more chance to 65 | checkpoint before its lease will be released. 66 | 67 | :param amazon_kclpy.messages.ShutdownRequestedInput shutdown_requested_input: 68 | Information related to shutdown requested including the checkpointer. 69 | """ 70 | raise NotImplementedError 71 | 72 | version = 3 73 | 74 | 75 | class V2toV3Processor(RecordProcessorBase): 76 | """ 77 | Provides a bridge between the new v2 RecordProcessorBase, and the original RecordProcessorBase. 78 | 79 | This handles the conversion of the new input types to the older expected forms. This normally shouldn't be used 80 | directly by record processors, since it's just a compatibility layer. 81 | 82 | The delegate should be a :py:class:`amazon_kclpy.kcl.RecordProcessorBase`: 83 | 84 | """ 85 | 86 | def __init__(self, delegate): 87 | """ 88 | Creates a new V2 to V3 record processor. 89 | 90 | :param amazon_kclpy.kcl.v2.RecordProcessorBase delegate: the delegate where requests will be forwarded to 91 | """ 92 | self.delegate = delegate 93 | 94 | def initialize(self, initialize_input): 95 | """ 96 | Initializes the record processor 97 | 98 | :param amazon_kclpy.messages.InitializeInput initialize_input: the initialization request 99 | :return: None 100 | """ 101 | self.delegate.initialize(initialize_input) 102 | 103 | def process_records(self, process_records_input): 104 | """ 105 | Expands the requests, and hands it off to the delegate for processing 106 | 107 | :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: information about the records 108 | to process 109 | :return: None 110 | """ 111 | self.delegate.process_records(process_records_input) 112 | 113 | def lease_lost(self, lease_lost_input): 114 | """ 115 | Translates the lease lost call to the older shutdown/shutdown input style that was used. In a special case the 116 | checkpointer will not be set in this call, which is essentially fine as checkpointing would fail anyway 117 | 118 | :param amazon_kclpy.messages.LeaseLostInput lease_lost_input: information about the lease loss 119 | (currently this is empty) 120 | :return: None 121 | """ 122 | self.delegate.shutdown(ShutdownInput.zombie()) 123 | 124 | def shard_ended(self, shard_ended_input): 125 | """ 126 | Translates the shard end message to a shutdown input with a reason of TERMINATE and the checkpointer 127 | :param amazon_kclpy.messages.ShardEndedInput shard_ended_input: information, and checkpoint for the end of the 128 | shard. 129 | :return: None 130 | """ 131 | self.delegate.shutdown(ShutdownInput.terminate(shard_ended_input.checkpointer)) 132 | 133 | def shutdown_requested(self, shutdown_requested_input): 134 | """ 135 | Sends the shutdown request to the delegate 136 | 137 | :param amazon_kclpy.messages.ShutdownRequested shutdown_requested_input: information related to the record processor shutdown 138 | :return: None 139 | """ 140 | self.delegate.shutdown_requested(shutdown_requested_input) 141 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/AmazonKinesisClientLibraryforPython.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/AmazonKinesisClientLibraryforPython.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/AmazonKinesisClientLibraryforPython" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/AmazonKinesisClientLibraryforPython" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Amazon Kinesis Client Library for Python documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Oct 24 12:24:53 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | sys.path.insert(0, os.path.abspath('../')) 22 | 23 | import amazon_kclpy 24 | import amazon_kclpy.v2 25 | import samples.sample_kclpy_app 26 | import samples.amazon_kclpy_helper 27 | 28 | # -- General configuration ------------------------------------------------ 29 | 30 | # If your documentation needs a minimal Sphinx version, state it here. 31 | # 32 | # needs_sphinx = '1.0' 33 | 34 | # Add any Sphinx extension module names here, as strings. They can be 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 36 | # ones. 37 | extensions = [ 38 | 'sphinx.ext.autodoc', 39 | 'sphinx.ext.doctest', 40 | 'sphinx.ext.intersphinx', 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # 49 | # source_suffix = ['.rst', '.md'] 50 | source_suffix = '.rst' 51 | 52 | # The encoding of source files. 53 | # 54 | # source_encoding = 'utf-8-sig' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # General information about the project. 60 | project = u'Amazon Kinesis Client Library for Python' 61 | copyright = u'2016, Amazon.com, Inc.' 62 | author = 'Amazon.com, Inc.' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = u'1.3.1' 70 | # The full version, including alpha/beta/rc tags. 71 | release = u'1.3.1' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # There are two options for replacing |today|: either, you set today to some 81 | # non-false value, then it is used: 82 | # 83 | # today = '' 84 | # 85 | # Else, today_fmt is used as the format for a strftime call. 86 | # 87 | # today_fmt = '%B %d, %Y' 88 | 89 | # List of patterns, relative to source directory, that match files and 90 | # directories to ignore when looking for source files. 91 | # This patterns also effect to html_static_path and html_extra_path 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 93 | 94 | # The reST default role (used for this markup: `text`) to use for all 95 | # documents. 96 | # 97 | # default_role = None 98 | 99 | # If true, '()' will be appended to :func: etc. cross-reference text. 100 | # 101 | # add_function_parentheses = True 102 | 103 | # If true, the current module name will be prepended to all description 104 | # unit titles (such as .. function::). 105 | # 106 | # add_module_names = True 107 | 108 | # If true, sectionauthor and moduleauthor directives will be shown in the 109 | # output. They are ignored by default. 110 | # 111 | # show_authors = False 112 | 113 | # The name of the Pygments (syntax highlighting) style to use. 114 | pygments_style = 'sphinx' 115 | 116 | # A list of ignored prefixes for module index sorting. 117 | # modindex_common_prefix = [] 118 | 119 | # If true, keep warnings as "system message" paragraphs in the built documents. 120 | # keep_warnings = False 121 | 122 | # If true, `todo` and `todoList` produce output, else they produce nothing. 123 | todo_include_todos = False 124 | 125 | 126 | # -- Options for HTML output ---------------------------------------------- 127 | 128 | # The theme to use for HTML and HTML Help pages. See the documentation for 129 | # a list of builtin themes. 130 | # 131 | html_theme = 'alabaster' 132 | 133 | # Theme options are theme-specific and customize the look and feel of a theme 134 | # further. For a list of options available for each theme, see the 135 | # documentation. 136 | # 137 | # html_theme_options = {} 138 | 139 | # Add any paths that contain custom themes here, relative to this directory. 140 | # html_theme_path = [] 141 | 142 | # The name for this set of Sphinx documents. 143 | # " v documentation" by default. 144 | # 145 | # html_title = u'Amazon Kinesis Client Library for Python v1.3.1' 146 | 147 | # A shorter title for the navigation bar. Default is the same as html_title. 148 | # 149 | # html_short_title = None 150 | 151 | # The name of an image file (relative to this directory) to place at the top 152 | # of the sidebar. 153 | # 154 | # html_logo = None 155 | 156 | # The name of an image file (relative to this directory) to use as a favicon of 157 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 158 | # pixels large. 159 | # 160 | # html_favicon = None 161 | 162 | # Add any paths that contain custom static files (such as style sheets) here, 163 | # relative to this directory. They are copied after the builtin static files, 164 | # so a file named "default.css" will overwrite the builtin "default.css". 165 | html_static_path = ['_static'] 166 | 167 | # Add any extra paths that contain custom files (such as robots.txt or 168 | # .htaccess) here, relative to this directory. These files are copied 169 | # directly to the root of the documentation. 170 | # 171 | # html_extra_path = [] 172 | 173 | # If not None, a 'Last updated on:' timestamp is inserted at every page 174 | # bottom, using the given strftime format. 175 | # The empty string is equivalent to '%b %d, %Y'. 176 | # 177 | # html_last_updated_fmt = None 178 | 179 | # If true, SmartyPants will be used to convert quotes and dashes to 180 | # typographically correct entities. 181 | # 182 | # html_use_smartypants = True 183 | 184 | # Custom sidebar templates, maps document names to template names. 185 | # 186 | # html_sidebars = {} 187 | 188 | # Additional templates that should be rendered to pages, maps page names to 189 | # template names. 190 | # 191 | # html_additional_pages = {} 192 | 193 | # If false, no module index is generated. 194 | # 195 | # html_domain_indices = True 196 | 197 | # If false, no index is generated. 198 | # 199 | # html_use_index = True 200 | 201 | # If true, the index is split into individual pages for each letter. 202 | # 203 | # html_split_index = False 204 | 205 | # If true, links to the reST sources are added to the pages. 206 | # 207 | # html_show_sourcelink = True 208 | 209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 210 | # 211 | # html_show_sphinx = True 212 | 213 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 214 | # 215 | # html_show_copyright = True 216 | 217 | # If true, an OpenSearch description file will be output, and all pages will 218 | # contain a tag referring to it. The value of this option must be the 219 | # base URL from which the finished HTML is served. 220 | # 221 | # html_use_opensearch = '' 222 | 223 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 224 | # html_file_suffix = None 225 | 226 | # Language to be used for generating the HTML full-text search index. 227 | # Sphinx supports the following languages: 228 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 229 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 230 | # 231 | # html_search_language = 'en' 232 | 233 | # A dictionary with options for the search language support, empty by default. 234 | # 'ja' uses this config value. 235 | # 'zh' user can custom change `jieba` dictionary path. 236 | # 237 | # html_search_options = {'type': 'default'} 238 | 239 | # The name of a javascript file (relative to the configuration directory) that 240 | # implements a search results scorer. If empty, the default will be used. 241 | # 242 | # html_search_scorer = 'scorer.js' 243 | 244 | # Output file base name for HTML help builder. 245 | htmlhelp_basename = 'AmazonKinesisClientLibraryforPythondoc' 246 | 247 | # -- Options for LaTeX output --------------------------------------------- 248 | 249 | latex_elements = { 250 | # The paper size ('letterpaper' or 'a4paper'). 251 | # 252 | # 'papersize': 'letterpaper', 253 | 254 | # The font size ('10pt', '11pt' or '12pt'). 255 | # 256 | # 'pointsize': '10pt', 257 | 258 | # Additional stuff for the LaTeX preamble. 259 | # 260 | # 'preamble': '', 261 | 262 | # Latex figure (float) alignment 263 | # 264 | # 'figure_align': 'htbp', 265 | } 266 | 267 | # Grouping the document tree into LaTeX files. List of tuples 268 | # (source start file, target name, title, 269 | # author, documentclass [howto, manual, or own class]). 270 | latex_documents = [ 271 | (master_doc, 'AmazonKinesisClientLibraryforPython.tex', u'Amazon Kinesis Client Library for Python Documentation', 272 | u'Amazon.com', 'manual'), 273 | ] 274 | 275 | # The name of an image file (relative to this directory) to place at the top of 276 | # the title page. 277 | # 278 | # latex_logo = None 279 | 280 | # For "manual" documents, if this is true, then toplevel headings are parts, 281 | # not chapters. 282 | # 283 | # latex_use_parts = False 284 | 285 | # If true, show page references after internal links. 286 | # 287 | # latex_show_pagerefs = False 288 | 289 | # If true, show URL addresses after external links. 290 | # 291 | # latex_show_urls = False 292 | 293 | # Documents to append as an appendix to all manuals. 294 | # 295 | # latex_appendices = [] 296 | 297 | # It false, will not define \strong, \code, itleref, \crossref ... but only 298 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added 299 | # packages. 300 | # 301 | # latex_keep_old_macro_names = True 302 | 303 | # If false, no module index is generated. 304 | # 305 | # latex_domain_indices = True 306 | 307 | 308 | # -- Options for manual page output --------------------------------------- 309 | 310 | # One entry per manual page. List of tuples 311 | # (source start file, name, description, authors, manual section). 312 | man_pages = [ 313 | (master_doc, 'amazonkinesisclientlibraryforpython', u'Amazon Kinesis Client Library for Python Documentation', 314 | [author], 1) 315 | ] 316 | 317 | # If true, show URL addresses after external links. 318 | # 319 | # man_show_urls = False 320 | 321 | 322 | # -- Options for Texinfo output ------------------------------------------- 323 | 324 | # Grouping the document tree into Texinfo files. List of tuples 325 | # (source start file, target name, title, author, 326 | # dir menu entry, description, category) 327 | texinfo_documents = [ 328 | (master_doc, 'AmazonKinesisClientLibraryforPython', u'Amazon Kinesis Client Library for Python Documentation', 329 | author, 'AmazonKinesisClientLibraryforPython', 'One line description of project.', 330 | 'Miscellaneous'), 331 | ] 332 | 333 | # Documents to append as an appendix to all manuals. 334 | # 335 | # texinfo_appendices = [] 336 | 337 | # If false, no module index is generated. 338 | # 339 | # texinfo_domain_indices = True 340 | 341 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 342 | # 343 | # texinfo_show_urls = 'footnote' 344 | 345 | # If true, do not generate a @detailmenu in the "Top" node's menu. 346 | # 347 | # texinfo_no_detailmenu = False 348 | 349 | 350 | # Example configuration for intersphinx: refer to the Python standard library. 351 | intersphinx_mapping = {'https://docs.python.org/': None} 352 | -------------------------------------------------------------------------------- /docs/guide/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _guide_quickstart: 2 | 3 | Getting Started Using the Amazon Kinesis Client for Python 4 | ========================================================== 5 | This assumes you're already publishing data to Kinesis. If you're not publishing see :doc:`sample`. In 6 | addition you will need to ensure that you have a Java Runtime Environment (JRE) installed. The JRE must be version 7 | 1.7 or greater. 8 | 9 | Prerequisites 10 | ------------- 11 | There are a few prerequisites for using the Amazon Kinesis Client for Python. 12 | 13 | Publishing Data to Kinesis 14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 15 | 16 | You must have an AWS Account, and be publishing some data to Kinesis that you intend to process. 17 | If you're not publishing you can use the sample publisher described in the :doc:`sample`. 18 | 19 | Install a Java Runtime Environment 20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | You must have a Java Runtime Environment (JRE) version 1.7 or greater installed. It's recommended you use the newest 23 | version of the JRE, which is currently 1.8 24 | 25 | To install the 1.8 version of the JRE on Amazon Linux you can run the following command:: 26 | 27 | sudo yum install java-1.8.0-openjdk.x86_64 28 | 29 | For other operating systems please refer to your system's documentation. 30 | 31 | It is also possible to download, and install a JRE from Oracle `Java SE Runtime Environment 8 `_ 32 | 33 | 34 | Installing the Amazon Kinesis Client for Python 35 | ----------------------------------------------- 36 | The first thing to do is install the Amazon Kinesis Client for Python (KCL). You can install the KCL from pip using:: 37 | 38 | pip install amazon_kclpy 39 | 40 | This should install the KCL, and automatically download the necessary jars. 41 | 42 | 43 | Create A Record Processor 44 | ------------------------- 45 | The record processor is how the KCL will communicate with your application. Create a file with a class that extends 46 | :class:`amazon_kclpy.v2.processor.RecordProcessorBase`. See the :doc:`sample` for an example of a record processor. 47 | 48 | Create A Properties File 49 | ------------------------ 50 | The KCL uses a Java properties file to configure itself. The Java process uses this file to configure the KCL, and 51 | determine which python script to run for record processing. See the 52 | :download:`sample.properties <../../samples/sample.properties>` for documentation, and required values. 53 | 54 | Create the Startup Command 55 | -------------------------- 56 | The KCL includes a script to help generate the command line to start the KCL application. TO create the startup 57 | command for your application use:: 58 | 59 | amazon_kclpy_helper.py --print_command \ 60 | --java --properties 61 | 62 | .. automodule:: samples.amazon_kclpy_helper 63 | :special-members: 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/guide/record_processor_v1.rst: -------------------------------------------------------------------------------- 1 | .. _guide_record_processor_v1 2 | 3 | Kinesis Client Record Process Version 1 4 | ======================================= 5 | The record processor is the central pillar of the Kinesis Client. This version of the record processor doesn't accept 6 | as much information, and so it's recommended that you don't use this version anymore. 7 | 8 | Record Processor API 9 | -------------------- 10 | 11 | .. autoclass:: amazon_kclpy.kcl.RecordProcessorBase 12 | :members: 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/guide/record_processor_v2.rst: -------------------------------------------------------------------------------- 1 | .. _guide_record_processor_v2 2 | 3 | Kinesis Client Record Process Version 2 4 | ======================================= 5 | The record processor is the central pillar of the Kinesis Client. This version of the record processor uses more 6 | complex objects to represent the inputs to the methods of the record processor. 7 | 8 | Record Processor API 9 | -------------------- 10 | 11 | .. autoclass:: amazon_kclpy.v2.processor.RecordProcessorBase 12 | :members: 13 | 14 | Record Processor Messages 15 | ------------------------- 16 | 17 | .. automodule:: amazon_kclpy.messages 18 | :members: 19 | 20 | 21 | -------------------------------------------------------------------------------- /docs/guide/sample.rst: -------------------------------------------------------------------------------- 1 | .. _guide_sample: 2 | 3 | Running the Sample Application 4 | ============================== 5 | The sample application provided with this module shows the basics of using the Amazon Kinesis Client for Python. 6 | 7 | Before Getting Started 8 | ---------------------- 9 | Before running the samples, you'll want to make sure that your environment is 10 | configured to allow the samples to use your 11 | `AWS Security Credentials `_. 12 | 13 | By default the samples use the `DefaultAWSCredentialsProviderChain `_ 14 | so you'll want to make your credentials available to one of the credentials providers in that 15 | provider chain. There are several ways to do this such as providing a ~/.aws/credentials file, 16 | or if you're running on EC2, you can associate an IAM role with your instance with appropriate 17 | access. 18 | 19 | For questions regarding Amazon Kinesis Service and the client libraries please visit the 20 | `Amazon Kinesis Forums `_ 21 | 22 | Running the Sample 23 | ------------------ 24 | 25 | Using the ``amazon_kclpy`` package requires the MultiLangDaemon which is provided 26 | by the `Amazon KCL for Java `. These jars will be downloaded automatically 27 | by the **install** command, but you can explicitly download them with the ``download_jars`` command. 28 | From the root of this repo, run:: 29 | 30 | python setup.py download_jars 31 | python setup.py install 32 | 33 | Now the ``amazon_kclpy`` and `boto < http://boto.readthedocs.org/en/latest/>`_ (used by the sample putter script) and required 34 | jars should be installed in your environment. To start the sample putter, run:: 35 | 36 | sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird -w lobster 37 | 38 | This will create an Amazon Kinesis stream called words and put the words 39 | specified by the -w options into the stream once each. Use -p SECONDS to 40 | indicate a period over which to repeatedly put these words. 41 | 42 | Now we would like to run an Amazon KCL for Python application that reads records 43 | from the stream we just created, but first take a look in the samples directory, 44 | you'll find a file called sample.properties, cat that file:: 45 | 46 | cat samples/sample.properties 47 | 48 | You'll see several properties defined there. ``executableName`` indicates the 49 | executable for the MultiLangDaemon to run, ``streamName`` is the Kinesis stream 50 | to read from, ``appName`` is the Amazon KCL application name to use which will be the 51 | name of an Amazon DynamoDB table that gets created by the Amazon KCL, 52 | ``initialPositionInStream`` tells the Amazon KCL how to start reading from shards upon 53 | a fresh startup. To run the sample application you can use a helper script 54 | included in this package. Note you must provide a path to java (version 1.7 55 | or greater) to run the Amazon KCL:: 56 | 57 | amazon_kclpy_helper.py --print_command \ 58 | --java --properties samples/sample.properties 59 | 60 | This will print the command needed to run the sample which you can copy paste, 61 | or surround the command with back ticks to run it:: 62 | 63 | `amazon_kclpy_helper.py --print_command \ 64 | --java --properties samples/sample.properties` 65 | 66 | Alternatively, if you don't have the source on hand, but want to run the sample 67 | app you can use the ``--sample`` argument to indicate you'd like to get the 68 | sample.properties file from the installation location:: 69 | 70 | amazon_kclpy_helper.py --print_command --java --sample 71 | 72 | The Sample Code 73 | --------------- 74 | .. autoclass:: samples.sample_kclpy_app.RecordProcessor 75 | :members: 76 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Amazon Kinesis Client Library for Python documentation master file, created by 2 | sphinx-quickstart on Mon Oct 24 12:24:53 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Amazon Kinesis Client Library for Python 7 | ======================================== 8 | This package provides an interface to the Amazon Kinesis Client Library (KCL) MultiLangDaemon, 9 | which is part of the `Amazon KCL for Java `_. 10 | Developers can use the `Amazon KCL `_ 11 | to build distributed applications that process streaming data reliably at scale. The 12 | `Amazon KCL `_ 13 | takes care of many of the complex tasks associated with distributed computing, such as load-balancing 14 | across multiple instances, responding to instance failures, checkpointing processed records, 15 | and reacting to changes in stream volume. 16 | This interface manages the interaction with the MultiLangDaemon so that developers can focus on 17 | implementing their record processor executable. A record processor executable 18 | typically looks something like: 19 | 20 | 21 | Guides 22 | ------ 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | 27 | guide/quickstart 28 | guide/sample 29 | guide/record_processor_v1 30 | guide/record_processor_v2 31 | 32 | 33 | Indices and tables 34 | ================== 35 | 36 | * :ref:`genindex` 37 | * :ref:`modindex` 38 | * :ref:`search` 39 | 40 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. epub3 to make an epub3 31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 32 | echo. text to make text files 33 | echo. man to make manual pages 34 | echo. texinfo to make Texinfo files 35 | echo. gettext to make PO message catalogs 36 | echo. changes to make an overview over all changed/added/deprecated items 37 | echo. xml to make Docutils-native XML files 38 | echo. pseudoxml to make pseudoxml-XML files for display purposes 39 | echo. linkcheck to check all external links for integrity 40 | echo. doctest to run all doctests embedded in the documentation if enabled 41 | echo. coverage to run coverage check of the documentation if enabled 42 | echo. dummy to check syntax errors of document sources 43 | goto end 44 | ) 45 | 46 | if "%1" == "clean" ( 47 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 48 | del /q /s %BUILDDIR%\* 49 | goto end 50 | ) 51 | 52 | 53 | REM Check if sphinx-build is available and fallback to Python version if any 54 | %SPHINXBUILD% 1>NUL 2>NUL 55 | if errorlevel 9009 goto sphinx_python 56 | goto sphinx_ok 57 | 58 | :sphinx_python 59 | 60 | set SPHINXBUILD=python -m sphinx.__init__ 61 | %SPHINXBUILD% 2> nul 62 | if errorlevel 9009 ( 63 | echo. 64 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 65 | echo.installed, then set the SPHINXBUILD environment variable to point 66 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 67 | echo.may add the Sphinx directory to PATH. 68 | echo. 69 | echo.If you don't have Sphinx installed, grab it from 70 | echo.http://sphinx-doc.org/ 71 | exit /b 1 72 | ) 73 | 74 | :sphinx_ok 75 | 76 | 77 | if "%1" == "html" ( 78 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 79 | if errorlevel 1 exit /b 1 80 | echo. 81 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 82 | goto end 83 | ) 84 | 85 | if "%1" == "dirhtml" ( 86 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 87 | if errorlevel 1 exit /b 1 88 | echo. 89 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 90 | goto end 91 | ) 92 | 93 | if "%1" == "singlehtml" ( 94 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 95 | if errorlevel 1 exit /b 1 96 | echo. 97 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 98 | goto end 99 | ) 100 | 101 | if "%1" == "pickle" ( 102 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 103 | if errorlevel 1 exit /b 1 104 | echo. 105 | echo.Build finished; now you can process the pickle files. 106 | goto end 107 | ) 108 | 109 | if "%1" == "json" ( 110 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 111 | if errorlevel 1 exit /b 1 112 | echo. 113 | echo.Build finished; now you can process the JSON files. 114 | goto end 115 | ) 116 | 117 | if "%1" == "htmlhelp" ( 118 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 119 | if errorlevel 1 exit /b 1 120 | echo. 121 | echo.Build finished; now you can run HTML Help Workshop with the ^ 122 | .hhp project file in %BUILDDIR%/htmlhelp. 123 | goto end 124 | ) 125 | 126 | if "%1" == "qthelp" ( 127 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 128 | if errorlevel 1 exit /b 1 129 | echo. 130 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 131 | .qhcp project file in %BUILDDIR%/qthelp, like this: 132 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\AmazonKinesisClientLibraryforPython.qhcp 133 | echo.To view the help file: 134 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\AmazonKinesisClientLibraryforPython.ghc 135 | goto end 136 | ) 137 | 138 | if "%1" == "devhelp" ( 139 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 140 | if errorlevel 1 exit /b 1 141 | echo. 142 | echo.Build finished. 143 | goto end 144 | ) 145 | 146 | if "%1" == "epub" ( 147 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 148 | if errorlevel 1 exit /b 1 149 | echo. 150 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 151 | goto end 152 | ) 153 | 154 | if "%1" == "epub3" ( 155 | %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 156 | if errorlevel 1 exit /b 1 157 | echo. 158 | echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. 159 | goto end 160 | ) 161 | 162 | if "%1" == "latex" ( 163 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 164 | if errorlevel 1 exit /b 1 165 | echo. 166 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdf" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "latexpdfja" ( 181 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 182 | cd %BUILDDIR%/latex 183 | make all-pdf-ja 184 | cd %~dp0 185 | echo. 186 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 187 | goto end 188 | ) 189 | 190 | if "%1" == "text" ( 191 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 192 | if errorlevel 1 exit /b 1 193 | echo. 194 | echo.Build finished. The text files are in %BUILDDIR%/text. 195 | goto end 196 | ) 197 | 198 | if "%1" == "man" ( 199 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 200 | if errorlevel 1 exit /b 1 201 | echo. 202 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 203 | goto end 204 | ) 205 | 206 | if "%1" == "texinfo" ( 207 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 208 | if errorlevel 1 exit /b 1 209 | echo. 210 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 211 | goto end 212 | ) 213 | 214 | if "%1" == "gettext" ( 215 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 216 | if errorlevel 1 exit /b 1 217 | echo. 218 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 219 | goto end 220 | ) 221 | 222 | if "%1" == "changes" ( 223 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 224 | if errorlevel 1 exit /b 1 225 | echo. 226 | echo.The overview file is in %BUILDDIR%/changes. 227 | goto end 228 | ) 229 | 230 | if "%1" == "linkcheck" ( 231 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 232 | if errorlevel 1 exit /b 1 233 | echo. 234 | echo.Link check complete; look for any errors in the above output ^ 235 | or in %BUILDDIR%/linkcheck/output.txt. 236 | goto end 237 | ) 238 | 239 | if "%1" == "doctest" ( 240 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 241 | if errorlevel 1 exit /b 1 242 | echo. 243 | echo.Testing of doctests in the sources finished, look at the ^ 244 | results in %BUILDDIR%/doctest/output.txt. 245 | goto end 246 | ) 247 | 248 | if "%1" == "coverage" ( 249 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 250 | if errorlevel 1 exit /b 1 251 | echo. 252 | echo.Testing of coverage in the sources finished, look at the ^ 253 | results in %BUILDDIR%/coverage/python.txt. 254 | goto end 255 | ) 256 | 257 | if "%1" == "xml" ( 258 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 259 | if errorlevel 1 exit /b 1 260 | echo. 261 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 262 | goto end 263 | ) 264 | 265 | if "%1" == "pseudoxml" ( 266 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 267 | if errorlevel 1 exit /b 1 268 | echo. 269 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 270 | goto end 271 | ) 272 | 273 | if "%1" == "dummy" ( 274 | %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy 275 | if errorlevel 1 exit /b 1 276 | echo. 277 | echo.Build finished. Dummy builder generates no files. 278 | goto end 279 | ) 280 | 281 | :end 282 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | 2.25.64 6 | 3.0.2 7 | 4.2.1.Final 8 | 2.0.6 9 | 2.13.5 10 | 1.3.15 11 | 12 | 13 | 14 | software.amazon.kinesis 15 | amazon-kinesis-client-multilang 16 | ${kcl.version} 17 | 18 | 19 | software.amazon.kinesis 20 | amazon-kinesis-client 21 | ${kcl.version} 22 | 23 | 24 | software.amazon.awssdk 25 | kinesis 26 | ${awssdk.version} 27 | 28 | 29 | software.amazon.awssdk 30 | dynamodb 31 | ${awssdk.version} 32 | 33 | 34 | 35 | software.amazon.awssdk 36 | dynamodb-enhanced 37 | ${awssdk.version} 38 | 39 | 40 | 41 | com.amazonaws 42 | dynamodb-lock-client 43 | 1.3.0 44 | 45 | 46 | software.amazon.awssdk 47 | cloudwatch 48 | ${awssdk.version} 49 | 50 | 51 | software.amazon.awssdk 52 | netty-nio-client 53 | ${awssdk.version} 54 | 55 | 56 | software.amazon.awssdk 57 | metrics-spi 58 | ${awssdk.version} 59 | 60 | 61 | software.amazon.awssdk 62 | sts 63 | ${awssdk.version} 64 | 65 | 66 | software.amazon.awssdk 67 | protocol-core 68 | ${awssdk.version} 69 | 70 | 71 | software.amazon.awssdk 72 | aws-query-protocol 73 | ${awssdk.version} 74 | 75 | 76 | software.amazon.awssdk 77 | aws-cbor-protocol 78 | ${awssdk.version} 79 | 80 | 81 | software.amazon.awssdk 82 | aws-json-protocol 83 | ${awssdk.version} 84 | 85 | 86 | software.amazon.awssdk 87 | json-utils 88 | ${awssdk.version} 89 | 90 | 91 | software.amazon.awssdk 92 | third-party-jackson-core 93 | ${awssdk.version} 94 | 95 | 96 | software.amazon.awssdk 97 | third-party-jackson-dataformat-cbor 98 | ${awssdk.version} 99 | 100 | 101 | software.amazon.awssdk 102 | profiles 103 | ${awssdk.version} 104 | 105 | 106 | software.amazon.awssdk 107 | sdk-core 108 | ${awssdk.version} 109 | 110 | 111 | software.amazon.awssdk 112 | aws-core 113 | ${awssdk.version} 114 | 115 | 116 | software.amazon.awssdk 117 | endpoints-spi 118 | ${awssdk.version} 119 | 120 | 121 | software.amazon.awssdk 122 | auth 123 | ${awssdk.version} 124 | 125 | 126 | software.amazon.awssdk 127 | http-client-spi 128 | ${awssdk.version} 129 | 130 | 131 | software.amazon.awssdk 132 | regions 133 | ${awssdk.version} 134 | 135 | 136 | software.amazon.awssdk 137 | annotations 138 | ${awssdk.version} 139 | 140 | 141 | software.amazon.awssdk 142 | utils 143 | ${awssdk.version} 144 | 145 | 146 | software.amazon.awssdk 147 | apache-client 148 | ${awssdk.version} 149 | 150 | 151 | software.amazon.awssdk 152 | arns 153 | ${awssdk.version} 154 | 155 | 156 | software.amazon.awssdk 157 | http-auth-spi 158 | ${awssdk.version} 159 | 160 | 161 | software.amazon.awssdk 162 | http-auth 163 | ${awssdk.version} 164 | 165 | 166 | software.amazon.awssdk 167 | http-auth-aws 168 | ${awssdk.version} 169 | 170 | 171 | software.amazon.awssdk 172 | checksums-spi 173 | ${awssdk.version} 174 | 175 | 176 | software.amazon.awssdk 177 | checksums 178 | ${awssdk.version} 179 | 180 | 181 | software.amazon.awssdk 182 | identity-spi 183 | ${awssdk.version} 184 | 185 | 186 | io.netty 187 | netty-codec-http 188 | ${netty.version} 189 | 190 | 191 | io.netty 192 | netty-codec-http2 193 | ${netty.version} 194 | 195 | 196 | io.netty 197 | netty-codec 198 | ${netty.version} 199 | 200 | 201 | io.netty 202 | netty-transport 203 | ${netty.version} 204 | 205 | 206 | io.netty 207 | netty-resolver 208 | ${netty.version} 209 | 210 | 211 | io.netty 212 | netty-common 213 | ${netty.version} 214 | 215 | 216 | io.netty 217 | netty-buffer 218 | ${netty.version} 219 | 220 | 221 | io.netty 222 | netty-handler 223 | ${netty.version} 224 | 225 | 226 | io.netty 227 | netty-transport-native-epoll 228 | ${netty.version} 229 | 230 | 231 | io.netty 232 | netty-transport-native-unix-common 233 | ${netty.version} 234 | 235 | 236 | com.typesafe.netty 237 | netty-reactive-streams-http 238 | ${netty-reactive.version} 239 | 240 | 241 | com.typesafe.netty 242 | netty-reactive-streams 243 | ${netty-reactive.version} 244 | 245 | 246 | org.reactivestreams 247 | reactive-streams 248 | 1.0.3 249 | 250 | 251 | com.google.guava 252 | guava 253 | 32.1.1-jre 254 | 255 | 256 | com.google.code.findbugs 257 | jsr305 258 | 3.0.2 259 | 260 | 261 | org.checkerframework 262 | checker-qual 263 | 3.49.4 264 | 265 | 266 | com.google.errorprone 267 | error_prone_annotations 268 | 2.7.1 269 | 270 | 271 | com.google.j2objc 272 | j2objc-annotations 273 | 1.3 274 | 275 | 276 | org.codehaus.mojo 277 | animal-sniffer-annotations 278 | 1.20 279 | 280 | 281 | com.google.protobuf 282 | protobuf-java 283 | 4.27.5 284 | 285 | 286 | org.apache.commons 287 | commons-lang3 288 | 3.14.0 289 | 290 | 291 | org.slf4j 292 | slf4j-api 293 | 2.0.13 294 | 295 | 296 | io.reactivex.rxjava3 297 | rxjava 298 | 3.1.8 299 | 300 | 301 | com.fasterxml.jackson.dataformat 302 | jackson-dataformat-cbor 303 | ${fasterxml-jackson.version} 304 | 305 | 306 | com.fasterxml.jackson.core 307 | jackson-core 308 | ${fasterxml-jackson.version} 309 | 310 | 311 | com.fasterxml.jackson.core 312 | jackson-databind 313 | ${fasterxml-jackson.version} 314 | 315 | 316 | com.fasterxml.jackson.core 317 | jackson-annotations 318 | ${fasterxml-jackson.version} 319 | 320 | 321 | software.amazon 322 | flow 323 | 1.7 324 | 325 | 326 | org.apache.httpcomponents 327 | httpclient 328 | 4.5.13 329 | 330 | 331 | commons-codec 332 | commons-codec 333 | 1.15 334 | 335 | 336 | org.apache.httpcomponents 337 | httpcore 338 | 4.4.15 339 | 340 | 341 | com.amazon.ion 342 | ion-java 343 | 1.11.4 344 | 345 | 346 | software.amazon.glue 347 | schema-registry-serde 348 | 1.1.19 349 | 350 | 351 | com.amazonaws 352 | aws-java-sdk-sts 353 | 354 | 355 | 356 | 357 | joda-time 358 | joda-time 359 | 2.10.13 360 | 361 | 362 | ch.qos.logback 363 | logback-classic 364 | ${logback.version} 365 | 366 | 367 | ch.qos.logback 368 | logback-core 369 | ${logback.version} 370 | 371 | 372 | com.beust 373 | jcommander 374 | 1.82 375 | 376 | 377 | commons-io 378 | commons-io 379 | 2.16.1 380 | 381 | 382 | commons-logging 383 | commons-logging 384 | 1.1.3 385 | 386 | 387 | org.apache.commons 388 | commons-collections4 389 | 4.4 390 | 391 | 392 | commons-beanutils 393 | commons-beanutils 394 | 1.11.0 395 | 396 | 397 | commons-collections 398 | commons-collections 399 | 3.2.2 400 | 401 | 402 | 403 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | argparse ; python_version <= "3.1" 3 | -------------------------------------------------------------------------------- /samples/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | 5 | 6 | BEFORE YOU GET STARTED 7 | ====================== 8 | 9 | Before running the samples, you'll want to make sure that your environment is 10 | configured to allow the samples to use your AWS credentials. To familiarize 11 | yourself with AWS Credentials read this guide: 12 | 13 | http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html 14 | 15 | For the MultiLangDaemon and boto libs you'll want to make your credentials 16 | available to one of the credentials providers in the default credential 17 | providers chain such as providing a ~/.aws/credentials file 18 | 19 | http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html 20 | 21 | RUNNING THE SAMPLE 22 | ================== 23 | 24 | Navigate to the amazon_kclpy directory and install the package. Using the amazon_kclpy 25 | package requires the MultiLangDaemon which is provided by the java KCL. To get 26 | the necessary jars to this directory before installing, you'll want to run the 27 | "download_jars" command before running "install". If you just want the python 28 | KCL and plan to retrieve the necessary jars yourself, you can just do "install" 29 | 30 | python setup.py download_jars 31 | python setup.py install 32 | 33 | Now the amazon_kclpy and boto and required jars should be installed in your 34 | environment. To start the sample putter, run: 35 | 36 | sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird 37 | 38 | This will create a Kinesis stream called words and put the words specified by 39 | the -w options into the stream once each. Use -p SECONDS to indicate a period 40 | over which to repeatedly put these words. 41 | 42 | Now we would like to run a python KCL application that reads records from 43 | the stream we just created, but first take a look in the samples directory, 44 | you'll find a file called sample.properties, cat that file: 45 | 46 | cat samples/sample.properties 47 | 48 | You'll see several properties defined there. "executableName" indicates the 49 | executable for the MultiLangDaemon to run, "streamName" is the Kinesis stream 50 | to read from, "appName" is the KCL application name to use which will be the 51 | name of a DynamoDB table that gets created by the KCL, "initialPositionInStream" 52 | tells the KCL how to start reading from shards upon a fresh startup. To run the 53 | sample application you can use a helper script included in the package. 54 | 55 | amazon_kclpy_helper.py --print_command \ 56 | --java --properties samples/sample.properties 57 | 58 | This will print the command needed to run the sample which you can copy paste, 59 | or surround the command with back ticks, e.g. 60 | 61 | `amazon_kclpy_helper.py --print_command \ 62 | --java --properties samples/sample.properties` 63 | ''' 64 | -------------------------------------------------------------------------------- /samples/amazon_kclpy_helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | """ 5 | This script provides two utility functions: 6 | 7 | ``--print_classpath`` 8 | which prints a java class path. It optionally takes --properties 9 | and any number of --path options. It will generate a java class path which will include 10 | the properties file and paths and the location of the KCL jars based on the location of 11 | the amazon_kclpy.kcl module. 12 | 13 | ``--print_command`` 14 | which prints a command to run an Amazon KCLpy application. It requires a --java 15 | and --properties argument and optionally takes any number of --path arguments to prepend 16 | to the classpath that it generates for the command. 17 | """ 18 | from __future__ import print_function 19 | from amazon_kclpy import kcl 20 | from glob import glob 21 | import os 22 | import argparse 23 | import sys 24 | import samples 25 | 26 | 27 | 28 | 29 | def get_dir_of_file(f): 30 | ''' 31 | Returns the absolute path to the directory containing the specified file. 32 | 33 | :type f: str 34 | :param f: A path to a file, either absolute or relative 35 | 36 | :rtype: str 37 | :return: The absolute path of the directory represented by the relative path provided. 38 | ''' 39 | return os.path.dirname(os.path.abspath(f)) 40 | 41 | def get_kcl_dir(): 42 | ''' 43 | Returns the absolute path to the dir containing the amazon_kclpy.kcl module. 44 | 45 | :rtype: str 46 | :return: The absolute path of the KCL package. 47 | ''' 48 | return get_dir_of_file(kcl.__file__) 49 | 50 | def get_kcl_jar_path(): 51 | ''' 52 | Returns the absolute path to the KCL jars needed to run an Amazon KCLpy app. 53 | 54 | :rtype: str 55 | :return: The absolute path of the KCL jar files needed to run the MultiLangDaemon. 56 | ''' 57 | if os.name == 'posix': 58 | return ':'.join(glob(os.path.join(get_kcl_dir(), 'jars', '*jar'))) 59 | else: 60 | return ';'.join(glob(os.path.join(get_kcl_dir(), 'jars', '*jar'))) 61 | 62 | def get_kcl_classpath(properties=None, paths=[]): 63 | ''' 64 | Generates a classpath that includes the location of the kcl jars, the 65 | properties file and the optional paths. 66 | 67 | :type properties: str 68 | :param properties: Path to properties file. 69 | 70 | :type paths: list 71 | :param paths: List of strings. The paths that will be prepended to the classpath. 72 | 73 | :rtype: str 74 | :return: A java class path that will allow your properties to be found and the MultiLangDaemon and its deps and 75 | any custom paths you provided. 76 | ''' 77 | # First make all the user provided paths absolute 78 | paths = [os.path.abspath(p) for p in paths] 79 | # We add our paths after the user provided paths because this permits users to 80 | # potentially inject stuff before our paths (otherwise our stuff would always 81 | # take precedence). 82 | paths.append(get_kcl_jar_path()) 83 | if properties: 84 | # Add the dir that the props file is in 85 | dir_of_file = get_dir_of_file(properties) 86 | paths.append(dir_of_file) 87 | if os.name == 'posix': 88 | return ":".join([p for p in paths if p != '']) 89 | else: 90 | return ";".join([p for p in paths if p != '']) 91 | 92 | def get_kcl_app_command(args, multi_lang_daemon_class, properties, log_configuration, paths=[]): 93 | ''' 94 | Generates a command to run the MultiLangDaemon. 95 | 96 | :type java: str 97 | :param java: Path to java 98 | 99 | :type multi_lang_daemon_class: str 100 | :param multi_lang_daemon_class: Name of multi language daemon class e.g. com.amazonaws.services.kinesis.multilang.MultiLangDaemon 101 | 102 | :type properties: str 103 | :param properties: Optional properties file to be included in the classpath. 104 | 105 | :type paths: list 106 | :param paths: List of strings. Additional paths to prepend to the classpath. 107 | 108 | :rtype: str 109 | :return: A command that will run the MultiLangDaemon with your properties and custom paths and java. 110 | ''' 111 | return "{java} -cp {cp} {daemon} {props} {log_config}".format(java=args.java, 112 | cp = get_kcl_classpath(args.properties, paths), 113 | daemon = multi_lang_daemon_class, 114 | # Just need the basename because the path is added to the classpath 115 | props = properties, 116 | log_config = log_configuration) 117 | 118 | if __name__ == '__main__': 119 | parser = argparse.ArgumentParser("A script for generating a command to run an Amazon KCLpy app") 120 | parser.add_argument("--print_classpath", dest="print_classpath", action="store_true", 121 | default=False, 122 | help="Print a java class path.\noptional arguments: --path") 123 | parser.add_argument("--print_command", dest="print_command", action="store_true", 124 | default=False, 125 | help="Print a command for running an Amazon KCLpy app.\nrequired " 126 | + "args: --java --properties\noptional args: --classpath") 127 | parser.add_argument("-j", "--java", dest="java", 128 | help="The path to the java executable e.g. /jdk/bin/java", 129 | metavar="PATH_TO_JAVA") 130 | parser.add_argument("-p", "--properties", "--props", "--prop", dest="properties", 131 | help="The path to a properties file (relative to where you are running this script)", 132 | metavar="PATH_TO_PROPERTIES") 133 | parser.add_argument("--sample", "--sample-props", "--use-sample-properties", dest="use_sample_props", 134 | help="This will use the sample.properties file included in this package as the properties file.", 135 | action="store_true", default=False) 136 | parser.add_argument("-c", "--classpath", "--path", dest="paths", action="append", default=[], 137 | help="Additional path to add to java class path. May be specified any number of times", 138 | metavar="PATH") 139 | parser.add_argument("-l", "--log-configuration", dest="log_configuration", 140 | help="This will use the logback.xml which will be used by the KCL to log.", 141 | metavar="PATH_TO_LOG_CONFIGURATION") 142 | args = parser.parse_args() 143 | # Possibly replace the properties with the sample. Useful if they just want to run the sample app. 144 | if args.use_sample_props: 145 | if args.properties: 146 | sys.stderr.write('Replacing provided properties with sample properties due to arg --sample\n') 147 | args.properties = os.path.join(get_dir_of_file(samples.__file__), 'sample.properties') 148 | 149 | # Print what the asked for 150 | if args.print_classpath: 151 | print(get_kcl_classpath(args.properties, args.paths)) 152 | elif args.print_command: 153 | if args.java and args.properties: 154 | multi_lang_daemon_class = 'software.amazon.kinesis.multilang.MultiLangDaemon' 155 | properties_argument = "--properties-file {props}".format(props = args.properties) 156 | log_argument = '' 157 | if args.log_configuration is not None: 158 | log_argument = "--log-configuration {log}".format(log = args.log_configuration) 159 | print(get_kcl_app_command(args, multi_lang_daemon_class, properties_argument, log_argument, paths=args.paths)) 160 | else: 161 | sys.stderr.write("Must provide arguments: --java and --properties\n") 162 | parser.print_usage() 163 | else: 164 | parser.print_usage() 165 | -------------------------------------------------------------------------------- /samples/sample.properties: -------------------------------------------------------------------------------- 1 | # The script that abides by the multi-language protocol. This script will 2 | # be executed by the MultiLangDaemon, which will communicate with this script 3 | # over STDIN and STDOUT according to the multi-language protocol. 4 | executableName = sample_kclpy_app.py 5 | 6 | # The Stream arn: arn:aws:kinesis:::stream/ 7 | # Important: streamArn takes precedence over streamName if both are set 8 | streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample 9 | 10 | # The name of an Amazon Kinesis stream to process. 11 | # Important: streamArn takes precedence over streamName if both are set 12 | streamName = kclpysample 13 | 14 | # Used by the KCL as the name of this application. Will be used as the name 15 | # of an Amazon DynamoDB table which will store the lease and checkpoint 16 | # information for workers with this application name 17 | applicationName = PythonKCLSample 18 | 19 | # Users can change the credentials provider the KCL will use to retrieve credentials. 20 | # Expected key name (case-sensitive): 21 | # AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch 22 | # The DefaultCredentialsProvider checks several other providers, which is 23 | # described here: 24 | # https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html 25 | AwsCredentialsProvider = DefaultCredentialsProvider 26 | 27 | # Appended to the user agent of the KCL. Does not impact the functionality of the 28 | # KCL in any other way. 29 | processingLanguage = python/3.8 30 | 31 | # Valid options at TRIM_HORIZON or LATEST. 32 | # See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax 33 | initialPositionInStream = TRIM_HORIZON 34 | 35 | # To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended', 36 | # and uncomment below line with right timestamp value. 37 | # See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax 38 | #initialPositionInStreamExtended = 1636609142 39 | 40 | # The following properties are also available for configuring the KCL Worker that is created 41 | # by the MultiLangDaemon. 42 | 43 | # The KCL defaults to us-east-1 44 | regionName = us-east-1 45 | 46 | # Fail over time in milliseconds. A worker which does not renew it's lease within this time interval 47 | # will be regarded as having problems and it's shards will be assigned to other workers. 48 | # For applications that have a large number of shards, this msy be set to a higher number to reduce 49 | # the number of DynamoDB IOPS required for tracking leases 50 | #failoverTimeMillis = 10000 51 | 52 | # A worker id that uniquely identifies this worker among all workers using the same applicationName 53 | # If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself. 54 | #workerId = 55 | 56 | # Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks. 57 | #shardSyncIntervalMillis = 60000 58 | 59 | # Max records to fetch from Kinesis in a single GetRecords call. 60 | #maxRecords = 10000 61 | 62 | # Idle time between record reads in milliseconds. 63 | #idleTimeBetweenReadsInMillis = 1000 64 | 65 | # Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while) 66 | #callProcessRecordsEvenForEmptyRecordList = false 67 | 68 | # Interval in milliseconds between polling to check for parent shard completion. 69 | # Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on 70 | # completion of parent shards). 71 | #parentShardPollIntervalMillis = 10000 72 | 73 | # Cleanup leases upon shards completion (don't wait until they expire in Kinesis). 74 | # Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try 75 | # to delete the ones we don't need any longer. 76 | #cleanupLeasesUponShardCompletion = true 77 | 78 | # Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures). 79 | #taskBackoffTimeMillis = 500 80 | 81 | # Buffer metrics for at most this long before publishing to CloudWatch. 82 | #metricsBufferTimeMillis = 10000 83 | 84 | # Buffer at most this many metrics before publishing to CloudWatch. 85 | #metricsMaxQueueSize = 10000 86 | 87 | # KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls 88 | # to RecordProcessorCheckpointer#checkpoint(String) by default. 89 | #validateSequenceNumberBeforeCheckpointing = true 90 | 91 | # The maximum number of active threads for the MultiLangDaemon to permit. 92 | # If a value is provided then a FixedThreadPool is used with the maximum 93 | # active threads set to the provided value. If a non-positive integer or no 94 | # value is provided a CachedThreadPool is used. 95 | #maxActiveThreads = 0 96 | 97 | ################### KclV3 configurations ################### 98 | # NOTE : These are just test configurations to show how to customize 99 | # all possible KCLv3 configurations. They are not necessarily the best 100 | # default values to use for production. 101 | 102 | # Coordinator config 103 | # Version the KCL needs to operate in. For more details check the KCLv3 migration 104 | # documentation. Default is CLIENT_VERSION_CONFIG_3X 105 | # clientVersionConfig = 106 | # Configurations to control how the CoordinatorState DDB table is created 107 | # Default name is applicationName-CoordinatorState in PAY_PER_REQUEST, 108 | # with PITR and deletion protection disabled and no tags 109 | # coordinatorStateTableName = 110 | # coordinatorStateBillingMode = 111 | # coordinatorStateReadCapacity = 112 | # coordinatorStateWriteCapacity = 113 | # coordinatorStatePointInTimeRecoveryEnabled = 114 | # coordinatorStateDeletionProtectionEnabled = 115 | # coordinatorStateTags = 116 | 117 | # Graceful handoff config - tuning of the shutdown behavior during lease transfers 118 | # default values are 30000 and true respectively 119 | # gracefulLeaseHandoffTimeoutMillis = 120 | # isGracefulLeaseHandoffEnabled = 121 | 122 | # WorkerMetricStats table config - control how the DDB table is created 123 | # Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST, 124 | # with PITR and deletion protection disabled and no tags 125 | # workerMetricsTableName = 126 | # workerMetricsBillingMode = 127 | # workerMetricsReadCapacity = 128 | # workerMetricsWriteCapacity = 129 | # workerMetricsPointInTimeRecoveryEnabled = 130 | # workerMetricsDeletionProtectionEnabled = 131 | # workerMetricsTags = 132 | 133 | # WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm 134 | # 135 | # frequency of capturing worker metrics in memory. Default is 1s 136 | # inMemoryWorkerMetricsCaptureFrequencyMillis = 137 | 138 | # frequency of reporting worker metric stats to storage. Default is 30s 139 | # workerMetricsReporterFreqInMillis = 140 | 141 | # No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10 142 | # noOfPersistedMetricsPerWorkerMetrics = 143 | 144 | # Disable use of worker metrics to balance lease, default is false. 145 | # If it is true, the algorithm balances lease based on worker's processing throughput. 146 | # disableWorkerMetrics = 147 | 148 | # Max throughput per host 10 MBps, to limit processing to the given value 149 | # Default is unlimited. 150 | # maxThroughputPerHostKBps = 151 | 152 | # Dampen the load that is rebalanced during lease re-balancing, default is 60% 153 | # dampeningPercentage = 154 | 155 | # Configures the allowed variance range for worker utilization. The upper 156 | # limit is calculated as average * (1 + reBalanceThresholdPercentage/100). 157 | # The lower limit is average * (1 - reBalanceThresholdPercentage/100). If 158 | # any worker's utilization falls outside this range, lease re-balancing is 159 | # triggered. The re-balancing algorithm aims to bring variance within the 160 | # specified range. It also avoids thrashing by ensuring the utilization of 161 | # the worker receiving the load after re-balancing doesn't exceed the fleet 162 | # average. This might cause no re-balancing action even the utilization is 163 | # out of the variance range. The default value is 10, representing +/-10% 164 | # variance from the average value. 165 | # reBalanceThresholdPercentage = 166 | 167 | # Whether at-least one lease must be taken from a high utilization worker 168 | # during re-balancing when there is no lease assigned to that worker which has 169 | # throughput is less than or equal to the minimum throughput that needs to be 170 | # moved away from that worker to bring the worker back into the allowed variance. 171 | # Default is true. 172 | # allowThroughputOvershoot = 173 | 174 | # Lease assignment is performed every failoverTimeMillis but re-balance will 175 | # be attempted only once in 5 times based on the below config. Default is 3. 176 | # varianceBalancingFrequency = 177 | 178 | # Alpha value used for calculating exponential moving average of worker's metricStats. 179 | # workerMetricsEMAAlpha = 180 | # Duration after which workerMetricStats entry from WorkerMetricStats table will 181 | # be cleaned up. 182 | # Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days) 183 | # Refer to Duration.parse javadocs for more details 184 | # staleWorkerMetricsEntryCleanupDuration = 185 | -------------------------------------------------------------------------------- /samples/sample_kclpy_app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import print_function 7 | 8 | import sys 9 | import time 10 | 11 | from amazon_kclpy import kcl 12 | from amazon_kclpy.v3 import processor 13 | 14 | 15 | class RecordProcessor(processor.RecordProcessorBase): 16 | """ 17 | A RecordProcessor processes data from a shard in a stream. Its methods will be called with this pattern: 18 | 19 | * initialize will be called once 20 | * process_records will be called zero or more times 21 | * shutdown will be called if this MultiLangDaemon instance loses the lease to this shard, or the shard ends due 22 | a scaling change. 23 | """ 24 | def __init__(self): 25 | self._SLEEP_SECONDS = 5 26 | self._CHECKPOINT_RETRIES = 5 27 | self._CHECKPOINT_FREQ_SECONDS = 60 28 | self._largest_seq = (None, None) 29 | self._largest_sub_seq = None 30 | self._last_checkpoint_time = None 31 | 32 | def log(self, message): 33 | sys.stderr.write(message) 34 | 35 | def initialize(self, initialize_input): 36 | """ 37 | Called once by a KCLProcess before any calls to process_records 38 | 39 | :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the lease that this record 40 | processor has been assigned. 41 | """ 42 | self._largest_seq = (None, None) 43 | self._last_checkpoint_time = time.time() 44 | 45 | def checkpoint(self, checkpointer, sequence_number=None, sub_sequence_number=None): 46 | """ 47 | Checkpoints with retries on retryable exceptions. 48 | 49 | :param amazon_kclpy.kcl.Checkpointer checkpointer: the checkpointer provided to either process_records 50 | or shutdown 51 | :param str or None sequence_number: the sequence number to checkpoint at. 52 | :param int or None sub_sequence_number: the sub sequence number to checkpoint at. 53 | """ 54 | for n in range(0, self._CHECKPOINT_RETRIES): 55 | try: 56 | checkpointer.checkpoint(sequence_number, sub_sequence_number) 57 | return 58 | except kcl.CheckpointError as e: 59 | if 'ShutdownException' == e.value: 60 | # 61 | # A ShutdownException indicates that this record processor should be shutdown. This is due to 62 | # some failover event, e.g. another MultiLangDaemon has taken the lease for this shard. 63 | # 64 | print('Encountered shutdown exception, skipping checkpoint') 65 | return 66 | elif 'ThrottlingException' == e.value: 67 | # 68 | # A ThrottlingException indicates that one of our dependencies is is over burdened, e.g. too many 69 | # dynamo writes. We will sleep temporarily to let it recover. 70 | # 71 | if self._CHECKPOINT_RETRIES - 1 == n: 72 | sys.stderr.write('Failed to checkpoint after {n} attempts, giving up.\n'.format(n=n)) 73 | return 74 | else: 75 | print('Was throttled while checkpointing, will attempt again in {s} seconds' 76 | .format(s=self._SLEEP_SECONDS)) 77 | elif 'InvalidStateException' == e.value: 78 | sys.stderr.write('MultiLangDaemon reported an invalid state while checkpointing.\n') 79 | else: # Some other error 80 | sys.stderr.write('Encountered an error while checkpointing, error was {e}.\n'.format(e=e)) 81 | time.sleep(self._SLEEP_SECONDS) 82 | 83 | def process_record(self, data, partition_key, sequence_number, sub_sequence_number): 84 | """ 85 | Called for each record that is passed to process_records. 86 | 87 | :param str data: The blob of data that was contained in the record. 88 | :param str partition_key: The key associated with this record. 89 | :param int sequence_number: The sequence number associated with this record. 90 | :param int sub_sequence_number: the sub sequence number associated with this record. 91 | """ 92 | #################################### 93 | # Insert your processing logic here 94 | #################################### 95 | self.log("Record (Partition Key: {pk}, Sequence Number: {seq}, Subsequence Number: {sseq}, Data Size: {ds}" 96 | .format(pk=partition_key, seq=sequence_number, sseq=sub_sequence_number, ds=len(data))) 97 | 98 | def should_update_sequence(self, sequence_number, sub_sequence_number): 99 | """ 100 | Determines whether a new larger sequence number is available 101 | 102 | :param int sequence_number: the sequence number from the current record 103 | :param int sub_sequence_number: the sub sequence number from the current record 104 | :return boolean: true if the largest sequence should be updated, false otherwise 105 | """ 106 | return self._largest_seq == (None, None) or sequence_number > self._largest_seq[0] or \ 107 | (sequence_number == self._largest_seq[0] and sub_sequence_number > self._largest_seq[1]) 108 | 109 | def process_records(self, process_records_input): 110 | """ 111 | Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers 112 | from the records to indicate where in the stream to checkpoint. 113 | 114 | :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, and metadata about the 115 | records. 116 | """ 117 | try: 118 | for record in process_records_input.records: 119 | data = record.binary_data 120 | seq = int(record.sequence_number) 121 | sub_seq = record.sub_sequence_number 122 | key = record.partition_key 123 | self.process_record(data, key, seq, sub_seq) 124 | if self.should_update_sequence(seq, sub_seq): 125 | self._largest_seq = (seq, sub_seq) 126 | 127 | # 128 | # Checkpoints every self._CHECKPOINT_FREQ_SECONDS seconds 129 | # 130 | if time.time() - self._last_checkpoint_time > self._CHECKPOINT_FREQ_SECONDS: 131 | self.checkpoint(process_records_input.checkpointer, str(self._largest_seq[0]), self._largest_seq[1]) 132 | self._last_checkpoint_time = time.time() 133 | 134 | except Exception as e: 135 | self.log("Encountered an exception while processing records. Exception was {e}\n".format(e=e)) 136 | 137 | def lease_lost(self, lease_lost_input): 138 | self.log("Lease has been lost") 139 | 140 | def shard_ended(self, shard_ended_input): 141 | self.log("Shard has ended checkpointing") 142 | shard_ended_input.checkpointer.checkpoint() 143 | 144 | def shutdown_requested(self, shutdown_requested_input): 145 | self.log("Shutdown has been requested, checkpointing.") 146 | shutdown_requested_input.checkpointer.checkpoint() 147 | 148 | 149 | if __name__ == "__main__": 150 | kcl_process = kcl.KCLProcess(RecordProcessor()) 151 | kcl_process.run() 152 | -------------------------------------------------------------------------------- /samples/sample_kinesis_wordputter.py: -------------------------------------------------------------------------------- 1 | #!env python 2 | ''' 3 | Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | SPDX-License-Identifier: Apache-2.0 5 | ''' 6 | from __future__ import print_function 7 | 8 | import argparse 9 | import sys 10 | import time 11 | 12 | import boto3 13 | 14 | def get_stream_status(kinesis, stream_name): 15 | ''' 16 | Query this provided connection object for the provided stream's status. 17 | :type conn: Kinesis.Client 18 | :param conn: A connection to Amazon Kinesis 19 | :type stream_name: str 20 | :param stream_name: The name of a stream. 21 | :rtype: str 22 | :return: The stream's status 23 | ''' 24 | r = kinesis.describe_stream(StreamName=stream_name) 25 | description = r.get('StreamDescription') 26 | return description.get('StreamStatus') 27 | 28 | def wait_for_stream(kinesis, stream_name): 29 | ''' 30 | Wait for the provided stream to become active. 31 | :type kinesis: Kinesis.Client 32 | :param kinesis: A low-level client representing Amazon Kinesis 33 | :type stream_name: str 34 | :param stream_name: The name of a stream. 35 | ''' 36 | SLEEP_TIME_SECONDS = 3 37 | status = get_stream_status(kinesis, stream_name) 38 | while status != 'ACTIVE': 39 | print('{stream_name} has status: {status}, sleeping for {secs} seconds'.format( 40 | stream_name = stream_name, 41 | status = status, 42 | secs = SLEEP_TIME_SECONDS)) 43 | time.sleep(SLEEP_TIME_SECONDS) # sleep for 3 seconds 44 | status = get_stream_status(kinesis, stream_name) 45 | 46 | def put_words_in_stream(kinesis, stream_name, words): 47 | ''' 48 | Put each word in the provided list of words into the stream. 49 | :type kinesis: Kinesis.Client 50 | :param kinesis: A connection to Amazon Kinesis 51 | :type stream_name: str 52 | :param stream_name: The name of a stream. 53 | :type words: list 54 | :param words: A list of strings to put into the stream. 55 | ''' 56 | for w in words: 57 | try: 58 | kinesis.put_record(StreamName=stream_name, Data=w, PartitionKey=w) 59 | print("Put word: " + w + " into stream: " + stream_name) 60 | except Exception as e: 61 | sys.stderr.write("Encountered an exception while trying to put a word: " 62 | + w + " into stream: " + stream_name + " exception was: " + str(e)) 63 | 64 | def put_words_in_stream_periodically(conn, stream_name, words, period_seconds): 65 | ''' 66 | Puts words into a stream, then waits for the period to elapse then puts the words in again. There is no strict 67 | guarantee about how frequently we put each word into the stream, just that we will wait between iterations. 68 | :type conn: boto.kinesis.layer1.KinesisConnection 69 | :param conn: A connection to Amazon Kinesis 70 | :type stream_name: str 71 | :param stream_name: The name of a stream. 72 | :type words: list 73 | :param words: A list of strings to put into the stream. 74 | :type period_seconds: int 75 | :param period_seconds: How long to wait, in seconds, between iterations over the list of words. 76 | ''' 77 | while True: 78 | put_words_in_stream(conn, stream_name, words) 79 | print("Sleeping for {period_seconds} seconds".format(period_seconds=period_seconds)) 80 | time.sleep(period_seconds) 81 | 82 | if __name__ == '__main__': 83 | parser = argparse.ArgumentParser(''' 84 | Puts words into a stream. 85 | # Using the -w option multiple times 86 | sample_wordputter.py -s STREAM_NAME -w WORD1 -w WORD2 -w WORD3 -p 3 87 | # Passing input from STDIN 88 | echo "WORD1\\nWORD2\\nWORD3" | sample_wordputter.py -s STREAM_NAME -p 3 89 | ''') 90 | parser.add_argument("-s", "--stream", dest="stream_name", required=True, 91 | help="The stream you'd like to create.", metavar="STREAM_NAME",) 92 | parser.add_argument("-r", "--regionName", "--region", dest="region", default="us-east-1", 93 | help="The region you'd like to make this stream in. Default is 'us-east-1'", metavar="REGION_NAME",) 94 | parser.add_argument("-w", "--word", dest="words", default=[], action="append", 95 | help="A word to add to the stream. Can be specified multiple times to add multiple words.", metavar="WORD",) 96 | parser.add_argument("-p", "--period", dest="period", type=int, 97 | help="If you'd like to repeatedly put words into the stream, this option provides the period for putting " 98 | + "words into the stream in SECONDS. If no period is given then the words are put once.", 99 | metavar="SECONDS",) 100 | args = parser.parse_args() 101 | stream_name = args.stream_name 102 | 103 | ''' 104 | Getting a connection to Amazon Kinesis will require that you have your credentials available to 105 | one of the standard credentials providers. 106 | ''' 107 | print("Connecting to stream: {s} in {r}".format(s=stream_name, r=args.region)) 108 | kinesis = boto3.client('kinesis', region_name=args.region) 109 | 110 | try: 111 | status = get_stream_status(kinesis, stream_name) 112 | if 'DELETING' == status: 113 | print('The stream: {s} is being deleted, please rerun the script.'.format(s=stream_name)) 114 | sys.exit(1) 115 | elif 'ACTIVE' != status: 116 | wait_for_stream(kinesis, stream_name) 117 | except: 118 | # We'll assume the stream didn't exist so we will try to create it with just one shard 119 | kinesis.create_stream(StreamName=stream_name, ShardCount=1) 120 | wait_for_stream(kinesis, stream_name) 121 | # Now the stream should exist 122 | if len(args.words) == 0: 123 | print('No -w options provided. Waiting on input from STDIN') 124 | words = [l.strip() for l in sys.stdin.readlines() if l.strip() != ''] 125 | else: 126 | words = args.words 127 | if args.period != None: 128 | put_words_in_stream_periodically(kinesis, stream_name, words, args.period) 129 | else: 130 | put_words_in_stream(kinesis, stream_name, words) -------------------------------------------------------------------------------- /scripts/build_deps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | """ 5 | Builds the dependency list used by setup.py from the maven dependency tree. This script must be run in the 6 | amazon-kinesis-client or amazon-kinesis-client-multilang directory, or where the pom.xml for the libraries are present. 7 | """ 8 | import subprocess 9 | from tempfile import mkstemp 10 | from os import close 11 | import re 12 | 13 | 14 | def format_dependency(line): 15 | """ 16 | This attempts to extract Maven dependencies and versions from a line of output from mvn dependency:tree 17 | 18 | An example line without specifiers: 19 | 20 | ``[INFO] +- software.amazon.kinesis:amazon-kinesis-client:jar:2.1.2:compile`` 21 | 22 | This fields in the line in order are: 23 | 1. Group Id: software.amazon.kinesis 24 | 2. Artifact Id: amazon-kinesis-client 25 | 3. Packaging: jar (not used) 26 | 4. Version: 2.1.2 27 | 5. Dependency type: compile (this will be runtime or compile) 28 | 29 | An example line with specifiers: 30 | 31 | ``[INFO] | | +- io.netty:netty-transport-native-epoll:jar:linux-x86_64:4.1.32.Final:compile`` 32 | 33 | The fields in order are: 34 | 1. Group Id: io.netty 35 | 2. Artifact Id: netty-transport-native-epoll 36 | 3. Packaging: jar (not used) 37 | 4. Specifier: linux-x86_64 (not used) 38 | 5. Version: 4.1.32.Final 39 | 6. Dependency type: compile (this will be runtime or compile) 40 | 41 | :param str line: the line to extract version information from 42 | :return: the version information needed to retrieve the jars from Maven Central 43 | """ 44 | match = re.match(r'^[\\\s+|-]*(?P[^\s]+)', line) 45 | assert match is not None 46 | items = match.groupdict()['dep_line'].split(":") 47 | version_idx = 3 48 | if len(items) > 5: 49 | version_idx = 4 50 | 51 | return "('{group_id}', '{artifact_id}', '{version}')".format(group_id=items[0], 52 | artifact_id=items[1], 53 | version=items[version_idx]) 54 | 55 | 56 | def build_deps(): 57 | """ 58 | Extracts all the dependencies from the pom.xml and formats them into a form usable for setup.py or other 59 | multilang daemon implementations 60 | """ 61 | (fh, filename) = mkstemp() 62 | close(fh) 63 | output_command = '-Doutput={temp}'.format(temp=filename) 64 | subprocess.check_call(['mvn', 'dependency:tree', '-Dscope=runtime', output_command]) 65 | 66 | dependency_file = open(filename) 67 | 68 | dependencies = [format_dependency(line) for line in dependency_file] 69 | 70 | print(",\n".join(dependencies)) 71 | 72 | 73 | if __name__ == '__main__': 74 | build_deps() 75 | 76 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | [aliases] 4 | test=pytest -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | from __future__ import print_function 4 | 5 | import glob 6 | import sys 7 | 8 | import os 9 | import shutil 10 | import xml.etree.ElementTree as ET 11 | 12 | from setuptools import Command 13 | from setuptools import setup 14 | from setuptools.command.install import install 15 | 16 | if sys.version_info[0] >= 3: 17 | # Python 3 18 | from urllib.request import urlopen 19 | else: 20 | # Python 2 21 | from urllib2 import urlopen 22 | 23 | # 24 | # This script modifies the basic setuptools by adding some functionality to the standard 25 | # "install" command and by adding an additional command "download_jars" which 26 | # simplifies retrieval of the jars required to run the KCL multi-language daemon 27 | # which is required to run the sample app included in this package. 28 | # 29 | # If a user runs the basic install: 30 | # 31 | # python setup.py install 32 | # 33 | # They will be notified of any jars that are downloaded for this package. Those jars 34 | # will go in amazon_kclpy/jars so that they can be installed as part of this package's 35 | # data. 36 | # 37 | # python setup.py download_jars 38 | # 39 | # Will retrieve the configured jars from maven and then advise the user 40 | # to rerun the install command. 41 | # 42 | 43 | PACKAGE_NAME = 'amazon_kclpy' 44 | JAR_DIRECTORY = os.path.join(PACKAGE_NAME, 'jars') 45 | PACKAGE_VERSION = '3.0.3' 46 | PYTHON_REQUIREMENTS = [ 47 | 'boto3', 48 | # argparse is part of python2.7 but must be declared for python2.6 49 | 'argparse', 50 | ] 51 | REMOTE_MAVEN_PACKAGES_FILE = 'pom.xml' 52 | 53 | class MavenJarDownloader: 54 | 55 | def __init__(self, on_completion, destdir=JAR_DIRECTORY, packages_file=REMOTE_MAVEN_PACKAGES_FILE): 56 | self.on_completion = on_completion 57 | self.destdir = destdir 58 | self.packages_file = packages_file 59 | self.packages = self.parse_packages_from_pom() 60 | 61 | def warning_string(self, missing_jars=[]): 62 | s = '''The following jars were not installed because they were not 63 | present in this package at the time of installation:''' 64 | for jar in missing_jars: 65 | s += '\n {jar}'.format(jar=jar) 66 | s += ''' 67 | This doesn't affect the rest of the installation, but may make it more 68 | difficult for you to run the sample app and get started. 69 | 70 | You should consider running: 71 | 72 | python setup.py download_jars 73 | python setup.py install 74 | 75 | Which will download the required jars and rerun the install. 76 | ''' 77 | return s 78 | 79 | def parse_packages_from_pom(self): 80 | maven_root = ET.parse(self.packages_file).getroot() 81 | maven_version = '{http://maven.apache.org/POM/4.0.0}' 82 | # dictionary of common package versions encoded in `properties` section 83 | properties = {f"${{{child.tag.replace(maven_version, '')}}}": child.text 84 | for child in maven_root.find(f'{maven_version}properties').iter() if 'version' in child.tag} 85 | 86 | packages = [] 87 | for dep in maven_root.iter(f'{maven_version}dependency'): 88 | dependency = [] 89 | for attr in ['groupId', 'artifactId', 'version']: 90 | val = dep.find(maven_version + attr).text 91 | if val in properties: 92 | dependency.append(properties[val]) 93 | else: 94 | dependency.append(val) 95 | packages.append(tuple(dependency)) 96 | 97 | return packages 98 | 99 | def download_and_check(self): 100 | self.download_files() 101 | self.on_completion() 102 | missing_jars = self.missing_jars() 103 | if len(missing_jars) > 0: 104 | raise RuntimeError(self.warning_string(missing_jars)) 105 | 106 | def package_destination(self, artifact_id, version): 107 | return '{artifact_id}-{version}.jar'.format(artifact_id=artifact_id, version=version) 108 | 109 | def missing_jars(self): 110 | file_list = [os.path.join(self.destdir, self.package_destination(p[1], p[2])) for p in self.packages] 111 | return [f for f in file_list if not os.path.isfile(f)] # The missing files 112 | 113 | def package_url(self, group_id, artifact_id, version): 114 | # 115 | # Sample url: 116 | # https://search.maven.org/remotecontent?filepath=org/apache/httpcomponents/httpclient/4.2/httpclient-4.2.jar 117 | # https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.2/httpclient-4.2.jar 118 | # 119 | prefix = os.getenv("KCL_MVN_REPO_SEARCH_URL", 'https://repo1.maven.org/maven2/') 120 | return '{prefix}{path}/{artifact_id}/{version}/{dest}'.format( 121 | prefix=prefix, 122 | path='/'.join(group_id.split('.')), 123 | artifact_id=artifact_id, 124 | version=version, 125 | dest=self.package_destination(artifact_id, version)) 126 | 127 | def download_file(self, url, dest): 128 | """ 129 | Downloads a file at the url to the destination. 130 | """ 131 | print('Attempting to retrieve remote jar {url}'.format(url=url)) 132 | try: 133 | response = urlopen(url) 134 | with open(dest, 'wb') as dest_file: 135 | shutil.copyfileobj(response, dest_file) 136 | print('Saving {url} -> {dest}'.format(url=url, dest=dest)) 137 | except Exception as e: 138 | print('Failed to retrieve {url}: {e}'.format(url=url, e=e)) 139 | return 140 | 141 | def download_files(self): 142 | for package in self.packages: 143 | dest = os.path.join(self.destdir, self.package_destination(package[1], package[2])) 144 | if os.path.isfile(dest): 145 | print('Skipping download of {dest}'.format(dest=dest)) 146 | else: 147 | url = self.package_url(package[0], package[1], package[2]) 148 | self.download_file(url, dest) 149 | 150 | 151 | class DownloadJarsCommand(Command): 152 | description = "Download the jar files needed to run the sample application" 153 | user_options = [] 154 | 155 | def initialize_options(self): 156 | pass 157 | 158 | def finalize_options(self): 159 | pass 160 | 161 | def run(self): 162 | """ 163 | Runs when this command is given to setup.py 164 | """ 165 | downloader = MavenJarDownloader(on_completion=lambda : None) 166 | downloader.download_files() 167 | print(''' 168 | Now you should run: 169 | 170 | python setup.py install 171 | 172 | Which will finish the installation. 173 | ''') 174 | 175 | 176 | class InstallThenCheckForJars(install): 177 | 178 | def do_install(self): 179 | install.run(self) 180 | 181 | def run(self): 182 | """ 183 | We override the basic install command. First we download jars then 184 | we run the basic install then we check whether the jars are present 185 | in this package. If they aren't present we warn the user and give 186 | them some advice on how to retry getting the jars. 187 | """ 188 | downloader = MavenJarDownloader(self.do_install) 189 | downloader.download_and_check() 190 | 191 | 192 | try: 193 | from wheel.bdist_wheel import bdist_wheel 194 | 195 | 196 | class BdistWheelWithJars(bdist_wheel): 197 | """ 198 | This overrides the bdist_wheel command, that handles building a binary wheel of the package. 199 | Currently, as far as I can tell, binary wheel creation only occurs during the virtual environment creation. 200 | The package that bdist_wheel comes from isn't a modeled dependency of this package, but is required for virtual 201 | environment creation. 202 | """ 203 | 204 | def do_run(self): 205 | bdist_wheel.run(self) 206 | 207 | def run(self): 208 | downloader = MavenJarDownloader(self.do_run) 209 | downloader.download_and_check() 210 | 211 | except ImportError: 212 | pass 213 | 214 | if __name__ == '__main__': 215 | commands = { 216 | 'download_jars': DownloadJarsCommand, 217 | 'install': InstallThenCheckForJars, 218 | } 219 | try: 220 | # 221 | # BdistWheelWithJars will only be present if the wheel package is present, and that is present during 222 | # virtual environment creation. 223 | # It's important to note this is a hack. There doesn't appear to be a way to execute hooks around wheel 224 | # creation by design. See https://github.com/pypa/packaging-problems/issues/64 for more information. 225 | # 226 | commands['bdist_wheel'] = BdistWheelWithJars 227 | except NameError: 228 | pass 229 | 230 | setup( 231 | name=PACKAGE_NAME, 232 | version=PACKAGE_VERSION, 233 | description='A python interface for the Amazon Kinesis Client Library MultiLangDaemon', 234 | license='Apache-2.0', 235 | packages=[PACKAGE_NAME, PACKAGE_NAME + "/v2", PACKAGE_NAME + "/v3", 'samples'], 236 | scripts=glob.glob('samples/*py'), 237 | package_data={ 238 | '': ['*.txt', '*.md'], 239 | PACKAGE_NAME: ['jars/*'], 240 | 'samples': ['sample.properties'], 241 | }, 242 | install_requires=PYTHON_REQUIREMENTS, 243 | setup_requires=["pytest-runner"], 244 | tests_require=["pytest", "mock"], 245 | cmdclass=commands, 246 | url="https://github.com/awslabs/amazon-kinesis-client-python", 247 | keywords="amazon kinesis client library python", 248 | zip_safe=False, 249 | ) 250 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import sys 5 | import os 6 | 7 | sys.path.append(os.path.dirname(__file__)) 8 | -------------------------------------------------------------------------------- /test/test_amazon_kclpy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import json 5 | from mock import Mock 6 | from amazon_kclpy import kcl, dispatch 7 | from utils import make_io_obj 8 | 9 | 10 | def build_basic_io_handler_mock(read_line_side_effects): 11 | """ 12 | 13 | :param read_line_side_effects: 14 | :rtype: kcl._IOHandler 15 | """ 16 | io_handler = Mock() 17 | io_handler.read_line.side_effect = read_line_side_effects 18 | io_handler.load_action.side_effect = lambda x: json.loads(x, object_hook=dispatch.message_decode) 19 | return io_handler 20 | 21 | 22 | def test_checkpointer_exception(): 23 | exception_name = 'ThisIsATestException' 24 | checkpointer = kcl.Checkpointer( 25 | build_basic_io_handler_mock(['{"action": "checkpoint",' 26 | '"checkpoint":"456", "sequenceNumber": "1234", "subSequenceNumber": 0, ' 27 | '"error" : "' + exception_name + '"}'])) 28 | try: 29 | checkpointer.checkpoint() 30 | assert 0, "Checkpointing should have raised an exception" 31 | except kcl.CheckpointError as e: 32 | assert e.value == exception_name 33 | 34 | 35 | def test_checkpointer_unexpected_message_after_checkpointing(): 36 | io_handler = Mock() 37 | io_handler.read_line.side_effect = ['{"action":"initialize", "shardId" : "shardid-123", ' 38 | '"sequenceNumber": "1234", "subSequenceNumber": 1}', ] 39 | io_handler.load_action.side_effect = lambda x: json.loads(x, object_hook=dispatch.message_decode) 40 | checkpointer = kcl.Checkpointer( 41 | build_basic_io_handler_mock( 42 | ['{"action":"initialize", "shardId" : "shardid-123", "sequenceNumber": "1234", "subSequenceNumber": 1}'])) 43 | 44 | try: 45 | checkpointer.checkpoint() 46 | assert 0, "Checkpointing should have raised an exception" 47 | except kcl.CheckpointError as e: 48 | assert e.value == 'InvalidStateException' 49 | 50 | 51 | def test_kcl_process_exits_on_record_processor_exception(): 52 | unique_string = "Super uniqe statement we can look for" 53 | errorFile = make_io_obj() 54 | class ClientException(Exception): 55 | pass 56 | mock_rp = Mock() # type: kcl.RecordProcessorBase 57 | # Our record processor will just fail during initialization 58 | mock_rp.initialize.side_effect = [ClientException(unique_string)] 59 | kcl_process = kcl.KCLProcess(mock_rp, 60 | input_file=make_io_obj('{"action":"initialize", "shardId" : "shardid-123", ' 61 | '"sequenceNumber": "1234", "subSequenceNumber": 1}'), 62 | output_file=make_io_obj(), 63 | error_file=errorFile) 64 | try: 65 | kcl_process.run() 66 | except ClientException: 67 | assert 0, "Should not have seen the ClientException propagate up the call stack." 68 | assert errorFile.getvalue().count(unique_string) > 0, 'We should see our error message printed to the error file' 69 | 70 | 71 | def test_kcl_process_exits_on_action_message_exception(): 72 | mock_rp = Mock() # type: kcl.RecordProcessorBase 73 | # Our record processor will just fail during initialization 74 | kcl_process = kcl.KCLProcess(mock_rp, 75 | # This will suffice because a checkpoint message won't be understood by 76 | # the KCLProcessor (only the Checkpointer understands them) 77 | input_file=make_io_obj('{"action":"invalid", "error" : "badstuff", ' 78 | '"sequenceNumber": "1234", "subSequenceNumber": 1}'), 79 | output_file=make_io_obj(), 80 | error_file=make_io_obj()) 81 | try: 82 | kcl_process.run() 83 | assert 0, 'Should have received an exception here' 84 | except dispatch.MalformedAction: 85 | pass 86 | 87 | -------------------------------------------------------------------------------- /test/test_amazon_kclpy_input_output_integration.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from amazon_kclpy import kcl 5 | from utils import make_io_obj 6 | 7 | 8 | # Dummy record processor 9 | class RecordProcessor(kcl.RecordProcessorBase): 10 | 11 | def __init__(self, expected_shard_id, expected_sequence_number): 12 | self.expected_shard_id = expected_shard_id 13 | self.expected_sequence_number = expected_sequence_number 14 | pass 15 | 16 | def initialize(self, shard_id): 17 | assert shard_id == self.expected_shard_id 18 | pass 19 | 20 | def process_records(self, records, checkpointer): 21 | seq = records[0].get('sequenceNumber') 22 | assert seq == self.expected_sequence_number 23 | try: 24 | checkpointer.checkpoint(seq) 25 | assert 0, "First checkpoint should fail" 26 | except Exception: 27 | # Try it one more time (this time it'll work) 28 | checkpointer.checkpoint(seq) 29 | 30 | def shutdown(self, checkpointer, reason): 31 | if 'TERMINATE' == reason: 32 | checkpointer.checkpoint() 33 | 34 | 35 | ''' 36 | An input string which we'll feed to a file for kcl.py to read from. 37 | ''' 38 | 39 | ''' 40 | This string is approximately what the output should look like. We remove whitespace when comparing this to what is 41 | written to the outputfile. 42 | ''' 43 | test_output_string = """ 44 | {"action": "status", "responseFor": "initialize"} 45 | {"action": "checkpoint", "checkpoint": "456"} 46 | {"action": "checkpoint", "checkpoint": "456"} 47 | {"action": "status", "responseFor": "processRecords"} 48 | {"action": "checkpoint", "checkpoint": null} 49 | {"action": "status", "responseFor": "shutdown"} 50 | """ 51 | 52 | test_output_messages = [ 53 | {"action": "status", "responseFor": "initialize"}, 54 | {"action": "checkpoint", "sequenceNumber": "456", "subSequenceNumber": None}, 55 | {"action": "checkpoint", "sequenceNumber": "456", "subSequenceNumber": None}, 56 | {"action": "status", "responseFor": "processRecords"}, 57 | {"action": "checkpoint", "sequenceNumber": None, "subSequenceNumber": None}, 58 | {"action": "status", "responseFor": "shardEnded"} 59 | ] 60 | 61 | 62 | def _strip_all_whitespace(s): 63 | return re.sub('\\s*', '', s) 64 | 65 | 66 | test_shard_id = "shardId-123" 67 | test_sequence_number = "456" 68 | 69 | test_input_messages = [ 70 | {"action": "initialize", "shardId": test_shard_id, "sequenceNumber": test_sequence_number, "subSequenceNumber": 0}, 71 | {"action": "processRecords", "millisBehindLatest": 1476889708000, "records": 72 | [ 73 | { 74 | "action": "record", "data": "bWVvdw==", "partitionKey": "cat", "sequenceNumber": test_sequence_number, 75 | "subSequenceNumber": 0, "approximateArrivalTimestamp": 1476889707000 76 | } 77 | ] 78 | }, 79 | {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0, "error": "Exception"}, 80 | {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0}, 81 | {"action": "shardEnded"}, 82 | {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0} 83 | ] 84 | 85 | 86 | def test_kcl_py_integration_test_perfect_input(): 87 | test_input_json = "\n".join(map(lambda j: json.dumps(j), test_input_messages)) 88 | input_file = make_io_obj(test_input_json) 89 | output_file = make_io_obj() 90 | error_file = make_io_obj() 91 | process = kcl.KCLProcess(RecordProcessor(test_shard_id, test_sequence_number), 92 | input_file=input_file, output_file=output_file, error_file=error_file) 93 | process.run() 94 | ''' 95 | The strings are approximately the same, modulo whitespace. 96 | ''' 97 | output_message_list = filter(lambda s: s != "", output_file.getvalue().split("\n")) 98 | responses = [json.loads(s) for s in output_message_list] 99 | assert len(responses) == len(test_output_messages) 100 | for i in range(len(responses)): 101 | assert responses[i] == test_output_messages[i] 102 | 103 | ''' 104 | There should be some error output but it seems like overly specific to make sure that a particular message is printed. 105 | ''' 106 | error_output = error_file.getvalue() 107 | assert error_output == "" 108 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import sys 5 | import io 6 | 7 | 8 | def make_io_obj(json_text=None): 9 | if sys.version_info[0] >= 3: 10 | create_method = io.StringIO 11 | else: 12 | create_method = io.BytesIO 13 | 14 | if json_text is not None: 15 | return create_method(json_text) 16 | else: 17 | return create_method() -------------------------------------------------------------------------------- /test/v3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import pytest 5 | pytestmark = pytest.mark.webtests 6 | -------------------------------------------------------------------------------- /test/v3/delegate_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import mock 5 | import pytest 6 | 7 | from amazon_kclpy.v2 import processor as v2 8 | from amazon_kclpy.v3 import processor as v3 9 | from amazon_kclpy import messages 10 | from amazon_kclpy.kcl import Checkpointer, CheckpointError 11 | 12 | 13 | @pytest.fixture 14 | def delegate(): 15 | return mock.Mock(spec=v2.RecordProcessorBase) 16 | 17 | 18 | @pytest.fixture 19 | def processor(delegate): 20 | return v3.V2toV3Processor(delegate) 21 | 22 | 23 | def test_initialization_delegate(delegate, processor): 24 | initialization_input = mock.Mock(spec=messages.InitializeInput) 25 | processor.initialize(initialization_input) 26 | 27 | delegate.initialize.assert_called_with(initialization_input) 28 | 29 | 30 | def test_process_records_delegate(delegate, processor): 31 | process_records_input = mock.Mock(spec=messages.ProcessRecordsInput) 32 | processor.process_records(process_records_input) 33 | 34 | delegate.process_records.assert_called_with(process_records_input) 35 | 36 | 37 | def test_shutdown_requested_delegate(delegate, processor): 38 | shutdown_requested_input = mock.Mock(spec=messages.ShutdownRequestedInput) 39 | processor.shutdown_requested(shutdown_requested_input) 40 | 41 | delegate.shutdown_requested.assert_called_with(shutdown_requested_input) 42 | 43 | 44 | def test_lease_lost_to_shutdown_delegate(delegate, processor): 45 | lease_lost_input = messages.LeaseLostInput({ 46 | "action": "leaseLost" 47 | }) 48 | 49 | processor.lease_lost(lease_lost_input) 50 | delegate.shutdown.assert_called() 51 | 52 | actual = delegate.shutdown.call_args[0][0] 53 | 54 | assert actual.reason == "ZOMBIE" 55 | assert actual.action == "shutdown" 56 | assert isinstance(actual.checkpointer, messages.LeaseLostCheckpointer) 57 | 58 | 59 | def test_lease_lost_checkpoint_triggers_exception(delegate, processor): 60 | lease_lost_input = mock.Mock(spec=messages.LeaseLostInput) 61 | delegate.shutdown = lambda s: s.checkpointer.checkpoint() 62 | 63 | with pytest.raises(CheckpointError): 64 | processor.lease_lost(lease_lost_input) 65 | 66 | 67 | def test_shard_ended_to_shutdown_delegate(delegate, processor): 68 | shard_ended_input = messages.ShardEndedInput({ 69 | "action": "shardEnded" 70 | }) 71 | checkpointer = mock.Mock(spec=Checkpointer) 72 | shard_ended_input._checkpointer = checkpointer 73 | 74 | processor.shard_ended(shard_ended_input) 75 | delegate.shutdown.assert_called() 76 | 77 | actual = delegate.shutdown.call_args[0][0] 78 | 79 | assert actual.reason == "TERMINATE" 80 | assert actual.action == "shutdown" 81 | assert actual.checkpointer == checkpointer 82 | 83 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | mock 3 | --------------------------------------------------------------------------------