├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    ├── dependabot.yml
    └── workflows
    │   └── privileged-run.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── NOTICE.txt
├── README.md
├── amazon_kclpy
    ├── __init__.py
    ├── checkpoint_error.py
    ├── dispatch.py
    ├── jars
    │   └── __init__.py
    ├── kcl.py
    ├── messages.py
    ├── v2
    │   ├── __init__.py
    │   └── processor.py
    └── v3
    │   ├── __init__.py
    │   └── processor.py
├── docs
    ├── Makefile
    ├── conf.py
    ├── guide
    │   ├── quickstart.rst
    │   ├── record_processor_v1.rst
    │   ├── record_processor_v2.rst
    │   └── sample.rst
    ├── index.rst
    └── make.bat
├── pom.xml
├── requirements.txt
├── samples
    ├── __init__.py
    ├── amazon_kclpy_helper.py
    ├── sample.properties
    ├── sample_kclpy_app.py
    └── sample_kinesis_wordputter.py
├── scripts
    └── build_deps.py
├── setup.cfg
├── setup.py
├── test
    ├── __init__.py
    ├── conftest.py
    ├── test_amazon_kclpy.py
    ├── test_amazon_kclpy_input_output_integration.py
    ├── utils.py
    └── v3
    │   ├── __init__.py
    │   └── delegate_test.py
└── test_requirements.txt


/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 | 
3 | *Description of changes:*
4 | 
5 | 
6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
7 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule: 
 6 |       interval: "weekly"
 7 | 
 8 |   - package-ecosystem: "maven"
 9 |     directory: "/"
10 |     open-pull-requests-limit: 4
11 |     schedule:
12 |       interval: "daily"
13 | 


--------------------------------------------------------------------------------
/.github/workflows/privileged-run.yml:
--------------------------------------------------------------------------------
  1 | # This workflow will trigger on pushes, pull requests (to master branch), and manually from the GitHub Actions tab (when requested)
  2 | # sample_run uses matrix to create 12 unique combinations of operating systems and python versions
  3 | # each of the 12 runs download the jars needed to run the KCL, run the sample_kinesis_wordputter.py, and use a timeout command to run the sample_kclpy_app.py
  4 | # auto_merge uses GitHub events to check if dependabot is the pull requester, and if the request fits the criteria the PR is automatically merged
  5 | 
  6 | name: Sample Run and Dependabot Auto-merge
  7 | on:
  8 |   push:
  9 |     branches: [ master ]
 10 |   pull_request_target:
 11 |     branches: [ master ]
 12 |   workflow_dispatch:
 13 | 
 14 | permissions:
 15 |   id-token: write
 16 |   contents: write
 17 |   pull-requests: write
 18 |   statuses: write
 19 | 
 20 | jobs:
 21 |   sample-run:
 22 |     timeout-minutes: 8
 23 |     runs-on: ${{ matrix.os }}
 24 |     defaults:
 25 |       run:
 26 |         shell: bash
 27 | 
 28 |     strategy:
 29 |       fail-fast: false
 30 |       matrix:
 31 |         python-version: [ "3.9", "3.10", "3.11" ]
 32 |         jdk-version: [ "8", "11", "17", "21", "24" ]
 33 |         os: [ ubuntu-latest, macOS-latest, windows-latest ]
 34 | 
 35 |     steps:
 36 |       - name: Checkout
 37 |         uses: actions/checkout@v4
 38 |         with:
 39 |           ref: ${{ github.event.pull_request.head.sha }}
 40 | 
 41 |       - name: Configure AWS Credentials
 42 |         uses: aws-actions/configure-aws-credentials@v4
 43 |         with:
 44 |           aws-region: us-east-1
 45 |           role-to-assume: arn:aws:iam::751999266872:role/GitHubPython
 46 |           role-session-name: myGitHubActionsPython
 47 | 
 48 |       - name: Set up JDK ${{ matrix.jdk-version }}
 49 |         uses: actions/setup-java@v4
 50 |         with:
 51 |           java-version: ${{ matrix.jdk-version }}
 52 |           distribution: 'corretto'
 53 | 
 54 |       - name: Set up Python ${{ matrix.python-version }}
 55 |         uses: actions/setup-python@v2
 56 |         with:
 57 |           python-version: ${{ matrix.python-version }}
 58 | 
 59 |       - name: Install Python and required pips
 60 |         run: |
 61 |           python -m pip install --upgrade pip
 62 |           pip install -r requirements.txt
 63 |           pip install -r test_requirements.txt
 64 |           pip install build
 65 | 
 66 |       - name: Test with Pytest
 67 |         run: |
 68 |           python -m pytest
 69 | 
 70 |       - name: Install .jar files
 71 |         run: |
 72 |           python -m build
 73 |           python setup.py download_jars
 74 |           python setup.py install
 75 |         env:
 76 |           KCL_MVN_REPO_SEARCH_URL: https://repo1.maven.org/maven2/
 77 | 
 78 |       - name: Put words to sample stream
 79 |         run: |
 80 |           sample_kinesis_wordputter.py --stream kclpysample -w cat -w dog -w bird -w lobster -w octopus
 81 | 
 82 |       - name: Start KCL application (windows or ubuntu)
 83 |         if: matrix.os  != 'macOS-latest'
 84 |         run: |
 85 |           timeout 45 $(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
 86 | 
 87 |       - name: Start KCL application (macOS)
 88 |         if: matrix.os  == 'macOS-latest'
 89 |         run: |
 90 |           brew install coreutils
 91 |           gtimeout 45 $(amazon_kclpy_helper.py --print_command --java $(which java) --properties samples/sample.properties) || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
 92 | 
 93 |   auto-merge-dependabot:
 94 |     needs: [sample-run]
 95 |     runs-on: ubuntu-latest
 96 |     if: github.actor == 'dependabot[bot]' && github.event.pull_request.user.login == 'dependabot[bot]'
 97 |     steps:
 98 |       - name: Fetch Dependabot metadata
 99 |         id: metadata
100 |         uses: dependabot/fetch-metadata@v2
101 |         with:
102 |           alert-lookup: true
103 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
104 | 
105 |       - name: Approve PR
106 |         if: steps.metadata.outputs.update-type != 'version-update:semver-major'
107 |         run: gh pr review --approve "$PR_URL"
108 |         env:
109 |           PR_URL: ${{github.event.pull_request.html_url}}
110 |           GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
111 | 
112 | #      - name: Enable auto-merge for Dependabot PRs
113 | #        if: steps.metadata.outputs.update-type != 'version-update:semver-major'
114 | #        run: gh pr merge --auto --merge "$PR_URL"
115 | #        env:
116 | #          PR_URL: ${{github.event.pull_request.html_url}}
117 | #          GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
118 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.cache/
 2 | /amazon_kclpy.egg-info/
 3 | /amazon_kclpy/jars/
 4 | *.pyc
 5 | /build/
 6 | /dist/
 7 | /docs/_build/
 8 | /.eggs/
 9 | 
10 | # IntelliJ idea stuff
11 | .idea
12 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-kinesis-client-python/issues), or [recently closed](https://github.com/awslabs/amazon-kinesis-client-python/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-kinesis-client-python/labels/help%20wanted) issues is a great place to start. 
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/awslabs/amazon-kinesis-client-python/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include NOTICE.txt
3 | include README.md
4 | include pom.xml


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | AmazonKinesisClientLibraryForPython
2 | Copyright 2012-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 | 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Amazon Kinesis Client Library for Python
  2 | 
  3 | [![Version](https://img.shields.io/pypi/v/amazon-kclpy.svg?style=flat)](https://pypi.org/project/amazon-kclpy/) [![UnitTestCoverage](https://github.com/awslabs/amazon-kinesis-client-python/actions/workflows/run-unit-tests.yml/badge.svg)](https://github.com/awslabs/amazon-kinesis-client-python/actions/workflows/run-unit-tests.yml)
  4 | 
  5 | This package provides an interface to the Amazon Kinesis Client Library (KCL) MultiLangDaemon,
  6 | which is part of the [Amazon KCL for Java][kinesis-github].
  7 | Developers can use the [Amazon KCL][amazon-kcl] to build distributed applications that
  8 | process streaming data reliably at scale. The [Amazon KCL][amazon-kcl] takes care of
  9 | many of the complex tasks associated with distributed computing, such as load-balancing
 10 | across multiple instances, responding to instance failures, checkpointing processed records,
 11 | and reacting to changes in stream volume.
 12 | This interface manages the interaction with the MultiLangDaemon so that developers can focus on
 13 | implementing their record processor executable. A record processor executable
 14 | typically looks something like:
 15 | 
 16 | ```python
 17 |     #!env python
 18 |     from amazon_kclpy import kcl
 19 |     import json, base64
 20 | 
 21 |     class RecordProcessor(kcl.RecordProcessorBase):
 22 | 
 23 |         def initialize(self, initialiation_input):
 24 |             pass
 25 | 
 26 |         def process_records(self, process_records_input):
 27 |             pass
 28 | 
 29 |         def lease_lost(self, lease_lost_input):
 30 |             pass
 31 | 
 32 |         def shard_ended(self, shard_ended_input):
 33 |             pass
 34 | 
 35 |         def shutdown_requested(self, shutdown_requested_input):
 36 |             pass
 37 | 
 38 |     if __name__ == "__main__":
 39 |         kclprocess = kcl.KCLProcess(RecordProcessor())
 40 |         kclprocess.run()
 41 | ```
 42 | 
 43 | ## Before You Get Started
 44 | 
 45 | Before running the samples, you'll want to make sure that your environment is
 46 | configured to allow the samples to use your
 47 | [AWS Security Credentials](http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html).
 48 | 
 49 | By default the samples use the [DefaultCredentialsProvider][DefaultCredentialsProvider]
 50 | so you'll want to make your credentials available to one of the credentials providers in that
 51 | provider chain. There are several ways to do this such as providing a ~/.aws/credentials file,
 52 | or if you're running on EC2, you can associate an IAM role with your instance with appropriate
 53 | access.
 54 | 
 55 | For questions regarding Amazon Kinesis Service and the client libraries please visit the
 56 | [Amazon Kinesis Forums][kinesis-forum]
 57 | 
 58 | ## Running the Sample
 59 | 
 60 | Using the `amazon_kclpy` package requires the MultiLangDaemon which is provided
 61 | by the [Amazon KCL for Java][kinesis-github]. These jars will be downloaded automatically
 62 | by the `install` command, but you can explicitly download them with the `download_jars` command.
 63 | From the root of this repo, run:
 64 | 
 65 |     python setup.py download_jars
 66 |     python setup.py install
 67 | 
 68 | If you'd like to override the default search location for the jars, you can set the `KCL_MVN_REPO_SEARCH_URL`
 69 | environment variable to the location of the maven repository you'd like to use.
 70 | 
 71 |     export KCL_MVN_REPO_SEARCH_URL=https://path/to/maven/repo
 72 | 
 73 | Now the `amazon_kclpy` and [boto][boto] (used by the sample putter script) and required
 74 | jars should be installed in your environment. To start the sample putter, run:
 75 | 
 76 |     sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird -w lobster
 77 | 
 78 | This will create an Amazon Kinesis stream called words and put the words
 79 | specified by the -w options into the stream once each. Use -p SECONDS to
 80 | indicate a period over which to repeatedly put these words.
 81 | 
 82 | Now we would like to run an Amazon KCL for Python application that reads records
 83 | from the stream we just created, but first take a look in the samples directory,
 84 | you'll find a file called sample.properties, cat that file:
 85 | 
 86 |     cat samples/sample.properties
 87 | 
 88 | You'll see several properties defined there. `executableName` indicates the
 89 | executable for the MultiLangDaemon to run, `streamName` is the Kinesis stream
 90 | to read from, `appName` is the Amazon KCL application name to use which will be the
 91 | name of an Amazon DynamoDB table that gets created by the Amazon KCL,
 92 | `initialPositionInStream` tells the Amazon KCL how to start reading from shards upon
 93 | a fresh startup. To run the sample application you can use a helper script
 94 | included in this package. Note you must provide a path to java (version 1.7
 95 | or greater) to run the Amazon KCL.
 96 | 
 97 |     amazon_kclpy_helper.py --print_command \
 98 |         --java <path-to-java> --properties samples/sample.properties
 99 | 
100 | This will print the command needed to run the sample which you can copy paste,
101 | or surround the command with back ticks to run it.
102 | 
103 |     `amazon_kclpy_helper.py --print_command \
104 |         --java <path-to-java> --properties samples/sample.properties`
105 | 
106 | Alternatively, if you don't have the source on hand, but want to run the sample
107 | app you can use the `--sample` argument to indicate you'd like to get the
108 | sample.properties file from the installation location.
109 | 
110 |     amazon_kclpy_helper.py --print_command --java <path-to-java> --sample
111 | 
112 | ## Running on EC2
113 | 
114 | Running on EC2 is simple. Assuming you are already logged into an EC2 instance running
115 | Amazon Linux, the following steps will prepare your environment for running the sample
116 | app. Note the version of java that ships with Amazon Linux can be found at
117 | `/usr/bin/java` and should be 1.7 or greater.
118 | 
119 |     sudo yum install python-pip
120 | 
121 |     sudo pip install virtualenv
122 | 
123 |     virtualenv /tmp/kclpy-sample-env
124 | 
125 |     source /tmp/kclpy-sample-env/bin/activate
126 | 
127 |     pip install amazon_kclpy
128 | 
129 | ## Under the Hood - What You Should Know about Amazon KCL's [MultiLangDaemon][multi-lang-daemon]
130 | Amazon KCL for Python uses [Amazon KCL for Java][kinesis-github] internally. We have implemented
131 | a Java-based daemon, called the *MultiLangDaemon* that does all the heavy lifting. Our approach
132 | has the daemon spawn the user-defined record processor script/program as a sub-process. The
133 | *MultiLangDaemon* communicates with this sub-process over standard input/output using a simple
134 | protocol, and therefore the record processor script/program can be written in any language.
135 | 
136 | At runtime, there will always be a one-to-one correspondence between a record processor, a child process,
137 | and an [Amazon Kinesis Shard][amazon-kinesis-shard]. The *MultiLangDaemon* will make sure of
138 | that, without any need for the developer to intervene.
139 | 
140 | In this release, we have abstracted these implementation details away and exposed an interface that enables
141 | you to focus on writing record processing logic in Python. This approach enables [Amazon KCL][amazon-kcl] to
142 | be language agnostic, while providing identical features and similar parallel processing model across
143 | all languages.
144 | 
145 | ## See Also
146 | * [Developing Consumer Applications for Amazon Kinesis Using the Amazon Kinesis Client Library][amazon-kcl]
147 | * The [Amazon KCL for Java][kinesis-github]
148 | * The [Amazon KCL for Ruby][amazon-kinesis-ruby-github]
149 | * The [Amazon Kinesis Documentation][amazon-kinesis-docs]
150 | * The [Amazon Kinesis Forum][kinesis-forum]
151 | 
152 | ## Release Notes
153 | ### Release 3.0.3 (March 25, 2025)
154 | * Downgrade logback from 1.5.16 to 1.3.15 to maintain JDK 8 compatability
155 | 
156 | ### Release 3.0.2 (March 24, 2025)
157 | #### :warning: [BREAKING CHANGES] - Release 3.0.2 contains a dependency version that is not compatible with JDK 8. Please upgrade to a later version if your KCL application requires JDK 8.
158 | * [KCL 3.0.2 Changelog](https://github.com/awslabs/amazon-kinesis-client/blob/5263b4227ce7210d52bec6817191d43f047cd1b2/CHANGELOG.md) Upgrade KCL and KCL-Multilang dependencies from 3.0.0 to 3.0.2
159 | * [#266](https://github.com/awslabs/amazon-kinesis-client-python/pull/266) Upgrade netty.version from 4.1.108.Final to 4.1.118.Final
160 | * [#265](https://github.com/awslabs/amazon-kinesis-client-python/pull/265) Upgrade logback.version from 1.3.14 to 1.5.16
161 | 
162 | ### Release 3.0.1 (November 6, 2024)
163 | * New lease assignment / load balancing algorithm
164 |     * KCL 3.x introduces a new lease assignment and load balancing algorithm. It assigns leases among workers based on worker utilization metrics and throughput on each lease, replacing the previous lease count-based lease assignment algorithm.
165 |     * When KCL detects higher variance in CPU utilization among workers, it proactively reassigns leases from over-utilized workers to under-utilized workers for even load balancing. This ensures even CPU utilization across workers and removes the need to over-provision the stream processing compute hosts.
166 | * Optimized DynamoDB RCU usage
167 |     * KCL 3.x optimizes DynamoDB read capacity unit (RCU) usage on the lease table by implementing a global secondary index with leaseOwner as the partition key. This index mirrors the leaseKey attribute from the base lease table, allowing workers to efficiently discover their assigned leases by querying the index instead of scanning the entire table.
168 |     * This approach significantly reduces read operations compared to earlier KCL versions, where workers performed full table scans, resulting in higher RCU consumption.
169 | * Graceful lease handoff
170 |     * KCL 3.x introduces a feature called "graceful lease handoff" to minimize data reprocessing during lease reassignments. Graceful lease handoff allows the current worker to complete checkpointing of processed records before transferring the lease to another worker. For graceful lease handoff, you should implement checkpointing logic within the existing `shutdownRequested()` method.
171 |     * This feature is enabled by default in KCL 3.x, but you can turn off this feature by adjusting the configuration property `isGracefulLeaseHandoffEnabled`.
172 |     * While this approach significantly reduces the probability of data reprocessing during lease transfers, it doesn't completely eliminate the possibility. To maintain data integrity and consistency, it's crucial to design your downstream consumer applications to be idempotent. This ensures that the application can handle potential duplicate record processing without adverse effects.
173 | * New DynamoDB metadata management artifacts
174 |     * KCL 3.x introduces two new DynamoDB tables for improved lease management:
175 |         * Worker metrics table: Records CPU utilization metrics from each worker. KCL uses these metrics for optimal lease assignments, balancing resource utilization across workers. If CPU utilization metric is not available, KCL assigns leases to balance the total sum of shard throughput per worker instead.
176 |         * Coordinator state table: Stores internal state information for workers. Used to coordinate in-place migration from KCL 2.x to KCL 3.x and leader election among workers.
177 |     * Follow this [documentation](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html#kcl-migration-from-2-3-IAM-permissions) to add required IAM permissions for your KCL application.
178 | * Other improvements and changes
179 |     * Dependency on the AWS SDK for Java 1.x has been fully removed.
180 |         * The Glue Schema Registry integration functionality no longer depends on AWS SDK for Java 1.x. Previously, it required this as a transient dependency.
181 |         * Multilangdaemon has been upgraded to use AWS SDK for Java 2.x. It no longer depends on AWS SDK for Java 1.x.
182 |     * `idleTimeBetweenReadsInMillis` (PollingConfig) now has a minimum default value of 200.
183 |         * This polling configuration property determines the [publishers](https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java) wait time between GetRecords calls in both success and failure cases. Previously, setting this value below 200 caused unnecessary throttling. This is because Amazon Kinesis Data Streams supports up to five read transactions per second per shard for shared-throughput consumers.
184 |     * Shard lifecycle management is improved to deal with edge cases around shard splits and merges to ensure records continue being processed as expected.
185 | * Migration
186 |     * The programming interfaces of KCL 3.x remain identical with KCL 2.x for an easier migration. For detailed migration instructions, please refer to the [Migrate consumers from KCL 2.x to KCL 3.x](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html) page in the Amazon Kinesis Data Streams developer guide.
187 | * Configuration properties
188 |     * New configuration properties introduced in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#new-configurations-in-kcl-3x).
189 |     * Deprecated configuration properties in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#discontinued-configuration-properties-in-kcl-3x). You need to keep the deprecated configuration properties during the migration from any previous KCL version to KCL 3.x.
190 | * Metrics
191 |     * New CloudWatch metrics introduced in KCL 3.x are explained in the [Monitor the Kinesis Client Library with Amazon CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) in the Amazon Kinesis Data Streams developer guide. The following operations are newly added in KCL 3.x:
192 |         * `LeaseAssignmentManager`
193 |         * `WorkerMetricStatsReporter`
194 |         * `LeaseDiscovery`
195 | ### Release 3.0.0 (November 6, 2024)
196 | 
197 | **We found an issue with the release 3.0.0 regarding the build failure. Please use the release 3.0.1 to use KCL 3.0.**
198 | 
199 | ---
200 | For **2.x** and **1.x** release notes, please see [v2.x/README.md](https://github.com/awslabs/amazon-kinesis-client-python/blob/v2.x/README.md#release-notes)
201 | 
202 | [amazon-kinesis-shard]: http://docs.aws.amazon.com/kinesis/latest/dev/key-concepts.html
203 | [amazon-kinesis-docs]: http://aws.amazon.com/documentation/kinesis/
204 | [amazon-kcl]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html
205 | [multi-lang-daemon]: https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java
206 | [kinesis]: http://aws.amazon.com/kinesis
207 | [amazon-kinesis-ruby-github]: https://github.com/awslabs/amazon-kinesis-client-ruby
208 | [kinesis-github]: https://github.com/awslabs/amazon-kinesis-client
209 | [boto]: http://boto.readthedocs.org/en/latest/
210 | [DefaultCredentialsProvider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
211 | [kinesis-forum]: http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169
212 | 
213 | ## License
214 | 
215 | This library is licensed under the Apache 2.0 License.
216 | 


--------------------------------------------------------------------------------
/amazon_kclpy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | """
 5 | This package provides an interface to the KCL MultiLangDaemon. This interface
 6 | manages the interaction with the MultiLangDaemon so that developers can focus
 7 | on implementing their record processor. A record processor executable typically
 8 | looks something like::
 9 | 
10 |     #!env python
11 |     from amazon_kclpy import kcl
12 |     import json, base64
13 | 
14 |     class RecordProcessor(kcl.RecordProcessorBase):
15 | 
16 |         def initialize(self, shard_id):
17 |             pass
18 | 
19 |         def process_records(self, records, checkpointer):
20 |             pass
21 | 
22 |         def shutdown(self, checkpointer, reason):
23 |             pass
24 | 
25 |     if __name__ == "__main__":
26 |         kclprocess = kcl.KCLProcess(RecordProcessor())
27 |         kclprocess.run()
28 | """
29 | 


--------------------------------------------------------------------------------
/amazon_kclpy/checkpoint_error.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | class CheckpointError(Exception):
 5 |     """
 6 |     Error class used for wrapping exception names passed through the input file.
 7 |     """
 8 |     def __init__(self, value):
 9 |         """
10 |         :type value: str
11 |         :param value: The name of the exception that was received while checkpointing. For more details see
12 |             https://github.com/awslabs/amazon-kinesis-client/tree/master/src/main/java/com/amazonaws/services/kinesis/clientlibrary/exceptions
13 |             Any of those exceptions' names could be returned by the MultiLangDaemon as a response to a checkpoint action.
14 |         """
15 |         self.value = value
16 | 
17 |     def __str__(self):
18 |         return repr(self.value)
19 | 
20 | 


--------------------------------------------------------------------------------
/amazon_kclpy/dispatch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | from amazon_kclpy import messages
 5 | 
 6 | 
 7 | class MalformedAction(Exception):
 8 |     """
 9 |     Raised when an action given by the MultiLangDaemon doesn't have all the appropriate attributes.
10 |     """
11 |     pass
12 | 
13 | 
14 | _serializers = {
15 |     "initialize": messages.InitializeInput,
16 |     "processRecords": messages.ProcessRecordsInput,
17 |     "shutdown": messages.ShutdownInput,
18 |     "checkpoint": messages.CheckpointInput,
19 |     "record": messages.Record,
20 |     "shutdownRequested": messages.ShutdownRequestedInput,
21 |     "leaseLost": messages.LeaseLostInput,
22 |     "shardEnded": messages.ShardEndedInput,
23 | }
24 | 
25 | 
26 | def _format_serializer_names():
27 |     return ", ".join('"{k}"'.format(k=k) for k in _serializers.keys())
28 | 
29 | 
30 | def message_decode(json_dict):
31 |     """
32 |     Translates incoming JSON commands into MessageDispatch classes
33 | 
34 |     :param dict json_dict: a dictionary of JSON data
35 | 
36 |     :return: an object that can be used to dispatch the received JSON command
37 |     :rtype: amazon_kclpy.messages.MessageDispatcher
38 | 
39 |     :raises MalformedAction: if the JSON object is missing action, or an appropriate serializer for that
40 |         action can't be found
41 |     """
42 |     try:
43 |         action = json_dict["action"]
44 |     except KeyError as key_error:
45 |         raise MalformedAction("Action {json_dict} was expected to have key {key!s}".format(json_dict=json_dict,
46 |                                                                                            key=key_error))
47 |     try:
48 |         serializer = _serializers[action]
49 |     except KeyError:
50 |         raise MalformedAction("Received an action which couldn't be understood. Action was '{action}' -- Allowed {keys}"
51 |                               .format(action=action, keys=_format_serializer_names()))
52 | 
53 |     return serializer(json_dict)
54 | 


--------------------------------------------------------------------------------
/amazon_kclpy/jars/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-kinesis-client-python/2d08638dca28a8afcd77ee78b209dc8e16aa8d85/amazon_kclpy/jars/__init__.py


--------------------------------------------------------------------------------
/amazon_kclpy/kcl.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | import abc
  4 | import json
  5 | import sys
  6 | import traceback
  7 | 
  8 | from amazon_kclpy import dispatch
  9 | from amazon_kclpy.v2 import processor as v2processor
 10 | from amazon_kclpy.v3 import processor as v3processor
 11 | from amazon_kclpy import messages
 12 | from amazon_kclpy.checkpoint_error import CheckpointError
 13 | 
 14 | 
 15 | class _IOHandler(object):
 16 |     """
 17 |     Hidden class used by :class:`KCLProcess` and :class:`Checkpointer` to communicate with the input and output
 18 |     files.
 19 |     """
 20 | 
 21 |     def __init__(self, input_file, output_file, error_file):
 22 |         """
 23 |         :param file input_file: A file to read input lines from (e.g. sys.stdin).
 24 |         :param file output_file: A file to write output lines to (e.g. sys.stdout).
 25 |         :param file error_file: A file to write error lines to (e.g. sys.stderr).
 26 |         """
 27 |         self.input_file = input_file
 28 |         self.output_file = output_file
 29 |         self.error_file = error_file
 30 | 
 31 |     def write_line(self, line):
 32 |         """
 33 |         Writes a line to the output file. The line is preceded and followed by a new line because other libraries
 34 |         could be writing to the output file as well (e.g. some libs might write debugging info to STDOUT) so we would
 35 |         like to prevent our lines from being interlaced with other messages so the MultiLangDaemon can understand them.
 36 | 
 37 |         :param str line: A line to write (e.g. '{"action" : "status", "responseFor" : "<someAction>"}')
 38 |         """
 39 |         self.output_file.write('\n{line}\n'.format(line=line))
 40 |         self.output_file.flush()
 41 | 
 42 |     def write_error(self, error_message):
 43 |         """
 44 |         Write a line to the error file.
 45 | 
 46 |         :param str error_message: An error message.
 47 |         """
 48 |         self.error_file.write('{error_message}\n'.format(error_message=error_message))
 49 |         self.error_file.flush()
 50 | 
 51 |     def read_line(self):
 52 |         """
 53 |         Reads a line from the input file.
 54 | 
 55 |         :rtype: str
 56 |         :return: A single line read from the input_file (e.g. '{"action" : "initialize", "shardId" : "shardId-000001"}')
 57 |         """
 58 |         return self.input_file.readline()
 59 | 
 60 |     def load_action(self, line):
 61 |         """
 62 |         Decodes a message from the MultiLangDaemon.
 63 |         :type line: str
 64 |         :param line: A message line that was delivered received from the MultiLangDaemon (e.g.
 65 |             '{"action" : "initialize", "shardId" : "shardId-000001"}')
 66 | 
 67 |         :rtype: amazon_kclpy.messages.MessageDispatcher
 68 |         :return: A callable action class that contains the action presented in the line
 69 |         """
 70 |         return json.loads(line, object_hook=dispatch.message_decode)
 71 | 
 72 |     def write_action(self, response):
 73 |         """
 74 |         :type response: dict
 75 |         :param response: A dictionary with an action message such as 'checkpoint' or 'status'. For example if the action that was
 76 |             just handled by this processor was an 'initialize' action, this dictionary would look like
 77 |             {'action' : status', 'responseFor' : 'initialize'}
 78 |         """
 79 |         self.write_line(json.dumps(response))
 80 | 
 81 | 
 82 | CheckpointError = CheckpointError
 83 | 
 84 | 
 85 | class Checkpointer(object):
 86 |     """
 87 |     A checkpointer class which allows you to make checkpoint requests. A checkpoint marks a point in a shard
 88 |     where you've successfully processed to. If this processor fails or loses its lease to that shard, another
 89 |     processor will be started either by this MultiLangDaemon or a different instance and resume at the most recent
 90 |     checkpoint in this shard.
 91 |     """
 92 |     def __init__(self, io_handler):
 93 |         """
 94 |         :type io_handler: amazon_kclpy.kcl._IOHandler
 95 |         :param io_handler: An IOHandler object which this checkpointer will use to write and read checkpoint actions
 96 |             to and from the MultiLangDaemon.
 97 |         """
 98 |         self.io_handler = io_handler
 99 | 
100 |     def _get_action(self):
101 |         """
102 |         Gets the next json message from STDIN
103 | 
104 |         :rtype: object
105 |         :return: Either a child of MessageDispatcher, or a housekeeping object type
106 |         """
107 |         line = self.io_handler.read_line()
108 |         action = self.io_handler.load_action(line)
109 |         return action
110 | 
111 |     def checkpoint(self, sequence_number=None, sub_sequence_number=None):
112 |         """
113 |         Checkpoints at a particular sequence number you provide or if no sequence number is given, the checkpoint will
114 |         be at the end of the most recently delivered list of records
115 | 
116 |         :param str or None sequence_number: The sequence number to checkpoint at or None if you want to checkpoint at the
117 |             farthest record
118 |         :param int or None sub_sequence_number: the sub sequence to checkpoint at, if set to None will checkpoint
119 |             at the farthest sub_sequence_number
120 |         """
121 |         response = {"action": "checkpoint", "sequenceNumber": sequence_number, "subSequenceNumber": sub_sequence_number}
122 |         self.io_handler.write_action(response)
123 |         action = self._get_action()
124 |         if isinstance(action, messages.CheckpointInput):
125 |             if action.error is not None:
126 |                 raise CheckpointError(action.error)
127 |         else:
128 |             #
129 |             # We are in an invalid state. We will raise a checkpoint exception
130 |             # to the RecordProcessor indicating that the KCL (or KCLpy) is in
131 |             # an invalid state. See KCL documentation for description of this
132 |             # exception. Note that the documented guidance is that this exception
133 |             # is NOT retryable so the client code should exit.
134 |             #
135 |             raise CheckpointError('InvalidStateException')
136 | 
137 | 
138 | # RecordProcessor base class
139 | class RecordProcessorBase(object):
140 |     """
141 |     Base class for implementing a record processor.A RecordProcessor processes a shard in a stream.
142 |     Its methods will be called with this pattern:
143 | 
144 |     - initialize will be called once
145 |     - process_records will be called zero or more times
146 |     - shutdown will be called if this MultiLangDaemon instance loses the lease to this shard
147 |     """
148 |     __metaclass__ = abc.ABCMeta
149 | 
150 |     @abc.abstractmethod
151 |     def initialize(self, shard_id):
152 |         """
153 |         Called once by a KCLProcess before any calls to process_records
154 | 
155 |         :type shard_id: str
156 |         :param shard_id: The shard id that this processor is going to be working on.
157 |         """
158 |         raise NotImplementedError
159 | 
160 |     @abc.abstractmethod
161 |     def process_records(self, records, checkpointer):
162 |         """
163 |         Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers
164 |         from the records to indicate where in the stream to checkpoint.
165 | 
166 |         :type records: list
167 |         :param records: A list of records that are to be processed. A record looks like
168 |             {"data":"<base64 encoded string>","partitionKey":"someKey","sequenceNumber":"1234567890"} Note that "data" is a base64
169 |             encoded string. You can use base64.b64decode to decode the data into a string. We currently do not do this decoding for you
170 |             so as to leave it to your discretion whether you need to decode this particular piece of data.
171 | 
172 |         :type checkpointer: amazon_kclpy.kcl.Checkpointer
173 |         :param checkpointer: A checkpointer which accepts a sequence number or no parameters.
174 |         """
175 |         raise NotImplementedError
176 | 
177 |     @abc.abstractmethod
178 |     def shutdown(self, checkpointer, reason):
179 |         """
180 |         Called by a KCLProcess instance to indicate that this record processor should shutdown. After this is called,
181 |         there will be no more calls to any other methods of this record processor.
182 | 
183 |         :type checkpointer: amazon_kclpy.kcl.Checkpointer
184 |         :param checkpointer: A checkpointer which accepts a sequence number or no parameters.
185 | 
186 |         :type reason: str
187 |         :param reason: The reason this record processor is being shutdown, either TERMINATE or ZOMBIE. If ZOMBIE,
188 |             clients should not checkpoint because there is possibly another record processor which has acquired the lease
189 |             for this shard. If TERMINATE then checkpointer.checkpoint() should be called to checkpoint at the end of the
190 |             shard so that this processor will be shutdown and new processor(s) will be created to for the child(ren) of
191 |             this shard.
192 |         """
193 |         raise NotImplementedError
194 | 
195 |     def shutdown_requested(self, checkpointer):
196 |         """
197 |         Called by a KCLProcess instance to indicate that this record processor is about to be be shutdown.  This gives
198 |         the record processor a chance to checkpoint, before the lease is terminated.
199 | 
200 |         :type checkpointer: amazon_kclpy.kcl.Checkpointer
201 |         :param checkpointer: A checkpointer which accepts a sequence number or no parameters.
202 |         """
203 |         pass
204 | 
205 |     version = 1
206 | 
207 | 
208 | class KCLProcess(object):
209 | 
210 |     def __init__(self, record_processor, input_file=sys.stdin, output_file=sys.stdout, error_file=sys.stderr):
211 |         """
212 |         :type record_processor: RecordProcessorBase or amazon_kclpy.v2.processor.RecordProcessorBase
213 |         :param record_processor: A record processor to use for processing a shard.
214 | 
215 |         :param file input_file: A file to read action messages from. Typically STDIN.
216 | 
217 |         :param file output_file: A file to write action messages to. Typically STDOUT.
218 | 
219 |         :param file error_file: A file to write error messages to. Typically STDERR.
220 |         """
221 |         self.io_handler = _IOHandler(input_file, output_file, error_file)
222 |         self.checkpointer = Checkpointer(self.io_handler)
223 |         if record_processor.version == 2:
224 |             self.processor = v3processor.V2toV3Processor(record_processor)
225 |         elif record_processor.version == 1:
226 |             self.processor = v3processor.V2toV3Processor(v2processor.V1toV2Processor(record_processor))
227 |         else:
228 |             self.processor = record_processor
229 | 
230 |     def _perform_action(self, action):
231 |         """
232 |         Maps input action to the appropriate method of the record processor.
233 | 
234 |         :type action:
235 |         :param MessageDispatcher action: A derivative of MessageDispatcher that will handle the provided input
236 | 
237 |         :raises MalformedAction: Raised if the action is missing attributes.
238 |         """
239 | 
240 |         try:
241 |             action.dispatch(self.checkpointer, self.processor)
242 |         except SystemExit as sys_exit:
243 |             # On a system exit exception just go ahead and exit
244 |             raise sys_exit
245 |         except Exception as ex:
246 |             """
247 |             We don't know what the client's code could raise and we have no way to recover if we let it propagate
248 |             up further. We will mimic the KCL and pass over client errors. We print their stack trace to STDERR to
249 |             help them notice and debug this type of issue.
250 |             """
251 |             self.io_handler.error_file.write("Caught exception from action dispatch: {ex}".format(ex=str(ex)))
252 |             traceback.print_exc(file=self.io_handler.error_file)
253 |             self.io_handler.error_file.flush()
254 | 
255 |     def _report_done(self, response_for=None):
256 |         """
257 |         Writes a status message to the output file.
258 | 
259 |         :param response_for: Required parameter; the action that this status message is confirming completed.
260 |         """
261 |         self.io_handler.write_action({"action": "status", "responseFor": response_for})
262 | 
263 |     def _handle_a_line(self, line):
264 |         """
265 |         - Parses the line from JSON
266 |         - Invokes the appropriate method of the RecordProcessor
267 |         - Writes a status message back to MultiLanguageDaemon
268 | 
269 |         :type line: str
270 |         :param line: A line that has been read from STDIN and is expected to be a JSON encoded dictionary
271 |             representing what action to take.
272 |         """
273 |         action = self.io_handler.load_action(line)
274 |         self._perform_action(action)
275 |         self._report_done(action.action)
276 | 
277 |     def run(self):
278 |         """
279 |         Starts this KCL processor's main loop.
280 |         """
281 |         line = True
282 |         """
283 |         We don't make any effort to stop errors from propagating up and exiting the program
284 |         because there is really nothing the KCLpy can do to recover after most exceptions that could
285 |         occur, e.g. I/O error or json decoding exception (the MultiLangDaemon should never write a non-json string
286 |         to this process).
287 |         """
288 |         while line:
289 |             line = self.io_handler.read_line()
290 |             if line:
291 |                 self._handle_a_line(line)
292 | 
293 | 
294 | 


--------------------------------------------------------------------------------
/amazon_kclpy/messages.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | import abc
  5 | import base64
  6 | from datetime import datetime
  7 | 
  8 | from amazon_kclpy.checkpoint_error import CheckpointError
  9 | 
 10 | 
 11 | class MessageDispatcher(object):
 12 |     """
 13 |     The base class use to dispatch actions to record processors.  This allows derived classes
 14 |     to determine which method on the record processor they need to call.  Additionally classes
 15 |     implementing this generally wrap up the parameters into themselves
 16 |     """
 17 |     __metaclass__ = abc.ABCMeta
 18 | 
 19 |     @abc.abstractmethod
 20 |     def dispatch(self, checkpointer, record_processor):
 21 |         """
 22 |         Dispatches the current message to the record processor.
 23 | 
 24 |         :param amazon_kclpy.kcl.Checkpointer checkpointer: The checkpointer that can be used by the record
 25 |             process to record its progress
 26 | 
 27 |         :param amazon_kclpy.v3.processor.RecordProcessorBase record_processor: The record processor that will receive,
 28 |             and process the message.
 29 | 
 30 |         :return: Nothing
 31 |         """
 32 |         raise NotImplementedError
 33 | 
 34 |     @abc.abstractmethod
 35 |     def action(self):
 36 |         """
 37 |         Retrieves the name of the action that caused the creation of this dispatcher.
 38 | 
 39 |         :return str: The name of the action e.g. initialize, or processRecords
 40 |         """
 41 |         raise NotImplementedError
 42 | 
 43 | 
 44 | class InitializeInput(MessageDispatcher):
 45 |     """
 46 |     Provides the necessary parameters to initialize a Record Processor
 47 |     """
 48 |     def __init__(self, json_dict):
 49 |         """
 50 |         Configures the input, preparing it for dispatch
 51 | 
 52 |         :param dict json_dict: The raw representation of the JSON data
 53 |         """
 54 |         self._shard_id = json_dict["shardId"]
 55 |         self._sequence_number = json_dict["sequenceNumber"]
 56 |         self._sub_sequence_number = json_dict["subSequenceNumber"]
 57 |         self._action = json_dict['action']
 58 | 
 59 |     @property
 60 |     def shard_id(self):
 61 |         """
 62 |         The shard id that this record processor will be processing.
 63 | 
 64 |         :return: the shard id
 65 |         :rtype: str
 66 |         """
 67 |         return self._shard_id
 68 | 
 69 |     @property
 70 |     def sequence_number(self):
 71 |         """
 72 |         The sequence number that this record processor will start at.  This can be None if this record processor is
 73 |         starting on a fresh shard.
 74 | 
 75 |         :return: the sequence number
 76 |         :rtype: str or None
 77 |         """
 78 |         return self._sequence_number
 79 | 
 80 |     @property
 81 |     def sub_sequence_number(self):
 82 |         """
 83 |         The sub sequence number that this record processor will start at.  This will never be none,
 84 |         but can be 0 if there was no sub-sequence number
 85 | 
 86 |         :return: the subsequence number
 87 |         :rtype: int
 88 |         """
 89 |         return self._sub_sequence_number
 90 | 
 91 |     @property
 92 |     def action(self):
 93 |         """
 94 |         The action that spawned this message
 95 | 
 96 |         :return: the original action value
 97 |         :rtype: str
 98 |         """
 99 |         return self._action
100 | 
101 |     def dispatch(self, checkpointer, record_processor):
102 |         record_processor.initialize(self)
103 | 
104 | 
105 | class ProcessRecordsInput(MessageDispatcher):
106 |     """
107 |     Provides the records, and associated metadata for calls to process_records.
108 |     """
109 |     def __init__(self, json_dict):
110 |         self._records = json_dict["records"]
111 |         self._millis_behind_latest = json_dict["millisBehindLatest"]
112 |         self._checkpointer = None
113 |         self._action = json_dict['action']
114 | 
115 |     @property
116 |     def records(self):
117 |         """
118 |         The records that are part of this request.
119 | 
120 |         :return: records that are part of this request
121 |         :rtype: list[amazon_kclpy.messages.Record]
122 |         """
123 |         return self._records
124 | 
125 |     @property
126 |     def millis_behind_latest(self):
127 |         """
128 |         An approximation of how far behind the current time this batch of records is.
129 | 
130 |         :return: the number of milliseconds
131 |         :rtype: int
132 |         """
133 |         return self._millis_behind_latest
134 | 
135 |     @property
136 |     def checkpointer(self):
137 |         """
138 |         Provides the checkpointer that will confirm all records upto, and including this batch of records.
139 | 
140 |         :return: the checkpointer for this request
141 |         :rtype: amazon_kclpy.kcl.Checkpointer
142 |         """
143 |         return self._checkpointer
144 | 
145 |     @property
146 |     def action(self):
147 |         """
148 |         The action that spawned this message
149 | 
150 |         :return: the original action value
151 |         :rtype: str
152 |         """
153 |         return self._action
154 | 
155 |     def dispatch(self, checkpointer, record_processor):
156 |         self._checkpointer = checkpointer
157 |         record_processor.process_records(self)
158 | 
159 | 
160 | class LeaseLostCheckpointer:
161 |     """
162 |     Checkpointer for use in the lease loss scenario.  This is specifically used in the v2 to v3 delegate
163 |     """
164 |     def __init__(self):
165 |         pass
166 | 
167 |     def checkpoint(self, sequence_number=None, sub_sequence_number=None):
168 |         raise CheckpointError("Checkpointing is not allowed when the lease is lost")
169 | 
170 | 
171 | class ShutdownInput:
172 |     """
173 |     Used to tell the record processor it will be shutdown.
174 |     """
175 | 
176 |     @staticmethod
177 |     def zombie():
178 |         return ShutdownInput("ZOMBIE", LeaseLostCheckpointer())
179 | 
180 |     @staticmethod
181 |     def terminate(checkpointer):
182 |         return ShutdownInput("TERMINATE", checkpointer)
183 | 
184 |     def __init__(self, reason, checkpointer):
185 |         self._action = 'shutdown'
186 |         self._reason = reason
187 |         self._checkpointer = checkpointer
188 | 
189 |     @property
190 |     def reason(self):
191 |         """
192 |         The reason that this record processor is being shutdown, will be one of
193 | 
194 |         * TERMINATE
195 |         * ZOMBIE
196 | 
197 |         :return: the reason for the shutdown
198 |         :rtype: str
199 |         """
200 |         return self._reason
201 | 
202 |     @property
203 |     def checkpointer(self):
204 |         """
205 |         The checkpointer that can be used to checkpoint this shutdown.
206 | 
207 |         :return: the checkpointer
208 |         :rtype: amazon_kclpy.kcl.Checkpointer
209 |         """
210 |         return self._checkpointer
211 | 
212 |     @property
213 |     def action(self):
214 |         """
215 |         The action that spawned this message
216 | 
217 |         :return: the original action value
218 |         :rtype: str
219 |         """
220 |         return self._action
221 | 
222 | 
223 | class LeaseLostInput(MessageDispatcher):
224 |     """
225 |     Message, and input that is sent when the client has lost the lease for this shard.
226 |     """
227 | 
228 |     def __init__(self, json_dict):
229 |         self._action = json_dict['action']
230 | 
231 |     @property
232 |     def action(self):
233 |         """
234 |         The action that indicated the lease was lost
235 | 
236 |         :return: the name of the action
237 |         :rtype: str
238 |         """
239 |         return self._action
240 | 
241 |     def dispatch(self, checkpointer, record_processor):
242 |         """
243 |         Dispatch the lease lost notification to the record processor
244 | 
245 |         :param checkpointer: unused
246 |         :param record_processor: the record processor to dispatch the call to
247 |         :return: None
248 |         """
249 |         record_processor.lease_lost(self)
250 | 
251 | 
252 | class ShardEndedInput(MessageDispatcher):
253 |     """
254 |     Message and input that is sent to the record processor when the client has reached the end of the shard.
255 |     """
256 | 
257 |     def __init__(self, json_dict):
258 |         self._checkpointer = None
259 |         self._action = json_dict['action']
260 | 
261 |     @property
262 |     def action(self):
263 |         """
264 |         The action that caused the creation of this message
265 | 
266 |         :return: the action name
267 |         :rtype: str
268 |         """
269 |         return self._action
270 | 
271 |     @property
272 |     def checkpointer(self):
273 |         """
274 |         The checkpointer that the record processor will use to checkpoint the end of the shard
275 |         :return: the checkpointer
276 |         :rtype: Checkpointer
277 |         """
278 |         return self._checkpointer
279 | 
280 |     def dispatch(self, checkpointer, record_processor):
281 |         """
282 |         Dispatches the shard ended message to the record processor
283 | 
284 |         :param checkpointer: the checkpointer to be used to officially end processing on the shard
285 |         :param record_processor: the record processor that will handle the shard end message
286 |         """
287 |         self._checkpointer = checkpointer
288 |         record_processor.shard_ended(self)
289 | 
290 | 
291 | class ShutdownRequestedInput(MessageDispatcher):
292 |     """
293 |     Used to tell the record processor it will be shutdown.
294 |     """
295 |     def __init__(self, json_dict):
296 |         self._checkpointer = None
297 |         self._action = json_dict['action']
298 | 
299 |     @property
300 |     def checkpointer(self):
301 |         """
302 |         The checkpointer that can be used to checkpoint before actual shutdown.
303 | 
304 |         :return: the checkpointer
305 |         :rtype: amazon_kclpy.kcl.Checkpointer
306 |         """
307 |         return self._checkpointer
308 | 
309 |     @property
310 |     def action(self):
311 |         """
312 |         The action that spawned this message
313 | 
314 |         :return: the original action value
315 |         :rtype: str
316 |         """
317 |         return self._action
318 | 
319 |     def dispatch(self, checkpointer, record_processor):
320 |         self._checkpointer = checkpointer
321 |         record_processor.shutdown_requested(self)
322 | 
323 | 
324 | class CheckpointInput(object):
325 |     """
326 |     Used in preparing the response back during the checkpoint process.  This shouldn't be used by record processors.
327 |     """
328 |     def __init__(self, json_dict):
329 |         """
330 |         Creates a new CheckpointInput object with the given sequence number, and sub-sequence number.
331 |         The provided dictionary must contain:
332 |         * sequenceNumber
333 |         * subSequenceNumber
334 | 
335 |         The provided dictionary can optionally contain:
336 |         * error
337 | 
338 |         :param dict json_dict:
339 |         """
340 |         self._sequence_number = json_dict["sequenceNumber"]
341 |         self._sub_sequence_number = json_dict["subSequenceNumber"]
342 |         self._error = json_dict.get("error", None)
343 | 
344 |     @property
345 |     def sequence_number(self):
346 |         """
347 |         The sequence number that record processor intends to checkpoint at.  Can be None if the default 
348 |         checkpoint behavior is desired.
349 | 
350 |         :return: the sequence number
351 |         :rtype: str or None
352 |         """
353 |         return self._sequence_number
354 | 
355 |     @property
356 |     def sub_sequence_number(self):
357 |         """
358 |         The sub-sequence number that the record processor intends to checkpoint at.  Can be None if 
359 |         the default checkpoint behavior is desired.
360 | 
361 |         :return: the sub-sequence number
362 |         :rtype: int or None
363 |         """
364 |         return self._sub_sequence_number
365 | 
366 |     @property
367 |     def error(self):
368 |         """
369 |         The error message that may have resulted from checkpointing.  This will be None if no error occurred.
370 | 
371 |         :return: the error message
372 |         :rtype: str or None
373 |         """
374 |         return self._error
375 | 
376 | 
377 | class Record(object):
378 |     """
379 |     Represents a single record as returned by Kinesis, or Disaggregated from the Kinesis Producer Library
380 |     """
381 |     def __init__(self, json_dict):
382 |         """
383 |         Creates a new Record object that represent a single record in Kinesis.  Construction for the provided
384 |         dictionary requires that the following fields are present:
385 |             * sequenceNumber
386 |             * subSequenceNumber
387 |             * approximateArrivalTimestamp
388 |             * partitionKey
389 |             * data
390 | 
391 |         :param dict json_dict:
392 |         """
393 |         self._sequence_number = json_dict["sequenceNumber"]
394 |         self._sub_sequence_number = json_dict["subSequenceNumber"]
395 | 
396 |         self._timestamp_millis = int(json_dict["approximateArrivalTimestamp"])
397 |         self._approximate_arrival_timestamp = datetime.fromtimestamp(self._timestamp_millis / 1000.0)
398 | 
399 |         self._partition_key = json_dict["partitionKey"]
400 |         self._data = json_dict["data"]
401 |         self._json_dict = json_dict
402 | 
403 |     @property
404 |     def binary_data(self):
405 |         """
406 |         The raw binary data automatically decoded from the Base 64 representation provided by
407 | 
408 |         :py:attr:`data` the original source of the data
409 | 
410 |         :return: a string representing the raw bytes from
411 |         :rtype: str
412 |         """
413 |         return base64.b64decode(self._data)
414 |     
415 |     @property
416 |     def sequence_number(self):
417 |         """
418 |         The sequence number for this record.  This number maybe the same for other records, if they're
419 |         all part of an aggregated record.  In that case the sub_sequence_number will be greater than 0
420 | 
421 |         :py:attr:`sub_sequence_number`
422 | 
423 |         :return: the sequence number
424 |         :rtype: str
425 |         """
426 |         return self._sequence_number
427 |     
428 |     @property
429 |     def sub_sequence_number(self):
430 |         """
431 |         The sub-sequence number of this record.  This is only populated when the record is a disaggregated
432 |         record produced by the `amazon-kinesis-producer-library <https://github.com/awslabs/amazon-kinesis-producer>`
433 | 
434 |         :return: the sub-sequence number
435 |         :rtype: int
436 |         """
437 |         return self._sub_sequence_number
438 | 
439 |     @property
440 |     def timestamp_millis(self):
441 |         """
442 |         The timestamp of the approximate arrival time of the record in milliseconds since the Unix epoch
443 | 
444 |         :return: the timestamp in milliseconds
445 |         :rtype: int
446 |         """
447 |         return self._timestamp_millis
448 | 
449 |     @property
450 |     def approximate_arrival_timestamp(self):
451 |         """
452 |         The approximate time when this record was accepted, and stored by Kinesis.
453 | 
454 |         :return: the timestamp
455 |         :rtype: datetime
456 |         """
457 |         return self._approximate_arrival_timestamp
458 | 
459 |     @property
460 |     def partition_key(self):
461 |         """
462 |         The partition key for this record
463 | 
464 |         :return: the partition key
465 |         :rtype: str
466 |         """
467 |         return self._partition_key
468 | 
469 |     @property
470 |     def data(self):
471 |         """
472 |         The Base64 encoded data of this record.
473 | 
474 |         :return: a string containing the Base64 data
475 |         :rtype: str
476 |         """
477 |         return self._data
478 | 
479 |     def get(self, field):
480 |         return self._json_dict[field]
481 |     
482 |     def __getitem__(self, field):
483 |         return self.get(field)
484 | 
485 | 


--------------------------------------------------------------------------------
/amazon_kclpy/v2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | """
 4 | This package provides an interface to the KCL MultiLangDaemon. This interface
 5 | manages the interaction with the MultiLangDaemon so that developers can focus
 6 | on implementing their record processor. A record processor executable typically
 7 | looks something like::
 8 |     #!env python
 9 |     from amazon_kclpy import kcl
10 |     import json, base64
11 | 
12 |     class RecordProcessor(kcl.RecordProcessorBase):
13 | 
14 |         def initialize(self, shard_id):
15 |             pass
16 | 
17 |         def process_records(self, records, checkpointer):
18 |             pass
19 | 
20 |         def shutdown(self, checkpointer, reason):
21 |             pass
22 | 
23 |     if __name__ == "__main__":
24 |         kclprocess = kcl.KCLProcess(RecordProcessor())
25 |         kclprocess.run()
26 | """
27 | 


--------------------------------------------------------------------------------
/amazon_kclpy/v2/processor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | import abc
  5 | 
  6 | 
  7 | class RecordProcessorBase(object):
  8 |     """
  9 |     Base class for implementing a record processor.A RecordProcessor processes a shard in a stream.
 10 |     Its methods will be called with this pattern:
 11 | 
 12 |     - initialize will be called once
 13 |     - process_records will be called zero or more times
 14 |     - shutdown will be called if this MultiLangDaemon instance loses the lease to this shard
 15 |     """
 16 |     __metaclass__ = abc.ABCMeta
 17 | 
 18 |     @abc.abstractmethod
 19 |     def initialize(self, initialize_input):
 20 |         """
 21 |         Called once by a KCLProcess before any calls to process_records
 22 | 
 23 |         :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the
 24 |             initialization request for the record processor
 25 |         """
 26 |         raise NotImplementedError
 27 | 
 28 |     @abc.abstractmethod
 29 |     def process_records(self, process_records_input):
 30 |         """
 31 |         Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers
 32 |         from the records to indicate where in the stream to checkpoint.
 33 | 
 34 |         :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, and metadata about the
 35 |             records.
 36 | 
 37 |         """
 38 |         raise NotImplementedError
 39 | 
 40 |     @abc.abstractmethod
 41 |     def shutdown(self, shutdown_input):
 42 |         """
 43 |         Called by a KCLProcess instance to indicate that this record processor should shutdown. After this is called,
 44 |         there will be no more calls to any other methods of this record processor.
 45 | 
 46 |         As part of the shutdown process you must inspect :attr:`amazon_kclpy.messages.ShutdownInput.reason` to
 47 |         determine the steps to take.
 48 | 
 49 |             * Shutdown Reason ZOMBIE:
 50 |                 **ATTEMPTING TO CHECKPOINT ONCE A LEASE IS LOST WILL FAIL**
 51 | 
 52 |                 A record processor will be shutdown if it loses its lease.  In this case the KCL will terminate the
 53 |                 record processor.  It is not possible to checkpoint once a record processor has lost its lease.
 54 |             * Shutdown Reason TERMINATE:
 55 |                 **THE RECORD PROCESSOR MUST CHECKPOINT OR THE KCL WILL BE UNABLE TO PROGRESS**
 56 | 
 57 |                 A record processor will be shutdown once it reaches the end of a shard.  A shard ending indicates that
 58 |                 it has been either split into multiple shards or merged with another shard.  To begin processing the new
 59 |                 shard(s) it's required that a final checkpoint occurs.
 60 | 
 61 | 
 62 |         :param amazon_kclpy.messages.ShutdownInput shutdown_input: Information related to the shutdown request
 63 |         """
 64 |         raise NotImplementedError
 65 | 
 66 |     def shutdown_requested(self, shutdown_requested_input):
 67 |         """
 68 |         Called by a KCLProcess instance to indicate that this record processor is about to be be shutdown.  This gives
 69 |         the record processor a chance to checkpoint, before the lease is terminated.
 70 | 
 71 |         :param amazon_kclpy.messages.ShutdownRequestedInput shutdown_requested_input:
 72 |             Information related to shutdown requested.
 73 |         """
 74 |         pass
 75 | 
 76 |     version = 2
 77 | 
 78 | 
 79 | class V1toV2Processor(RecordProcessorBase):
 80 |     """
 81 |     Provides a bridge between the new v2 RecordProcessorBase, and the original RecordProcessorBase.
 82 | 
 83 |     This handles the conversion of the new input types to the older expected forms.  This normally shouldn't be used
 84 |     directly by record processors, since it's just a compatibility layer.
 85 | 
 86 |     The delegate should be a :py:class:`amazon_kclpy.kcl.RecordProcessorBase`:
 87 | 
 88 |     """
 89 |     def __init__(self, delegate):
 90 |         """
 91 |         Creates a new V1 to V2 record processor.
 92 | 
 93 |         :param amazon_kclpy.kcl.RecordProcessorBase delegate: the delegate where requests will be forwarded to
 94 |         """
 95 |         self.delegate = delegate
 96 | 
 97 |     def initialize(self, initialize_input):
 98 |         """
 99 |         Initializes the record processor
100 | 
101 |         :param amazon_kclpy.messages.InitializeInput initialize_input: the initialization request
102 |         :return: None
103 |         """
104 |         self.delegate.initialize(initialize_input.shard_id)
105 | 
106 |     def process_records(self, process_records_input):
107 |         """
108 |         Expands the requests, and hands it off to the delegate for processing
109 | 
110 |         :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: information about the records
111 |             to process
112 |         :return: None
113 |         """
114 |         self.delegate.process_records(process_records_input.records, process_records_input.checkpointer)
115 | 
116 |     def shutdown(self, shutdown_input):
117 |         """
118 |         Sends the shutdown request to the delegate
119 | 
120 |         :param amazon_kclpy.messages.ShutdownInput shutdown_input: information related to the record processor shutdown
121 |         :return: None
122 |         """
123 |         self.delegate.shutdown(shutdown_input.checkpointer, shutdown_input.reason)
124 | 
125 |     def shutdown_requested(self, shutdown_requested_input):
126 |         """
127 |         Sends the shutdown request to the delegate
128 | 
129 |         :param amazon_kclpy.messages.ShutdownInput shutdown_input: information related to the record processor shutdown
130 |         :return: None
131 |         """
132 |         self.delegate.shutdown_requested(shutdown_requested_input.checkpointer)
133 | 


--------------------------------------------------------------------------------
/amazon_kclpy/v3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | """
 4 | This package provides an interface to the KCL MultiLangDaemon. This interface
 5 | manages the interaction with the MultiLangDaemon so that developers can focus
 6 | on implementing their record processor. A record processor executable typically
 7 | looks something like::
 8 |     #!env python
 9 |     from amazon_kclpy import kcl
10 |     import json, base64
11 | 
12 |     class RecordProcessor(kcl.RecordProcessorBase):
13 | 
14 |         def initialize(self, shard_id):
15 |             pass
16 | 
17 |         def process_records(self, records, checkpointer):
18 |             pass
19 | 
20 |         def shutdown(self, checkpointer, reason):
21 |             pass
22 | 
23 |     if __name__ == "__main__":
24 |         kclprocess = kcl.KCLProcess(RecordProcessor())
25 |         kclprocess.run()
26 | """
27 | 


--------------------------------------------------------------------------------
/amazon_kclpy/v3/processor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | import abc
  4 | 
  5 | from amazon_kclpy.messages import ShutdownInput
  6 | 
  7 | 
  8 | class RecordProcessorBase(object):
  9 |     """
 10 |     Base class for implementing a record processor. Each RecordProcessor processes a single shard in a stream.
 11 | 
 12 |     The record processor represents a lifecycle where it will be initialized, possibly process records, and
 13 |     finally be terminated.
 14 |     """
 15 |     __metaclass__ = abc.ABCMeta
 16 | 
 17 |     @abc.abstractmethod
 18 |     def initialize(self, initialize_input):
 19 |         """
 20 |         Called once by a the KCL to allow the record processor to configure itself before starting to process records.
 21 | 
 22 |         :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the
 23 |             initialization request for the record processor
 24 |         """
 25 |         raise NotImplementedError
 26 | 
 27 |     @abc.abstractmethod
 28 |     def process_records(self, process_records_input):
 29 |         """
 30 |         This is called whenever records are received.  The method will be provided the batch of records that were
 31 |         received.  A checkpointer is also supplied that allows the application to checkpoint its progress within the
 32 |         shard.
 33 | 
 34 |         :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, metadata about the
 35 |             records, and a checkpointer.
 36 | 
 37 |         """
 38 |         raise NotImplementedError
 39 | 
 40 |     @abc.abstractmethod
 41 |     def lease_lost(self, lease_lost_input):
 42 |         """
 43 |         This is called whenever the record processor has lost the lease.  After this returns the record processor will
 44 |         be shutdown.  Additionally once a lease has been lost checkpointing is no longer possible.
 45 | 
 46 |         :param amazon_kclpy.messages.LeaseLostInput lease_lost_input: information about the lease loss (currently empty)
 47 |         """
 48 |         raise NotImplementedError
 49 | 
 50 |     @abc.abstractmethod
 51 |     def shard_ended(self, shard_ended_input):
 52 |         """
 53 |         This is called whenever the record processor has reached the end of the shard. The record processor needs to
 54 |         checkpoint to notify the KCL that it's ok to start processing the child shard(s).  Failing to checkpoint will
 55 |         trigger a retry of the shard end
 56 | 
 57 |         :param amazon_kclpy.messages.ShardEndedInput shard_ended_input: information about reaching the end of the shard.
 58 |         """
 59 |         raise NotImplementedError
 60 | 
 61 |     @abc.abstractmethod
 62 |     def shutdown_requested(self, shutdown_requested_input):
 63 |         """
 64 |         Called when the parent process is preparing to shutdown.  This gives the record processor one more chance to
 65 |         checkpoint before its lease will be released.
 66 | 
 67 |         :param amazon_kclpy.messages.ShutdownRequestedInput shutdown_requested_input:
 68 |             Information related to shutdown requested including the checkpointer.
 69 |         """
 70 |         raise NotImplementedError
 71 | 
 72 |     version = 3
 73 | 
 74 | 
 75 | class V2toV3Processor(RecordProcessorBase):
 76 |     """
 77 |     Provides a bridge between the new v2 RecordProcessorBase, and the original RecordProcessorBase.
 78 | 
 79 |     This handles the conversion of the new input types to the older expected forms.  This normally shouldn't be used
 80 |     directly by record processors, since it's just a compatibility layer.
 81 | 
 82 |     The delegate should be a :py:class:`amazon_kclpy.kcl.RecordProcessorBase`:
 83 | 
 84 |     """
 85 | 
 86 |     def __init__(self, delegate):
 87 |         """
 88 |         Creates a new V2 to V3 record processor.
 89 | 
 90 |         :param amazon_kclpy.kcl.v2.RecordProcessorBase delegate: the delegate where requests will be forwarded to
 91 |         """
 92 |         self.delegate = delegate
 93 | 
 94 |     def initialize(self, initialize_input):
 95 |         """
 96 |         Initializes the record processor
 97 | 
 98 |         :param amazon_kclpy.messages.InitializeInput initialize_input: the initialization request
 99 |         :return: None
100 |         """
101 |         self.delegate.initialize(initialize_input)
102 | 
103 |     def process_records(self, process_records_input):
104 |         """
105 |         Expands the requests, and hands it off to the delegate for processing
106 | 
107 |         :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: information about the records
108 |             to process
109 |         :return: None
110 |         """
111 |         self.delegate.process_records(process_records_input)
112 | 
113 |     def lease_lost(self, lease_lost_input):
114 |         """
115 |         Translates the lease lost call to the older shutdown/shutdown input style that was used.  In a special case the
116 |         checkpointer will not be set in this call, which is essentially fine as checkpointing would fail anyway
117 | 
118 |         :param amazon_kclpy.messages.LeaseLostInput lease_lost_input: information about the lease loss
119 |         (currently this is empty)
120 |         :return: None
121 |         """
122 |         self.delegate.shutdown(ShutdownInput.zombie())
123 | 
124 |     def shard_ended(self, shard_ended_input):
125 |         """
126 |         Translates the shard end message to a shutdown input with a reason of TERMINATE and the checkpointer
127 |         :param amazon_kclpy.messages.ShardEndedInput shard_ended_input: information, and checkpoint for the end of the
128 |         shard.
129 |         :return: None
130 |         """
131 |         self.delegate.shutdown(ShutdownInput.terminate(shard_ended_input.checkpointer))
132 | 
133 |     def shutdown_requested(self, shutdown_requested_input):
134 |         """
135 |         Sends the shutdown request to the delegate
136 | 
137 |         :param amazon_kclpy.messages.ShutdownRequested shutdown_requested_input: information related to the record processor shutdown
138 |         :return: None
139 |         """
140 |         self.delegate.shutdown_requested(shutdown_requested_input)
141 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html       to make standalone HTML files"
 21 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml to make a single large HTML file"
 23 | 	@echo "  pickle     to make pickle files"
 24 | 	@echo "  json       to make JSON files"
 25 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 26 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp  to make an Apple Help Book"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  epub3      to make an epub3"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  xml        to make Docutils-native XML files"
 41 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 42 | 	@echo "  linkcheck  to check all external links for integrity"
 43 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 44 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 45 | 	@echo "  dummy      to check syntax errors of document sources"
 46 | 
 47 | .PHONY: clean
 48 | clean:
 49 | 	rm -rf $(BUILDDIR)/*
 50 | 
 51 | .PHONY: html
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | .PHONY: dirhtml
 58 | dirhtml:
 59 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 60 | 	@echo
 61 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 62 | 
 63 | .PHONY: singlehtml
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | .PHONY: pickle
 70 | pickle:
 71 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 72 | 	@echo
 73 | 	@echo "Build finished; now you can process the pickle files."
 74 | 
 75 | .PHONY: json
 76 | json:
 77 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 78 | 	@echo
 79 | 	@echo "Build finished; now you can process the JSON files."
 80 | 
 81 | .PHONY: htmlhelp
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | .PHONY: qthelp
 89 | qthelp:
 90 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 91 | 	@echo
 92 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 93 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 94 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/AmazonKinesisClientLibraryforPython.qhcp"
 95 | 	@echo "To view the help file:"
 96 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/AmazonKinesisClientLibraryforPython.qhc"
 97 | 
 98 | .PHONY: applehelp
 99 | applehelp:
100 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | 	@echo
102 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | 	@echo "N.B. You won't be able to view it unless you put it in" \
104 | 	      "~/Library/Documentation/Help or install it in your application" \
105 | 	      "bundle."
106 | 
107 | .PHONY: devhelp
108 | devhelp:
109 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | 	@echo
111 | 	@echo "Build finished."
112 | 	@echo "To view the help file:"
113 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/AmazonKinesisClientLibraryforPython"
114 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/AmazonKinesisClientLibraryforPython"
115 | 	@echo "# devhelp"
116 | 
117 | .PHONY: epub
118 | epub:
119 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | 	@echo
121 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 | 
123 | .PHONY: epub3
124 | epub3:
125 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | 	@echo
127 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 | 
129 | .PHONY: latex
130 | latex:
131 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | 	@echo
133 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | 	      "(use \`make latexpdf' here to do that automatically)."
136 | 
137 | .PHONY: latexpdf
138 | latexpdf:
139 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | 	@echo "Running LaTeX files through pdflatex..."
141 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 | 
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
148 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 | 
151 | .PHONY: text
152 | text:
153 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | 	@echo
155 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
156 | 
157 | .PHONY: man
158 | man:
159 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | 	@echo
161 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 | 
163 | .PHONY: texinfo
164 | texinfo:
165 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | 	@echo
167 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
169 | 	      "(use \`make info' here to do that automatically)."
170 | 
171 | .PHONY: info
172 | info:
173 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | 	@echo "Running Texinfo files through makeinfo..."
175 | 	make -C $(BUILDDIR)/texinfo info
176 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 | 
178 | .PHONY: gettext
179 | gettext:
180 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | 	@echo
182 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 | 
184 | .PHONY: changes
185 | changes:
186 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | 	@echo
188 | 	@echo "The overview file is in $(BUILDDIR)/changes."
189 | 
190 | .PHONY: linkcheck
191 | linkcheck:
192 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | 	@echo
194 | 	@echo "Link check complete; look for any errors in the above output " \
195 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
196 | 
197 | .PHONY: doctest
198 | doctest:
199 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | 	@echo "Testing of doctests in the sources finished, look at the " \
201 | 	      "results in $(BUILDDIR)/doctest/output.txt."
202 | 
203 | .PHONY: coverage
204 | coverage:
205 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | 	@echo "Testing of coverage in the sources finished, look at the " \
207 | 	      "results in $(BUILDDIR)/coverage/python.txt."
208 | 
209 | .PHONY: xml
210 | xml:
211 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | 	@echo
213 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 | 
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | 	@echo
219 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 | 
221 | .PHONY: dummy
222 | dummy:
223 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | 	@echo
225 | 	@echo "Build finished. Dummy builder generates no files."
226 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Amazon Kinesis Client Library for Python documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Oct 24 12:24:53 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | sys.path.insert(0, os.path.abspath('../'))
 22 | 
 23 | import amazon_kclpy
 24 | import amazon_kclpy.v2
 25 | import samples.sample_kclpy_app
 26 | import samples.amazon_kclpy_helper
 27 | 
 28 | # -- General configuration ------------------------------------------------
 29 | 
 30 | # If your documentation needs a minimal Sphinx version, state it here.
 31 | #
 32 | # needs_sphinx = '1.0'
 33 | 
 34 | # Add any Sphinx extension module names here, as strings. They can be
 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 36 | # ones.
 37 | extensions = [
 38 |     'sphinx.ext.autodoc',
 39 |     'sphinx.ext.doctest',
 40 |     'sphinx.ext.intersphinx',
 41 | ]
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['_templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | #
 49 | # source_suffix = ['.rst', '.md']
 50 | source_suffix = '.rst'
 51 | 
 52 | # The encoding of source files.
 53 | #
 54 | # source_encoding = 'utf-8-sig'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # General information about the project.
 60 | project = u'Amazon Kinesis Client Library for Python'
 61 | copyright = u'2016, Amazon.com, Inc.'
 62 | author = 'Amazon.com, Inc.'
 63 | 
 64 | # The version info for the project you're documenting, acts as replacement for
 65 | # |version| and |release|, also used in various other places throughout the
 66 | # built documents.
 67 | #
 68 | # The short X.Y version.
 69 | version = u'1.3.1'
 70 | # The full version, including alpha/beta/rc tags.
 71 | release = u'1.3.1'
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #
 76 | # This is also used if you do content translation via gettext catalogs.
 77 | # Usually you set "language" from the command line for these cases.
 78 | language = None
 79 | 
 80 | # There are two options for replacing |today|: either, you set today to some
 81 | # non-false value, then it is used:
 82 | #
 83 | # today = ''
 84 | #
 85 | # Else, today_fmt is used as the format for a strftime call.
 86 | #
 87 | # today_fmt = '%B %d, %Y'
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 93 | 
 94 | # The reST default role (used for this markup: `text`) to use for all
 95 | # documents.
 96 | #
 97 | # default_role = None
 98 | 
 99 | # If true, '()' will be appended to :func: etc. cross-reference text.
100 | #
101 | # add_function_parentheses = True
102 | 
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #
106 | # add_module_names = True
107 | 
108 | # If true, sectionauthor and moduleauthor directives will be shown in the
109 | # output. They are ignored by default.
110 | #
111 | # show_authors = False
112 | 
113 | # The name of the Pygments (syntax highlighting) style to use.
114 | pygments_style = 'sphinx'
115 | 
116 | # A list of ignored prefixes for module index sorting.
117 | # modindex_common_prefix = []
118 | 
119 | # If true, keep warnings as "system message" paragraphs in the built documents.
120 | # keep_warnings = False
121 | 
122 | # If true, `todo` and `todoList` produce output, else they produce nothing.
123 | todo_include_todos = False
124 | 
125 | 
126 | # -- Options for HTML output ----------------------------------------------
127 | 
128 | # The theme to use for HTML and HTML Help pages.  See the documentation for
129 | # a list of builtin themes.
130 | #
131 | html_theme = 'alabaster'
132 | 
133 | # Theme options are theme-specific and customize the look and feel of a theme
134 | # further.  For a list of options available for each theme, see the
135 | # documentation.
136 | #
137 | # html_theme_options = {}
138 | 
139 | # Add any paths that contain custom themes here, relative to this directory.
140 | # html_theme_path = []
141 | 
142 | # The name for this set of Sphinx documents.
143 | # "<project> v<release> documentation" by default.
144 | #
145 | # html_title = u'Amazon Kinesis Client Library for Python v1.3.1'
146 | 
147 | # A shorter title for the navigation bar.  Default is the same as html_title.
148 | #
149 | # html_short_title = None
150 | 
151 | # The name of an image file (relative to this directory) to place at the top
152 | # of the sidebar.
153 | #
154 | # html_logo = None
155 | 
156 | # The name of an image file (relative to this directory) to use as a favicon of
157 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
158 | # pixels large.
159 | #
160 | # html_favicon = None
161 | 
162 | # Add any paths that contain custom static files (such as style sheets) here,
163 | # relative to this directory. They are copied after the builtin static files,
164 | # so a file named "default.css" will overwrite the builtin "default.css".
165 | html_static_path = ['_static']
166 | 
167 | # Add any extra paths that contain custom files (such as robots.txt or
168 | # .htaccess) here, relative to this directory. These files are copied
169 | # directly to the root of the documentation.
170 | #
171 | # html_extra_path = []
172 | 
173 | # If not None, a 'Last updated on:' timestamp is inserted at every page
174 | # bottom, using the given strftime format.
175 | # The empty string is equivalent to '%b %d, %Y'.
176 | #
177 | # html_last_updated_fmt = None
178 | 
179 | # If true, SmartyPants will be used to convert quotes and dashes to
180 | # typographically correct entities.
181 | #
182 | # html_use_smartypants = True
183 | 
184 | # Custom sidebar templates, maps document names to template names.
185 | #
186 | # html_sidebars = {}
187 | 
188 | # Additional templates that should be rendered to pages, maps page names to
189 | # template names.
190 | #
191 | # html_additional_pages = {}
192 | 
193 | # If false, no module index is generated.
194 | #
195 | # html_domain_indices = True
196 | 
197 | # If false, no index is generated.
198 | #
199 | # html_use_index = True
200 | 
201 | # If true, the index is split into individual pages for each letter.
202 | #
203 | # html_split_index = False
204 | 
205 | # If true, links to the reST sources are added to the pages.
206 | #
207 | # html_show_sourcelink = True
208 | 
209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
210 | #
211 | # html_show_sphinx = True
212 | 
213 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
214 | #
215 | # html_show_copyright = True
216 | 
217 | # If true, an OpenSearch description file will be output, and all pages will
218 | # contain a <link> tag referring to it.  The value of this option must be the
219 | # base URL from which the finished HTML is served.
220 | #
221 | # html_use_opensearch = ''
222 | 
223 | # This is the file name suffix for HTML files (e.g. ".xhtml").
224 | # html_file_suffix = None
225 | 
226 | # Language to be used for generating the HTML full-text search index.
227 | # Sphinx supports the following languages:
228 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
229 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
230 | #
231 | # html_search_language = 'en'
232 | 
233 | # A dictionary with options for the search language support, empty by default.
234 | # 'ja' uses this config value.
235 | # 'zh' user can custom change `jieba` dictionary path.
236 | #
237 | # html_search_options = {'type': 'default'}
238 | 
239 | # The name of a javascript file (relative to the configuration directory) that
240 | # implements a search results scorer. If empty, the default will be used.
241 | #
242 | # html_search_scorer = 'scorer.js'
243 | 
244 | # Output file base name for HTML help builder.
245 | htmlhelp_basename = 'AmazonKinesisClientLibraryforPythondoc'
246 | 
247 | # -- Options for LaTeX output ---------------------------------------------
248 | 
249 | latex_elements = {
250 |      # The paper size ('letterpaper' or 'a4paper').
251 |      #
252 |      # 'papersize': 'letterpaper',
253 | 
254 |      # The font size ('10pt', '11pt' or '12pt').
255 |      #
256 |      # 'pointsize': '10pt',
257 | 
258 |      # Additional stuff for the LaTeX preamble.
259 |      #
260 |      # 'preamble': '',
261 | 
262 |      # Latex figure (float) alignment
263 |      #
264 |      # 'figure_align': 'htbp',
265 | }
266 | 
267 | # Grouping the document tree into LaTeX files. List of tuples
268 | # (source start file, target name, title,
269 | #  author, documentclass [howto, manual, or own class]).
270 | latex_documents = [
271 |     (master_doc, 'AmazonKinesisClientLibraryforPython.tex', u'Amazon Kinesis Client Library for Python Documentation',
272 |      u'Amazon.com', 'manual'),
273 | ]
274 | 
275 | # The name of an image file (relative to this directory) to place at the top of
276 | # the title page.
277 | #
278 | # latex_logo = None
279 | 
280 | # For "manual" documents, if this is true, then toplevel headings are parts,
281 | # not chapters.
282 | #
283 | # latex_use_parts = False
284 | 
285 | # If true, show page references after internal links.
286 | #
287 | # latex_show_pagerefs = False
288 | 
289 | # If true, show URL addresses after external links.
290 | #
291 | # latex_show_urls = False
292 | 
293 | # Documents to append as an appendix to all manuals.
294 | #
295 | # latex_appendices = []
296 | 
297 | # It false, will not define \strong, \code, 	itleref, \crossref ... but only
298 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
299 | # packages.
300 | #
301 | # latex_keep_old_macro_names = True
302 | 
303 | # If false, no module index is generated.
304 | #
305 | # latex_domain_indices = True
306 | 
307 | 
308 | # -- Options for manual page output ---------------------------------------
309 | 
310 | # One entry per manual page. List of tuples
311 | # (source start file, name, description, authors, manual section).
312 | man_pages = [
313 |     (master_doc, 'amazonkinesisclientlibraryforpython', u'Amazon Kinesis Client Library for Python Documentation',
314 |      [author], 1)
315 | ]
316 | 
317 | # If true, show URL addresses after external links.
318 | #
319 | # man_show_urls = False
320 | 
321 | 
322 | # -- Options for Texinfo output -------------------------------------------
323 | 
324 | # Grouping the document tree into Texinfo files. List of tuples
325 | # (source start file, target name, title, author,
326 | #  dir menu entry, description, category)
327 | texinfo_documents = [
328 |     (master_doc, 'AmazonKinesisClientLibraryforPython', u'Amazon Kinesis Client Library for Python Documentation',
329 |      author, 'AmazonKinesisClientLibraryforPython', 'One line description of project.',
330 |      'Miscellaneous'),
331 | ]
332 | 
333 | # Documents to append as an appendix to all manuals.
334 | #
335 | # texinfo_appendices = []
336 | 
337 | # If false, no module index is generated.
338 | #
339 | # texinfo_domain_indices = True
340 | 
341 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
342 | #
343 | # texinfo_show_urls = 'footnote'
344 | 
345 | # If true, do not generate a @detailmenu in the "Top" node's menu.
346 | #
347 | # texinfo_no_detailmenu = False
348 | 
349 | 
350 | # Example configuration for intersphinx: refer to the Python standard library.
351 | intersphinx_mapping = {'https://docs.python.org/': None}
352 | 


--------------------------------------------------------------------------------
/docs/guide/quickstart.rst:
--------------------------------------------------------------------------------
 1 | .. _guide_quickstart:
 2 | 
 3 | Getting Started Using the Amazon Kinesis Client for Python
 4 | ==========================================================
 5 | This assumes you're already publishing data to Kinesis.  If you're not publishing see :doc:`sample`.  In
 6 | addition you will need to ensure that you have a Java Runtime Environment (JRE) installed.  The JRE must be version
 7 | 1.7 or greater.
 8 | 
 9 | Prerequisites
10 | -------------
11 | There are a few prerequisites for using the Amazon Kinesis Client for Python.
12 | 
13 | Publishing Data to Kinesis
14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
15 | 
16 | You must have an AWS Account, and be publishing some data to Kinesis that you intend to process.
17 | If you're not publishing you can use the sample publisher described in the :doc:`sample`.
18 | 
19 | Install a Java Runtime Environment
20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21 | 
22 | You must have a Java Runtime Environment (JRE) version 1.7 or greater installed.  It's recommended you use the newest
23 | version of the JRE, which is currently 1.8
24 | 
25 | To install the 1.8 version of the JRE on Amazon Linux you can run the following command::
26 | 
27 |     sudo yum install java-1.8.0-openjdk.x86_64
28 | 
29 | For other operating systems please refer to your system's documentation.
30 | 
31 | It is also possible to download, and install a JRE from Oracle `Java SE Runtime Environment 8 <http://www.oracle.com/technetwork/java/javase/downloads/jre8-downloads-2133155.html>`_
32 | 
33 | 
34 | Installing the Amazon Kinesis Client for Python
35 | -----------------------------------------------
36 | The first thing to do is install the Amazon Kinesis Client for Python (KCL).  You can install the KCL from pip using::
37 | 
38 |     pip install amazon_kclpy
39 | 
40 | This should install the KCL, and automatically download the necessary jars.
41 | 
42 | 
43 | Create A Record Processor
44 | -------------------------
45 | The record processor is how the KCL will communicate with your application.  Create a file with a class that extends
46 | :class:`amazon_kclpy.v2.processor.RecordProcessorBase`.  See the :doc:`sample` for an example of a record processor.
47 | 
48 | Create A Properties File
49 | ------------------------
50 | The KCL uses a Java properties file to configure itself.  The Java process uses this file to configure the KCL, and
51 | determine which python script to run for record processing.  See the
52 | :download:`sample.properties <../../samples/sample.properties>` for documentation, and required values.
53 | 
54 | Create the Startup Command
55 | --------------------------
56 | The KCL includes a script to help generate the command line to start the KCL application.  TO create the startup
57 | command for your application use::
58 | 
59 |     amazon_kclpy_helper.py --print_command \
60 |             --java <path-to-java> --properties <path to your properties file>
61 | 
62 | .. automodule:: samples.amazon_kclpy_helper
63 |     :special-members:
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/docs/guide/record_processor_v1.rst:
--------------------------------------------------------------------------------
 1 | .. _guide_record_processor_v1
 2 | 
 3 | Kinesis Client Record Process Version 1
 4 | =======================================
 5 | The record processor is the central pillar of the Kinesis Client.  This version of the record processor doesn't accept
 6 | as much information, and so it's recommended that you don't use this version anymore.
 7 | 
 8 | Record Processor API
 9 | --------------------
10 | 
11 | .. autoclass:: amazon_kclpy.kcl.RecordProcessorBase
12 |     :members:
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/guide/record_processor_v2.rst:
--------------------------------------------------------------------------------
 1 | .. _guide_record_processor_v2
 2 | 
 3 | Kinesis Client Record Process Version 2
 4 | =======================================
 5 | The record processor is the central pillar of the Kinesis Client.  This version of the record processor uses more
 6 | complex objects to represent the inputs to the methods of the record processor.
 7 | 
 8 | Record Processor API
 9 | --------------------
10 | 
11 | .. autoclass:: amazon_kclpy.v2.processor.RecordProcessorBase
12 |     :members:
13 | 
14 | Record Processor Messages
15 | -------------------------
16 | 
17 | .. automodule:: amazon_kclpy.messages
18 |     :members:
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/guide/sample.rst:
--------------------------------------------------------------------------------
 1 | .. _guide_sample:
 2 | 
 3 | Running the Sample Application
 4 | ==============================
 5 | The sample application provided with this module shows the basics of using the Amazon Kinesis Client for Python.
 6 | 
 7 | Before Getting Started
 8 | ----------------------
 9 | Before running the samples, you'll want to make sure that your environment is
10 | configured to allow the samples to use your
11 | `AWS Security Credentials <http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html>`_.
12 | 
13 | By default the samples use the `DefaultAWSCredentialsProviderChain <http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html>`_
14 | so you'll want to make your credentials available to one of the credentials providers in that
15 | provider chain. There are several ways to do this such as providing a ~/.aws/credentials file,
16 | or if you're running on EC2, you can associate an IAM role with your instance with appropriate
17 | access.
18 | 
19 | For questions regarding Amazon Kinesis Service and the client libraries please visit the
20 | `Amazon Kinesis Forums <http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169>`_
21 | 
22 | Running the Sample
23 | ------------------
24 | 
25 | Using the ``amazon_kclpy`` package requires the MultiLangDaemon which is provided
26 | by the `Amazon KCL for Java <https://github.com/awslabs/amazon-kinesis-client>`. These jars will be downloaded automatically
27 | by the **install** command, but you can explicitly download them with the ``download_jars`` command.
28 | From the root of this repo, run::
29 | 
30 |     python setup.py download_jars
31 |     python setup.py install
32 | 
33 | Now the ``amazon_kclpy`` and `boto < http://boto.readthedocs.org/en/latest/>`_ (used by the sample putter script) and required
34 | jars should be installed in your environment. To start the sample putter, run::
35 | 
36 |     sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird -w lobster
37 | 
38 | This will create an Amazon Kinesis stream called words and put the words
39 | specified by the -w options into the stream once each. Use -p SECONDS to
40 | indicate a period over which to repeatedly put these words.
41 | 
42 | Now we would like to run an Amazon KCL for Python application that reads records
43 | from the stream we just created, but first take a look in the samples directory,
44 | you'll find a file called sample.properties, cat that file::
45 | 
46 |     cat samples/sample.properties
47 | 
48 | You'll see several properties defined there. ``executableName`` indicates the
49 | executable for the MultiLangDaemon to run, ``streamName`` is the Kinesis stream
50 | to read from, ``appName`` is the Amazon KCL application name to use which will be the
51 | name of an Amazon DynamoDB table that gets created by the Amazon KCL,
52 | ``initialPositionInStream`` tells the Amazon KCL how to start reading from shards upon
53 | a fresh startup. To run the sample application you can use a helper script
54 | included in this package. Note you must provide a path to java (version 1.7
55 | or greater) to run the Amazon KCL::
56 | 
57 |     amazon_kclpy_helper.py --print_command \
58 |         --java <path-to-java> --properties samples/sample.properties
59 | 
60 | This will print the command needed to run the sample which you can copy paste,
61 | or surround the command with back ticks to run it::
62 | 
63 |     `amazon_kclpy_helper.py --print_command \
64 |         --java <path-to-java> --properties samples/sample.properties`
65 | 
66 | Alternatively, if you don't have the source on hand, but want to run the sample
67 | app you can use the ``--sample`` argument to indicate you'd like to get the
68 | sample.properties file from the installation location::
69 | 
70 |     amazon_kclpy_helper.py --print_command --java <path-to-java> --sample
71 | 
72 | The Sample Code
73 | ---------------
74 | .. autoclass:: samples.sample_kclpy_app.RecordProcessor
75 |     :members:
76 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Amazon Kinesis Client Library for Python documentation master file, created by
 2 | sphinx-quickstart on Mon Oct 24 12:24:53 2016.
 3 | You can adapt this file completely to your liking, but it should at least
 4 | contain the root `toctree` directive.
 5 | 
 6 | Amazon Kinesis Client Library for Python
 7 | ========================================
 8 | This package provides an interface to the Amazon Kinesis Client Library (KCL) MultiLangDaemon,
 9 | which is part of the `Amazon KCL for Java <https://github.com/awslabs/amazon-kinesis-client>`_.
10 | Developers can use the `Amazon KCL <http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html>`_
11 | to build distributed applications that process streaming data reliably at scale. The
12 | `Amazon KCL <http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html>`_
13 | takes care of many of the complex tasks associated with distributed computing, such as load-balancing
14 | across multiple instances, responding to instance failures, checkpointing processed records,
15 | and reacting to changes in stream volume.
16 | This interface manages the interaction with the MultiLangDaemon so that developers can focus on
17 | implementing their record processor executable. A record processor executable
18 | typically looks something like:
19 | 
20 | 
21 | Guides
22 | ------
23 | 
24 | .. toctree::
25 |    :maxdepth: 2
26 | 
27 |    guide/quickstart
28 |    guide/sample
29 |    guide/record_processor_v1
30 |    guide/record_processor_v2
31 | 
32 | 
33 | Indices and tables
34 | ==================
35 | 
36 | * :ref:`genindex`
37 | * :ref:`modindex`
38 | * :ref:`search`
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	echo.  dummy      to check syntax errors of document sources
 43 | 	goto end
 44 | )
 45 | 
 46 | if "%1" == "clean" (
 47 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 48 | 	del /q /s %BUILDDIR%\*
 49 | 	goto end
 50 | )
 51 | 
 52 | 
 53 | REM Check if sphinx-build is available and fallback to Python version if any
 54 | %SPHINXBUILD% 1>NUL 2>NUL
 55 | if errorlevel 9009 goto sphinx_python
 56 | goto sphinx_ok
 57 | 
 58 | :sphinx_python
 59 | 
 60 | set SPHINXBUILD=python -m sphinx.__init__
 61 | %SPHINXBUILD% 2> nul
 62 | if errorlevel 9009 (
 63 | 	echo.
 64 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 65 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 66 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 67 | 	echo.may add the Sphinx directory to PATH.
 68 | 	echo.
 69 | 	echo.If you don't have Sphinx installed, grab it from
 70 | 	echo.http://sphinx-doc.org/
 71 | 	exit /b 1
 72 | )
 73 | 
 74 | :sphinx_ok
 75 | 
 76 | 
 77 | if "%1" == "html" (
 78 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 79 | 	if errorlevel 1 exit /b 1
 80 | 	echo.
 81 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 82 | 	goto end
 83 | )
 84 | 
 85 | if "%1" == "dirhtml" (
 86 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 87 | 	if errorlevel 1 exit /b 1
 88 | 	echo.
 89 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 90 | 	goto end
 91 | )
 92 | 
 93 | if "%1" == "singlehtml" (
 94 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 95 | 	if errorlevel 1 exit /b 1
 96 | 	echo.
 97 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 98 | 	goto end
 99 | )
100 | 
101 | if "%1" == "pickle" (
102 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | 	if errorlevel 1 exit /b 1
104 | 	echo.
105 | 	echo.Build finished; now you can process the pickle files.
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "json" (
110 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished; now you can process the JSON files.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "htmlhelp" (
118 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | 	goto end
124 | )
125 | 
126 | if "%1" == "qthelp" (
127 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | 	if errorlevel 1 exit /b 1
129 | 	echo.
130 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\AmazonKinesisClientLibraryforPython.qhcp
133 | 	echo.To view the help file:
134 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\AmazonKinesisClientLibraryforPython.ghc
135 | 	goto end
136 | )
137 | 
138 | if "%1" == "devhelp" (
139 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | 	if errorlevel 1 exit /b 1
141 | 	echo.
142 | 	echo.Build finished.
143 | 	goto end
144 | )
145 | 
146 | if "%1" == "epub" (
147 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | 	if errorlevel 1 exit /b 1
149 | 	echo.
150 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | 	goto end
152 | )
153 | 
154 | if "%1" == "epub3" (
155 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | 	if errorlevel 1 exit /b 1
157 | 	echo.
158 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | 	goto end
160 | )
161 | 
162 | if "%1" == "latex" (
163 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | 	if errorlevel 1 exit /b 1
165 | 	echo.
166 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdf" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "latexpdfja" (
181 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | 	cd %BUILDDIR%/latex
183 | 	make all-pdf-ja
184 | 	cd %~dp0
185 | 	echo.
186 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | 	goto end
188 | )
189 | 
190 | if "%1" == "text" (
191 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | 	if errorlevel 1 exit /b 1
193 | 	echo.
194 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
195 | 	goto end
196 | )
197 | 
198 | if "%1" == "man" (
199 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | 	if errorlevel 1 exit /b 1
201 | 	echo.
202 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | 	goto end
204 | )
205 | 
206 | if "%1" == "texinfo" (
207 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | 	if errorlevel 1 exit /b 1
209 | 	echo.
210 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | 	goto end
212 | )
213 | 
214 | if "%1" == "gettext" (
215 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | 	if errorlevel 1 exit /b 1
217 | 	echo.
218 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | 	goto end
220 | )
221 | 
222 | if "%1" == "changes" (
223 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | 	if errorlevel 1 exit /b 1
225 | 	echo.
226 | 	echo.The overview file is in %BUILDDIR%/changes.
227 | 	goto end
228 | )
229 | 
230 | if "%1" == "linkcheck" (
231 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | 	if errorlevel 1 exit /b 1
233 | 	echo.
234 | 	echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | 	goto end
237 | )
238 | 
239 | if "%1" == "doctest" (
240 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | 	if errorlevel 1 exit /b 1
242 | 	echo.
243 | 	echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | 	goto end
246 | )
247 | 
248 | if "%1" == "coverage" (
249 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | 	if errorlevel 1 exit /b 1
251 | 	echo.
252 | 	echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | 	goto end
255 | )
256 | 
257 | if "%1" == "xml" (
258 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | 	if errorlevel 1 exit /b 1
260 | 	echo.
261 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | 	goto end
263 | )
264 | 
265 | if "%1" == "pseudoxml" (
266 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | 	if errorlevel 1 exit /b 1
268 | 	echo.
269 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | 	goto end
271 | )
272 | 
273 | if "%1" == "dummy" (
274 | 	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | 	if errorlevel 1 exit /b 1
276 | 	echo.
277 | 	echo.Build finished. Dummy builder generates no files.
278 | 	goto end
279 | )
280 | 
281 | :end
282 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |     <modelVersion>4.0.0</modelVersion>
  4 |     <properties>
  5 |         <awssdk.version>2.25.64</awssdk.version>
  6 |         <kcl.version>3.0.2</kcl.version>
  7 |         <netty.version>4.2.1.Final</netty.version>
  8 |         <netty-reactive.version>2.0.6</netty-reactive.version>
  9 |         <fasterxml-jackson.version>2.13.5</fasterxml-jackson.version>
 10 |         <logback.version>1.3.15</logback.version>
 11 |     </properties>
 12 |     <dependencies>
 13 |         <dependency>
 14 |             <groupId>software.amazon.kinesis</groupId>
 15 |             <artifactId>amazon-kinesis-client-multilang</artifactId>
 16 |             <version>${kcl.version}</version>
 17 |         </dependency>
 18 |         <dependency>
 19 |             <groupId>software.amazon.kinesis</groupId>
 20 |             <artifactId>amazon-kinesis-client</artifactId>
 21 |             <version>${kcl.version}</version>
 22 |         </dependency>
 23 |         <dependency>
 24 |             <groupId>software.amazon.awssdk</groupId>
 25 |             <artifactId>kinesis</artifactId>
 26 |             <version>${awssdk.version}</version>
 27 |         </dependency>
 28 |         <dependency>
 29 |             <groupId>software.amazon.awssdk</groupId>
 30 |             <artifactId>dynamodb</artifactId>
 31 |             <version>${awssdk.version}</version>
 32 |         </dependency>
 33 |         <!-- https://mvnrepository.com/artifact/software.amazon.awssdk/dynamodb-enhanced -->
 34 |         <dependency>
 35 |             <groupId>software.amazon.awssdk</groupId>
 36 |             <artifactId>dynamodb-enhanced</artifactId>
 37 |             <version>${awssdk.version}</version>
 38 |         </dependency>
 39 |         <!-- https://mvnrepository.com/artifact/com.amazonaws/dynamodb-lock-client -->
 40 |         <dependency>
 41 |             <groupId>com.amazonaws</groupId>
 42 |             <artifactId>dynamodb-lock-client</artifactId>
 43 |             <version>1.3.0</version>
 44 |         </dependency>
 45 |         <dependency>
 46 |             <groupId>software.amazon.awssdk</groupId>
 47 |             <artifactId>cloudwatch</artifactId>
 48 |             <version>${awssdk.version}</version>
 49 |         </dependency>
 50 |         <dependency>
 51 |             <groupId>software.amazon.awssdk</groupId>
 52 |             <artifactId>netty-nio-client</artifactId>
 53 |             <version>${awssdk.version}</version>
 54 |         </dependency>
 55 |         <dependency>
 56 |             <groupId>software.amazon.awssdk</groupId>
 57 |             <artifactId>metrics-spi</artifactId>
 58 |             <version>${awssdk.version}</version>
 59 |         </dependency>
 60 |         <dependency>
 61 |             <groupId>software.amazon.awssdk</groupId>
 62 |             <artifactId>sts</artifactId>
 63 |             <version>${awssdk.version}</version>
 64 |         </dependency>
 65 |         <dependency>
 66 |             <groupId>software.amazon.awssdk</groupId>
 67 |             <artifactId>protocol-core</artifactId>
 68 |             <version>${awssdk.version}</version>
 69 |         </dependency>
 70 |         <dependency>
 71 |             <groupId>software.amazon.awssdk</groupId>
 72 |             <artifactId>aws-query-protocol</artifactId>
 73 |             <version>${awssdk.version}</version>
 74 |         </dependency>
 75 |         <dependency>
 76 |             <groupId>software.amazon.awssdk</groupId>
 77 |             <artifactId>aws-cbor-protocol</artifactId>
 78 |             <version>${awssdk.version}</version>
 79 |         </dependency>
 80 |         <dependency>
 81 |             <groupId>software.amazon.awssdk</groupId>
 82 |             <artifactId>aws-json-protocol</artifactId>
 83 |             <version>${awssdk.version}</version>
 84 |         </dependency>
 85 |         <dependency>
 86 |             <groupId>software.amazon.awssdk</groupId>
 87 |             <artifactId>json-utils</artifactId>
 88 |             <version>${awssdk.version}</version>
 89 |         </dependency>
 90 |         <dependency>
 91 |             <groupId>software.amazon.awssdk</groupId>
 92 |             <artifactId>third-party-jackson-core</artifactId>
 93 |             <version>${awssdk.version}</version>
 94 |         </dependency>
 95 |         <dependency>
 96 |             <groupId>software.amazon.awssdk</groupId>
 97 |             <artifactId>third-party-jackson-dataformat-cbor</artifactId>
 98 |             <version>${awssdk.version}</version>
 99 |         </dependency>
100 |         <dependency>
101 |             <groupId>software.amazon.awssdk</groupId>
102 |             <artifactId>profiles</artifactId>
103 |             <version>${awssdk.version}</version>
104 |         </dependency>
105 |         <dependency>
106 |             <groupId>software.amazon.awssdk</groupId>
107 |             <artifactId>sdk-core</artifactId>
108 |             <version>${awssdk.version}</version>
109 |         </dependency>
110 |         <dependency>
111 |             <groupId>software.amazon.awssdk</groupId>
112 |             <artifactId>aws-core</artifactId>
113 |             <version>${awssdk.version}</version>
114 |         </dependency>
115 |         <dependency>
116 |             <groupId>software.amazon.awssdk</groupId>
117 |             <artifactId>endpoints-spi</artifactId>
118 |             <version>${awssdk.version}</version>
119 |         </dependency>
120 |         <dependency>
121 |             <groupId>software.amazon.awssdk</groupId>
122 |             <artifactId>auth</artifactId>
123 |             <version>${awssdk.version}</version>
124 |         </dependency>
125 |         <dependency>
126 |             <groupId>software.amazon.awssdk</groupId>
127 |             <artifactId>http-client-spi</artifactId>
128 |             <version>${awssdk.version}</version>
129 |         </dependency>
130 |         <dependency>
131 |             <groupId>software.amazon.awssdk</groupId>
132 |             <artifactId>regions</artifactId>
133 |             <version>${awssdk.version}</version>
134 |         </dependency>
135 |         <dependency>
136 |             <groupId>software.amazon.awssdk</groupId>
137 |             <artifactId>annotations</artifactId>
138 |             <version>${awssdk.version}</version>
139 |         </dependency>
140 |         <dependency>
141 |             <groupId>software.amazon.awssdk</groupId>
142 |             <artifactId>utils</artifactId>
143 |             <version>${awssdk.version}</version>
144 |         </dependency>
145 |         <dependency>
146 |             <groupId>software.amazon.awssdk</groupId>
147 |             <artifactId>apache-client</artifactId>
148 |             <version>${awssdk.version}</version>
149 |         </dependency>
150 |         <dependency>
151 |             <groupId>software.amazon.awssdk</groupId>
152 |             <artifactId>arns</artifactId>
153 |             <version>${awssdk.version}</version>
154 |         </dependency>
155 |         <dependency>
156 |           <groupId>software.amazon.awssdk</groupId>
157 |           <artifactId>http-auth-spi</artifactId>
158 |           <version>${awssdk.version}</version>
159 |         </dependency>
160 |         <dependency>
161 |           <groupId>software.amazon.awssdk</groupId>
162 |           <artifactId>http-auth</artifactId>
163 |           <version>${awssdk.version}</version>
164 |         </dependency>
165 |         <dependency>
166 |           <groupId>software.amazon.awssdk</groupId>
167 |           <artifactId>http-auth-aws</artifactId>
168 |           <version>${awssdk.version}</version>
169 |         </dependency>
170 |         <dependency>
171 |           <groupId>software.amazon.awssdk</groupId>
172 |           <artifactId>checksums-spi</artifactId>
173 |           <version>${awssdk.version}</version>
174 |         </dependency>
175 |         <dependency>
176 |           <groupId>software.amazon.awssdk</groupId>
177 |           <artifactId>checksums</artifactId>
178 |           <version>${awssdk.version}</version>
179 |         </dependency>
180 |         <dependency>
181 |           <groupId>software.amazon.awssdk</groupId>
182 |           <artifactId>identity-spi</artifactId>
183 |           <version>${awssdk.version}</version>
184 |         </dependency>
185 |         <dependency>
186 |             <groupId>io.netty</groupId>
187 |             <artifactId>netty-codec-http</artifactId>
188 |             <version>${netty.version}</version>
189 |         </dependency>
190 |         <dependency>
191 |             <groupId>io.netty</groupId>
192 |             <artifactId>netty-codec-http2</artifactId>
193 |             <version>${netty.version}</version>
194 |         </dependency>
195 |         <dependency>
196 |             <groupId>io.netty</groupId>
197 |             <artifactId>netty-codec</artifactId>
198 |             <version>${netty.version}</version>
199 |         </dependency>
200 |         <dependency>
201 |             <groupId>io.netty</groupId>
202 |             <artifactId>netty-transport</artifactId>
203 |             <version>${netty.version}</version>
204 |         </dependency>
205 |         <dependency>
206 |             <groupId>io.netty</groupId>
207 |             <artifactId>netty-resolver</artifactId>
208 |             <version>${netty.version}</version>
209 |         </dependency>
210 |         <dependency>
211 |             <groupId>io.netty</groupId>
212 |             <artifactId>netty-common</artifactId>
213 |             <version>${netty.version}</version>
214 |         </dependency>
215 |         <dependency>
216 |             <groupId>io.netty</groupId>
217 |             <artifactId>netty-buffer</artifactId>
218 |             <version>${netty.version}</version>
219 |         </dependency>
220 |         <dependency>
221 |             <groupId>io.netty</groupId>
222 |             <artifactId>netty-handler</artifactId>
223 |             <version>${netty.version}</version>
224 |         </dependency>
225 |         <dependency>
226 |             <groupId>io.netty</groupId>
227 |             <artifactId>netty-transport-native-epoll</artifactId>
228 |             <version>${netty.version}</version>
229 |         </dependency>
230 |         <dependency>
231 |             <groupId>io.netty</groupId>
232 |             <artifactId>netty-transport-native-unix-common</artifactId>
233 |             <version>${netty.version}</version>
234 |         </dependency>
235 |         <dependency>
236 |             <groupId>com.typesafe.netty</groupId>
237 |             <artifactId>netty-reactive-streams-http</artifactId>
238 |             <version>${netty-reactive.version}</version>
239 |         </dependency>
240 |         <dependency>
241 |             <groupId>com.typesafe.netty</groupId>
242 |             <artifactId>netty-reactive-streams</artifactId>
243 |             <version>${netty-reactive.version}</version>
244 |         </dependency>
245 |         <dependency>
246 |             <groupId>org.reactivestreams</groupId>
247 |             <artifactId>reactive-streams</artifactId>
248 |             <version>1.0.3</version>
249 |         </dependency>
250 |         <dependency>
251 |             <groupId>com.google.guava</groupId>
252 |             <artifactId>guava</artifactId>
253 |             <version>32.1.1-jre</version>
254 |         </dependency>
255 |         <dependency>
256 |             <groupId>com.google.code.findbugs</groupId>
257 |             <artifactId>jsr305</artifactId>
258 |             <version>3.0.2</version>
259 |         </dependency>
260 |         <dependency>
261 |             <groupId>org.checkerframework</groupId>
262 |             <artifactId>checker-qual</artifactId>
263 |             <version>3.49.4</version>
264 |         </dependency>
265 |         <dependency>
266 |             <groupId>com.google.errorprone</groupId>
267 |             <artifactId>error_prone_annotations</artifactId>
268 |             <version>2.7.1</version>
269 |         </dependency>
270 |         <dependency>
271 |             <groupId>com.google.j2objc</groupId>
272 |             <artifactId>j2objc-annotations</artifactId>
273 |             <version>1.3</version>
274 |         </dependency>
275 |         <dependency>
276 |             <groupId>org.codehaus.mojo</groupId>
277 |             <artifactId>animal-sniffer-annotations</artifactId>
278 |             <version>1.20</version>
279 |         </dependency>
280 |         <dependency>
281 |             <groupId>com.google.protobuf</groupId>
282 |             <artifactId>protobuf-java</artifactId>
283 |             <version>4.27.5</version>
284 |         </dependency>
285 |         <dependency>
286 |             <groupId>org.apache.commons</groupId>
287 |             <artifactId>commons-lang3</artifactId>
288 |             <version>3.14.0</version>
289 |         </dependency>
290 |         <dependency>
291 |             <groupId>org.slf4j</groupId>
292 |             <artifactId>slf4j-api</artifactId>
293 |             <version>2.0.13</version>
294 |         </dependency>
295 |         <dependency>
296 |             <groupId>io.reactivex.rxjava3</groupId>
297 |             <artifactId>rxjava</artifactId>
298 |             <version>3.1.8</version>
299 |         </dependency>
300 |         <dependency>
301 |             <groupId>com.fasterxml.jackson.dataformat</groupId>
302 |             <artifactId>jackson-dataformat-cbor</artifactId>
303 |             <version>${fasterxml-jackson.version}</version>
304 |         </dependency>
305 |         <dependency>
306 |             <groupId>com.fasterxml.jackson.core</groupId>
307 |             <artifactId>jackson-core</artifactId>
308 |             <version>${fasterxml-jackson.version}</version>
309 |         </dependency>
310 |         <dependency>
311 |             <groupId>com.fasterxml.jackson.core</groupId>
312 |             <artifactId>jackson-databind</artifactId>
313 |             <version>${fasterxml-jackson.version}</version>
314 |         </dependency>
315 |         <dependency>
316 |             <groupId>com.fasterxml.jackson.core</groupId>
317 |             <artifactId>jackson-annotations</artifactId>
318 |             <version>${fasterxml-jackson.version}</version>
319 |         </dependency>
320 |         <dependency>
321 |             <groupId>software.amazon</groupId>
322 |             <artifactId>flow</artifactId>
323 |             <version>1.7</version>
324 |         </dependency>
325 |         <dependency>
326 |             <groupId>org.apache.httpcomponents</groupId>
327 |             <artifactId>httpclient</artifactId>
328 |             <version>4.5.13</version>
329 |         </dependency>
330 |         <dependency>
331 |             <groupId>commons-codec</groupId>
332 |             <artifactId>commons-codec</artifactId>
333 |             <version>1.15</version>
334 |         </dependency>
335 |         <dependency>
336 |             <groupId>org.apache.httpcomponents</groupId>
337 |             <artifactId>httpcore</artifactId>
338 |             <version>4.4.15</version>
339 |         </dependency>
340 |         <dependency>
341 |             <groupId>com.amazon.ion</groupId>
342 |             <artifactId>ion-java</artifactId>
343 |             <version>1.11.4</version>
344 |         </dependency>
345 |         <dependency>
346 |             <groupId>software.amazon.glue</groupId>
347 |             <artifactId>schema-registry-serde</artifactId>
348 |             <version>1.1.19</version>
349 |             <exclusions>
350 |                 <exclusion>
351 |                     <groupId>com.amazonaws</groupId>
352 |                     <artifactId>aws-java-sdk-sts</artifactId>
353 |                 </exclusion>
354 |             </exclusions>
355 |         </dependency>
356 |         <dependency>
357 |             <groupId>joda-time</groupId>
358 |             <artifactId>joda-time</artifactId>
359 |             <version>2.10.13</version>
360 |         </dependency>
361 |         <dependency>
362 |             <groupId>ch.qos.logback</groupId>
363 |             <artifactId>logback-classic</artifactId>
364 |             <version>${logback.version}</version>
365 |         </dependency>
366 |         <dependency>
367 |             <groupId>ch.qos.logback</groupId>
368 |             <artifactId>logback-core</artifactId>
369 |             <version>${logback.version}</version>
370 |         </dependency>
371 |         <dependency>
372 |             <groupId>com.beust</groupId>
373 |             <artifactId>jcommander</artifactId>
374 |             <version>1.82</version>
375 |         </dependency>
376 |         <dependency>
377 |             <groupId>commons-io</groupId>
378 |             <artifactId>commons-io</artifactId>
379 |             <version>2.16.1</version>
380 |         </dependency>
381 |         <dependency>
382 |             <groupId>commons-logging</groupId>
383 |             <artifactId>commons-logging</artifactId>
384 |             <version>1.1.3</version>
385 |         </dependency>
386 |         <dependency>
387 |             <groupId>org.apache.commons</groupId>
388 |             <artifactId>commons-collections4</artifactId>
389 |             <version>4.4</version>
390 |         </dependency>
391 |         <dependency>
392 |             <groupId>commons-beanutils</groupId>
393 |             <artifactId>commons-beanutils</artifactId>
394 |             <version>1.11.0</version>
395 |         </dependency>
396 |         <dependency>
397 |             <groupId>commons-collections</groupId>
398 |             <artifactId>commons-collections</artifactId>
399 |             <version>3.2.2</version>
400 |         </dependency>
401 |     </dependencies>
402 | </project>
403 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | argparse ; python_version <= "3.1"
3 | 


--------------------------------------------------------------------------------
/samples/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | 
 6 | BEFORE YOU GET STARTED
 7 | ======================
 8 | 
 9 | Before running the samples, you'll want to make sure that your environment is
10 | configured to allow the samples to use your AWS credentials. To familiarize
11 | yourself with AWS Credentials read this guide:
12 | 
13 |     http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
14 | 
15 | For the MultiLangDaemon and boto libs you'll want to make your credentials 
16 | available to one of the credentials providers in the default credential
17 | providers chain such as providing a ~/.aws/credentials file 
18 | 
19 |     http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
20 | 
21 | RUNNING THE SAMPLE
22 | ==================
23 | 
24 | Navigate to the amazon_kclpy directory and install the package. Using the amazon_kclpy
25 | package requires the MultiLangDaemon which is provided by the java KCL. To get
26 | the necessary jars to this directory before installing, you'll want to run the
27 | "download_jars" command before running "install". If you just want the python
28 | KCL and plan to retrieve the necessary jars yourself, you can just do "install"
29 | 
30 |     python setup.py download_jars
31 |     python setup.py install
32 | 
33 | Now the amazon_kclpy and boto and required jars should be installed in your
34 | environment. To start the sample putter, run:
35 | 
36 |     sample_kinesis_wordputter.py --stream words -w cat -w dog -w bird
37 | 
38 | This will create a Kinesis stream called words and put the words specified by
39 | the -w options into the stream once each. Use -p SECONDS to indicate a period
40 | over which to repeatedly put these words.
41 | 
42 | Now we would like to run a python KCL application that reads records from
43 | the stream we just created, but first take a look in the samples directory,
44 | you'll find a file called sample.properties, cat that file:
45 | 
46 |     cat samples/sample.properties
47 | 
48 | You'll see several properties defined there. "executableName" indicates the
49 | executable for the MultiLangDaemon to run, "streamName" is the Kinesis stream
50 | to read from, "appName" is the KCL application name to use which will be the
51 | name of a DynamoDB table that gets created by the KCL, "initialPositionInStream"
52 | tells the KCL how to start reading from shards upon a fresh startup. To run the
53 | sample application you can use a helper script included in the package.
54 | 
55 |     amazon_kclpy_helper.py --print_command \
56 |         --java <path-to-java> --properties samples/sample.properties
57 | 
58 | This will print the command needed to run the sample which you can copy paste,
59 | or surround the command with back ticks, e.g.
60 | 
61 |     `amazon_kclpy_helper.py --print_command \
62 |         --java <path-to-java> --properties samples/sample.properties`
63 | '''
64 | 


--------------------------------------------------------------------------------
/samples/amazon_kclpy_helper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | """
  5 | This script provides two utility functions:
  6 | 
  7 |     ``--print_classpath``
  8 |         which prints a java class path. It optionally takes --properties
  9 |         and any number of --path options. It will generate a java class path which will include
 10 |         the properties file and paths and the location of the KCL jars based on the location of
 11 |         the amazon_kclpy.kcl module.
 12 | 
 13 |     ``--print_command``
 14 |         which prints a command to run an Amazon KCLpy application. It requires a --java
 15 |         and --properties argument and optionally takes any number of --path arguments to prepend
 16 |         to the classpath that it generates for the command.
 17 | """
 18 | from __future__ import print_function
 19 | from amazon_kclpy import kcl
 20 | from glob import glob
 21 | import os
 22 | import argparse
 23 | import sys
 24 | import samples
 25 | 
 26 | 
 27 | 
 28 | 
 29 | def get_dir_of_file(f):
 30 |     '''
 31 |     Returns the absolute path to the directory containing the specified file.
 32 | 
 33 |     :type f: str
 34 |     :param f: A path to a file, either absolute or relative
 35 | 
 36 |     :rtype:  str
 37 |     :return: The absolute path of the directory represented by the relative path provided.
 38 |     '''
 39 |     return os.path.dirname(os.path.abspath(f))
 40 | 
 41 | def get_kcl_dir():
 42 |     '''
 43 |     Returns the absolute path to the dir containing the amazon_kclpy.kcl module.
 44 | 
 45 |     :rtype: str
 46 |     :return: The absolute path of the KCL package. 
 47 |     '''
 48 |     return get_dir_of_file(kcl.__file__)
 49 | 
 50 | def get_kcl_jar_path():
 51 |     '''
 52 |     Returns the absolute path to the KCL jars needed to run an Amazon KCLpy app.
 53 | 
 54 |     :rtype: str
 55 |     :return: The absolute path of the KCL jar files needed to run the MultiLangDaemon.
 56 |     '''
 57 |     if os.name == 'posix':
 58 |         return ':'.join(glob(os.path.join(get_kcl_dir(), 'jars', '*jar')))
 59 |     else:
 60 |         return ';'.join(glob(os.path.join(get_kcl_dir(), 'jars', '*jar')))
 61 | 
 62 | def get_kcl_classpath(properties=None, paths=[]):
 63 |     '''
 64 |     Generates a classpath that includes the location of the kcl jars, the
 65 |     properties file and the optional paths.
 66 | 
 67 |     :type properties: str
 68 |     :param properties: Path to properties file.
 69 | 
 70 |     :type paths: list
 71 |     :param paths: List of strings. The paths that will be prepended to the classpath.
 72 | 
 73 |     :rtype: str
 74 |     :return: A java class path that will allow your properties to be found and the MultiLangDaemon and its deps and
 75 |         any custom paths you provided.
 76 |     '''
 77 |     # First make all the user provided paths absolute
 78 |     paths = [os.path.abspath(p) for p in paths]
 79 |     # We add our paths after the user provided paths because this permits users to
 80 |     # potentially inject stuff before our paths (otherwise our stuff would always
 81 |     # take precedence).
 82 |     paths.append(get_kcl_jar_path())
 83 |     if properties:
 84 |         # Add the dir that the props file is in
 85 |         dir_of_file = get_dir_of_file(properties)
 86 |         paths.append(dir_of_file)
 87 |     if os.name == 'posix':
 88 |         return ":".join([p for p in paths if p != ''])
 89 |     else:
 90 |         return ";".join([p for p in paths if p != ''])
 91 | 
 92 | def get_kcl_app_command(args, multi_lang_daemon_class, properties, log_configuration, paths=[]):
 93 |     '''
 94 |     Generates a command to run the MultiLangDaemon.
 95 | 
 96 |     :type java: str
 97 |     :param java: Path to java
 98 | 
 99 |     :type multi_lang_daemon_class: str
100 |     :param multi_lang_daemon_class: Name of multi language daemon class e.g. com.amazonaws.services.kinesis.multilang.MultiLangDaemon
101 | 
102 |     :type properties: str
103 |     :param properties: Optional properties file to be included in the classpath.
104 | 
105 |     :type paths: list
106 |     :param paths: List of strings. Additional paths to prepend to the classpath.
107 | 
108 |     :rtype: str
109 |     :return: A command that will run the MultiLangDaemon with your properties and custom paths and java.
110 |     '''
111 |     return "{java} -cp {cp} {daemon} {props} {log_config}".format(java=args.java,
112 |                                     cp = get_kcl_classpath(args.properties, paths),
113 |                                     daemon = multi_lang_daemon_class,
114 |                                     # Just need the basename because the path is added to the classpath
115 |                                     props = properties,
116 |                                     log_config = log_configuration)
117 | 
118 | if __name__ == '__main__':
119 |     parser = argparse.ArgumentParser("A script for generating a command to run an Amazon KCLpy app")
120 |     parser.add_argument("--print_classpath", dest="print_classpath", action="store_true",
121 |                         default=False,
122 |                         help="Print a java class path.\noptional arguments: --path")
123 |     parser.add_argument("--print_command", dest="print_command", action="store_true",
124 |                         default=False,
125 |                         help="Print a command for running an Amazon KCLpy app.\nrequired "
126 |                         + "args: --java --properties\noptional args: --classpath")
127 |     parser.add_argument("-j", "--java", dest="java",
128 |                         help="The path to the java executable e.g. <some root>/jdk/bin/java",
129 |                         metavar="PATH_TO_JAVA")
130 |     parser.add_argument("-p", "--properties", "--props", "--prop", dest="properties",
131 |                         help="The path to a properties file (relative to where you are running this script)",
132 |                         metavar="PATH_TO_PROPERTIES")
133 |     parser.add_argument("--sample", "--sample-props", "--use-sample-properties", dest="use_sample_props",
134 |                         help="This will use the sample.properties file included in this package as the properties file.",
135 |                         action="store_true", default=False)
136 |     parser.add_argument("-c", "--classpath", "--path", dest="paths", action="append", default=[],
137 |                         help="Additional path to add to java class path. May be specified any number of times",
138 |                         metavar="PATH")
139 |     parser.add_argument("-l", "--log-configuration", dest="log_configuration",
140 |                         help="This will use the logback.xml which will be used by the KCL to log.",
141 |                         metavar="PATH_TO_LOG_CONFIGURATION")
142 |     args = parser.parse_args()
143 |     # Possibly replace the properties with the sample. Useful if they just want to run the sample app.
144 |     if args.use_sample_props:
145 |         if args.properties:
146 |             sys.stderr.write('Replacing provided properties with sample properties due to arg --sample\n')
147 |         args.properties = os.path.join(get_dir_of_file(samples.__file__), 'sample.properties')
148 | 
149 |     # Print what the asked for
150 |     if args.print_classpath:
151 |         print(get_kcl_classpath(args.properties, args.paths))
152 |     elif args.print_command:
153 |         if args.java and args.properties:
154 |             multi_lang_daemon_class = 'software.amazon.kinesis.multilang.MultiLangDaemon'
155 |             properties_argument = "--properties-file {props}".format(props = args.properties)
156 |             log_argument = ''
157 |             if args.log_configuration is not None:
158 |                 log_argument = "--log-configuration {log}".format(log = args.log_configuration)
159 |             print(get_kcl_app_command(args, multi_lang_daemon_class, properties_argument, log_argument, paths=args.paths))
160 |         else:
161 |             sys.stderr.write("Must provide arguments: --java and --properties\n")
162 |             parser.print_usage()
163 |     else:
164 |         parser.print_usage()
165 | 


--------------------------------------------------------------------------------
/samples/sample.properties:
--------------------------------------------------------------------------------
  1 | # The script that abides by the multi-language protocol. This script will
  2 | # be executed by the MultiLangDaemon, which will communicate with this script
  3 | # over STDIN and STDOUT according to the multi-language protocol.
  4 | executableName = sample_kclpy_app.py
  5 | 
  6 | # The Stream arn: arn:aws:kinesis:<region>:<account id>:stream/<stream name>
  7 | # Important: streamArn takes precedence over streamName if both are set
  8 | streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample
  9 | 
 10 | # The name of an Amazon Kinesis stream to process.
 11 | # Important: streamArn takes precedence over streamName if both are set
 12 | streamName = kclpysample
 13 | 
 14 | # Used by the KCL as the name of this application. Will be used as the name
 15 | # of an Amazon DynamoDB table which will store the lease and checkpoint
 16 | # information for workers with this application name
 17 | applicationName = PythonKCLSample
 18 | 
 19 | # Users can change the credentials provider the KCL will use to retrieve credentials.
 20 | # Expected key name (case-sensitive):
 21 | # AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
 22 | # The DefaultCredentialsProvider checks several other providers, which is
 23 | # described here:
 24 | # https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
 25 | AwsCredentialsProvider = DefaultCredentialsProvider
 26 | 
 27 | # Appended to the user agent of the KCL. Does not impact the functionality of the
 28 | # KCL in any other way.
 29 | processingLanguage = python/3.8
 30 | 
 31 | # Valid options at TRIM_HORIZON or LATEST.
 32 | # See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
 33 | initialPositionInStream = TRIM_HORIZON
 34 | 
 35 | # To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended',
 36 | # and uncomment below line with right timestamp value.
 37 | # See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
 38 | #initialPositionInStreamExtended = 1636609142
 39 | 
 40 | # The following properties are also available for configuring the KCL Worker that is created
 41 | # by the MultiLangDaemon.
 42 | 
 43 | # The KCL defaults to us-east-1
 44 | regionName = us-east-1
 45 | 
 46 | # Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
 47 | # will be regarded as having problems and it's shards will be assigned to other workers.
 48 | # For applications that have a large number of shards, this msy be set to a higher number to reduce
 49 | # the number of DynamoDB IOPS required for tracking leases
 50 | #failoverTimeMillis = 10000
 51 | 
 52 | # A worker id that uniquely identifies this worker among all workers using the same applicationName
 53 | # If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself.
 54 | #workerId = 
 55 | 
 56 | # Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
 57 | #shardSyncIntervalMillis = 60000
 58 | 
 59 | # Max records to fetch from Kinesis in a single GetRecords call.
 60 | #maxRecords = 10000
 61 | 
 62 | # Idle time between record reads in milliseconds.
 63 | #idleTimeBetweenReadsInMillis = 1000
 64 | 
 65 | # Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while)
 66 | #callProcessRecordsEvenForEmptyRecordList = false
 67 | 
 68 | # Interval in milliseconds between polling to check for parent shard completion.
 69 | # Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
 70 | # completion of parent shards).
 71 | #parentShardPollIntervalMillis = 10000
 72 | 
 73 | # Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
 74 | # Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try
 75 | # to delete the ones we don't need any longer.
 76 | #cleanupLeasesUponShardCompletion = true
 77 | 
 78 | # Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
 79 | #taskBackoffTimeMillis = 500
 80 | 
 81 | # Buffer metrics for at most this long before publishing to CloudWatch.
 82 | #metricsBufferTimeMillis = 10000
 83 | 
 84 | # Buffer at most this many metrics before publishing to CloudWatch.
 85 | #metricsMaxQueueSize = 10000
 86 | 
 87 | # KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls
 88 | # to RecordProcessorCheckpointer#checkpoint(String) by default.
 89 | #validateSequenceNumberBeforeCheckpointing = true
 90 | 
 91 | # The maximum number of active threads for the MultiLangDaemon to permit.
 92 | # If a value is provided then a FixedThreadPool is used with the maximum
 93 | # active threads set to the provided value. If a non-positive integer or no
 94 | # value is provided a CachedThreadPool is used.
 95 | #maxActiveThreads = 0
 96 | 
 97 | ################### KclV3 configurations ###################
 98 | # NOTE : These are just test configurations to show how to customize
 99 | # all possible KCLv3 configurations. They are not necessarily the best
100 | # default values to use for production.
101 | 
102 | # Coordinator config
103 | # Version the KCL needs to operate in. For more details check the KCLv3 migration
104 | # documentation. Default is CLIENT_VERSION_CONFIG_3X
105 | # clientVersionConfig =
106 | # Configurations to control how the CoordinatorState DDB table is created
107 | # Default name is applicationName-CoordinatorState in PAY_PER_REQUEST,
108 | # with PITR and deletion protection disabled and no tags
109 | # coordinatorStateTableName =
110 | # coordinatorStateBillingMode =
111 | # coordinatorStateReadCapacity =
112 | # coordinatorStateWriteCapacity =
113 | # coordinatorStatePointInTimeRecoveryEnabled =
114 | # coordinatorStateDeletionProtectionEnabled =
115 | # coordinatorStateTags =
116 | 
117 | # Graceful handoff config - tuning of the shutdown behavior during lease transfers
118 | # default values are 30000 and true respectively
119 | # gracefulLeaseHandoffTimeoutMillis =
120 | # isGracefulLeaseHandoffEnabled =
121 | 
122 | # WorkerMetricStats table config - control how the DDB table is created
123 | # Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST,
124 | # with PITR and deletion protection disabled and no tags
125 | # workerMetricsTableName =
126 | # workerMetricsBillingMode =
127 | # workerMetricsReadCapacity =
128 | # workerMetricsWriteCapacity =
129 | # workerMetricsPointInTimeRecoveryEnabled =
130 | # workerMetricsDeletionProtectionEnabled =
131 | # workerMetricsTags =
132 | 
133 | # WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
134 | #
135 | # frequency of capturing worker metrics in memory. Default is 1s
136 | # inMemoryWorkerMetricsCaptureFrequencyMillis =
137 | 
138 | # frequency of reporting worker metric stats to storage. Default is 30s
139 | # workerMetricsReporterFreqInMillis =
140 | 
141 | # No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10
142 | # noOfPersistedMetricsPerWorkerMetrics =
143 | 
144 | # Disable use of worker metrics to balance lease, default is false.
145 | # If it is true, the algorithm balances lease based on worker's processing throughput.
146 | # disableWorkerMetrics =
147 | 
148 | # Max throughput per host 10 MBps, to limit processing to the given value
149 | # Default is unlimited.
150 | # maxThroughputPerHostKBps =
151 | 
152 | # Dampen the load that is rebalanced during lease re-balancing, default is 60%
153 | # dampeningPercentage =
154 | 
155 | # Configures the allowed variance range for worker utilization. The upper
156 | # limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
157 | # The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
158 | # any worker's utilization falls outside this range, lease re-balancing is
159 | # triggered. The re-balancing algorithm aims to bring variance within the
160 | # specified range. It also avoids thrashing by ensuring the utilization of
161 | # the worker receiving the load after re-balancing doesn't exceed the fleet
162 | # average. This might cause no re-balancing action even the utilization is
163 | # out of the variance range. The default value is 10, representing +/-10%
164 | # variance from the average value.
165 | # reBalanceThresholdPercentage =
166 | 
167 | # Whether at-least one lease must be taken from a high utilization worker
168 | # during re-balancing when there is no lease assigned to that worker which has
169 | # throughput is less than or equal to the minimum throughput that needs to be
170 | # moved away from that worker to bring the worker back into the allowed variance.
171 | # Default is true.
172 | # allowThroughputOvershoot =
173 | 
174 | # Lease assignment is performed every failoverTimeMillis but re-balance will
175 | # be attempted only once in 5 times based on the below config. Default is 3.
176 | # varianceBalancingFrequency =
177 | 
178 | # Alpha value used for calculating exponential moving average of worker's metricStats.
179 | # workerMetricsEMAAlpha =
180 | # Duration after which workerMetricStats entry from WorkerMetricStats table will
181 | # be cleaned up.
182 | # Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
183 | # Refer to Duration.parse javadocs for more details
184 | # staleWorkerMetricsEntryCleanupDuration =
185 | 


--------------------------------------------------------------------------------
/samples/sample_kclpy_app.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  4 | # SPDX-License-Identifier: Apache-2.0
  5 | 
  6 | from __future__ import print_function
  7 | 
  8 | import sys
  9 | import time
 10 | 
 11 | from amazon_kclpy import kcl
 12 | from amazon_kclpy.v3 import processor
 13 | 
 14 | 
 15 | class RecordProcessor(processor.RecordProcessorBase):
 16 |     """
 17 |     A RecordProcessor processes data from a shard in a stream. Its methods will be called with this pattern:
 18 | 
 19 |     * initialize will be called once
 20 |     * process_records will be called zero or more times
 21 |     * shutdown will be called if this MultiLangDaemon instance loses the lease to this shard, or the shard ends due
 22 |         a scaling change.
 23 |     """
 24 |     def __init__(self):
 25 |         self._SLEEP_SECONDS = 5
 26 |         self._CHECKPOINT_RETRIES = 5
 27 |         self._CHECKPOINT_FREQ_SECONDS = 60
 28 |         self._largest_seq = (None, None)
 29 |         self._largest_sub_seq = None
 30 |         self._last_checkpoint_time = None
 31 | 
 32 |     def log(self, message):
 33 |         sys.stderr.write(message)
 34 | 
 35 |     def initialize(self, initialize_input):
 36 |         """
 37 |         Called once by a KCLProcess before any calls to process_records
 38 | 
 39 |         :param amazon_kclpy.messages.InitializeInput initialize_input: Information about the lease that this record
 40 |             processor has been assigned.
 41 |         """
 42 |         self._largest_seq = (None, None)
 43 |         self._last_checkpoint_time = time.time()
 44 | 
 45 |     def checkpoint(self, checkpointer, sequence_number=None, sub_sequence_number=None):
 46 |         """
 47 |         Checkpoints with retries on retryable exceptions.
 48 | 
 49 |         :param amazon_kclpy.kcl.Checkpointer checkpointer: the checkpointer provided to either process_records
 50 |             or shutdown
 51 |         :param str or None sequence_number: the sequence number to checkpoint at.
 52 |         :param int or None sub_sequence_number: the sub sequence number to checkpoint at.
 53 |         """
 54 |         for n in range(0, self._CHECKPOINT_RETRIES):
 55 |             try:
 56 |                 checkpointer.checkpoint(sequence_number, sub_sequence_number)
 57 |                 return
 58 |             except kcl.CheckpointError as e:
 59 |                 if 'ShutdownException' == e.value:
 60 |                     #
 61 |                     # A ShutdownException indicates that this record processor should be shutdown. This is due to
 62 |                     # some failover event, e.g. another MultiLangDaemon has taken the lease for this shard.
 63 |                     #
 64 |                     print('Encountered shutdown exception, skipping checkpoint')
 65 |                     return
 66 |                 elif 'ThrottlingException' == e.value:
 67 |                     #
 68 |                     # A ThrottlingException indicates that one of our dependencies is is over burdened, e.g. too many
 69 |                     # dynamo writes. We will sleep temporarily to let it recover.
 70 |                     #
 71 |                     if self._CHECKPOINT_RETRIES - 1 == n:
 72 |                         sys.stderr.write('Failed to checkpoint after {n} attempts, giving up.\n'.format(n=n))
 73 |                         return
 74 |                     else:
 75 |                         print('Was throttled while checkpointing, will attempt again in {s} seconds'
 76 |                               .format(s=self._SLEEP_SECONDS))
 77 |                 elif 'InvalidStateException' == e.value:
 78 |                     sys.stderr.write('MultiLangDaemon reported an invalid state while checkpointing.\n')
 79 |                 else:  # Some other error
 80 |                     sys.stderr.write('Encountered an error while checkpointing, error was {e}.\n'.format(e=e))
 81 |             time.sleep(self._SLEEP_SECONDS)
 82 | 
 83 |     def process_record(self, data, partition_key, sequence_number, sub_sequence_number):
 84 |         """
 85 |         Called for each record that is passed to process_records.
 86 | 
 87 |         :param str data: The blob of data that was contained in the record.
 88 |         :param str partition_key: The key associated with this record.
 89 |         :param int sequence_number: The sequence number associated with this record.
 90 |         :param int sub_sequence_number: the sub sequence number associated with this record.
 91 |         """
 92 |         ####################################
 93 |         # Insert your processing logic here
 94 |         ####################################
 95 |         self.log("Record (Partition Key: {pk}, Sequence Number: {seq}, Subsequence Number: {sseq}, Data Size: {ds}"
 96 |                  .format(pk=partition_key, seq=sequence_number, sseq=sub_sequence_number, ds=len(data)))
 97 | 
 98 |     def should_update_sequence(self, sequence_number, sub_sequence_number):
 99 |         """
100 |         Determines whether a new larger sequence number is available
101 | 
102 |         :param int sequence_number: the sequence number from the current record
103 |         :param int sub_sequence_number: the sub sequence number from the current record
104 |         :return boolean: true if the largest sequence should be updated, false otherwise
105 |         """
106 |         return self._largest_seq == (None, None) or sequence_number > self._largest_seq[0] or \
107 |             (sequence_number == self._largest_seq[0] and sub_sequence_number > self._largest_seq[1])
108 | 
109 |     def process_records(self, process_records_input):
110 |         """
111 |         Called by a KCLProcess with a list of records to be processed and a checkpointer which accepts sequence numbers
112 |         from the records to indicate where in the stream to checkpoint.
113 | 
114 |         :param amazon_kclpy.messages.ProcessRecordsInput process_records_input: the records, and metadata about the
115 |             records.
116 |         """
117 |         try:
118 |             for record in process_records_input.records:
119 |                 data = record.binary_data
120 |                 seq = int(record.sequence_number)
121 |                 sub_seq = record.sub_sequence_number
122 |                 key = record.partition_key
123 |                 self.process_record(data, key, seq, sub_seq)
124 |                 if self.should_update_sequence(seq, sub_seq):
125 |                     self._largest_seq = (seq, sub_seq)
126 | 
127 |             #
128 |             # Checkpoints every self._CHECKPOINT_FREQ_SECONDS seconds
129 |             #
130 |             if time.time() - self._last_checkpoint_time > self._CHECKPOINT_FREQ_SECONDS:
131 |                 self.checkpoint(process_records_input.checkpointer, str(self._largest_seq[0]), self._largest_seq[1])
132 |                 self._last_checkpoint_time = time.time()
133 | 
134 |         except Exception as e:
135 |             self.log("Encountered an exception while processing records. Exception was {e}\n".format(e=e))
136 | 
137 |     def lease_lost(self, lease_lost_input):
138 |         self.log("Lease has been lost")
139 | 
140 |     def shard_ended(self, shard_ended_input):
141 |         self.log("Shard has ended checkpointing")
142 |         shard_ended_input.checkpointer.checkpoint()
143 | 
144 |     def shutdown_requested(self, shutdown_requested_input):
145 |         self.log("Shutdown has been requested, checkpointing.")
146 |         shutdown_requested_input.checkpointer.checkpoint()
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     kcl_process = kcl.KCLProcess(RecordProcessor())
151 |     kcl_process.run()
152 | 


--------------------------------------------------------------------------------
/samples/sample_kinesis_wordputter.py:
--------------------------------------------------------------------------------
  1 | #!env python
  2 | '''
  3 | Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  4 | SPDX-License-Identifier: Apache-2.0
  5 | '''
  6 | from __future__ import print_function
  7 | 
  8 | import argparse
  9 | import sys
 10 | import time
 11 | 
 12 | import boto3
 13 | 
 14 | def get_stream_status(kinesis, stream_name):
 15 |     '''
 16 |     Query this provided connection object for the provided stream's status.
 17 |     :type conn: Kinesis.Client
 18 |     :param conn: A connection to Amazon Kinesis
 19 |     :type stream_name: str
 20 |     :param stream_name: The name of a stream.
 21 |     :rtype: str
 22 |     :return: The stream's status
 23 |     '''
 24 |     r = kinesis.describe_stream(StreamName=stream_name)
 25 |     description = r.get('StreamDescription')
 26 |     return description.get('StreamStatus')
 27 | 
 28 | def wait_for_stream(kinesis, stream_name):
 29 |     '''
 30 |     Wait for the provided stream to become active.
 31 |     :type kinesis: Kinesis.Client
 32 |     :param kinesis: A low-level client representing Amazon Kinesis
 33 |     :type stream_name: str
 34 |     :param stream_name: The name of a stream.
 35 |     '''
 36 |     SLEEP_TIME_SECONDS = 3
 37 |     status = get_stream_status(kinesis, stream_name)
 38 |     while status != 'ACTIVE':
 39 |         print('{stream_name} has status: {status}, sleeping for {secs} seconds'.format(
 40 |                 stream_name = stream_name,
 41 |                 status      = status,
 42 |                 secs        = SLEEP_TIME_SECONDS))
 43 |         time.sleep(SLEEP_TIME_SECONDS) # sleep for 3 seconds
 44 |         status = get_stream_status(kinesis, stream_name)
 45 | 
 46 | def put_words_in_stream(kinesis, stream_name, words):
 47 |     '''
 48 |     Put each word in the provided list of words into the stream.
 49 |     :type kinesis: Kinesis.Client
 50 |     :param kinesis: A connection to Amazon Kinesis
 51 |     :type stream_name: str
 52 |     :param stream_name: The name of a stream.
 53 |     :type words: list
 54 |     :param words: A list of strings to put into the stream.
 55 |     '''
 56 |     for w in words:
 57 |         try:
 58 |             kinesis.put_record(StreamName=stream_name, Data=w, PartitionKey=w)
 59 |             print("Put word: " + w + " into stream: " + stream_name)
 60 |         except Exception as e:
 61 |             sys.stderr.write("Encountered an exception while trying to put a word: "
 62 |                              + w + " into stream: " + stream_name + " exception was: " + str(e))
 63 | 
 64 | def put_words_in_stream_periodically(conn, stream_name, words, period_seconds):
 65 |     '''
 66 |     Puts words into a stream, then waits for the period to elapse then puts the words in again. There is no strict
 67 |     guarantee about how frequently we put each word into the stream, just that we will wait between iterations.
 68 |     :type conn: boto.kinesis.layer1.KinesisConnection
 69 |     :param conn: A connection to Amazon Kinesis
 70 |     :type stream_name: str
 71 |     :param stream_name: The name of a stream.
 72 |     :type words: list
 73 |     :param words: A list of strings to put into the stream.
 74 |     :type period_seconds: int
 75 |     :param period_seconds: How long to wait, in seconds, between iterations over the list of words.
 76 |     '''
 77 |     while True:
 78 |         put_words_in_stream(conn, stream_name, words)
 79 |         print("Sleeping for {period_seconds} seconds".format(period_seconds=period_seconds))
 80 |         time.sleep(period_seconds)
 81 | 
 82 | if __name__ == '__main__':
 83 |     parser = argparse.ArgumentParser('''
 84 | Puts words into a stream.
 85 | # Using the -w option multiple times
 86 | sample_wordputter.py -s STREAM_NAME -w WORD1 -w WORD2 -w WORD3 -p 3
 87 | # Passing input from STDIN
 88 | echo "WORD1\\nWORD2\\nWORD3" | sample_wordputter.py -s STREAM_NAME -p 3
 89 | ''')
 90 |     parser.add_argument("-s", "--stream", dest="stream_name", required=True,
 91 |                       help="The stream you'd like to create.", metavar="STREAM_NAME",)
 92 |     parser.add_argument("-r", "--regionName", "--region", dest="region", default="us-east-1",
 93 |                       help="The region you'd like to make this stream in. Default is 'us-east-1'", metavar="REGION_NAME",)
 94 |     parser.add_argument("-w", "--word", dest="words", default=[], action="append",
 95 |                       help="A word to add to the stream. Can be specified multiple times to add multiple words.", metavar="WORD",)
 96 |     parser.add_argument("-p", "--period", dest="period", type=int,
 97 |                       help="If you'd like to repeatedly put words into the stream, this option provides the period for putting "
 98 |                             + "words into the stream in SECONDS. If no period is given then the words are put once.",
 99 |                       metavar="SECONDS",)
100 |     args = parser.parse_args()
101 |     stream_name = args.stream_name
102 | 
103 |     '''
104 |     Getting a connection to Amazon Kinesis will require that you have your credentials available to
105 |     one of the standard credentials providers.
106 |     '''
107 |     print("Connecting to stream: {s} in {r}".format(s=stream_name, r=args.region))
108 |     kinesis = boto3.client('kinesis', region_name=args.region)
109 | 
110 |     try:
111 |         status = get_stream_status(kinesis, stream_name)
112 |         if 'DELETING' == status:
113 |             print('The stream: {s} is being deleted, please rerun the script.'.format(s=stream_name))
114 |             sys.exit(1)
115 |         elif 'ACTIVE' != status:
116 |             wait_for_stream(kinesis, stream_name)
117 |     except:
118 |         # We'll assume the stream didn't exist so we will try to create it with just one shard
119 |         kinesis.create_stream(StreamName=stream_name, ShardCount=1)
120 |         wait_for_stream(kinesis, stream_name)
121 |     # Now the stream should exist
122 |     if len(args.words) == 0:
123 |         print('No -w options provided. Waiting on input from STDIN')
124 |         words = [l.strip() for l in sys.stdin.readlines() if l.strip() != '']
125 |     else:
126 |         words = args.words
127 |     if args.period != None:
128 |         put_words_in_stream_periodically(kinesis, stream_name, words, args.period)
129 |     else:
130 |         put_words_in_stream(kinesis, stream_name, words)


--------------------------------------------------------------------------------
/scripts/build_deps.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | """
 5 | Builds the dependency list used by setup.py from the maven dependency tree.  This script must be run in the
 6 | amazon-kinesis-client or amazon-kinesis-client-multilang directory, or where the pom.xml for the libraries are present.
 7 | """
 8 | import subprocess
 9 | from tempfile import mkstemp
10 | from os import close
11 | import re
12 | 
13 | 
14 | def format_dependency(line):
15 |     """
16 |     This attempts to extract Maven dependencies and versions from a line of output from mvn dependency:tree
17 | 
18 |     An example line without specifiers:
19 | 
20 |     ``[INFO] +- software.amazon.kinesis:amazon-kinesis-client:jar:2.1.2:compile``
21 | 
22 |     This fields in the line in order are:
23 |     1. Group Id: software.amazon.kinesis
24 |     2. Artifact Id: amazon-kinesis-client
25 |     3. Packaging: jar (not used)
26 |     4. Version: 2.1.2
27 |     5. Dependency type: compile (this will be runtime or compile)
28 | 
29 |     An example line with specifiers:
30 | 
31 |     ``[INFO] |  |  +- io.netty:netty-transport-native-epoll:jar:linux-x86_64:4.1.32.Final:compile``
32 | 
33 |     The fields in order are:
34 |     1. Group Id: io.netty
35 |     2. Artifact Id: netty-transport-native-epoll
36 |     3. Packaging: jar (not used)
37 |     4. Specifier: linux-x86_64 (not used)
38 |     5. Version: 4.1.32.Final
39 |     6. Dependency type: compile (this will be runtime or compile)
40 | 
41 |     :param str line: the line to extract version information from
42 |     :return: the version information needed to retrieve the jars from Maven Central
43 |     """
44 |     match = re.match(r'^[\\\s+|-]*(?P<dep_line>[^\s]+)', line)
45 |     assert match is not None
46 |     items = match.groupdict()['dep_line'].split(":")
47 |     version_idx = 3
48 |     if len(items) > 5:
49 |         version_idx = 4
50 | 
51 |     return "('{group_id}', '{artifact_id}', '{version}')".format(group_id=items[0],
52 |                                                                  artifact_id=items[1],
53 |                                                                  version=items[version_idx])
54 | 
55 | 
56 | def build_deps():
57 |     """
58 |     Extracts all the dependencies from the pom.xml and formats them into a form usable for setup.py or other
59 |     multilang daemon implementations
60 |     """
61 |     (fh, filename) = mkstemp()
62 |     close(fh)
63 |     output_command = '-Doutput={temp}'.format(temp=filename)
64 |     subprocess.check_call(['mvn', 'dependency:tree', '-Dscope=runtime', output_command])
65 | 
66 |     dependency_file = open(filename)
67 | 
68 |     dependencies = [format_dependency(line) for line in dependency_file]
69 | 
70 |     print(",\n".join(dependencies))
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     build_deps()
75 | 
76 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | [aliases]
4 | test=pytest


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | from __future__ import print_function
  4 | 
  5 | import glob
  6 | import sys
  7 | 
  8 | import os
  9 | import shutil
 10 | import xml.etree.ElementTree as ET
 11 | 
 12 | from setuptools import Command
 13 | from setuptools import setup
 14 | from setuptools.command.install import install
 15 | 
 16 | if sys.version_info[0] >= 3:
 17 |     # Python 3
 18 |     from urllib.request import urlopen
 19 | else:
 20 |     # Python 2
 21 |     from urllib2 import urlopen
 22 | 
 23 | #
 24 | # This script modifies the basic setuptools by adding some functionality to the standard
 25 | # "install" command and by adding an additional command "download_jars" which
 26 | # simplifies retrieval of the jars required to run the KCL multi-language daemon
 27 | # which is required to run the sample app included in this package.
 28 | #
 29 | # If a user runs the basic install:
 30 | #
 31 | #     python setup.py install
 32 | #
 33 | # They will be notified of any jars that are downloaded for this package. Those jars
 34 | # will go in amazon_kclpy/jars so that they can be installed as part of this package's
 35 | # data.
 36 | #
 37 | #     python setup.py download_jars
 38 | #
 39 | # Will retrieve the configured jars from maven and then advise the user
 40 | # to rerun the install command.
 41 | #
 42 | 
 43 | PACKAGE_NAME = 'amazon_kclpy'
 44 | JAR_DIRECTORY = os.path.join(PACKAGE_NAME, 'jars')
 45 | PACKAGE_VERSION = '3.0.3'
 46 | PYTHON_REQUIREMENTS = [
 47 |     'boto3',
 48 |     # argparse is part of python2.7 but must be declared for python2.6
 49 |     'argparse',
 50 | ]
 51 | REMOTE_MAVEN_PACKAGES_FILE = 'pom.xml'
 52 | 
 53 | class MavenJarDownloader:
 54 | 
 55 |     def __init__(self, on_completion, destdir=JAR_DIRECTORY, packages_file=REMOTE_MAVEN_PACKAGES_FILE):
 56 |         self.on_completion = on_completion
 57 |         self.destdir = destdir
 58 |         self.packages_file = packages_file
 59 |         self.packages = self.parse_packages_from_pom()
 60 | 
 61 |     def warning_string(self, missing_jars=[]):
 62 |         s = '''The following jars were not installed because they were not
 63 | present in this package at the time of installation:'''
 64 |         for jar in missing_jars:
 65 |             s += '\n  {jar}'.format(jar=jar)
 66 |         s += '''
 67 | This doesn't affect the rest of the installation, but may make it more
 68 | difficult for you to run the sample app and get started.
 69 | 
 70 | You should consider running:
 71 | 
 72 |     python setup.py download_jars
 73 |     python setup.py install
 74 | 
 75 | Which will download the required jars and rerun the install.
 76 | '''
 77 |         return s
 78 | 
 79 |     def parse_packages_from_pom(self):
 80 |         maven_root = ET.parse(self.packages_file).getroot()
 81 |         maven_version = '{http://maven.apache.org/POM/4.0.0}'
 82 |         # dictionary of common package versions encoded in `properties` section
 83 |         properties = {f"${{{child.tag.replace(maven_version, '')}}}": child.text
 84 |                       for child in maven_root.find(f'{maven_version}properties').iter() if 'version' in child.tag}
 85 | 
 86 |         packages = []
 87 |         for dep in maven_root.iter(f'{maven_version}dependency'):
 88 |             dependency = []
 89 |             for attr in ['groupId', 'artifactId', 'version']:
 90 |                 val = dep.find(maven_version + attr).text
 91 |                 if val in properties:
 92 |                     dependency.append(properties[val])
 93 |                 else:
 94 |                     dependency.append(val)
 95 |             packages.append(tuple(dependency))
 96 | 
 97 |         return packages
 98 | 
 99 |     def download_and_check(self):
100 |         self.download_files()
101 |         self.on_completion()
102 |         missing_jars = self.missing_jars()
103 |         if len(missing_jars) > 0:
104 |             raise RuntimeError(self.warning_string(missing_jars))
105 | 
106 |     def package_destination(self, artifact_id, version):
107 |         return '{artifact_id}-{version}.jar'.format(artifact_id=artifact_id, version=version)
108 | 
109 |     def missing_jars(self):
110 |         file_list = [os.path.join(self.destdir, self.package_destination(p[1], p[2])) for p in self.packages]
111 |         return [f for f in file_list if not os.path.isfile(f)] # The missing files
112 | 
113 |     def package_url(self, group_id, artifact_id, version):
114 |         #
115 |         # Sample url:
116 |         # https://search.maven.org/remotecontent?filepath=org/apache/httpcomponents/httpclient/4.2/httpclient-4.2.jar
117 |         # https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.2/httpclient-4.2.jar
118 |         #
119 |         prefix = os.getenv("KCL_MVN_REPO_SEARCH_URL", 'https://repo1.maven.org/maven2/')
120 |         return '{prefix}{path}/{artifact_id}/{version}/{dest}'.format(
121 |                                         prefix=prefix,
122 |                                         path='/'.join(group_id.split('.')),
123 |                                         artifact_id=artifact_id,
124 |                                         version=version,
125 |                                         dest=self.package_destination(artifact_id, version))
126 | 
127 |     def download_file(self, url, dest):
128 |         """
129 |         Downloads a file at the url to the destination.
130 |         """
131 |         print('Attempting to retrieve remote jar {url}'.format(url=url))
132 |         try:
133 |             response = urlopen(url)
134 |             with open(dest, 'wb') as dest_file:
135 |                 shutil.copyfileobj(response, dest_file)
136 |             print('Saving {url} -> {dest}'.format(url=url, dest=dest))
137 |         except Exception as e:
138 |             print('Failed to retrieve {url}: {e}'.format(url=url, e=e))
139 |             return
140 | 
141 |     def download_files(self):
142 |         for package in self.packages:
143 |             dest = os.path.join(self.destdir, self.package_destination(package[1], package[2]))
144 |             if os.path.isfile(dest):
145 |                 print('Skipping download of {dest}'.format(dest=dest))
146 |             else:
147 |                 url = self.package_url(package[0], package[1], package[2])
148 |                 self.download_file(url, dest)
149 | 
150 | 
151 | class DownloadJarsCommand(Command):
152 |     description = "Download the jar files needed to run the sample application"
153 |     user_options = []
154 | 
155 |     def initialize_options(self):
156 |         pass
157 | 
158 |     def finalize_options(self):
159 |         pass
160 | 
161 |     def run(self):
162 |         """
163 |         Runs when this command is given to setup.py
164 |         """
165 |         downloader = MavenJarDownloader(on_completion=lambda : None)
166 |         downloader.download_files()
167 |         print('''
168 | Now you should run:
169 | 
170 |     python setup.py install
171 | 
172 | Which will finish the installation.
173 | ''')
174 | 
175 | 
176 | class InstallThenCheckForJars(install):
177 | 
178 |     def do_install(self):
179 |         install.run(self)
180 | 
181 |     def run(self):
182 |         """
183 |         We override the basic install command. First we download jars then
184 |         we run the basic install then we check whether the jars are present
185 |         in this package. If they aren't present we warn the user and give
186 |         them some advice on how to retry getting the jars.
187 |         """
188 |         downloader = MavenJarDownloader(self.do_install)
189 |         downloader.download_and_check()
190 | 
191 | 
192 | try:
193 |     from wheel.bdist_wheel import bdist_wheel
194 | 
195 | 
196 |     class BdistWheelWithJars(bdist_wheel):
197 |         """
198 |         This overrides the bdist_wheel command, that handles building a binary wheel of the package.
199 |         Currently, as far as I can tell, binary wheel creation only occurs during the virtual environment creation.
200 |         The package that bdist_wheel comes from isn't a modeled dependency of this package, but is required for virtual
201 |         environment creation.
202 |         """
203 | 
204 |         def do_run(self):
205 |             bdist_wheel.run(self)
206 | 
207 |         def run(self):
208 |             downloader = MavenJarDownloader(self.do_run)
209 |             downloader.download_and_check()
210 | 
211 | except ImportError:
212 |     pass
213 | 
214 | if __name__ == '__main__':
215 |     commands = {
216 |         'download_jars': DownloadJarsCommand,
217 |         'install': InstallThenCheckForJars,
218 |     }
219 |     try:
220 |         #
221 |         # BdistWheelWithJars will only be present if the wheel package is present, and that is present during
222 |         # virtual environment creation.
223 |         # It's important to note this is a hack.  There doesn't appear to be a way to execute hooks around wheel
224 |         # creation by design.  See https://github.com/pypa/packaging-problems/issues/64 for more information.
225 |         #
226 |         commands['bdist_wheel'] = BdistWheelWithJars
227 |     except NameError:
228 |         pass
229 | 
230 |     setup(
231 |         name=PACKAGE_NAME,
232 |         version=PACKAGE_VERSION,
233 |         description='A python interface for the Amazon Kinesis Client Library MultiLangDaemon',
234 |         license='Apache-2.0',
235 |         packages=[PACKAGE_NAME, PACKAGE_NAME + "/v2", PACKAGE_NAME + "/v3", 'samples'],
236 |         scripts=glob.glob('samples/*py'),
237 |         package_data={
238 |             '': ['*.txt', '*.md'],
239 |             PACKAGE_NAME: ['jars/*'],
240 |             'samples': ['sample.properties'],
241 |         },
242 |         install_requires=PYTHON_REQUIREMENTS,
243 |         setup_requires=["pytest-runner"],
244 |         tests_require=["pytest", "mock"],
245 |         cmdclass=commands,
246 |         url="https://github.com/awslabs/amazon-kinesis-client-python",
247 |         keywords="amazon kinesis client library python",
248 |         zip_safe=False,
249 |         )
250 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | # SPDX-License-Identifier: Apache-2.0
3 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | # SPDX-License-Identifier: Apache-2.0
3 | 
4 | import sys
5 | import os
6 | 
7 | sys.path.append(os.path.dirname(__file__))
8 | 


--------------------------------------------------------------------------------
/test/test_amazon_kclpy.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import json
 5 | from mock import Mock
 6 | from amazon_kclpy import kcl, dispatch
 7 | from utils import make_io_obj
 8 | 
 9 | 
10 | def build_basic_io_handler_mock(read_line_side_effects):
11 |     """
12 | 
13 |     :param read_line_side_effects:
14 |     :rtype: kcl._IOHandler
15 |     """
16 |     io_handler = Mock()
17 |     io_handler.read_line.side_effect = read_line_side_effects
18 |     io_handler.load_action.side_effect = lambda x: json.loads(x, object_hook=dispatch.message_decode)
19 |     return io_handler
20 | 
21 | 
22 | def test_checkpointer_exception():
23 |     exception_name = 'ThisIsATestException'
24 |     checkpointer = kcl.Checkpointer(
25 |         build_basic_io_handler_mock(['{"action": "checkpoint",'
26 |                                      '"checkpoint":"456", "sequenceNumber": "1234", "subSequenceNumber": 0, '
27 |                                      '"error" : "' + exception_name + '"}']))
28 |     try:
29 |         checkpointer.checkpoint()
30 |         assert 0, "Checkpointing should have raised an exception"
31 |     except kcl.CheckpointError as e:
32 |         assert e.value == exception_name
33 | 
34 | 
35 | def test_checkpointer_unexpected_message_after_checkpointing():
36 |     io_handler = Mock()
37 |     io_handler.read_line.side_effect = ['{"action":"initialize", "shardId" : "shardid-123", '
38 |                                         '"sequenceNumber": "1234", "subSequenceNumber": 1}', ]
39 |     io_handler.load_action.side_effect = lambda x: json.loads(x, object_hook=dispatch.message_decode)
40 |     checkpointer = kcl.Checkpointer(
41 |         build_basic_io_handler_mock(
42 |             ['{"action":"initialize", "shardId" : "shardid-123", "sequenceNumber": "1234", "subSequenceNumber": 1}']))
43 | 
44 |     try:
45 |         checkpointer.checkpoint()
46 |         assert 0, "Checkpointing should have raised an exception"
47 |     except kcl.CheckpointError as e:
48 |         assert e.value == 'InvalidStateException'
49 | 
50 | 
51 | def test_kcl_process_exits_on_record_processor_exception():
52 |     unique_string = "Super uniqe statement we can look for"
53 |     errorFile = make_io_obj()
54 |     class ClientException(Exception):
55 |         pass
56 |     mock_rp = Mock()  # type: kcl.RecordProcessorBase
57 |     # Our record processor will just fail during initialization
58 |     mock_rp.initialize.side_effect = [ClientException(unique_string)]
59 |     kcl_process = kcl.KCLProcess(mock_rp,
60 |                              input_file=make_io_obj('{"action":"initialize", "shardId" : "shardid-123", '
61 |                                                    '"sequenceNumber": "1234", "subSequenceNumber": 1}'),
62 |                              output_file=make_io_obj(),
63 |                              error_file=errorFile)
64 |     try:
65 |         kcl_process.run()
66 |     except ClientException:
67 |         assert 0, "Should not have seen the ClientException propagate up the call stack."
68 |     assert errorFile.getvalue().count(unique_string) > 0, 'We should see our error message printed to the error file'
69 | 
70 | 
71 | def test_kcl_process_exits_on_action_message_exception():
72 |     mock_rp = Mock()  # type: kcl.RecordProcessorBase
73 |     # Our record processor will just fail during initialization
74 |     kcl_process = kcl.KCLProcess(mock_rp,
75 |                                  # This will suffice because a checkpoint message won't be understood by
76 |                                  # the KCLProcessor (only the Checkpointer understands them)
77 |                              input_file=make_io_obj('{"action":"invalid", "error" : "badstuff", '
78 |                                                    '"sequenceNumber": "1234", "subSequenceNumber": 1}'),
79 |                              output_file=make_io_obj(),
80 |                              error_file=make_io_obj())
81 |     try:
82 |         kcl_process.run()
83 |         assert 0, 'Should have received an exception here'
84 |     except dispatch.MalformedAction:
85 |         pass
86 | 
87 | 


--------------------------------------------------------------------------------
/test/test_amazon_kclpy_input_output_integration.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | 
  4 | from amazon_kclpy import kcl
  5 | from utils import make_io_obj
  6 | 
  7 | 
  8 | # Dummy record processor
  9 | class RecordProcessor(kcl.RecordProcessorBase):
 10 | 
 11 |     def __init__(self, expected_shard_id, expected_sequence_number):
 12 |         self.expected_shard_id = expected_shard_id
 13 |         self.expected_sequence_number = expected_sequence_number
 14 |         pass
 15 | 
 16 |     def initialize(self, shard_id):
 17 |         assert shard_id == self.expected_shard_id
 18 |         pass
 19 | 
 20 |     def process_records(self, records, checkpointer):
 21 |         seq = records[0].get('sequenceNumber')
 22 |         assert seq == self.expected_sequence_number
 23 |         try:
 24 |             checkpointer.checkpoint(seq)
 25 |             assert 0, "First checkpoint should fail"
 26 |         except Exception:
 27 |             # Try it one more time (this time it'll work)
 28 |             checkpointer.checkpoint(seq)
 29 | 
 30 |     def shutdown(self, checkpointer, reason):
 31 |         if 'TERMINATE' == reason:
 32 |             checkpointer.checkpoint()
 33 | 
 34 | 
 35 | '''
 36 | An input string which we'll feed to a file for kcl.py to read from.
 37 | '''
 38 | 
 39 | '''
 40 | This string is approximately what the output should look like. We remove whitespace when comparing this to what is
 41 | written to the outputfile.
 42 | '''
 43 | test_output_string = """
 44 | {"action": "status", "responseFor": "initialize"}
 45 | {"action": "checkpoint", "checkpoint": "456"}
 46 | {"action": "checkpoint", "checkpoint": "456"}
 47 | {"action": "status", "responseFor": "processRecords"}
 48 | {"action": "checkpoint", "checkpoint": null}
 49 | {"action": "status", "responseFor": "shutdown"}
 50 | """
 51 | 
 52 | test_output_messages = [
 53 |     {"action": "status", "responseFor": "initialize"},
 54 |     {"action": "checkpoint", "sequenceNumber": "456", "subSequenceNumber": None},
 55 |     {"action": "checkpoint", "sequenceNumber": "456", "subSequenceNumber": None},
 56 |     {"action": "status", "responseFor": "processRecords"},
 57 |     {"action": "checkpoint", "sequenceNumber": None, "subSequenceNumber": None},
 58 |     {"action": "status", "responseFor": "shardEnded"}
 59 | ]
 60 | 
 61 | 
 62 | def _strip_all_whitespace(s):
 63 |     return re.sub('\\s*', '', s)
 64 | 
 65 | 
 66 | test_shard_id = "shardId-123"
 67 | test_sequence_number = "456"
 68 | 
 69 | test_input_messages = [
 70 |     {"action": "initialize", "shardId": test_shard_id, "sequenceNumber": test_sequence_number, "subSequenceNumber": 0},
 71 |     {"action": "processRecords", "millisBehindLatest": 1476889708000, "records":
 72 |         [
 73 |             {
 74 |                 "action": "record", "data": "bWVvdw==", "partitionKey": "cat", "sequenceNumber": test_sequence_number,
 75 |                 "subSequenceNumber": 0, "approximateArrivalTimestamp": 1476889707000
 76 |             }
 77 |         ]
 78 |      },
 79 |     {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0, "error": "Exception"},
 80 |     {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0},
 81 |     {"action": "shardEnded"},
 82 |     {"action": "checkpoint", "sequenceNumber": test_sequence_number, "subSequenceNumber": 0}
 83 | ]
 84 | 
 85 | 
 86 | def test_kcl_py_integration_test_perfect_input():
 87 |     test_input_json = "\n".join(map(lambda j: json.dumps(j), test_input_messages))
 88 |     input_file = make_io_obj(test_input_json)
 89 |     output_file = make_io_obj()
 90 |     error_file = make_io_obj()
 91 |     process = kcl.KCLProcess(RecordProcessor(test_shard_id, test_sequence_number),
 92 |                              input_file=input_file, output_file=output_file, error_file=error_file)
 93 |     process.run()
 94 |     '''
 95 |     The strings are approximately the same, modulo whitespace.
 96 |     '''
 97 |     output_message_list = filter(lambda s: s != "", output_file.getvalue().split("\n"))
 98 |     responses = [json.loads(s) for s in output_message_list]
 99 |     assert len(responses) == len(test_output_messages)
100 |     for i in range(len(responses)):
101 |         assert responses[i] == test_output_messages[i]
102 | 
103 |     '''
104 |     There should be some error output but it seems like overly specific to make sure that a particular message is printed.
105 |     '''
106 |     error_output = error_file.getvalue()
107 |     assert error_output == ""
108 | 


--------------------------------------------------------------------------------
/test/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import sys
 5 | import io
 6 | 
 7 | 
 8 | def make_io_obj(json_text=None):
 9 |     if sys.version_info[0] >= 3:
10 |         create_method = io.StringIO
11 |     else:
12 |         create_method = io.BytesIO
13 | 
14 |     if json_text is not None:
15 |         return create_method(json_text)
16 |     else:
17 |         return create_method()


--------------------------------------------------------------------------------
/test/v3/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | # SPDX-License-Identifier: Apache-2.0
3 | 
4 | import pytest
5 | pytestmark = pytest.mark.webtests
6 | 


--------------------------------------------------------------------------------
/test/v3/delegate_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import mock
 5 | import pytest
 6 | 
 7 | from amazon_kclpy.v2 import processor as v2
 8 | from amazon_kclpy.v3 import processor as v3
 9 | from amazon_kclpy import messages
10 | from amazon_kclpy.kcl import Checkpointer, CheckpointError
11 | 
12 | 
13 | @pytest.fixture
14 | def delegate():
15 |     return mock.Mock(spec=v2.RecordProcessorBase)
16 | 
17 | 
18 | @pytest.fixture
19 | def processor(delegate):
20 |     return v3.V2toV3Processor(delegate)
21 | 
22 | 
23 | def test_initialization_delegate(delegate, processor):
24 |     initialization_input = mock.Mock(spec=messages.InitializeInput)
25 |     processor.initialize(initialization_input)
26 | 
27 |     delegate.initialize.assert_called_with(initialization_input)
28 | 
29 | 
30 | def test_process_records_delegate(delegate, processor):
31 |     process_records_input = mock.Mock(spec=messages.ProcessRecordsInput)
32 |     processor.process_records(process_records_input)
33 | 
34 |     delegate.process_records.assert_called_with(process_records_input)
35 | 
36 | 
37 | def test_shutdown_requested_delegate(delegate, processor):
38 |     shutdown_requested_input = mock.Mock(spec=messages.ShutdownRequestedInput)
39 |     processor.shutdown_requested(shutdown_requested_input)
40 | 
41 |     delegate.shutdown_requested.assert_called_with(shutdown_requested_input)
42 | 
43 | 
44 | def test_lease_lost_to_shutdown_delegate(delegate, processor):
45 |     lease_lost_input = messages.LeaseLostInput({
46 |         "action": "leaseLost"
47 |     })
48 | 
49 |     processor.lease_lost(lease_lost_input)
50 |     delegate.shutdown.assert_called()
51 | 
52 |     actual = delegate.shutdown.call_args[0][0]
53 | 
54 |     assert actual.reason == "ZOMBIE"
55 |     assert actual.action == "shutdown"
56 |     assert isinstance(actual.checkpointer, messages.LeaseLostCheckpointer)
57 | 
58 | 
59 | def test_lease_lost_checkpoint_triggers_exception(delegate, processor):
60 |     lease_lost_input = mock.Mock(spec=messages.LeaseLostInput)
61 |     delegate.shutdown = lambda s: s.checkpointer.checkpoint()
62 | 
63 |     with pytest.raises(CheckpointError):
64 |         processor.lease_lost(lease_lost_input)
65 | 
66 | 
67 | def test_shard_ended_to_shutdown_delegate(delegate, processor):
68 |     shard_ended_input = messages.ShardEndedInput({
69 |         "action": "shardEnded"
70 |     })
71 |     checkpointer = mock.Mock(spec=Checkpointer)
72 |     shard_ended_input._checkpointer = checkpointer
73 | 
74 |     processor.shard_ended(shard_ended_input)
75 |     delegate.shutdown.assert_called()
76 | 
77 |     actual = delegate.shutdown.call_args[0][0]
78 | 
79 |     assert actual.reason == "TERMINATE"
80 |     assert actual.action == "shutdown"
81 |     assert actual.checkpointer == checkpointer
82 | 
83 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | mock
3 | 


--------------------------------------------------------------------------------