├── .DS_Store ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ └── node.js.yml ├── .gitignore ├── .npmignore ├── Gruntfile.js ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── bin ├── kcl-bootstrap └── kcl-bootstrap.bat ├── conf ├── .jshintrc └── jsdoc.conf.json ├── index.js ├── lib ├── .DS_Store └── kcl │ ├── action_handler.js │ ├── checkpointer.js │ ├── io_handler.js │ ├── kcl_manager.js │ └── kcl_process.js ├── package-lock.json ├── package.json ├── pom.xml ├── samples ├── basic_sample │ ├── consumer │ │ ├── sample.properties │ │ └── sample_kcl_app.js │ └── producer │ │ ├── config.js │ │ ├── sample_kinesis_producer_app.js │ │ └── sample_producer.js ├── click_stream_sample │ ├── README.md │ ├── cloudformation │ │ └── nodejs-kcl-clickstream.template │ ├── consumer │ │ ├── click_stream_consumer.js │ │ ├── config.js │ │ ├── record_buffer.js │ │ ├── s3_emitter.js │ │ └── sample.properties │ └── producer │ │ ├── click_stream_generator.js │ │ ├── click_stream_producer.js │ │ ├── click_stream_producer_app.js │ │ └── config.js └── util │ └── logger.js └── test └── kcl ├── action_handler_tests.js ├── checkpointer_tests.js ├── io_handler_tests.js └── kcl_process_tests.js /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-kinesis-client-nodejs/1ed080ce080d901f418e11892c6839502e0cdd76/.DS_Store -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "maven" 4 | directory: "/" 5 | open-pull-requests-limit: 4 6 | schedule: 7 | interval: "daily" 8 | 9 | - package-ecosystem: "npm" 10 | directory: "/" 11 | open-pull-requests-limit: 4 12 | schedule: 13 | interval: "daily" 14 | -------------------------------------------------------------------------------- /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs 3 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 4 | 5 | name: Sample Run and Dependabot Auto-merge 6 | on: 7 | push: 8 | branches: [ master ] 9 | 10 | permissions: 11 | id-token: write 12 | contents: write 13 | pull-requests: write 14 | statuses: write 15 | 16 | jobs: 17 | sample-run: 18 | timeout-minutes: 8 19 | runs-on: ${{ matrix.os }} 20 | defaults: 21 | run: 22 | shell: bash 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | node-version: [ '18.x', '20.x', '21.x' ] 28 | jdk-version: [ "8", "11", "17", "21", "24" ] 29 | os: [ ubuntu-latest, macOS-latest, windows-latest ] 30 | 31 | steps: 32 | - name: Checkout working directory 33 | uses: actions/checkout@v4 34 | 35 | - name: Configure AWS Credentials 36 | uses: aws-actions/configure-aws-credentials@v4 37 | with: 38 | aws-region: us-east-1 39 | role-to-assume: arn:aws:iam::751999266872:role/GitHubNodejs 40 | role-session-name: myGitHubActionsNodejs 41 | 42 | - name: Set up JDK ${{ matrix.jdk-version }} 43 | uses: actions/setup-java@v4 44 | with: 45 | java-version: ${{ matrix.jdk-version }} 46 | distribution: 'corretto' 47 | 48 | - name: Set up Node.js ${{ matrix.node-version }} 49 | uses: actions/setup-node@v3 50 | with: 51 | node-version: ${{ matrix.node-version }} 52 | cache: 'npm' 53 | 54 | - name: npm clean install, build, and test 55 | run: | 56 | npm ci 57 | npm run build --if-present 58 | npm test 59 | 60 | - name: Install dependencies 61 | run: | 62 | npm install 63 | 64 | - name: Running data producer 65 | run: | 66 | cd samples/basic_sample/producer 67 | node sample_kinesis_producer_app.js 68 | 69 | - name: Running KCL consumer (windows or ubuntu) 70 | if: matrix.os != 'macOS-latest' 71 | run: | 72 | cd samples/basic_sample/consumer 73 | timeout 45 ../../../bin/kcl-bootstrap -e -p ./sample.properties || status="$?"; if (( status == 124 )); then exit 0; else exit 1; fi; exit "$status" 74 | 75 | - name: Running KCL consumer (macOS) 76 | if: matrix.os == 'macOS-latest' 77 | run: | 78 | brew install coreutils 79 | cd samples/basic_sample/consumer 80 | gtimeout 45 ../../../bin/kcl-bootstrap --java /usr/bin/java -e -p ./sample.properties || status="$?"; if (( status == 124 )); then exit 0; else exit 1; fi; exit "$status" 81 | 82 | auto-merge-dependabot: 83 | needs: [ sample-run ] 84 | runs-on: ubuntu-latest 85 | if: github.actor == 'dependabot[bot]' && github.event.pull_request.user.login == 'dependabot[bot]' 86 | steps: 87 | - name: Fetch Dependabot metadata 88 | id: metadata 89 | uses: dependabot/fetch-metadata@v2 90 | with: 91 | alert-lookup: true 92 | github-token: "${{ secrets.GITHUB_TOKEN }}" 93 | 94 | - name: Approve PR 95 | if: steps.metadata.outputs.update-type != 'version-update:semver-major' 96 | run: gh pr review --approve "$PR_URL" 97 | env: 98 | PR_URL: ${{github.event.pull_request.html_url}} 99 | GH_TOKEN: ${{secrets.GITHUB_TOKEN}} 100 | 101 | # - name: Enable auto-merge for Dependabot PRs 102 | # if: steps.metadata.outputs.update-type != 'version-update:semver-major' 103 | # run: gh pr merge --auto --merge "$PR_URL" 104 | # env: 105 | # PR_URL: ${{github.event.pull_request.html_url}} 106 | # GH_TOKEN: ${{secrets.GITHUB_TOKEN}} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /coverage 2 | /doc 3 | /lib/jars 4 | /node_modules 5 | *.log 6 | .idea 7 | 8 | .DS_Store 9 | */.DS_Store -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | /coverage 2 | /doc 3 | /lib/jars 4 | /node_modules 5 | *.log 6 | 7 | Gruntfile.js 8 | /conf 9 | /samples 10 | /test 11 | .idea/* 12 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | function mochaCoverageOptions(reporterName, outFile) { 10 | return { 11 | options: { 12 | reporter: reporterName, 13 | quiet: true, 14 | captureFile: outFile 15 | }, 16 | src: ['test/**/*_tests.js'] 17 | }; 18 | } 19 | 20 | module.exports = function(grunt) { 21 | 22 | grunt.initConfig({ 23 | 24 | jshint: { 25 | options: { 26 | jshintrc: 'conf/.jshintrc' 27 | }, 28 | gruntfile: { 29 | src: 'Gruntfile.js' 30 | }, 31 | bin: { 32 | src: ['bin/kcl-bootstrap'] 33 | }, 34 | lib: { 35 | src: ['index.js', 'lib/**/*.js'] 36 | }, 37 | test: { 38 | src: ['test/**/*.js'] 39 | }, 40 | samples: { 41 | src: ['samples/**/*.js'] 42 | } 43 | }, 44 | 45 | clean: { 46 | build: { 47 | options: { 48 | force: true 49 | }, 50 | src: ['build'] 51 | }, 52 | doc: { 53 | options: { 54 | force: true 55 | }, 56 | src: ['doc'] 57 | } 58 | }, 59 | 60 | mochaTest: { 61 | test: { 62 | options: { 63 | reporter: 'spec', 64 | clearRequireCache: true, 65 | }, 66 | src: ['test/**/*_tests.js'] 67 | } 68 | }, 69 | 70 | jsdoc: { 71 | dist: { 72 | src: ['index.js', 'lib/**/*.js', 'README.md'], 73 | jsdoc: 'node_modules/grunt-jsdoc/node_modules/jsdoc/jsdoc', 74 | options: { 75 | destination: 'doc', 76 | configure: 'conf/jsdoc.conf.json', 77 | template: 'node_modules/ink-docstrap/template' 78 | } 79 | } 80 | } 81 | }); 82 | 83 | grunt.loadNpmTasks('grunt-contrib-clean'); 84 | grunt.loadNpmTasks('grunt-contrib-jshint'); 85 | grunt.loadNpmTasks('grunt-jsdoc'); 86 | grunt.loadNpmTasks('grunt-mocha-test'); 87 | 88 | grunt.registerTask('default', ['jshint', 'mochaTest']); 89 | grunt.registerTask('build', 'compile'); 90 | grunt.registerTask('compile', ['jshint']); 91 | // clean task already defined above. 92 | grunt.registerTask('doc', 'jsdoc'); 93 | grunt.registerTask('test', ['jshint', 'mochaTest']); 94 | grunt.registerTask('release', ['jshint', 'mochaTest', 'jsdoc']); 95 | }; 96 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Amazon Kinesis Client Library for Node.js 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Kinesis Client Library for Node.js 2 | 3 | This package provides an interface to the [Amazon Kinesis Client Library][amazon-kcl] (KCL) [MultiLangDaemon][multi-lang-daemon] for the Node.js framework. 4 | 5 | Developers can use the KCL to build distributed applications that process streaming data reliably at scale. The KCL takes care of many of the complex tasks associated with distributed computing, such as load-balancing across multiple instances, responding to instance failures, checkpointing processed records, and reacting to changes in stream volume. 6 | 7 | This package wraps and manages the interaction with the [MultiLangDaemon][multi-lang-daemon], which is provided as part of the [Amazon KCL for Java][amazon-kcl-github] so that developers can focus on implementing their record processing logic. 8 | 9 | A record processor in Node.js typically looks like the following: 10 | 11 | ```javascript 12 | var kcl = require('aws-kcl'); 13 | var util = require('util'); 14 | 15 | /** 16 | * The record processor must provide three functions: 17 | * 18 | * * `initialize` - called once 19 | * * `processRecords` - called zero or more times 20 | * * `shutdown` - called if this KCL instance loses the lease to this shard 21 | * 22 | * Notes: 23 | * * All of the above functions take additional callback arguments. When one is 24 | * done initializing, processing records, or shutting down, callback must be 25 | * called (i.e., `completeCallback()`) in order to let the KCL know that the 26 | * associated operation is complete. Without the invocation of the callback 27 | * function, the KCL will not proceed further. 28 | * * The application will terminate if any error is thrown from any of the 29 | * record processor functions. Hence, if you would like to continue processing 30 | * on exception scenarios, exceptions should be handled appropriately in 31 | * record processor functions and should not be passed to the KCL library. The 32 | * callback must also be invoked in this case to let the KCL know that it can 33 | * proceed further. 34 | */ 35 | var recordProcessor = { 36 | /** 37 | * Called once by the KCL before any calls to processRecords. Any initialization 38 | * logic for record processing can go here. 39 | * 40 | * @param {object} initializeInput - Initialization related information. 41 | * Looks like - {"shardId":""} 42 | * @param {callback} completeCallback - The callback that must be invoked 43 | * once the initialization operation is complete. 44 | */ 45 | initialize: function(initializeInput, completeCallback) { 46 | // Initialization logic ... 47 | 48 | completeCallback(); 49 | }, 50 | 51 | /** 52 | * Called by KCL with a list of records to be processed and checkpointed. 53 | * A record looks like: 54 | * {"data":"","partitionKey":"someKey","sequenceNumber":"1234567890"} 55 | * 56 | * The checkpointer can optionally be used to checkpoint a particular sequence 57 | * number (from a record). If checkpointing, the checkpoint must always be 58 | * invoked before calling `completeCallback` for processRecords. Moreover, 59 | * `completeCallback` should only be invoked once the checkpoint operation 60 | * callback is received. 61 | * 62 | * @param {object} processRecordsInput - Process records information with 63 | * array of records that are to be processed. Looks like - 64 | * {"records":[, ], "checkpointer":} 65 | * where format is specified above. 66 | * @param {callback} completeCallback - The callback that must be invoked 67 | * once all records are processed and checkpoint (optional) is 68 | * complete. 69 | */ 70 | processRecords: function(processRecordsInput, completeCallback) { 71 | if (!processRecordsInput || !processRecordsInput.records) { 72 | // Must call completeCallback to proceed further. 73 | completeCallback(); 74 | return; 75 | } 76 | 77 | var records = processRecordsInput.records; 78 | var record, sequenceNumber, partitionKey, data; 79 | for (var i = 0 ; i < records.length ; ++i) { 80 | record = records[i]; 81 | sequenceNumber = record.sequenceNumber; 82 | partitionKey = record.partitionKey; 83 | // Note that "data" is a base64-encoded string. Buffer can be used to 84 | // decode the data into a string. 85 | data = new Buffer(record.data, 'base64').toString(); 86 | 87 | // Custom record processing logic ... 88 | } 89 | if (!sequenceNumber) { 90 | // Must call completeCallback to proceed further. 91 | completeCallback(); 92 | return; 93 | } 94 | // If checkpointing, only call completeCallback once checkpoint operation 95 | // is complete. 96 | processRecordsInput.checkpointer.checkpoint(sequenceNumber, 97 | function(err, checkpointedSequenceNumber) { 98 | // In this example, regardless of error, we mark processRecords 99 | // complete to proceed further with more records. 100 | completeCallback(); 101 | } 102 | ); 103 | }, 104 | 105 | /** 106 | * Called by the KCL to indicate that this record processor should shut down. 107 | * After the lease lost operation is complete, there will not be any more calls to 108 | * any other functions of this record processor. Clients should not attempt to 109 | * checkpoint because the lease has been lost by this Worker. 110 | * 111 | * @param {object} leaseLostInput - Lease lost information. 112 | * @param {callback} completeCallback - The callback must be invoked once lease 113 | * lost operations are completed. 114 | */ 115 | leaseLost: function(leaseLostInput, completeCallback) { 116 | // Lease lost logic ... 117 | completeCallback(); 118 | }, 119 | 120 | /** 121 | * Called by the KCL to indicate that this record processor should shutdown. 122 | * After the shard ended operation is complete, there will not be any more calls to 123 | * any other functions of this record processor. Clients are required to checkpoint 124 | * at this time. This indicates that the current record processor has finished 125 | * processing and new record processors for the children will be created. 126 | * 127 | * @param {object} shardEndedInput - ShardEnded information. Looks like - 128 | * {"checkpointer": } 129 | * @param {callback} completeCallback - The callback must be invoked once shard 130 | * ended operations are completed. 131 | */ 132 | shardEnded: function(shardEndedInput, completeCallback) { 133 | // Shard end logic ... 134 | 135 | // Since you are checkpointing, only call completeCallback once the checkpoint 136 | // operation is complete. 137 | shardEndedInput.checkpointer.checkpoint(function(err) { 138 | // In this example, regardless of the error, we mark the shutdown operation 139 | // complete. 140 | completeCallback(); 141 | }); 142 | completeCallback(); 143 | } 144 | }; 145 | 146 | kcl(recordProcessor).run(); 147 | ``` 148 | 149 | ## Before You Get Started 150 | 151 | ### Prerequisite 152 | Before you begin, Node.js and NPM must be installed on your system. For download instructions for your platform, see http://nodejs.org/download/. 153 | 154 | To get the sample KCL application and bootstrap script, you need git. 155 | 156 | Amazon KCL for Node.js uses [MultiLangDaemon][multi-lang-daemon] provided by [Amazon KCL for Java][amazon-kcl-github]. You also need Java version 1.8 or higher installed. 157 | 158 | ### Setting Up the Environment 159 | Before running the samples, make sure that your environment is configured to allow the samples to use your [AWS Security Credentials](http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html), which are used by [MultiLangDaemon][multi-lang-daemon] to interact with AWS services. 160 | 161 | By default, the [MultiLangDaemon][multi-lang-daemon] uses the [DefaultCredentialsProvider][DefaultCredentialsProvider], so make your credentials available to one of the credentials providers in that provider chain. There are several ways to do this. You can provide credentials through a `~/.aws/credentials` file or through environment variables (**AWS\_ACCESS\_KEY\_ID** and **AWS\_SECRET\_ACCESS\_KEY**). If you're running on Amazon EC2, you can associate an IAM role with your instance with appropriate access. 162 | 163 | For more information about [Amazon Kinesis][amazon-kinesis] and the client libraries, see the 164 | [Amazon Kinesis documentation][amazon-kinesis-docs] as well as the [Amazon Kinesis forums][kinesis-forum]. 165 | 166 | ## Running the Sample 167 | 168 | The Amazon KCL for Node.js repository contains source code for the KCL, a sample data producer and data consumer (processor) application, and the bootstrap script. 169 | 170 | To run sample applications, you need to get all required NPM modules. **From the root of the repository**, execute the following command: 171 | 172 | `npm install` 173 | 174 | This downloads all dependencies for running the bootstrap script as well as the sample application. 175 | 176 | The sample application consists of two components: 177 | 178 | * A data producer (`samples/basic_sample/producer/sample_kinesis_producer_app.js`): this script creates an [Amazon Kinesis][amazon-kinesis] stream and starts putting 10 random records into it. 179 | * A data processor (`samples/basic_sample/consumer/sample_kcl_app.js`): this script is invoked by the [MultiLangDaemon][multi-lang-daemon], consumes the data from the [Amazon Kinesis][amazon-kinesis] stream, and stores received data into files (1 file per shard). 180 | 181 | The following defaults are used in the sample application: 182 | 183 | * *Stream name*: `kclnodejssample` 184 | * *Number of shards*: 2 185 | * *Amazon KCL application name*: `kclnodejssample` 186 | * *Amazon DynamoDB table for Amazon KCL application*: `kclnodejssample` 187 | 188 | ### Running the Data Producer 189 | To run the data producer, execute the following commands from the root of the repository: 190 | 191 | ```sh 192 | cd samples/basic_sample/producer 193 | node sample_kinesis_producer_app.js 194 | ``` 195 | 196 | #### Notes 197 | * The script `samples/basic_sample/producer/sample_kinesis_producer_app.js` takes several parameters that you can use to customize its behavior. To change default parameters, change values in the file `samples/basic_sample/producer/config.js`. 198 | 199 | ### Running the Data Processor 200 | To start the data processor, run the following command from the root of the repository: 201 | 202 | ```sh 203 | cd samples/basic_sample/consumer 204 | ../../../bin/kcl-bootstrap --java /usr/bin/java -e -p ./sample.properties 205 | ``` 206 | 207 | #### Notes 208 | * The Amazon KCL for Node.js uses stdin/stdout to interact with [MultiLangDaemon][multi-lang-daemon]. Do not point your application logs to stdout/stderr. If your logs point to stdout/stderr, log output gets mingled with [MultiLangDaemon][multi-lang-daemon], which makes it really difficult to find consumer-specific log events. This consumer uses a logging library to redirect all application logs to a file called application.log. Make sure to follow a similar pattern while developing consumer applications with the Amazon KCL for Node.js. For more information about the protocol between the MultiLangDaemon and the Amazon KCL for Node.js, go to [MultiLangDaemon][multi-lang-daemon]. 209 | * The bootstrap script downloads [MultiLangDaemon][multi-lang-daemon] and its dependencies. 210 | * The bootstrap script invokes the [MultiLangDaemon][multi-lang-daemon], which starts the Node.js consumer application as its child process. By default: 211 | * The file `samples/basic_sample/consumer/sample.properties` controls which Amazon KCL for Node.js application is run. You can specify your own properties file with the `-p` or `--properties` argument. 212 | * The bootstrap script uses `JAVA_HOME` to locate the java binary. To specify your own java home path, use the `-j` or `--java` argument when invoking the bootstrap script. 213 | * To only print commands on the console to run the KCL application without actually running the KCL application, leave out the `-e` or `--execute` argument to the bootstrap script. 214 | * You can also add REPOSITORY_ROOT/bin to your PATH so you can access kcl-bootstrap from anywhere. 215 | * To find out all the options you can override when running the bootstrap script, run the following command: 216 | 217 | ```sh 218 | kcl-bootstrap --help 219 | ``` 220 | 221 | ### Cleaning Up 222 | This sample application creates an [Amazon Kinesis][amazon-kinesis] stream, sends data to it, and creates a DynamoDB table to track the KCL application state. This will incur nominal costs to your AWS account, and continue to do so even when the sample app is finished. To stop being charged, delete these resources. Specifically, the sample application creates following AWS resources: 223 | 224 | * An *Amazon Kinesis stream* named `kclnodejssample` 225 | * An *Amazon DynamoDB table* named `kclnodejssample` 226 | 227 | You can delete these using the AWS Management Console. 228 | 229 | ## Running on Amazon EC2 230 | Log into an Amazon EC2 instance running Amazon Linux, then perform the following steps to prepare your environment for running the sample application. Note the version of Java that ships with Amazon Linux can be found at `/usr/bin/java` and should be 1.8 or greater. 231 | 232 | ```sh 233 | # install node.js, npm and git 234 | sudo yum install nodejs npm --enablerepo=epel 235 | sudo yum install git 236 | # clone the git repository to work with the samples 237 | git clone https://github.com/awslabs/amazon-kinesis-client-nodejs.git kclnodejs 238 | cd kclnodejs/samples/basic_sample/producer/ 239 | # download dependencies 240 | npm install 241 | # run the sample producer 242 | node sample_kinesis_producer_app.js & 243 | 244 | # ...and in another terminal, run the sample consumer 245 | export PATH=$PATH:kclnodejs/bin 246 | cd kclnodejs/samples/basic_sample/consumer/ 247 | kcl-bootstrap --java /usr/bin/java -e -p ./sample.properties > consumer.out 2>&1 & 248 | ``` 249 | 250 | ## NPM module 251 | To get the Amazon KCL for Node.js module from NPM, use the following command: 252 | 253 | ```sh 254 | npm install aws-kcl 255 | ``` 256 | 257 | ## Under the Hood: Supplemental information about the MultiLangDaemon 258 | 259 | Amazon KCL for Node.js uses [Amazon KCL for Java][amazon-kcl-github] internally. We have implemented a Java-based daemon, called the *[MultiLangDaemon][multi-lang-daemon]* that does all the heavy lifting. The daemon launches the user-defined record processor script/program as a sub-process, and then communicates with this sub-process over standard input/output using a simple protocol. This allows support for any language. This approach enables the [Amazon KCL][amazon-kcl] to be language-agnostic, while providing identical features and similar parallel processing model across all languages. 260 | 261 | At runtime, there will always be a one-to-one correspondence between a record processor, a child process, and an [Amazon Kinesis shard][amazon-kinesis-shard]. The [MultiLangDaemon][multi-lang-daemon] ensures that, without any developer intervention. 262 | 263 | In this release, we have abstracted these implementation details away and exposed an interface that enables you to focus on writing record processing logic in Node.js. 264 | 265 | ## See Also 266 | 267 | * [Developing Processor Applications for Amazon Kinesis Using the Amazon Kinesis Client Library][amazon-kcl] 268 | * [Amazon KCL for Java][amazon-kcl-github] 269 | * [Amazon KCL for Python][amazon-kinesis-python-github] 270 | * [Amazon KCL for Ruby][amazon-kinesis-ruby-github] 271 | * [Amazon Kinesis documentation][amazon-kinesis-docs] 272 | * [Amazon Kinesis forum][kinesis-forum] 273 | 274 | 275 | ## Release Notes 276 | 277 | ### Release (v3.0.1 - May 28, 2025) 278 | * [#414](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/414) Bump commons-beanutils:commons-beanutils from 1.9.4 to 1.11.0 279 | * [#413](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/413) Bump mocha from 10.4.0 to 11.5.0 280 | * [#410](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/410) Bump commander from 12.0.0 to 14.0.0 281 | * [#403](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/403) Bump io.netty:netty-handler from 4.1.115.Final to 4.1.118.Final 282 | * [#384](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/384) Bump ch.qos.logback:logback-core from 1.3.14 to 1.3.15 283 | * [#368](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/368) Bump io.netty:netty-common from 4.1.108.Final to 4.1.115.Final 284 | * [#367](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/367) Bump braces from 3.0.2 to 3.0.3 285 | 286 | ### Release 3.0.0 (November 6, 2024) 287 | * New lease assignment / load balancing algorithm 288 | * KCL 3.x introduces a new lease assignment and load balancing algorithm. It assigns leases among workers based on worker utilization metrics and throughput on each lease, replacing the previous lease count-based lease assignment algorithm. 289 | * When KCL detects higher variance in CPU utilization among workers, it proactively reassigns leases from over-utilized workers to under-utilized workers for even load balancing. This ensures even CPU utilization across workers and removes the need to over-provision the stream processing compute hosts. 290 | * Optimized DynamoDB RCU usage 291 | * KCL 3.x optimizes DynamoDB read capacity unit (RCU) usage on the lease table by implementing a global secondary index with leaseOwner as the partition key. This index mirrors the leaseKey attribute from the base lease table, allowing workers to efficiently discover their assigned leases by querying the index instead of scanning the entire table. 292 | * This approach significantly reduces read operations compared to earlier KCL versions, where workers performed full table scans, resulting in higher RCU consumption. 293 | * Graceful lease handoff 294 | * KCL 3.x introduces a feature called "graceful lease handoff" to minimize data reprocessing during lease reassignments. Graceful lease handoff allows the current worker to complete checkpointing of processed records before transferring the lease to another worker. For graceful lease handoff, you should implement checkpointing logic within the existing `shutdownRequested()` method. 295 | * This feature is enabled by default in KCL 3.x, but you can turn off this feature by adjusting the configuration property `isGracefulLeaseHandoffEnabled`. 296 | * While this approach significantly reduces the probability of data reprocessing during lease transfers, it doesn't completely eliminate the possibility. To maintain data integrity and consistency, it's crucial to design your downstream consumer applications to be idempotent. This ensures that the application can handle potential duplicate record processing without adverse effects. 297 | * New DynamoDB metadata management artifacts 298 | * KCL 3.x introduces two new DynamoDB tables for improved lease management: 299 | * Worker metrics table: Records CPU utilization metrics from each worker. KCL uses these metrics for optimal lease assignments, balancing resource utilization across workers. If CPU utilization metric is not available, KCL assigns leases to balance the total sum of shard throughput per worker instead. 300 | * Coordinator state table: Stores internal state information for workers. Used to coordinate in-place migration from KCL 2.x to KCL 3.x and leader election among workers. 301 | * Follow this [documentation](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html#kcl-migration-from-2-3-IAM-permissions) to add required IAM permissions for your KCL application. 302 | * Other improvements and changes 303 | * Dependency on the AWS SDK for Java 1.x has been fully removed. 304 | * The Glue Schema Registry integration functionality no longer depends on AWS SDK for Java 1.x. Previously, it required this as a transient dependency. 305 | * Multilangdaemon has been upgraded to use AWS SDK for Java 2.x. It no longer depends on AWS SDK for Java 1.x. 306 | * `idleTimeBetweenReadsInMillis` (PollingConfig) now has a minimum default value of 200. 307 | * This polling configuration property determines the [publishers](https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java) wait time between GetRecords calls in both success and failure cases. Previously, setting this value below 200 caused unnecessary throttling. This is because Amazon Kinesis Data Streams supports up to five read transactions per second per shard for shared-throughput consumers. 308 | * Shard lifecycle management is improved to deal with edge cases around shard splits and merges to ensure records continue being processed as expected. 309 | * Migration 310 | * The programming interfaces of KCL 3.x remain identical with KCL 2.x for an easier migration. For detailed migration instructions, please refer to the [Migrate consumers from KCL 2.x to KCL 3.x](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html) page in the Amazon Kinesis Data Streams developer guide. 311 | * Configuration properties 312 | * New configuration properties introduced in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#new-configurations-in-kcl-3x). 313 | * Deprecated configuration properties in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#discontinued-configuration-properties-in-kcl-3x). You need to keep the deprecated configuration properties during the migration from any previous KCL version to KCL 3.x. 314 | * Metrics 315 | * New CloudWatch metrics introduced in KCL 3.x are explained in the [Monitor the Kinesis Client Library with Amazon CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) in the Amazon Kinesis Data Streams developer guide. The following operations are newly added in KCL 3.x: 316 | * `LeaseAssignmentManager` 317 | * `WorkerMetricStatsReporter` 318 | * `LeaseDiscovery` 319 | 320 | ### Release 2.2.6 (April 25, 2024) 321 | * [PR #327](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/327) Upgraded amazon-kinesis-client from 2.5.5 to 2.5.8 322 | * [PR #329](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/329) Upgraded aws-sdk from 2.1562.0 to 2.1603.0 323 | * [PR #95](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/95) Upgraded jcommander from 1.72 to 1.82 324 | * [PR #271](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/271) Upgraded org.codehaus.mojo:animal-sniffer-annotations from 1.20 to 1.23 325 | * [PR #266](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/266) Upgraded com.amazonaws:aws-java-sdk-core from 1.12.370 to 1.12.512 326 | * [PR #313](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/313) Upgraded logback.version from 1.3.12 to 1.5.3 327 | * [PR #305](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/305) Upgraded org.slf4j:slf4j-api from 2.0.5 to 2.0.12 328 | * [PR #325](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/325) Upgraded mocha from 9.2.2 to 10.4.0 329 | * [PR #307](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/307) Upgraded com.google.protobuf:protobuf-java from 3.21.7 to 3.25.3 330 | * [PR #262](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/262) Upgraded checker-qual from 2.5.2 to 3.36.0 331 | * [PR #303](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/303) Upgraded commander from 8.3.0 to 12.0.0 332 | * [PR #287](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/287) Upgraded sinon from 14.0.2 to 17.0.1 333 | * [PR #318](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/318) Upgraded awssdk.version from 2.20.43 to 2.25.11 334 | * [PR #319](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/319) Upgraded org.reactivestreams:reactive-streams from 1.0.3 to 1.0.4 335 | * [PR #320](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/320) Upgraded netty-reactive.version from 2.0.6 to 2.0.12 336 | * [PR #330](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/330) Upgraded io.netty:netty-codec-http from 4.1.100.Final to 4.1.108.Final 337 | * [PR #331](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/331) Upgraded ion-java from 1.5.1 to 1.11.4 338 | * [PR #211](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/211) Upgraded fasterxml-jackson.version from 2.13.4 to 2.14.1 339 | 340 | ### Release 2.2.5 (February 29, 2024) 341 | * [PR #309](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/309) Updated amazon-kinesis-client and amazon-kinesis-client multilang from 2.5.4 to 2.5.5 and updated awssdk.version to match amazon-kinesis-client from 2.19.2 to 2.20.43 342 | 343 | ### Release 2.2.4 (January 16, 2024) 344 | * [PR #298](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/298) Added dependency on aws-sdk arns 345 | * [PR #293](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/293) Updated logback-classic to 1.3.12 346 | 347 | ### Release 2.2.3 (December 18, 2023) 348 | * [PR #291](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/291) Updated KCL and KCL multilang to the latest version 2.5.4 349 | * [PR #284](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/284) Updated netty to 4.1.100.Final, fasterxml-jackson to 2.13.5, and guava to 32.1.1-jre 350 | * [PR #277](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/277) Updated com.google.protobuf:protobuf-java from 3.21.5 to 3.21.7 351 | 352 | ### Release 2.2.2 (January 4, 2023) 353 | * [PR #207](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/207) Add endpoints-spi dependency 354 | 355 | ### Release 2.2.1 (January 3, 2023) 356 | * [PR #202](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/202) Keep Java dependencies in sync with the KCL V2.4.4 357 | * Updated dependencies to match the v2.4.4 KCL Java release 358 | * Updated slfj to resolve the logger's incompatibility problem 359 | 360 | ### Release 2.2.0 (September 15, 2022) 361 | * [PR #165](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/165) Update Java dependencies 362 | * KCL and KCL-multilang are updated to the latest version 2.4.3 363 | 364 | ### Release 2.1.0 (January 31, 2020) 365 | [Milestone #4](https://github.com/awslabs/amazon-kinesis-client-nodejs/milestone/4) 366 | * Fixing bootstrap to use HTTPS 367 | * [PR #75](https://github.com/awslabs/amazon-kinesis-client/pull/679) 368 | * Adding support for Win32 platform 369 | * [PR #67](https://github.com/awslabs/amazon-kinesis-client/pull/668) 370 | * Relicensing to Apache-2.0 371 | * [PR #69](https://github.com/awslabs/amazon-kinesis-client/pull/667) 372 | 373 | ### Release 2.0.0 (March 6, 2019) 374 | * Added support for [Enhanced Fan-Out](https://aws.amazon.com/blogs/aws/kds-enhanced-fanout/). 375 | Enhanced Fan-Out provides dedicated throughput per stream consumer, and uses an HTTP/2 push API (SubscribeToShard) to deliver records with lower latency. 376 | * Updated the Amazon Kinesis Client Library for Java to version 2.1.2. 377 | * Version 2.1.2 uses 4 additional Kinesis API's 378 | __WARNING: These additional API's may require updating any explicit IAM policies__ 379 | * [`RegisterStreamConsumer`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_RegisterStreamConsumer.html) 380 | * [`SubscribeToShard`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_SubscribeToShard.html) 381 | * [`DescribeStreamConsumer`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_DescribeStreamConsumer.html) 382 | * [`DescribeStreamSummary`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_DescribeStreamSummary.html) 383 | * For more information about Enhanced Fan-Out with the Amazon Kinesis Client Library please see the [announcement](https://aws.amazon.com/blogs/aws/kds-enhanced-fanout/) and [developer documentation](https://docs.aws.amazon.com/streams/latest/dev/introduction-to-enhanced-consumers.html). 384 | * Added support for the newer methods to the [`KCLManager`](https://github.com/awslabs/amazon-kinesis-client-nodejs/blob/a2be81a3bd4ccca7f68b616ebc416192c3be9d0e/lib/kcl/kcl_manager.js). 385 | While the original `shutdown` method will continue to work it's recommended to upgrade to the newer interface. 386 | * The `shutdown` has been replaced by `leaseLost` and `shardEnded`. 387 | * Added the `leaseLost` method which is invoked when a lease is lost. 388 | `leaseLost` replaces `shutdown` where `shutdownInput.reason` was `ZOMBIE`. 389 | * Added the `shardEnded` method which is invoked when all records from a split or merge have been processed. 390 | `shardEnded` replaces `shutdown` where `shutdownInput.reason` was `TERMINATE`. 391 | * Updated the AWS Java SDK version to 2.4.0 392 | * MultiLangDaemon now provides logging using Logback. 393 | * MultiLangDaemon supports custom configurations for logging via a Logback XML configuration file. 394 | * The `kcl-bootstrap` program was been updated to accept either `-l` or `--log-configuration` to provide a Logback XML configuration file. 395 | 396 | ### Release 0.8.0 (February 12, 2019) 397 | * Updated the dependency on [Amazon Kinesis Client for Java][amazon-kcl-github] to 1.9.3 398 | * This adds support for ListShards API. This API is used in place of DescribeStream API to provide more throughput during ShardSyncTask. Please consult the [AWS Documentation for ListShards](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_ListShards.html) for more information. 399 | * ListShards supports higher call rate, which should reduce instances of throttling when attempting to synchronize the shard list. 400 | * __WARNING: `ListShards` is a new API, and may require updating any explicit IAM policies__ 401 | * [PR #59](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/59) 402 | * Changed to now download jars from Maven using `https`. 403 | * [PR #59](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/59) 404 | 405 | ### Release 0.7.0 (August 2, 2017) 406 | * Updated the dependency on [Amazon Kinesis Client for Java][amazon-kcl-github] to 1.8.1. 407 | This adds support for setting a timeout when dispatching records to the node.js record processor. 408 | If the record processor doesn't respond in the given time the Java processor is terminated. 409 | The timeout for the this can be set by adding `timeoutInSeconds = `. The default for this is no timeout. 410 | __Setting this can cause the KCL to exit suddenly, before using this ensure that you have an automated restart for your application__ 411 | __Updating minimum requirement for the JDK version to 8__ 412 | * [Amazon Kinesis Client Issue #185](https://github.com/awslabs/amazon-kinesis-client/issues/185) 413 | * [PR #41](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/41) 414 | * Added support to handle graceful shutdown requests. 415 | * [PR #39](https://github.com/awslabs/amazon-kinesis-client-nodejs/pull/39) 416 | * [Issue #34](https://github.com/awslabs/amazon-kinesis-client-nodejs/issues/34) 417 | 418 | ### Release 0.6.0 (December 12, 2016) 419 | * Updated the dependency on [Amazon Kinesis Client for Java][amazon-kcl-github] to 1.7.2. 420 | * PR #23 421 | * PR #24 422 | 423 | ### Release 0.5.0 (March 26, 2015) 424 | * The `aws-kcl` npm module allows implementation of record processors in Node.js using the Amazon KCL [MultiLangDaemon][multi-lang-daemon]. 425 | * The `samples` directory contains a sample producer and processing applications using the Amazon KCL for Node.js. 426 | 427 | [amazon-kinesis]: http://aws.amazon.com/kinesis 428 | [amazon-kinesis-docs]: http://aws.amazon.com/documentation/kinesis/ 429 | [amazon-kinesis-shard]: http://docs.aws.amazon.com/kinesis/latest/dev/key-concepts.html 430 | [amazon-kcl]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html 431 | [aws-sdk-node]: http://aws.amazon.com/sdk-for-node-js/ 432 | [amazon-kcl-github]: https://github.com/awslabs/amazon-kinesis-client 433 | [amazon-kinesis-python-github]: https://github.com/awslabs/amazon-kinesis-client-python 434 | [amazon-kinesis-ruby-github]: https://github.com/awslabs/amazon-kinesis-client-ruby 435 | [multi-lang-daemon]: https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java 436 | [DefaultCredentialsProvider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html 437 | [kinesis-forum]: http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169 438 | [aws-console]: http://aws.amazon.com/console/ 439 | [jvm]: http://java.com/en/download/ 440 | 441 | ## License 442 | 443 | This library is licensed under the Apache 2.0 License. 444 | -------------------------------------------------------------------------------- /bin/kcl-bootstrap: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /*** 4 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 | SPDX-License-Identifier: Apache-2.0 6 | ***/ 7 | 8 | 'use strict'; 9 | 10 | 11 | var fs = require('fs'); 12 | var http = require('http'); 13 | var https = require('https'); 14 | var path = require('path'); 15 | var spawn = require('child_process').spawn; 16 | var convert = require('xml-js'); 17 | var url = require('url'); 18 | var util = require('util'); 19 | const { program } = require('commander'); 20 | 21 | const mavenFile = fs.readFileSync(path.join(__dirname, '..', 'pom.xml'), 'utf8'); 22 | const mavenJson = JSON.parse(convert.xml2json(mavenFile, { compact: true })); 23 | const dependencyArray = mavenJson.project.dependencies.dependency; 24 | const propertyDict = Object.fromEntries(Object.entries(mavenJson.project.properties).map(([k, v]) => [`$\{${k}\}`, v._text])); 25 | 26 | // Read Java dependencies from pom.xml 27 | const MAVEN_PACKAGE_LIST = dependencyArray.map( 28 | dep => getMavenPackageInfo(dep.groupId._text, dep.artifactId._text, propertyDict[dep.version._text] || dep.version._text) 29 | ); 30 | 31 | var DEFAULT_JAR_PATH = path.resolve(path.join(__dirname, '..', 'lib', 'jars')); 32 | var MULTI_LANG_DAEMON_CLASS = 'software.amazon.kinesis.multilang.MultiLangDaemon'; 33 | var MAX_HTTP_REDIRECT_FOLLOW = 3; 34 | 35 | function bootstrap() { 36 | var args = parseArguments(); 37 | downloadMavenPackages(MAVEN_PACKAGE_LIST, args.jarPath, function (err) { 38 | if (err) { 39 | errorExit(util.format('Unable to download MultiLangDaemon jar files from maven: %s', err)); 40 | } 41 | startKinesisClientLibraryApplication(args); 42 | }); 43 | } 44 | 45 | function createJavaHomeExecutablePath() { 46 | return path.join(process.env.JAVA_HOME, 'bin', process.platform !== 'win32' ? 'java' : 'java.exe'); 47 | } 48 | 49 | function parseArguments() { 50 | program 51 | .option('-p, --properties [properties file]', 'properties file with multi-language daemon options') 52 | .option('-l, --log-configuration [logback.xml]', 'logback.xml to be used with MultiLangDaemon for logging (optional)') 53 | .option('-j, --java [java path]', 'path to java executable - defaults to using JAVA_HOME environment variable to get java path (optional)') 54 | .option('-c, --jar-path [jar path]', 'path where all multi-language daemon jar files will be downloaded (optional)') 55 | .option('-e, --execute', 'execute the KCL application') 56 | .parse(process.argv); 57 | 58 | const options = program.opts(); 59 | var args = { 60 | 'properties': options.properties, 61 | 'logConfiguration': options.logConfiguration ? options.logConfiguration : null, 62 | 'java': (options.java ? options.java : (process.env.JAVA_HOME ? createJavaHomeExecutablePath() : null)), 63 | 'jarPath': (options.jarPath ? options.jarPath : DEFAULT_JAR_PATH), 64 | 'execute': options.execute 65 | }; 66 | 67 | if (!args.properties) { 68 | invalidInvocationExit(program, 'Specify a valid --properties value.', true); 69 | } 70 | if (!isFile(args.properties)) { 71 | invalidInvocationExit(program, args.properties + ' file does not exist. Specify a valid --properties value.', true); 72 | } 73 | if (!isFile(args.java)) { 74 | invalidInvocationExit(program, 'Valid --java value is required or alternatively JAVA_HOME environment variable must be set.', true); 75 | } 76 | if (args.logCofiguration && !isFile(args.logConfiguration)) { 77 | invalidInvocationExit(program, args.logConfiguration + ' file does not exists. Specify a valid --log-configuration value', true); 78 | } 79 | if (args.jarPath === DEFAULT_JAR_PATH) { 80 | createDirectory(args.jarPath); 81 | } 82 | else if (!isDirectory(args.jarPath)) { 83 | invalidInvocationExit(program, 'Path specified with --jar-path must already exist and must be a directory.', false); 84 | } 85 | return args; 86 | } 87 | 88 | function startKinesisClientLibraryApplication(options) { 89 | var classpath = getClasspath(options).join(getPathDelimiter()); 90 | var java = options.java; 91 | var logConfiguration = options.logConfiguration ? ['--log-configuration', options.logConfiguration] : []; 92 | var args = ['-cp', classpath, MULTI_LANG_DAEMON_CLASS, '--properties-file', options.properties, ...logConfiguration]; 93 | var cmd = java + ' ' + args.join(' '); 94 | 95 | console.log("=========================================================="); 96 | console.log(cmd); 97 | console.log("=========================================================="); 98 | if (options.execute) { 99 | console.log("Starting MultiLangDaemon ..."); 100 | spawn(java, args, { stdio: 'inherit' }); 101 | } 102 | } 103 | 104 | function getClasspath(options) { 105 | var classpath = []; 106 | fs.readdirSync(options.jarPath).map(function (file) { 107 | return path.join(options.jarPath, file); 108 | }).filter(function (file) { 109 | return isFile(file); 110 | }).forEach(function (file) { 111 | classpath.push(path.resolve(file)); 112 | }); 113 | classpath.push(path.resolve('.')); 114 | classpath.push(path.dirname(path.resolve(options.properties))); 115 | return classpath; 116 | } 117 | 118 | function downloadMavenPackages(mavenPackages, destinationDirectory, callback) { 119 | var remainingPackages = mavenPackages.length; 120 | var callbackInvoked = false; 121 | 122 | var downloadMavenPackageCallback = function (err, filePath) { 123 | remainingPackages = remainingPackages - 1; 124 | if (!callbackInvoked) { 125 | if (!err) { 126 | console.log(filePath + ' downloaded. ' + remainingPackages + ' files remain.'); 127 | } 128 | if (err || remainingPackages === 0) { 129 | callbackInvoked = true; 130 | callback(err); 131 | return; 132 | } 133 | } 134 | }; 135 | 136 | for (var i = 0; i < mavenPackages.length; ++i) { 137 | downloadMavenPackage(mavenPackages[i], destinationDirectory, downloadMavenPackageCallback); 138 | } 139 | } 140 | 141 | function downloadMavenPackage(mavenPackage, destinationDirectory, callback) { 142 | process.nextTick(function () { 143 | var mavenPackageUrlInfo = getMavenPackageUrlInfo(mavenPackage); 144 | var destinationFile = path.join(destinationDirectory, mavenPackageUrlInfo.fileName); 145 | if (fs.existsSync(destinationFile)) { 146 | callback(null, destinationFile); 147 | return; 148 | } 149 | httpDownloadFile(mavenPackageUrlInfo.url, destinationFile, 0, callback); 150 | }); 151 | } 152 | 153 | function httpDownloadFile(requestUrl, destinationFile, redirectCount, callback) { 154 | if (redirectCount >= MAX_HTTP_REDIRECT_FOLLOW) { 155 | callback('Reached maximum redirects. ' + requestUrl + ' could not be downloaded.'); 156 | return; 157 | } 158 | var protocol = (url.parse(requestUrl).protocol === 'https:' ? https : http); 159 | var options = { 160 | hostname: url.parse(requestUrl).hostname, 161 | path: url.parse(requestUrl).path, 162 | agent: false 163 | }; 164 | var request = protocol.get(options, function (response) { 165 | // Non-2XX response. 166 | if (response.statusCode > 300) { 167 | if (response.statusCode > 300 && response.statusCode < 400 && response.headers.location) { 168 | httpDownloadFile(response.headers.location, destinationFile, redirectCount + 1, callback); 169 | return; 170 | } 171 | else { 172 | callback(requestUrl + ' could not be downloaded: ' + response.statusCode); 173 | return; 174 | } 175 | } 176 | else { 177 | var destinationFileStream = fs.createWriteStream(destinationFile); 178 | response.pipe(destinationFileStream); 179 | 180 | var callbackInvoked = false; 181 | var destinationFileStreamFinishCallback = function () { 182 | if (callbackInvoked) { 183 | return; 184 | } 185 | callbackInvoked = true; 186 | callback(null, destinationFile); 187 | }; 188 | destinationFileStream.on('finish', destinationFileStreamFinishCallback); 189 | // Older Node.js version may not support 'finish' event. 190 | destinationFileStream.on('close', destinationFileStreamFinishCallback); 191 | } 192 | }).on('error', function (err) { 193 | fs.unlink(destinationFile); 194 | callback(err); 195 | }); 196 | } 197 | 198 | function getMavenPackageUrlInfo(mavenPackage) { 199 | var urlParts = []; 200 | var fileName = util.format('%s-%s.jar', mavenPackage.artifactId, mavenPackage.version); 201 | mavenPackage.groupId.split('.').forEach(function (part) { 202 | urlParts.push(part); 203 | }); 204 | urlParts.push(mavenPackage.artifactId); 205 | urlParts.push(mavenPackage.version); 206 | urlParts.push(fileName); 207 | return { 208 | 'url': "https://repo1.maven.org/maven2/" + urlParts.join('/'), 209 | 'fileName': fileName 210 | }; 211 | } 212 | 213 | function getMavenPackageInfo(groupId, artifactId, version) { 214 | return { 215 | 'groupId': groupId, 216 | 'artifactId': artifactId, 217 | 'version': version 218 | }; 219 | } 220 | 221 | function isDirectory(path) { 222 | try { 223 | return fs.statSync(path).isDirectory(); 224 | } catch (e) { 225 | // Path does not exist. 226 | return false; 227 | } 228 | } 229 | 230 | function createDirectory(path) { 231 | try { 232 | fs.mkdirSync(path); 233 | } catch (e) { 234 | if (e.code !== 'EEXIST') { 235 | throw e; 236 | } 237 | } 238 | } 239 | 240 | function isFile(path) { 241 | try { 242 | return fs.statSync(path).isFile(); 243 | } catch (e) { 244 | // Path does not exist. 245 | return false; 246 | } 247 | } 248 | 249 | function getPathDelimiter() { 250 | if (path.delimiter) { 251 | return path.delimiter; 252 | } 253 | // Older Node.js version may not support path.delimiter. 254 | return (/^win/.test(process.platform) ? ';' : ':'); 255 | } 256 | 257 | function invalidInvocationExit(prog, err, showHelp) { 258 | console.error(''); 259 | console.error(util.format('ERROR: %s', err)); 260 | console.error(''); 261 | if (showHelp) { 262 | prog.outputHelp(); 263 | } 264 | process.exit(1); 265 | } 266 | 267 | function errorExit(err) { 268 | console.error(''); 269 | console.error(util.format('ERROR: %s', err)); 270 | console.error(''); 271 | process.exit(1); 272 | } 273 | 274 | bootstrap(); 275 | -------------------------------------------------------------------------------- /bin/kcl-bootstrap.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | REM Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | REM SPDX-License-Identifier: Apache-2.0 5 | 6 | 7 | node %~dp0\kcl-bootstrap %* 8 | -------------------------------------------------------------------------------- /conf/.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "bitwise" : false, 3 | "curly" : true, 4 | "eqeqeq" : true, 5 | "funcscope" : false, 6 | "immed" : true, 7 | "indent" : 2, 8 | "latedef" : "nofunc", 9 | "newcap" : true, 10 | "noarg" : true, 11 | "sub" : true, 12 | "undef" : true, 13 | "boss" : true, 14 | "eqnull" : true, 15 | "strict" : true, 16 | "node" : true, 17 | "esversion" : 6, 18 | "globals" : { 19 | /* MOCHA */ 20 | "describe" : false, 21 | "it" : false, 22 | "before" : false, 23 | "beforeEach" : false, 24 | "after" : false, 25 | "afterEach" : false 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /conf/jsdoc.conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "tags" : { 3 | "allowUnknownTags" : true 4 | }, 5 | "plugins" : ["plugins/markdown"], 6 | "templates" : { 7 | "cleverLinks" : false, 8 | "monospaceLinks" : false, 9 | "dateFormat" : "ddd MMM Do YYYY", 10 | "outputSourceFiles" : false, 11 | "outputSourcePath" : false, 12 | "systemName" : "Amazon Kinesis Client Library in Node.js", 13 | "footer" : "Amazon Kinesis Client Library in Node.js", 14 | "copyright" : "Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.", 15 | "navType" : "vertical", 16 | "theme" : "cosmo", 17 | "linenums" : true, 18 | "collapseSymbols" : false, 19 | "inverseNav" : true, 20 | "highlightTutorialCode" : true, 21 | "protocol": "fred://" 22 | }, 23 | "markdown" : { 24 | "parser" : "gfm", 25 | "hardwrap" : true 26 | }, 27 | "opts" : { 28 | "private" : false, 29 | "recurse" : true, 30 | "lenient" : false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | /** 10 | * @fileoverview 11 | * The main KCL namespace exports user-facing modules and public functions. 12 | * Note that private modules and private functions may change at a future date without notice. 13 | */ 14 | 15 | module.exports = require("./lib/kcl/kcl_process"); 16 | -------------------------------------------------------------------------------- /lib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-kinesis-client-nodejs/1ed080ce080d901f418e11892c6839502e0cdd76/lib/.DS_Store -------------------------------------------------------------------------------- /lib/kcl/action_handler.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | /** 9 | * @fileoverview 10 | * Marshals and unmarshals actions and delegates them back and forth between the I/O handler 11 | * that talks to the MultiLangDaemon and the KCL manager. 12 | */ 13 | 14 | var EventEmitter = require('events').EventEmitter; 15 | var util = require('util'); 16 | 17 | /** 18 | * Creates an instance of the action handler. 19 | * @class ActionHandler 20 | * @param {IOHandler} ioHandler - I/O handler instance that communicates with the MultiLangDaemon. 21 | */ 22 | function ActionHandler(ioHandler) { 23 | this._ioHandler = ioHandler; 24 | this._onIOLineCallback = this._onIOLine.bind(this); 25 | this._onIOCloseCallback = this._onIOClose.bind(this); 26 | this._ioHandler.on('line', this._onIOLineCallback); 27 | this._ioHandler.on('close', this._onIOCloseCallback); 28 | } 29 | 30 | /** @extends EventEmitter */ 31 | util.inherits(ActionHandler, EventEmitter); 32 | 33 | /** 34 | * Frees up any resources held by this instance. 35 | */ 36 | ActionHandler.prototype.destroy = function() { 37 | this._ioHandler.removeListener('line', this._onIOLineCallback); 38 | this._ioHandler.removeListener('close', this._onIOCloseCallback); 39 | }; 40 | 41 | /** 42 | * Sends an action to the MultiLangDaemon. 43 | * @param {object} action - Action to send to the MultiLangDaemon. 44 | * @param {callback} callback - Callback that will be invoked when the action is sent to the MultiLangDaemon. 45 | */ 46 | ActionHandler.prototype.sendAction = function(action, callback) { 47 | this._ioHandler.writeLine(JSON.stringify(action), callback); 48 | }; 49 | 50 | /** 51 | * Event handler when a new line is received from the MultiLangDaemon through the I/O handler. 52 | * @param {string} line - New line received by IO handler. 53 | * @private 54 | */ 55 | ActionHandler.prototype._onIOLine = function(line) { 56 | if (line) { 57 | var action = JSON.parse(line); 58 | if (!action || !action.action) { 59 | this._ioHandler.writeError(util.format('Invalid action received: %s', line)); 60 | return; 61 | } 62 | this.emit('action', action); 63 | } 64 | }; 65 | 66 | /** 67 | * Event handler for the I/O close event. Following this event, no new lines will be received from the I/O handler. 68 | * @private 69 | */ 70 | ActionHandler.prototype._onIOClose = function() { 71 | this.emit('end'); 72 | }; 73 | 74 | 75 | /** @exports kcl/ActionHandler */ 76 | module.exports = ActionHandler; 77 | -------------------------------------------------------------------------------- /lib/kcl/checkpointer.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | /** 9 | * @fileoverview 10 | * Allows you to make checkpoint requests. A checkpoint marks a point in a shard until which all records are processed 11 | * successfully. If this MultiLangDaemon KCL application instance shuts down for whatever reason, then another instance 12 | * of the same KCL application resumes processing for this shard after the most recent checkpoint. 13 | */ 14 | 15 | var EventEmitter = require('events').EventEmitter; 16 | var util = require('util'); 17 | 18 | 19 | /** 20 | * Creates an instance of the checkpointer. 21 | * @class Checkpointer 22 | * @param {KCLManager} kclManager - Main KCL manager instance that keeps track of current state and dispatches all 23 | * processing functions. 24 | */ 25 | function Checkpointer(kclManager) { 26 | this._kclManager = kclManager; 27 | this._callback = null; 28 | } 29 | 30 | /** 31 | * Checkpoints at a given sequence number. If the sequence number is not provided, the checkpoint will be at the end of 32 | * the most recently-delivered list of records. 33 | * @param {string} [sequenceNumber] - Sequence number of the record to checkpoint; if this value is not provided, the 34 | * latest retrieved record is checkpointed. 35 | * @param {callback} callback - Function that is invoked after the checkpoint operation completes. 36 | */ 37 | Checkpointer.prototype.checkpoint = function(sequenceNumber, callback) { 38 | if (typeof sequenceNumber === 'function') { 39 | callback = sequenceNumber; 40 | sequenceNumber = null; 41 | } 42 | 43 | if (this._callback) { 44 | callback('Cannot checkpoint while another checkpoint is already in progress.'); 45 | return; 46 | } 47 | this._callback = callback; 48 | this._kclManager.checkpoint(sequenceNumber); 49 | }; 50 | 51 | /** 52 | * Gets called by the KCL manager when an outstanding checkpoint request completes either successfully or with 53 | * an error. This function then invokes the callback passed by the user when the checkpoint was requested. 54 | * @param {string} err - Error message if the checkpoint request was unsuccessful. 55 | * @param {string} sequenceNumber - Sequence number for which the checkpoint response is received. 56 | * @ignore 57 | */ 58 | Checkpointer.prototype.onCheckpointerResponse = function(err, sequenceNumber) { 59 | var callback = this._callback; 60 | this._callback = null; 61 | callback(err, sequenceNumber); 62 | }; 63 | 64 | /** @exports kcl/Checkpointer */ 65 | module.exports = Checkpointer; 66 | -------------------------------------------------------------------------------- /lib/kcl/io_handler.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | /** 9 | * @fileoverview 10 | * Communicates with the MultiLangDaemon through the input and output files. 11 | */ 12 | 13 | var EventEmitter = require('events').EventEmitter; 14 | var readline = require('readline'); 15 | var util = require('util'); 16 | 17 | /** 18 | * Creates an instance of the I/O handler. 19 | * @class IOHandler 20 | * @param {file} inputFile - A file to read input lines from. 21 | * @param {file} outputFile - A file to write output lines to. 22 | * @param {file} errorFile - A file to write error lines to. 23 | */ 24 | function IOHandler(inputFile, outputFile, errorFile) { 25 | this._inputFile = inputFile; 26 | this._outputFile = outputFile; 27 | this._errorFile = errorFile; 28 | 29 | this._readlineInterface = readline.createInterface(this._inputFile, this._outputFile); 30 | this._onInputLineCallback = this._onInputLine.bind(this); 31 | this._onInputCloseCallback = this._onInputClose.bind(this); 32 | this._readlineInterface.on('line', this._onInputLineCallback); 33 | this._readlineInterface.on('close', this._onInputCloseCallback); 34 | } 35 | 36 | /** @extends EventEmitter */ 37 | util.inherits(IOHandler, EventEmitter); 38 | 39 | /** 40 | * Frees up any resources held by this instance. 41 | */ 42 | IOHandler.prototype.destroy = function() { 43 | this._readlineInterface.removeListener('line', this._onInputLineCallback); 44 | this._readlineInterface.removeListener('close', this._onInputCloseCallback); 45 | this._readlineInterface.close(); 46 | }; 47 | 48 | /** 49 | * Sends the string message to the MultiLangDaemon using the output stream. 50 | * @param {string} line - Line to send to the MultiLangDaemon. 51 | * @param {callback} callback - Callback that gets invoked on completion. 52 | */ 53 | IOHandler.prototype.writeLine = function(line, callback) { 54 | var result = this._outputFile.write(util.format('\n%s\n', line), 'utf8', callback); 55 | if (!result) { 56 | callback(util.format('Unable to write %s to file.', line)); 57 | } 58 | }; 59 | 60 | /** 61 | * Logs an error. 62 | * @param {string} error - Error to log. 63 | */ 64 | IOHandler.prototype.writeError = function(error) { 65 | this._errorFile.write(util.format('%s\n', error)); 66 | }; 67 | 68 | /** 69 | * Event handler for when a new line is received from the MultiLangDaemon through the input stream. 70 | @ @param {string} line - New line received. 71 | * @private 72 | */ 73 | IOHandler.prototype._onInputLine = function(line) { 74 | this.emit('line', line); 75 | }; 76 | 77 | /** 78 | * Event handler for when the input stream is closed. 79 | * @private 80 | */ 81 | IOHandler.prototype._onInputClose = function() { 82 | this.emit('close'); 83 | }; 84 | 85 | /** @exports kcl/IOHandler */ 86 | module.exports = IOHandler; 87 | -------------------------------------------------------------------------------- /lib/kcl/kcl_manager.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | /** 9 | * @fileoverview 10 | * Keeps track of the MultiLangDaemon protocol. It implements the logic to move record processing forward and to 11 | * manage interactions with the record processor and MultiLangDaemon. 12 | */ 13 | 14 | var util = require('util'); 15 | 16 | var ActionHandler = require('./action_handler'); 17 | var Checkpointer = require('./checkpointer'); 18 | var IOHandler = require('./io_handler'); 19 | 20 | KCLManager.VERSION1 = Symbol("version1"); 21 | KCLManager.VERSION2 = Symbol("version2"); 22 | 23 | /** 24 | * Creates an instance of the KCL manager. 25 | * @class KCLManager 26 | * @param {object} kclManagerInput - Object containing the recordprocessor and the version of the record processor. 27 | * @param {file} inputFile - A file to read action messages from. 28 | * @param {file} outputFile - A file to write action messages to. 29 | * @param {file} errorfile - A file to write error messages to. 30 | */ 31 | function KCLManager(kclManagerInput, inputFile, outputFile, errorFile) { 32 | this._version = kclManagerInput.version; 33 | if (this._version === undefined) { 34 | this._version = KCLManager.VERSION2; 35 | } 36 | this._ioHandler = new IOHandler(inputFile, outputFile, errorFile); 37 | this._actionHandler = new ActionHandler(this._ioHandler); 38 | 39 | // this._stateMachine = new KCLStateMachine({}); 40 | this._context = { 41 | recordProcessor: kclManagerInput.recordProcessor, 42 | checkpointer: new Checkpointer(this) 43 | }; 44 | } 45 | 46 | /** 47 | * Event handler that gets invoked on a new action received from the MultiLangDaemon. 48 | * @param {object} action - Action received. 49 | * @private 50 | */ 51 | KCLManager.prototype._onAction = function(action) { 52 | var actionType = action.action; 53 | switch (actionType) { 54 | case 'initialize': 55 | case 'processRecords': 56 | case 'leaseLost': 57 | case 'shardEnded': 58 | this._onRecordProcessorAction(action); 59 | break; 60 | case 'checkpoint': 61 | this._onCheckpointAction(action); 62 | break; 63 | case 'shutdownRequested': 64 | this._onShutdownRequested(action); 65 | break; 66 | default: 67 | this._reportError(util.format('Invalid action received: %j', action)); 68 | } 69 | }; 70 | 71 | /** 72 | * Event handler that gets invoked when action handler has ended and no more action will be received. 73 | * @private 74 | */ 75 | KCLManager.prototype._onActionEnd = function() { 76 | // No more actions, so cleanup. 77 | this._cleanup(); 78 | }; 79 | 80 | /** 81 | * Record processing related action handler. 82 | * @param {object} action - Record processor related action. 83 | * @private 84 | */ 85 | KCLManager.prototype._onRecordProcessorAction = function(action) { 86 | var actionType = action.action; 87 | var context = this._context; 88 | var checkpointer = context.checkpointer; 89 | var recordProcessor = context.recordProcessor; 90 | var recordProcessorFuncInput = cloneToInput(action); 91 | var recordProcessorFunc; 92 | 93 | switch (actionType) { 94 | case 'initialize': 95 | recordProcessorFunc = recordProcessor.initialize; 96 | break; 97 | case 'processRecords': 98 | recordProcessorFuncInput.checkpointer = checkpointer; 99 | recordProcessorFunc = recordProcessor.processRecords; 100 | break; 101 | case 'leaseLost': 102 | if (this._version === KCLManager.VERSION1) { 103 | recordProcessorFuncInput.reason = 'ZOMBIE'; 104 | recordProcessorFunc = recordProcessor.shutdown; 105 | } else { 106 | recordProcessorFunc = recordProcessor.leaseLost; 107 | } 108 | break; 109 | case 'shardEnded': 110 | recordProcessorFuncInput.checkpointer = checkpointer; 111 | if (this._version === KCLManager.VERSION1) { 112 | recordProcessorFuncInput.reason = 'TERMINATE'; 113 | recordProcessorFunc = recordProcessor.shutdown; 114 | } else { 115 | recordProcessorFunc = recordProcessor.shardEnded; 116 | } 117 | break; 118 | default: 119 | // Should not occur. 120 | throw new Error(util.format('Invalid action for record processor: %j', action)); 121 | } 122 | 123 | // Attach callback so user can mark that operation is complete, and KCL can proceed with new operation. 124 | var callbackFunc = function() { 125 | this._recordProcessorCallback(context, action); 126 | }.bind(this); 127 | 128 | recordProcessorFunc.apply(recordProcessor, [recordProcessorFuncInput, callbackFunc]); 129 | }; 130 | 131 | /** 132 | * Clones the JSON action object into an input object that will be passed to the record processor function. 133 | * Note that only shallow copy is performed for efficiency. 134 | * @param {object} action - Record processor-related action. 135 | * @return Returns the cloned action object without the "action" attribute. 136 | * @private 137 | */ 138 | function cloneToInput(action) { 139 | var input = {}; 140 | for (var attr in action) { 141 | if (attr !== 'action') { 142 | input[attr] = action[attr]; 143 | } 144 | } 145 | return input; 146 | } 147 | 148 | /** 149 | * Gets invoked when the callback is received from the record processor suggesting that the record processor action 150 | * is complete. 151 | * @param {object} context - Context for which the record processor action is complete. 152 | * @param {object} action - Completed action. 153 | * @private 154 | */ 155 | KCLManager.prototype._recordProcessorCallback = function(context, action) { 156 | this._sendAction(context, {action : 'status', responseFor : action.action}); 157 | }; 158 | 159 | /** 160 | * Sends the given action to the MultiLangDaemon. 161 | * @param {object} context - Record processor context for which this action belongs to. 162 | * @param {object} action - Action to send. 163 | */ 164 | KCLManager.prototype._sendAction = function(context, action) { 165 | this._actionHandler.sendAction(action, function(err) { 166 | // If there is an error communicating with the MultiLangDaemon, then cannot proceed any further. 167 | if (err) { 168 | this._cleanup(); 169 | throw new Error('Kinesis Client Library is in an invalid state. Cannot proceed further.'); 170 | } 171 | }.bind(this)); 172 | }; 173 | 174 | /** 175 | * Checkpoint response action handler. 176 | * @param {object} action - Checkpoint response action. 177 | * @private 178 | */ 179 | KCLManager.prototype._onCheckpointAction = function(action) { 180 | var checkpointer = this._context.checkpointer; 181 | checkpointer.onCheckpointerResponse.apply(checkpointer, [action.error, action.sequenceNumber]); 182 | }; 183 | 184 | /** 185 | * Checkpoints with given sequence number. The request is sent to the MultiLangDaemon. 186 | * @param {string} sequenceNumber - Sequence number to checkpoint. 187 | */ 188 | KCLManager.prototype.checkpoint = function(sequenceNumber) { 189 | this._sendAction(this._context, {action : 'checkpoint', sequenceNumber : sequenceNumber}); 190 | }; 191 | 192 | /** 193 | * Gets invoked when shutdownRequested is called. 194 | * @param {Object} action - RecordProcessor related action 195 | * @private 196 | */ 197 | KCLManager.prototype._onShutdownRequested = function(action) { 198 | var context = this._context; 199 | var recordProcessor = context.recordProcessor; 200 | var recordProcessorFunc = recordProcessor.shutdownRequested; 201 | 202 | if (typeof recordProcessorFunc === 'function') { 203 | var recordProcessorFuncInput = cloneToInput(action); 204 | var checkpointer = context.checkpointer; 205 | 206 | var callbackFunc = function() { 207 | this._recordProcessorCallback(context, action); 208 | }.bind(this); 209 | 210 | recordProcessorFuncInput.checkpointer = checkpointer; 211 | recordProcessorFunc.apply(recordProcessor, [recordProcessorFuncInput, callbackFunc]); 212 | } 213 | else { 214 | this._recordProcessorCallback(context, action); 215 | } 216 | }; 217 | 218 | /** 219 | * Frees up any resources held by this instance. 220 | * @private 221 | */ 222 | KCLManager.prototype._cleanup = function() { 223 | this._actionHandler.removeListener('action', this._onActionCallback); 224 | this._actionHandler.removeListener('end', this._onActionEndCallback); 225 | this._actionHandler.destroy(); 226 | this._ioHandler.destroy(); 227 | }; 228 | 229 | /** 230 | * Initiates the KCL processing. 231 | */ 232 | KCLManager.prototype.run = function() { 233 | if (!this._running) { 234 | this._running = true; 235 | this._onActionCallback = this._onAction.bind(this); 236 | this._onActionEndCallback = this._onActionEnd.bind(this); 237 | this._actionHandler.on('action', this._onActionCallback); 238 | this._actionHandler.on('end', this._onActionEndCallback); 239 | } 240 | }; 241 | 242 | /** @exports kcl/KCLManager */ 243 | module.exports = KCLManager; 244 | -------------------------------------------------------------------------------- /lib/kcl/kcl_process.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | /** 9 | * @fileoverview 10 | * Initializes record processing with the MultiLangDaemon. 11 | * 12 | * This KCL class takes the record processor that is responsible for processing a shard from an Amazon Kinesis stream. 13 | * The record processor must provide the following three methods: 14 | * 15 | * * `initialize` - Called once. 16 | * * `processRecords` - Called zero or more times. 17 | * * `shutdown` - Called if this MultiLangDaemon instance loses the lease to this shard. 18 | * 19 | * @example 20 | * var recordProcessor = { 21 | * 22 | * initialize: function(initializeInput, completeCallback) { 23 | * // Initialization logic here... 24 | * 25 | * // Must call completeCallback when finished initializing in order to proceed further. 26 | * completeCallback(); 27 | * }, 28 | * 29 | * processRecords: function(processRecordsInput, completeCallback) { 30 | * // Record processing logic here... 31 | * 32 | * // Note that if a checkpoint is invoked, only call completeCallback after the checkpoint operation is complete. 33 | * completeCallback(); 34 | * }, 35 | * 36 | * leaseLost: function(leaseLostInput, completeCallback) { 37 | * // Lease lostg logic here... 38 | * 39 | * // Application can't checkpoint at this time as the lease was lost. 40 | * completeCallback(); 41 | * } 42 | * 43 | * shardEnded: function(shardEndedInput, completeCallback) { 44 | * // Shard End logic here... 45 | * 46 | * // Application needs to checkpoint at this time. Only call completeCallback after the checkpoint operation is 47 | * // complete. 48 | * completeCallback(); 49 | * } 50 | * }; 51 | * 52 | * kcl(recordProcessor).run(); 53 | * 54 | */ 55 | 56 | var KCLManager = require('./kcl_manager'); 57 | 58 | /** 59 | * Creates an instance of the KCL process. 60 | * @param {object} recordProcessor - A record processor to use for processing a shard. 61 | * @param {file} inputFile - A file to read action messages from. Defaults to STDIN. 62 | * @param {file} outputFile - A file to write action messages to. Defaults to STDOUT. 63 | * @param {file} errorfile - A file to write error messages to. Defaults to STDERR. 64 | */ 65 | function KCLProcess(recordProcessor, inputFile, outputFile, errorFile) { 66 | var allMethodsPresent = typeof recordProcessor.initialize === 'function' && 67 | typeof recordProcessor.processRecords === 'function'; 68 | allMethodsPresent = allMethodsPresent && ((typeof recordProcessor.leaseLost === 'function' && 69 | typeof recordProcessor.shardEnded === 'function') || (typeof recordProcessor.shutdown === 'function')); 70 | if (!allMethodsPresent) { 71 | throw new Error('Record processor must implement initialize, processRecords, and shutdown functions.'); 72 | } 73 | inputFile = typeof inputFile !== 'undefined' ? inputFile : process.stdin; 74 | outputFile = typeof outputFile !== 'undefined' ? outputFile : process.stdout; 75 | errorFile = typeof errorFile !== 'undefined' ? errorFile : process.stderr; 76 | 77 | var version = KCLManager.VERSION2; 78 | if (typeof recordProcessor.shutdown === 'function') { 79 | version = KCLManager.VERSION1; 80 | } 81 | 82 | var kclManagerInput = { 83 | recordProcessor: recordProcessor, 84 | version: version 85 | }; 86 | 87 | var kclManager = new KCLManager(kclManagerInput, inputFile, outputFile, errorFile, version); 88 | 89 | return { 90 | // For testing only. 91 | _kclManager: kclManager, 92 | 93 | /** 94 | * Starts this KCL process's main loop. 95 | */ 96 | run: function () { 97 | kclManager.run(); 98 | }, 99 | 100 | cleanup: function() { 101 | kclManager._cleanup(); 102 | } 103 | }; 104 | } 105 | 106 | 107 | /** @exports kcl/KCLProcess */ 108 | module.exports = KCLProcess; 109 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "aws-kcl", 3 | "description": "Kinesis Client Libray (KCL) in Node.js.", 4 | "version": "3.0.1", 5 | "author": { 6 | "name": "Amazon Web Services", 7 | "url": "http://aws.amazon.com/" 8 | }, 9 | "main": "index.js", 10 | "engines": { 11 | "node": ">= 0.8.0" 12 | }, 13 | "bin": { 14 | "kcl-bootstrap": "bin/kcl-bootstrap", 15 | "kcl-bootstrap.bat": "bin/kcl-bootstrap.bat" 16 | }, 17 | "scripts": { 18 | "build": "grunt build", 19 | "compile": "grunt compile", 20 | "clean": "grunt clean", 21 | "test": "grunt test", 22 | "release": "grunt release", 23 | "doc": "grunt jsdoc" 24 | }, 25 | "dependencies": { 26 | "commander": "~14.0.0", 27 | "xml-js": "^1.6.11" 28 | }, 29 | "devDependencies": { 30 | "async": "^3.2.2", 31 | "aws-sdk": "^2.390.0", 32 | "chai": "^4.3.4", 33 | "grunt": "^1.0.3", 34 | "grunt-cli": "^1.3.2", 35 | "grunt-contrib-clean": "^2.0.0", 36 | "grunt-contrib-jshint": "^3.1.1", 37 | "grunt-jsdoc": "^2.4.1", 38 | "grunt-mocha-test": "^0.13.3", 39 | "ink-docstrap": "^1.3.2", 40 | "log4js": "^6.3.0", 41 | "mocha": "^11.5.0", 42 | "sinon": "^17.0.1" 43 | }, 44 | "homepage": "https://github.com/awslabs/amazon-kinesis-client-nodejs", 45 | "repository": { 46 | "type": "git", 47 | "url": "git://github.com/awslabs/amazon-kinesis-client-nodejs.git" 48 | }, 49 | "bugs": { 50 | "url": "https://github.com/awslabs/amazon-kinesis-client-nodejs/issues" 51 | }, 52 | "license": "Apache-2.0", 53 | "keywords": [ 54 | "api", 55 | "amazon", 56 | "aws", 57 | "big data", 58 | "kinesis", 59 | "kinesis client library", 60 | "kcl", 61 | "node.js" 62 | ] 63 | } 64 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | 2.25.64 6 | 3.0.0 7 | 4.1.118.Final 8 | 2.0.6 9 | 2.13.5 10 | 1.3.15 11 | 12 | 13 | 14 | software.amazon.kinesis 15 | amazon-kinesis-client-multilang 16 | ${kcl.version} 17 | 18 | 19 | software.amazon.kinesis 20 | amazon-kinesis-client 21 | ${kcl.version} 22 | 23 | 24 | software.amazon.awssdk 25 | kinesis 26 | ${awssdk.version} 27 | 28 | 29 | software.amazon.awssdk 30 | dynamodb 31 | ${awssdk.version} 32 | 33 | 34 | 35 | software.amazon.awssdk 36 | dynamodb-enhanced 37 | ${awssdk.version} 38 | 39 | 40 | 41 | com.amazonaws 42 | dynamodb-lock-client 43 | 1.3.0 44 | 45 | 46 | software.amazon.awssdk 47 | cloudwatch 48 | ${awssdk.version} 49 | 50 | 51 | software.amazon.awssdk 52 | netty-nio-client 53 | ${awssdk.version} 54 | 55 | 56 | software.amazon.awssdk 57 | metrics-spi 58 | ${awssdk.version} 59 | 60 | 61 | software.amazon.awssdk 62 | sts 63 | ${awssdk.version} 64 | 65 | 66 | software.amazon.awssdk 67 | protocol-core 68 | ${awssdk.version} 69 | 70 | 71 | software.amazon.awssdk 72 | aws-query-protocol 73 | ${awssdk.version} 74 | 75 | 76 | software.amazon.awssdk 77 | aws-cbor-protocol 78 | ${awssdk.version} 79 | 80 | 81 | software.amazon.awssdk 82 | aws-json-protocol 83 | ${awssdk.version} 84 | 85 | 86 | software.amazon.awssdk 87 | json-utils 88 | ${awssdk.version} 89 | 90 | 91 | software.amazon.awssdk 92 | third-party-jackson-core 93 | ${awssdk.version} 94 | 95 | 96 | software.amazon.awssdk 97 | third-party-jackson-dataformat-cbor 98 | ${awssdk.version} 99 | 100 | 101 | software.amazon.awssdk 102 | profiles 103 | ${awssdk.version} 104 | 105 | 106 | software.amazon.awssdk 107 | sdk-core 108 | ${awssdk.version} 109 | 110 | 111 | software.amazon.awssdk 112 | aws-core 113 | ${awssdk.version} 114 | 115 | 116 | software.amazon.awssdk 117 | endpoints-spi 118 | ${awssdk.version} 119 | 120 | 121 | software.amazon.awssdk 122 | auth 123 | ${awssdk.version} 124 | 125 | 126 | software.amazon.awssdk 127 | http-client-spi 128 | ${awssdk.version} 129 | 130 | 131 | software.amazon.awssdk 132 | regions 133 | ${awssdk.version} 134 | 135 | 136 | software.amazon.awssdk 137 | annotations 138 | ${awssdk.version} 139 | 140 | 141 | software.amazon.awssdk 142 | utils 143 | ${awssdk.version} 144 | 145 | 146 | software.amazon.awssdk 147 | apache-client 148 | ${awssdk.version} 149 | 150 | 151 | software.amazon.awssdk 152 | arns 153 | ${awssdk.version} 154 | 155 | 156 | software.amazon.awssdk 157 | http-auth-spi 158 | ${awssdk.version} 159 | 160 | 161 | software.amazon.awssdk 162 | http-auth 163 | ${awssdk.version} 164 | 165 | 166 | software.amazon.awssdk 167 | http-auth-aws 168 | ${awssdk.version} 169 | 170 | 171 | software.amazon.awssdk 172 | checksums-spi 173 | ${awssdk.version} 174 | 175 | 176 | software.amazon.awssdk 177 | checksums 178 | ${awssdk.version} 179 | 180 | 181 | software.amazon.awssdk 182 | identity-spi 183 | ${awssdk.version} 184 | 185 | 186 | io.netty 187 | netty-codec-http 188 | ${netty.version} 189 | 190 | 191 | io.netty 192 | netty-codec-http2 193 | ${netty.version} 194 | 195 | 196 | io.netty 197 | netty-codec 198 | ${netty.version} 199 | 200 | 201 | io.netty 202 | netty-transport 203 | ${netty.version} 204 | 205 | 206 | io.netty 207 | netty-resolver 208 | ${netty.version} 209 | 210 | 211 | io.netty 212 | netty-common 213 | ${netty.version} 214 | 215 | 216 | io.netty 217 | netty-buffer 218 | ${netty.version} 219 | 220 | 221 | io.netty 222 | netty-handler 223 | ${netty.version} 224 | 225 | 226 | io.netty 227 | netty-transport-native-epoll 228 | ${netty.version} 229 | 230 | 231 | io.netty 232 | netty-transport-native-unix-common 233 | ${netty.version} 234 | 235 | 236 | com.typesafe.netty 237 | netty-reactive-streams-http 238 | ${netty-reactive.version} 239 | 240 | 241 | com.typesafe.netty 242 | netty-reactive-streams 243 | ${netty-reactive.version} 244 | 245 | 246 | org.reactivestreams 247 | reactive-streams 248 | 1.0.3 249 | 250 | 251 | com.google.guava 252 | guava 253 | 32.1.1-jre 254 | 255 | 256 | com.google.code.findbugs 257 | jsr305 258 | 3.0.2 259 | 260 | 261 | org.checkerframework 262 | checker-qual 263 | 2.5.2 264 | 265 | 266 | com.google.errorprone 267 | error_prone_annotations 268 | 2.7.1 269 | 270 | 271 | com.google.j2objc 272 | j2objc-annotations 273 | 1.3 274 | 275 | 276 | org.codehaus.mojo 277 | animal-sniffer-annotations 278 | 1.20 279 | 280 | 281 | com.google.protobuf 282 | protobuf-java 283 | 4.27.5 284 | 285 | 286 | org.apache.commons 287 | commons-lang3 288 | 3.14.0 289 | 290 | 291 | org.slf4j 292 | slf4j-api 293 | 2.0.13 294 | 295 | 296 | io.reactivex.rxjava3 297 | rxjava 298 | 3.1.8 299 | 300 | 301 | com.fasterxml.jackson.dataformat 302 | jackson-dataformat-cbor 303 | ${fasterxml-jackson.version} 304 | 305 | 306 | com.fasterxml.jackson.core 307 | jackson-core 308 | ${fasterxml-jackson.version} 309 | 310 | 311 | com.fasterxml.jackson.core 312 | jackson-databind 313 | ${fasterxml-jackson.version} 314 | 315 | 316 | com.fasterxml.jackson.core 317 | jackson-annotations 318 | ${fasterxml-jackson.version} 319 | 320 | 321 | software.amazon 322 | flow 323 | 1.7 324 | 325 | 326 | org.apache.httpcomponents 327 | httpclient 328 | 4.5.13 329 | 330 | 331 | commons-codec 332 | commons-codec 333 | 1.15 334 | 335 | 336 | org.apache.httpcomponents 337 | httpcore 338 | 4.4.15 339 | 340 | 341 | com.amazon.ion 342 | ion-java 343 | 1.11.4 344 | 345 | 346 | software.amazon.glue 347 | schema-registry-serde 348 | 1.1.19 349 | 350 | 351 | com.amazonaws 352 | aws-java-sdk-sts 353 | 354 | 355 | 356 | 357 | joda-time 358 | joda-time 359 | 2.10.13 360 | 361 | 362 | ch.qos.logback 363 | logback-classic 364 | ${logback.version} 365 | 366 | 367 | ch.qos.logback 368 | logback-core 369 | ${logback.version} 370 | 371 | 372 | com.beust 373 | jcommander 374 | 1.82 375 | 376 | 377 | commons-io 378 | commons-io 379 | 2.16.1 380 | 381 | 382 | commons-logging 383 | commons-logging 384 | 1.1.3 385 | 386 | 387 | org.apache.commons 388 | commons-collections4 389 | 4.4 390 | 391 | 392 | commons-beanutils 393 | commons-beanutils 394 | 1.11.0 395 | 396 | 397 | commons-collections 398 | commons-collections 399 | 3.2.2 400 | 401 | 402 | 403 | -------------------------------------------------------------------------------- /samples/basic_sample/consumer/sample.properties: -------------------------------------------------------------------------------- 1 | # The script that abides by the multi-language protocol. This script will 2 | # be executed by the MultiLangDaemon, which will communicate with this script 3 | # over STDIN and STDOUT according to the multi-language protocol. 4 | executableName = node sample_kcl_app.js 5 | 6 | # The name of an Amazon Kinesis stream to process. 7 | streamName = kclnodejssample 8 | 9 | # Used by the KCL as the name of this application. Will be used as the name 10 | # of an Amazon DynamoDB table which will store the lease and checkpoint 11 | # information for workers with this application name 12 | applicationName = kclnodejssample 13 | 14 | # Users can change the credentials provider the KCL will use to retrieve credentials. 15 | # Expected key name (case-sensitive): 16 | # AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch 17 | # The DefaultCredentialsProvider checks several other providers, which is 18 | # described here: 19 | # https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html 20 | AwsCredentialsProvider = DefaultCredentialsProvider 21 | 22 | # Appended to the user agent of the KCL. Does not impact the functionality of the 23 | # KCL in any other way. 24 | processingLanguage = nodejs/0.10 25 | 26 | # Valid options at TRIM_HORIZON or LATEST. 27 | # See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax 28 | initialPositionInStream = TRIM_HORIZON 29 | 30 | # The following properties are also available for configuring the KCL Worker that is created 31 | # by the MultiLangDaemon. 32 | 33 | # Region of the stream for the KCL. 34 | regionName = us-east-1 35 | 36 | # Fail over time in milliseconds. A worker which does not renew it's lease within this time interval 37 | # will be regarded as having problems and it's shards will be assigned to other workers. 38 | # For applications that have a large number of shards, this msy be set to a higher number to reduce 39 | # the number of DynamoDB IOPS required for tracking leases 40 | #failoverTimeMillis = 10000 41 | 42 | # A worker id that uniquely identifies this worker among all workers using the same applicationName 43 | # If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself. 44 | #workerId = 45 | 46 | # Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks. 47 | #shardSyncIntervalMillis = 60000 48 | 49 | # Max records to fetch from Kinesis in a single GetRecords call. 50 | #maxRecords = 10000 51 | 52 | # Idle time between record reads in milliseconds. 53 | #idleTimeBetweenReadsInMillis = 1000 54 | 55 | # Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while) 56 | #callProcessRecordsEvenForEmptyRecordList = false 57 | 58 | # Interval in milliseconds between polling to check for parent shard completion. 59 | # Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on 60 | # completion of parent shards). 61 | #parentShardPollIntervalMillis = 10000 62 | 63 | # Cleanup leases upon shards completion (don't wait until they expire in Kinesis). 64 | # Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try 65 | # to delete the ones we don't need any longer. 66 | #cleanupLeasesUponShardCompletion = true 67 | 68 | # Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures). 69 | #taskBackoffTimeMillis = 500 70 | 71 | # Buffer metrics for at most this long before publishing to CloudWatch. 72 | #metricsBufferTimeMillis = 10000 73 | 74 | # Buffer at most this many metrics before publishing to CloudWatch. 75 | #metricsMaxQueueSize = 10000 76 | 77 | # KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls 78 | # to RecordProcessorCheckpointer#checkpoint(String) by default. 79 | #validateSequenceNumberBeforeCheckpointing = true 80 | 81 | # The maximum number of active threads for the MultiLangDaemon to permit. 82 | # If a value is provided then a FixedThreadPool is used with the maximum 83 | # active threads set to the provided value. If a non-positive integer or no 84 | # value is provided a CachedThreadPool is used. 85 | #maxActiveThreads = 0 86 | 87 | # By default, KCL will emit metrics for Operation, ShardId, and WorkerIdentifier dimensions 88 | # Specify the specific dimensions to emit metrics for 89 | #metricsEnabledDimensions = Operation,ShardId 90 | 91 | ################### KclV3 configurations ################### 92 | # NOTE : These are just test configurations to show how to customize 93 | # all possible KCLv3 configurations. They are not necessarily the best 94 | # default values to use for production. 95 | 96 | # Coordinator config 97 | # Version the KCL needs to operate in. For more details check the KCLv3 migration 98 | # documentation. Default is CLIENT_VERSION_CONFIG_3X 99 | # clientVersionConfig = 100 | # Configurations to control how the CoordinatorState DDB table is created 101 | # Default name is applicationName-CoordinatorState in PAY_PER_REQUEST, 102 | # with PITR and deletion protection disabled and no tags 103 | # coordinatorStateTableName = 104 | # coordinatorStateBillingMode = 105 | # coordinatorStateReadCapacity = 106 | # coordinatorStateWriteCapacity = 107 | # coordinatorStatePointInTimeRecoveryEnabled = 108 | # coordinatorStateDeletionProtectionEnabled = 109 | # coordinatorStateTags = 110 | 111 | # Graceful handoff config - tuning of the shutdown behavior during lease transfers 112 | # default values are 30000 and true respectively 113 | # gracefulLeaseHandoffTimeoutMillis = 114 | # isGracefulLeaseHandoffEnabled = 115 | 116 | # WorkerMetricStats table config - control how the DDB table is created 117 | # Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST, 118 | # with PITR and deletion protection disabled and no tags 119 | # workerMetricsTableName = 120 | # workerMetricsBillingMode = 121 | # workerMetricsReadCapacity = 122 | # workerMetricsWriteCapacity = 123 | # workerMetricsPointInTimeRecoveryEnabled = 124 | # workerMetricsDeletionProtectionEnabled = 125 | # workerMetricsTags = 126 | 127 | # WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm 128 | # 129 | # frequency of capturing worker metrics in memory. Default is 1s 130 | # inMemoryWorkerMetricsCaptureFrequencyMillis = 131 | 132 | # frequency of reporting worker metric stats to storage. Default is 30s 133 | # workerMetricsReporterFreqInMillis = 134 | 135 | # No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10 136 | # noOfPersistedMetricsPerWorkerMetrics = 137 | 138 | # Disable use of worker metrics to balance lease, default is false. 139 | # If it is true, the algorithm balances lease based on worker's processing throughput. 140 | # disableWorkerMetrics = 141 | 142 | # Max throughput per host 10 MBps, to limit processing to the given value 143 | # Default is unlimited. 144 | 145 | # maxThroughputPerHostKBps = 146 | # Dampen the load that is rebalanced during lease re-balancing, default is 60% 147 | # dampeningPercentage = 148 | # Configures the allowed variance range for worker utilization. The upper 149 | # limit is calculated as average * (1 + reBalanceThresholdPercentage/100). 150 | # The lower limit is average * (1 - reBalanceThresholdPercentage/100). If 151 | # any worker's utilization falls outside this range, lease re-balancing is 152 | # triggered. The re-balancing algorithm aims to bring variance within the 153 | # specified range. It also avoids thrashing by ensuring the utilization of 154 | # the worker receiving the load after re-balancing doesn't exceed the fleet 155 | # average. This might cause no re-balancing action even the utilization is 156 | # out of the variance range. The default value is 10, representing +/-10% 157 | # variance from the average value. 158 | # reBalanceThresholdPercentage = 159 | 160 | # Whether at-least one lease must be taken from a high utilization worker 161 | # during re-balancing when there is no lease assigned to that worker which has 162 | # throughput is less than or equal to the minimum throughput that needs to be 163 | # moved away from that worker to bring the worker back into the allowed variance. 164 | # Default is true. 165 | # allowThroughputOvershoot = 166 | 167 | # Lease assignment is performed every failoverTimeMillis but re-balance will 168 | # be attempted only once in 5 times based on the below config. Default is 3. 169 | # varianceBalancingFrequency = 170 | 171 | # Alpha value used for calculating exponential moving average of worker's metricStats. 172 | # workerMetricsEMAAlpha = 173 | # Duration after which workerMetricStats entry from WorkerMetricStats table will 174 | # be cleaned up. 175 | # Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days) 176 | # Refer to Duration.parse javadocs for more details 177 | # staleWorkerMetricsEntryCleanupDuration = 178 | -------------------------------------------------------------------------------- /samples/basic_sample/consumer/sample_kcl_app.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var util = require('util'); 9 | var kcl = require('../../..'); 10 | var logger = require('../../util/logger'); 11 | 12 | /** 13 | * A simple implementation for the record processor (consumer) that simply writes the data to a log file. 14 | * 15 | * Be careful not to use the 'stderr'/'stdout'/'console' as log destination since it is used to communicate with the 16 | * {https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java MultiLangDaemon}. 17 | */ 18 | 19 | function recordProcessor() { 20 | var log = logger().getLogger('recordProcessor'); 21 | var shardId; 22 | 23 | return { 24 | 25 | initialize: function(initializeInput, completeCallback) { 26 | shardId = initializeInput.shardId; 27 | 28 | completeCallback(); 29 | }, 30 | 31 | processRecords: function(processRecordsInput, completeCallback) { 32 | if (!processRecordsInput || !processRecordsInput.records) { 33 | completeCallback(); 34 | return; 35 | } 36 | var records = processRecordsInput.records; 37 | var record, data, sequenceNumber, partitionKey; 38 | for (var i = 0 ; i < records.length ; ++i) { 39 | record = records[i]; 40 | data = new Buffer(record.data, 'base64').toString(); 41 | sequenceNumber = record.sequenceNumber; 42 | partitionKey = record.partitionKey; 43 | log.info(util.format('ShardID: %s, Record: %s, SeqenceNumber: %s, PartitionKey:%s', shardId, data, sequenceNumber, partitionKey)); 44 | } 45 | if (!sequenceNumber) { 46 | completeCallback(); 47 | return; 48 | } 49 | // If checkpointing, completeCallback should only be called once checkpoint is complete. 50 | processRecordsInput.checkpointer.checkpoint(sequenceNumber, function(err, sequenceNumber) { 51 | log.info(util.format('Checkpoint successful. ShardID: %s, SeqenceNumber: %s', shardId, sequenceNumber)); 52 | completeCallback(); 53 | }); 54 | }, 55 | 56 | leaseLost: function(leaseLostInput, completeCallback) { 57 | log.info(util.format('Lease was lost for ShardId: %s', shardId)); 58 | completeCallback(); 59 | }, 60 | 61 | shardEnded: function(shardEndedInput, completeCallback) { 62 | log.info(util.format('ShardId: %s has ended. Will checkpoint now.', shardId)); 63 | shardEndedInput.checkpointer.checkpoint(function(err) { 64 | completeCallback(); 65 | }); 66 | }, 67 | 68 | shutdownRequested: function(shutdownRequestedInput, completeCallback) { 69 | shutdownRequestedInput.checkpointer.checkpoint(function (err) { 70 | completeCallback(); 71 | }); 72 | } 73 | }; 74 | } 75 | 76 | kcl(recordProcessor()).run(); 77 | -------------------------------------------------------------------------------- /samples/basic_sample/producer/config.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var config = module.exports = { 9 | kinesis : { 10 | region : 'us-east-1' 11 | }, 12 | 13 | sampleProducer : { 14 | stream : 'kclnodejssample', 15 | shards : 2, 16 | waitBetweenDescribeCallsInSeconds : 5 17 | } 18 | }; 19 | -------------------------------------------------------------------------------- /samples/basic_sample/producer/sample_kinesis_producer_app.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var AWS = require('aws-sdk'); 9 | var config = require('./config'); 10 | var producer = require('./sample_producer'); 11 | 12 | var kinesis = new AWS.Kinesis({region : config.kinesis.region}); 13 | producer(kinesis, config.sampleProducer).run(); 14 | -------------------------------------------------------------------------------- /samples/basic_sample/producer/sample_producer.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var util = require('util'); 9 | var logger = require('../../util/logger'); 10 | 11 | function sampleProducer(kinesis, config) { 12 | var log = logger().getLogger('sampleProducer'); 13 | 14 | function _createStreamIfNotCreated(callback) { 15 | var params = { 16 | ShardCount : config.shards, 17 | StreamName : config.stream 18 | }; 19 | 20 | kinesis.createStream(params, function(err, data) { 21 | if (err) { 22 | if (err.code !== 'ResourceInUseException') { 23 | callback(err); 24 | return; 25 | } 26 | else { 27 | log.info(util.format('%s stream is already created. Re-using it.', config.stream)); 28 | } 29 | } 30 | else { 31 | log.info(util.format("%s stream doesn't exist. Created a new stream with that name ..", config.stream)); 32 | } 33 | 34 | // Poll to make sure stream is in ACTIVE state before start pushing data. 35 | _waitForStreamToBecomeActive(callback); 36 | }); 37 | } 38 | 39 | function _waitForStreamToBecomeActive(callback) { 40 | kinesis.describeStream({StreamName : config.stream}, function(err, data) { 41 | if (!err) { 42 | log.info(util.format('Current status of the stream is %s.', data.StreamDescription.StreamStatus)); 43 | if (data.StreamDescription.StreamStatus === 'ACTIVE') { 44 | callback(null); 45 | } 46 | else { 47 | setTimeout(function() { 48 | _waitForStreamToBecomeActive(callback); 49 | }, 1000 * config.waitBetweenDescribeCallsInSeconds); 50 | } 51 | } 52 | }); 53 | } 54 | 55 | function _writeToKinesis() { 56 | var currTime = new Date().getMilliseconds(); 57 | var sensor = 'sensor-' + Math.floor(Math.random() * 100000); 58 | var reading = Math.floor(Math.random() * 1000000); 59 | 60 | var record = JSON.stringify({ 61 | time : currTime, 62 | sensor : sensor, 63 | reading : reading 64 | }); 65 | 66 | var recordParams = { 67 | Data : record, 68 | PartitionKey : sensor, 69 | StreamName : config.stream 70 | }; 71 | 72 | kinesis.putRecord(recordParams, function(err, data) { 73 | if (err) { 74 | log.error(err); 75 | } 76 | else { 77 | log.info('Successfully sent data to Kinesis.'); 78 | } 79 | }); 80 | } 81 | 82 | return { 83 | run: function() { 84 | _createStreamIfNotCreated(function(err) { 85 | if (err) { 86 | log.error(util.format('Error creating stream: %s', err)); 87 | return; 88 | } 89 | var count = 0; 90 | while (count < 10) { 91 | setTimeout(_writeToKinesis, 1000); 92 | count++; 93 | } 94 | }); 95 | } 96 | }; 97 | } 98 | 99 | module.exports = sampleProducer; 100 | -------------------------------------------------------------------------------- /samples/click_stream_sample/README.md: -------------------------------------------------------------------------------- 1 | # How to Process Clickstream Data Using Amazon Kinesis for Node.js 2 | 3 | This README shows how to send a stream of records to [Amazon Kinesis][amazon-kinesis] through the implementation of an application that consumes and processes the records in near real time using the [Amazon Kinesis Client Library][amazon-kcl](KCL) for Node.js. The scenario for this README is to show how to ingest a stream of clickstream data and write a simple consumer using the KCL to process, batch, and upload data to Amazon S3 for further processing. This is a common use case for using Amazon Kinesis. 4 | 5 | You can work through this README on your desktop or laptop and run both the producer and consumer code on the same machine. You can also run this sample on Amazon EC2 using the Amazon CloudFormation template provided. 6 | 7 | Clickstream data is simulated in the sample code, and the clickstream data is evenly spread across all the shards of the Amazon Kinesis stream. 8 | 9 | **Note:** 10 | 11 | After you create a stream, your account incurs nominal charges for Amazon Kinesis usage because Amazon Kinesis is not eligible for the AWS free tier. After the consumer application starts, it also incurs nominal charges for Amazon DynamoDB usage. DynamoDB is used by the consumer application to track the processing state. When you are finished with this tutorial, delete your AWS resources to stop incurring charges. If you use the provided CloudFormation template to run this sample on Amazon EC2, the template takes care of cleaning up resources when you delete the associated CloudFormation stack. 12 | 13 | ## Before you start 14 | 15 | * Before you begin, you need an AWS account. For more information about creating an AWS account and retrieving your AWS credentials, go to [AWS Security Credentials](http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html). 16 | * Familiarize yourself with Amazon Kinesis concepts such as streams, shards, producers, and consumers. For more information, see [Amazon Kinesis concepts](http://docs.aws.amazon.com/kinesis/latest/dev/key-concepts.html) and the tutorials. 17 | * To run the sample code, you need Node.js, NPM installed on your computer. The Amazon KCL for Node.js uses the [MultiLangDaemon][multi-lang-daemon] provided by [Amazon KCL for Java][amazon-kcl-github]. To run the Amazon KCL for Node.js samples, you also need to install the [Java JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html). 18 | 19 | ## Producer 20 | 21 | This section explains how to implement an application to ingest a continuous stream of clickstream data to Amazon Kinesis. This role is known as the Amazon Kinesis producer. You need to create a Amazon Kinesis stream to allow the producer to ingest data into Amazon Kinesis. The producer application creates a stream based on the configuration values in the file producer/config.js, or you can create your own from the [Amazon Kinesis console](https://console.aws.amazon.com/kinesis). If you create your own stream with a different name than the default in the sample code, edit the stream name in producer/config.js and the producer application will pick up that change. 22 | 23 | ### ClickStream producer 24 | 25 | * Reads configuration and creates an Amazon Kinesis stream if the specified stream doesn't exist in the specified region. 26 | * Waits for the stream to become ACTIVE by polling Amazon Kinesis using the describeStream operation. 27 | * Continuously retrieves random clickstream data records, batches them up to a value specified by config.ClickStreamProducer.recordsToWritePerBatch, and makes a [PutRecords][nodejs-kinesis-putrecords] call to write all records to the Amazon Kinesis stream. 28 | 29 | ```javascript 30 | // Use putRecords API to batch more than one record. 31 | for (var i = 0; i < totalRecords; i++) { 32 | data = clickStreamGen.getRandomClickStreamData(); 33 | 34 | record = { 35 | Data: JSON.stringify(data), 36 | PartitionKey: data.resource 37 | }; 38 | 39 | records.push(record); 40 | } 41 | 42 | var recordsParams = { 43 | Records: records, 44 | StreamName: config.stream 45 | }; 46 | 47 | kinesis.putRecords(recordsParams, function(err, data) { 48 | if (err) { 49 | console.log(err); 50 | } 51 | else { 52 | console.log(util.format("Sent %d records with %d failures ..", records.length, data.FailedRecordCount)); 53 | } 54 | }); 55 | ``` 56 | 57 | ### Clickstream records 58 | 59 | A clickstream record consists of a resource and a referrer. 60 | 61 | ```javascript 62 | var data = { 63 | "resource": "resource-1", 64 | "referrer": "http://www.amazon.com/" 65 | }; 66 | ``` 67 | 68 | ### Clickstream producer configuration 69 | 70 | THe file producer/config.js file contains configurations supported by the producer application. It exposes the following configurations. You can change any configuration values in producer/config.js as needed. 71 | 72 | ```javascript 73 | var config = module.exports = { 74 | kinesis : { 75 | // Region for the Amazon Kinesis stream. 76 | region : 'us-east-1' 77 | }, 78 | 79 | clickStreamProducer : { 80 | // The Amazon Kinesis stream to ingest clickstream data into. If the specified 81 | // stream doesn't exist, the producer application creates a new stream. 82 | stream : 'kclnodejsclickstreamsample', 83 | 84 | // Total shards in the specified Amazon Kinesis stream. 85 | shards : 2, 86 | 87 | // The producer application batches clickstream records in to the size specified 88 | // here, and makes a single PutRecords API call to ingest all records to the 89 | // stream. 90 | recordsToWritePerBatch : 5, 91 | 92 | // If the producer application creates a stream, it has to wait for the stream to 93 | // transition to ACTIVE state before it can start putting data in it. This 94 | // specifies the wait time between consecutive describeStream calls. 95 | waitBetweenDescribeCallsInSeconds : 5, 96 | 97 | // Transactions per second for the PutRecords call to make sure the producer 98 | // doesn't hit throughput limits enforced by Amazon Kinesis. 99 | putRecordsTps : 20 100 | } 101 | }; 102 | ``` 103 | For more information about throughput limits, see [Amazon Kinesis Limits](http://docs.aws.amazon.com/kinesis/latest/dev/service-sizes-and-limits.html). 104 | 105 | ### Run producer on a local computer 106 | 107 | To run the data producer, execute the following commands from the root of the repository: 108 | 109 | ```sh 110 | cd samples/click_stream_sample/producer 111 | node click_stream_producer_app.js 112 | ``` 113 | 114 | **Note:** 115 | 116 | To run a sample application on Amazon EC2, see the section 'Running on Amazon EC2' later in this README. 117 | 118 | ## Implement a basic processing application using the Amazon KCL for Node.js 119 | 120 | This basic application processes records from an Amazon Kinesis stream using [nodejs-kcl][nodejs-kcl], batching records up to 1 MB (configurable) and sends them to a specified Amazon S3 bucket for further offline processing. You can extend this application to perform some processing on the data (e.g., a rolling window count) before sending data to S3. For more information, see [developing-consumer-applications-with-kcl][amazon-kcl]. 121 | 122 | ### Clickstream consumer configuration 123 | The consumer/config.js file contains configurations supported by the consumer application. It exposes The following configurations. You can change any configuration values in consumer/config.js as needed. 124 | 125 | ```javascript 126 | var config = module.exports = { 127 | s3 : { 128 | // Region for Amazon S3. Defaults to us-east-1. 129 | // region : '', 130 | 131 | // Amazon S3 bucket to store batched clickstream data. The consumer application 132 | // may create a new bucket (based on S3.createBucketIfNotPresent value), 133 | // if the specified bucket doesn't exist. 134 | bucket : 'kinesis-clickstream-batchdata', 135 | 136 | // Enables the consumer application to create a new S3 bucket if the specified 137 | // bucket doesn't exist. 138 | createBucketIfNotPresent : true 139 | }, 140 | 141 | clickStreamProcessor : { 142 | // Maximum batch size in bytes before sending data to S3. 143 | maxBufferSize : 1024 * 1024 144 | } 145 | }; 146 | ``` 147 | 148 | ### The consumer/Amazon KCL interface 149 | The Amazon KCL for Node.js expects applications to pass an object that implements the following three functions: 150 | 151 | * initialize 152 | * processRecords 153 | * shutdown 154 | 155 | **Note:** 156 | 157 | The Amazon KCL for Node.js uses stdin/stdout to interact with the [MultiLangDaemon][multi-lang-daemon]. Do not point your application logs to stdout/stderr. If your logs point to stdout/stderr, the log output will get mingled with [MultiLangDaemon][multi-lang-daemon], which makes it really difficult to find consumer-specific log events. This consumer uses a logging library to redirect all application logs to a file called application.log. Make sure to follow a similar pattern while developing consumer applications with the Amazon KCL for Node.js. For more information about the protocol between the MultiLangDaemon and Amazon KCL for Node.js, see [MultiLangDaemon][multi-lang-daemon]. 158 | 159 | ```javascript 160 | /** 161 | * A simple implementation of RecordProcessor that accepts records from an Amazon 162 | * Kinesis stream and batches them into 1 MB (configurable) datasets, then puts 163 | * them in a configured S3 bucket for further offline processing. The object 164 | * returned should implement the functions initialize, processRecords, and shutdown 165 | * in order to enable the KCL to interact with MultiLangDaemon. 166 | * MultiLangDaemon would create one child process (hence one RecordProcessor instance) 167 | * per shard. A single shard will never be accessed by more than one 168 | * RecordProcessor instance; e.g., if you run this sample on a single machine, 169 | * against a stream with 2 shards, MultiLangDaemon would create 2 child 170 | * Node.js processes (RecordProcessor), one for each shard. 171 | */ 172 | function clickStreamProcessor(emitter, cfg) { 173 | // return an object that implements the initialize, processRecords, and shutdown functions. 174 | } 175 | ``` 176 | 177 | #### initialize(initializeInput, completeCallback) 178 | 179 | ```javascript 180 | /** 181 | * This function is called by the KCL to allow application initialization before it 182 | * starts processing Amazon Kinesis records. The KCL won't start processing records until the 183 | * application is successfully initialized and completeCallback is called. 184 | */ 185 | initialize: function(initializeInput, completeCallback) { 186 | shardId = initializeInput.shardId; 187 | // The KCL for Node.js does not allow more than one outstanding checkpoint. So checkpoint must 188 | // be done sequentially. Async queue with 1 concurrency will allow executing checkpoints 189 | // one after another. 190 | commitQueue = async.queue(_commit, 1); 191 | 192 | emitter.initialize(function(err) { 193 | if (err) { 194 | log.error(util.format('Error initializing emitter: %s', err)); 195 | process.exit(1); 196 | } 197 | else { 198 | log.info('Click stream processor successfully initialized.'); 199 | completeCallback(); 200 | } 201 | }); 202 | } 203 | ``` 204 | 205 | #### processRecords(processRecordsInput, completeCallback) 206 | 207 | ```javascript 208 | /** 209 | * Called by the KCL with a list of records to be processed and a checkpointer. 210 | * A record looks like - 211 | * '{"data":"","partitionKey":"someKey","sequenceNumber":"1234567890"}' 212 | * Note that "data" is a base64-encoded string. You can use the Buffer class to decode the data 213 | * into a string. The checkpointer can be used to checkpoint a particular sequence number. 214 | * Any checkpoint call should be made before calling completeCallback. The KCL ingests the next 215 | * batch of records only after completeCallback is called. 216 | */ 217 | processRecords: function(processRecordsInput, completeCallback) { 218 | // Record processing... 219 | // Checkpoint if you need to. 220 | // call completeCallback() to allow the KCL to ingest the next batch of records. 221 | } 222 | ``` 223 | 224 | In this sample, processRecords performs the following tasks: 225 | 226 | * Receives one or more records from the KCL. 227 | * Stores them in a local buffer 228 | * Checks if the buffer has reached maxBufferSize; if yes, sends batched data to S3. 229 | * Checkpoints after the data is successfully uploaded to S3. 230 | * Calls completeCallback() after all records are stored in the buffer. 231 | * Each call to processRecords may or may not call the checkpoint depending on whether the data was uploaded to S3. It checkpoints only after successfully uploading data to S3. This would be the most basic example of when an application should checkpoint after a unit of data is processed or persisted. 232 | 233 | #### shutdown(shutdownInput, completeCallback) 234 | 235 | ```javascript 236 | /** 237 | * Called by the KCL to indicate that this record processor should shut down. 238 | * After the shutdown operation is complete, there will not be any more calls to 239 | * any other functions of this record processor. Note that the shutdown reason 240 | * could be either TERMINATE or ZOMBIE. If ZOMBIE, clients should not 241 | * checkpoint because there is possibly another record processor which has 242 | * acquired the lease for this shard. If TERMINATE, then 243 | * checkpointer.checkpoint() should be called to checkpoint at the end of 244 | * the shard so that this processor will be shut down and new processors 245 | * will be created for the children of this shard. 246 | */ 247 | shutdown: function(shutdownInput, completeCallback) { 248 | if (shutdownInput.reason !== 'TERMINATE') { 249 | completeCallback(); 250 | return; 251 | } 252 | // Make sure to emit all remaining buffered data to S3 before shutting down. 253 | commitQueue.push({ 254 | key: shardId + '/' + buffer.getFirstSequenceNumber() + '-' + buffer.getLastSequenceNumber(), 255 | sequenceNumber: buffer.getLastSequenceNumber(), 256 | data: buffer.readAndClearRecords(), 257 | checkpointer: shutdownInput.checkpointer 258 | }, function(error) { 259 | if (error) { 260 | log.error(util.format('Received error while shutting down: %s', error)); 261 | } 262 | completeCallback(); 263 | }); 264 | } 265 | ``` 266 | 267 | ### Run the consumer on a local computer 268 | Amazon KCL for Node.js uses the [MultiLangDaemon][multi-lang-daemon] provided by [Amazon KCL for Java][amazon-kcl-github]. For more information about how MultiLangDaemon interacts with the Amazon KCL for Node.js, see [MultiLangDaemon][multi-lang-daemon]. 269 | 270 | * By default, the MultiLangDaemon uses the [DefaultAWSCredentialsProviderChain][DefaultAWSCredentialsProviderChain], so you'll want to make your credentials available to one of the credentials providers in that provider chain. There are several ways to do this. You can provide credentials through a '~/.aws/credentials' file or through environment variables (**AWS\_ACCESS\_KEY\_ID** and **AWS\_SECRET\_ACCESS\_KEY**). If you're running on Amazon EC2, you can associate an IAM role with your instance with appropriate access to Amazon Kinesis. If you use the CloudFormation template provided with sample application, it takes care of creating and associating the IAM role to your EC2 instances with the appropriate IAM policy. 271 | * The kcl-bootstrap script at /bin/kcl-bootstrap downloads [MultiLangDaemon][multi-lang-daemon] and its dependencies. This bootstrap script invokes the [MultiLangDaemon][multi-lang-daemon], which starts the Node.js consumer application as its child process. By default, [MultiLangDaemon][multi-lang-daemon] uses a properties file to specify configurations for accessing the Amazon Kinesis stream. Take a look at the consumer/samples.properties file provided for list of options. Use the '-p' or '--properties' option to specify the properties file to use. 272 | * The kcl-bootstrap script uses "JAVA_HOME" to locate the java binary. To specify your own java path, use the '-j' or '--java' argument when invoking the bootstrap script. 273 | * Skip the '-e' or '--execute' argument to the bootstrap script, and it will only print the commands on the console to run the KCL application without actually running the KCL application. 274 | * Add REPOSITORY_ROOT/bin to your path to access kcl-bootstrap from anywhere. 275 | * Run the following command to find out all the options you can override when running the bootstrap script: 276 | 277 | ```sh 278 | kcl-bootstrap --help 279 | ``` 280 | 281 | * Run the following command to start the consumer application: 282 | 283 | ```sh 284 | cd samples/click_stream_sample/consumer 285 | kcl-bootstrap --java -p ./sample.properties -e 286 | ``` 287 | 288 | **Note:** 289 | 290 | To run a sample application on Amazon EC2, see the section 'Running on Amazon EC2' later in this README. 291 | 292 | ### Cleaning up 293 | This sample application creates an Amazon Kinesis stream, ingests data into it, and creates an Amazon DynamoDB table to track the KCL application state. It may also create an S3 bucket to store batched clickstream data. Your AWS account will incur nominal costs for these resources. After you are done, you can log in to the AWS Management Console and delete these resources. Specifically, the sample application creates the following AWS resources: 294 | 295 | * An *Amazon Kinesis stream* provided in the config.js file. 296 | * An *Amazon DynamoDB table* with same name as applicationName provided in sample.properties. 297 | * An *Amazon S3 bucket* provided in the config.js file. 298 | 299 | ## Running on Amazon EC2 300 | To make running this sample on Amazon EC2 easier, we have provided an Amazon CloudFormation template that creates an Amazon Kinesis stream, an S3 bucket, an appropriate IAM role and policy, and Auto Scaling groups for consumers and producers. You can use this template to create a CloudFormation stack. Make sure to use same AWS region that you have specified in the config.js file (the region defaults to us-east-1, but you can use any region that supports Amazon Kinesis). This CloudFormation template also takes care of downloading and starting producer and consumer applications on EC2 instances. 301 | 302 | After the template is created, you can: 303 | 304 | * Log on to producer instances, go to samples/click_stream_sample/producer, and look at logs/application.log for logs. 305 | * Log on to consumer instances, go to samples/click_stream_sample/consumer, and look at consumer.out for multi-lang-daemon logs and logs/application.log for consumer logs. 306 | * View batched clickstream data in S3 under the specified S3 bucket. 307 | 308 | After you are done with testing the sample application, you can delete the CloudFormation script and it should take care of cleaning up the AWS resources created. Keep in mind the following: 309 | 310 | * Just as with the manually-run scenario, this stack ingests data into Amazon Kinesis, stores metadata in DynamoDB, and stores clickstream data in S3, all of which will result in a nominal AWS resource cost. This is especially important if you are planning to run the CloudFormation script for a longer duration. 311 | * You can use ProducerPutRecordsBatchSize and ProducerPutRecordsTps to decide how fast to ingest data into Amazon Kinesis. A lower number for both of these parameters will result in a slower data ingestion rate. 312 | * You must delete all files in the S3 bucket before deleting the CloudFormation script because CloudFormation only deletes empty S3 buckets. 313 | 314 | ## Summary 315 | 316 | Processing a large amount of data in near real time does not require writing any complex code or developing a huge infrastructure. It is as simple as writing logic to process a small amount of data (like writing processRecord(Record)) and letting Amazon Kinesis scale for you so that it works for a large amount of streaming data. You don’t have to worry about how your processing would scale because Amazon Kinesis handles it for you. Spend your time designing and developing the logic for your ingestion (producer) and processing (consumer), and let Amazon Kinesis do the rest. 317 | 318 | ## Next steps 319 | * For more information about the KCL, see [Developing Consumer Applications for Amazon Kinesis using the Amazon Kinesis Client Library][amazon-kcl]. 320 | * For more information about how to optimize your application, see [Advanced Topics for Amazon Kinesis Applications][advanced-kcl-topics]. 321 | 322 | [amazon-kinesis]: http://aws.amazon.com/kinesis 323 | [amazon-kcl-github]: https://github.com/awslabs/amazon-kinesis-client 324 | [amazon-kinesis-docs]: http://aws.amazon.com/documentation/kinesis/ 325 | [amazon-kinesis-shard]: http://docs.aws.amazon.com/kinesis/latest/dev/key-concepts.html 326 | [amazon-kcl]: http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumer-apps-with-kcl.html 327 | [nodejs-kinesis-putrecords]: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Kinesis.html#putRecords-property 328 | [nodejs-kcl]: https://github.com/awslabs/amazon-kinesis-client-nodejs 329 | [advanced-kcl-topics]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-advanced.html 330 | [aws-sdk-node]: http://aws.amazon.com/sdk-for-node-js/ 331 | [multi-lang-daemon]: https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java 332 | [DefaultAWSCredentialsProviderChain]: http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html 333 | [kinesis-forum]: http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169 334 | [aws-console]: http://aws.amazon.com/console/ 335 | -------------------------------------------------------------------------------- /samples/click_stream_sample/cloudformation/nodejs-kcl-clickstream.template: -------------------------------------------------------------------------------- 1 | { 2 | "AWSTemplateFormatVersion" : "2010-09-09", 3 | 4 | "Description" : "The Amazon Kinesis click stream sample for Node.js KCL.", 5 | 6 | "Parameters" : { 7 | "ProducerInstanceType" : { 8 | "Description" : "EC2 instance type for producer node", 9 | "Type" : "String", 10 | "Default" : "t2.micro", 11 | "AllowedValues" : [ "t2.micro", "t2.small", "t2.medium", "m3.medium", "m3.large", "m3.xlarge", "m3.2xlarge", "c3.large", "c3.xlarge", "c3.2xlarge", "c3.4xlarge", "c3.8xlarge" ], 12 | "ConstraintDescription" : "must be a supported EC2 instance type for this template." 13 | }, 14 | 15 | "ConsumerInstanceType" : { 16 | "Description" : "EC2 instance type for consumer node", 17 | "Type" : "String", 18 | "Default" : "t2.micro", 19 | "AllowedValues" : [ "t2.micro", "t2.small", "t2.medium", "m3.medium", "m3.large", "m3.xlarge", "m3.2xlarge", "c3.large", "c3.xlarge", "c3.2xlarge", "c3.4xlarge", "c3.8xlarge" ], 20 | "ConstraintDescription" : "must be a supported EC2 instance type for this template." 21 | }, 22 | 23 | "ProducerClusterSize" : { 24 | "Description" : "Total producer instances", 25 | "Type" : "Number", 26 | "MinValue" : "1", 27 | "Default" : "1" 28 | }, 29 | 30 | "ConsumerClusterSize" : { 31 | "Description" : "Total consumer instances", 32 | "Type" : "Number", 33 | "MinValue" : "1", 34 | "Default" : "1" 35 | }, 36 | 37 | "NumberOfShards" : { 38 | "Description" : "Total shards for the kinesis stream this stack creates.", 39 | "Type" : "Number", 40 | "MinValue" : "1", 41 | "Default" : "2" 42 | }, 43 | 44 | "ProducerPutRecordsBatchSize" : { 45 | "Description" : "Total number of records to batch per putRecords API call to Kinesis.", 46 | "Type" : "Number", 47 | "MinValue" : "1", 48 | "Default" : "5" 49 | }, 50 | 51 | "ProducerPutRecordsTps" : { 52 | "Description" : "Transactions per second for PutRecords API.", 53 | "Type" : "Number", 54 | "MinValue" : "1", 55 | "Default" : "20" 56 | }, 57 | 58 | "S3BufferSizeInBytes" : { 59 | "Description" : "Maximum buffer size on consumer before putting data in S3. Defaults to 1MB.", 60 | "Type" : "Number", 61 | "MinValue" : "1", 62 | "Default" : "1048576" 63 | }, 64 | 65 | "S3BucketName" : { 66 | "Description" : "S3 bucket to send batch of processed click-stream data from consumer.", 67 | "Type" : "String", 68 | "Default" : "", 69 | "MinLength" : "0", 70 | "MaxLength" : "255", 71 | "AllowedPattern" : "[\\x20-\\x7E]*", 72 | "ConstraintDescription" : "can contain only ASCII characters." 73 | }, 74 | 75 | "KeyName" : { 76 | "Description" : "(Optional) Name of an existing EC2 KeyPair to enable SSH access to the instance. If this is not provided you will not be able to SSH on to the EC2 instance.", 77 | "Type" : "String", 78 | "Default" : "", 79 | "MinLength" : "0", 80 | "MaxLength" : "255", 81 | "AllowedPattern" : "[\\x20-\\x7E]*", 82 | "ConstraintDescription" : "can contain only ASCII characters." 83 | }, 84 | 85 | "SSHLocation" : { 86 | "Description" : "The IP address range that can be used to SSH to the EC2 instances", 87 | "Type" : "String", 88 | "MinLength" : "9", 89 | "MaxLength" : "18", 90 | "Default" : "0.0.0.0/0", 91 | "AllowedPattern" : "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})", 92 | "ConstraintDescription" : "must be a valid IP CIDR range of the form x.x.x.x/x." 93 | } 94 | }, 95 | 96 | "Conditions": { 97 | "UseEC2KeyName": {"Fn::Not": [{"Fn::Equals" : [{"Ref" : "KeyName"}, ""]}]} 98 | }, 99 | 100 | "Mappings" : { 101 | "AWSInstanceType2Arch" : { 102 | "t2.micro" : { "Arch" : "HVM64" }, 103 | "t2.small" : { "Arch" : "HVM64" }, 104 | "t2.medium" : { "Arch" : "HVM64" }, 105 | "m3.medium" : { "Arch" : "HVM64" }, 106 | "m3.large" : { "Arch" : "HVM64" }, 107 | "m3.xlarge" : { "Arch" : "HVM64" }, 108 | "m3.2xlarge" : { "Arch" : "HVM64" }, 109 | "c3.large" : { "Arch" : "HVM64" }, 110 | "c3.xlarge" : { "Arch" : "HVM64" }, 111 | "c3.2xlarge" : { "Arch" : "HVM64" }, 112 | "c3.4xlarge" : { "Arch" : "HVM64" }, 113 | "c3.8xlarge" : { "Arch" : "HVM64" } 114 | }, 115 | 116 | "AWSRegionArch2AMI" : { 117 | "us-east-1" : { "HVM64" : "ami-146e2a7c" }, 118 | "us-west-1" : { "HVM64" : "ami-42908907" }, 119 | "us-west-2" : { "HVM64" : "ami-dfc39aef" }, 120 | "eu-west-1" : { "HVM64" : "ami-9d23aeea" }, 121 | "eu-central-1" : { "HVM64" : "ami-04003319" }, 122 | "ap-southeast-1" : { "HVM64" : "ami-96bb90c4" }, 123 | "ap-southeast-2" : { "HVM64" : "ami-d50773ef" }, 124 | "ap-northeast-1" : { "HVM64" : "ami-18869819" } 125 | } 126 | }, 127 | 128 | "Resources" : { 129 | "Ec2SecurityGroup" : { 130 | "Type" : "AWS::EC2::SecurityGroup", 131 | "Properties" : { 132 | "GroupDescription" : "Enable SSH access and HTTP access on the inbound port", 133 | "SecurityGroupIngress" : 134 | [{ "IpProtocol" : "tcp", "FromPort" : "22", "ToPort" : "22", "CidrIp" : { "Ref" : "SSHLocation"} }, 135 | { "IpProtocol" : "tcp", "FromPort" : "80", "ToPort" : "80", "CidrIp" : "0.0.0.0/0"}] 136 | } 137 | }, 138 | 139 | "KinesisStream" : { 140 | "Type" : "AWS::Kinesis::Stream", 141 | "Properties" : { 142 | "ShardCount" : { "Ref" : "NumberOfShards" } 143 | } 144 | }, 145 | 146 | "KCLDynamoDBTable" : { 147 | "Type" : "AWS::DynamoDB::Table", 148 | "Properties" : { 149 | "AttributeDefinitions" : [ 150 | { 151 | "AttributeName" : "leaseKey", 152 | "AttributeType" : "S" 153 | } 154 | ], 155 | "KeySchema" : [ 156 | { 157 | "AttributeName" : "leaseKey", 158 | "KeyType" : "HASH" 159 | } 160 | ], 161 | "ProvisionedThroughput" : { 162 | "ReadCapacityUnits" : "10", 163 | "WriteCapacityUnits" : "5" 164 | } 165 | } 166 | }, 167 | 168 | "S3Bucket": { 169 | "Type" : "AWS::S3::Bucket", 170 | "Properties" : { 171 | "BucketName" : {"Ref":"S3BucketName"} 172 | }, 173 | "DeletionPolicy" : "Delete" 174 | }, 175 | 176 | "RootRole": { 177 | "Type" : "AWS::IAM::Role", 178 | "Properties" : { 179 | "AssumeRolePolicyDocument": { 180 | "Version" : "2012-10-17", 181 | "Statement" : [ { 182 | "Effect" : "Allow", 183 | "Principal" : { 184 | "Service" : [ "ec2.amazonaws.com" ] 185 | }, 186 | "Action" : [ "sts:AssumeRole" ] 187 | } ] 188 | }, 189 | "Path" : "/" 190 | } 191 | }, 192 | 193 | "RolePolicies" : { 194 | "Type" : "AWS::IAM::Policy", 195 | "Properties" : { 196 | "PolicyName" : "root", 197 | "PolicyDocument" : { 198 | "Version" : "2012-10-17", 199 | "Statement" : [ { 200 | "Effect" : "Allow", 201 | "Action" : "kinesis:*", 202 | "Resource" : { "Fn::Join" : [ "", [ "arn:aws:kinesis:", { "Ref" : "AWS::Region" }, ":", { "Ref" : "AWS::AccountId" }, ":stream/", { "Ref" : "KinesisStream" } ]]} 203 | }, { 204 | "Effect" : "Allow", 205 | "Action" : "dynamodb:*", 206 | "Resource" : { "Fn::Join" : [ "", [ "arn:aws:dynamodb:", { "Ref" : "AWS::Region" }, ":", { "Ref" : "AWS::AccountId" }, ":table/", { "Ref" : "KCLDynamoDBTable" } ]]} 207 | }, { 208 | "Effect" : "Allow", 209 | "Action" : "cloudwatch:*", 210 | "Resource" : "*" 211 | }, { 212 | "Effect" : "Allow", 213 | "Action" : "s3:*", 214 | "Resource" : "*" 215 | } 216 | ] 217 | }, 218 | "Roles" : [ { "Ref": "RootRole" } ] 219 | } 220 | }, 221 | 222 | "RootInstanceProfile" : { 223 | "Type" : "AWS::IAM::InstanceProfile", 224 | "Properties" : { 225 | "Path" : "/", 226 | "Roles" : [ { "Ref": "RootRole" } ] 227 | } 228 | }, 229 | 230 | "ConsumerCluster" : { 231 | "Type" : "AWS::AutoScaling::AutoScalingGroup", 232 | "Properties" : { 233 | "AvailabilityZones" : { "Fn::GetAZs" : { "Ref" : "AWS::Region" } }, 234 | "LaunchConfigurationName" : { "Ref" : "ConsumerLaunchConfig" }, 235 | "MinSize" : { "Ref" : "ConsumerClusterSize" }, 236 | "MaxSize" : { "Ref" : "ConsumerClusterSize" }, 237 | "DesiredCapacity" : { "Ref" : "ConsumerClusterSize" }, 238 | "Tags" : [ 239 | { "Key" : "ApplicationRole", "Value" : "NodeJSClickStreamConsumer", "PropagateAtLaunch" : "true" } 240 | ] 241 | } 242 | }, 243 | 244 | "ConsumerLaunchConfig" : { 245 | "Type" : "AWS::AutoScaling::LaunchConfiguration", 246 | "Metadata" : { 247 | "Comment:" : "Run consumer for kinesis NodeJS-KCL ClickStream example.", 248 | "AWS::CloudFormation::Init" : { 249 | "config" : { 250 | } 251 | } 252 | }, 253 | "Properties" : { 254 | "KeyName" : { "Fn::If" : [ "UseEC2KeyName", { "Ref" : "KeyName" }, { "Ref" : "AWS::NoValue" } ]}, 255 | "ImageId" : { "Fn::FindInMap" : [ "AWSRegionArch2AMI", { "Ref" : "AWS::Region" }, 256 | { "Fn::FindInMap" : [ "AWSInstanceType2Arch", { "Ref" : "ConsumerInstanceType" }, 257 | "Arch" ] } ] }, 258 | "InstanceType" : { "Ref" : "ConsumerInstanceType" }, 259 | "SecurityGroups" : [{ "Ref" : "Ec2SecurityGroup" }], 260 | "IamInstanceProfile": { "Ref": "RootInstanceProfile" }, 261 | "UserData" : { "Fn::Base64" : { "Fn::Join" : ["", [ 262 | "#!/bin/bash\n", 263 | "cd /home/ec2-user\n", 264 | "yum install -y nodejs npm --enablerepo=epel\n", 265 | "yum install -y git\n", 266 | "git clone https://github.com/awslabs/amazon-kinesis-client-nodejs\n", 267 | "chown -R ec2-user:ec2-user /home/ec2-user/amazon-kinesis-client-nodejs\n", 268 | "cd amazon-kinesis-client-nodejs\n", 269 | "npm install\n", 270 | "cd samples/click_stream_sample/consumer\n", 271 | "export NODE_LOG_DIR='logs'\n", 272 | "mkdir logs\n", 273 | "sed 's/streamName = kclnodejsclickstreamsample/streamName = ", { "Ref" : "KinesisStream"}, "/g' -i sample.properties\n", 274 | "sed 's/applicationName = kclnodejsclickstreamsample/applicationName = ", { "Ref" : "KCLDynamoDBTable"}, "/g' -i sample.properties\n", 275 | "sed 's/regionName = us-east-1/regionName = ", { "Ref" : "AWS::Region"}, "/g' -i sample.properties\n", 276 | "sed 's/kinesis-clickstream-batchdata/", { "Ref" : "S3Bucket" }, "/g' -i config.js\n", 277 | "sed 's/maxBufferSize : 1024 \\* 1024/maxBufferSize : ", { "Ref" : "S3BufferSizeInBytes" }, "/g' -i config.js\n", 278 | "../../../bin/kcl-bootstrap -e --java /usr/bin/java --properties ./sample.properties > consumer.out 2>&1 &\n" 279 | ]]}} 280 | } 281 | }, 282 | 283 | "ProducerCluster" : { 284 | "Type" : "AWS::AutoScaling::AutoScalingGroup", 285 | "Properties" : { 286 | "AvailabilityZones" : { "Fn::GetAZs" : { "Ref" : "AWS::Region" } }, 287 | "LaunchConfigurationName" : { "Ref" : "ProducerLaunchConfig" }, 288 | "MinSize" : { "Ref" : "ProducerClusterSize" }, 289 | "MaxSize" : { "Ref" : "ProducerClusterSize" }, 290 | "DesiredCapacity" : { "Ref" : "ProducerClusterSize" }, 291 | "Tags" : [ 292 | { "Key" : "ApplicationRole", "Value" : "NodeJSClickStreamProducer", "PropagateAtLaunch" : "true" } 293 | ] 294 | } 295 | }, 296 | 297 | "ProducerLaunchConfig" : { 298 | "Type" : "AWS::AutoScaling::LaunchConfiguration", 299 | "Metadata" : { 300 | "Comment:" : "Run producer for kinesis NodeJS-KCL ClickStream example.", 301 | "AWS::CloudFormation::Init" : { 302 | "config" : { 303 | } 304 | } 305 | }, 306 | "Properties" : { 307 | "KeyName" : { "Fn::If" : [ "UseEC2KeyName", { "Ref" : "KeyName" }, { "Ref" : "AWS::NoValue" } ]}, 308 | "ImageId" : { "Fn::FindInMap" : [ "AWSRegionArch2AMI", { "Ref" : "AWS::Region" }, 309 | { "Fn::FindInMap" : [ "AWSInstanceType2Arch", { "Ref" : "ProducerInstanceType" }, 310 | "Arch" ] } ] }, 311 | "InstanceType" : { "Ref" : "ProducerInstanceType" }, 312 | "SecurityGroups" : [{ "Ref" : "Ec2SecurityGroup" }], 313 | "IamInstanceProfile": { "Ref": "RootInstanceProfile" }, 314 | "UserData" : { "Fn::Base64" : { "Fn::Join" : ["", [ 315 | "#!/bin/bash\n", 316 | "cd /home/ec2-user\n", 317 | "yum install -y nodejs npm --enablerepo=epel\n", 318 | "yum install -y git\n", 319 | "git clone https://github.com/awslabs/amazon-kinesis-client-nodejs\n", 320 | "chown -R ec2-user:ec2-user /home/ec2-user/amazon-kinesis-client-nodejs\n", 321 | "cd amazon-kinesis-client-nodejs\n", 322 | "npm install\n", 323 | "cd samples/click_stream_sample/producer\n", 324 | "export NODE_LOG_DIR='logs'\n", 325 | "mkdir logs\n", 326 | "sed 's/kclnodejsclickstreamsample/", { "Ref" : "KinesisStream" }, "/g' -i config.js\n", 327 | "sed 's/us-east-1/", { "Ref" : "AWS::Region" }, "/g' -i config.js\n", 328 | "sed 's/recordsToWritePerBatch : 5/recordsToWritePerBatch : ", { "Ref" : "ProducerPutRecordsBatchSize" }, "/g' -i config.js\n", 329 | "sed 's/shards : 2/shards : ", { "Ref" : "NumberOfShards" }, "/g' -i config.js\n", 330 | "sed 's/putRecordsTps : 20/putRecordsTps : ", { "Ref" : "ProducerPutRecordsTps" }, "/g' -i config.js\n", 331 | "node click_stream_producer_app.js &\n" 332 | ]]}} 333 | } 334 | } 335 | }, 336 | "Outputs" : { 337 | "StreamName" : { 338 | "Description" : "The name of the Kinesis Stream. This was autogenerated by the Kinesis Resource named 'KinesisStream'", 339 | "Value" : { "Ref" : "KinesisStream" } 340 | }, 341 | "DynamoDBTableForKCL" : { 342 | "Description" : "The DynamoDB table name to store KCL metadata. This was autogenerated by the DynamoDB Resource named 'KCLDynamoDBTable'", 343 | "Value" : { "Ref" : "KCLDynamoDBTable" } 344 | }, 345 | "S3Bucket" : { 346 | "Description" : "The name of the bucket where click stream data is stored from consumer. This was autogenerated by the S3 Resource named 'S3Bucket'", 347 | "Value" : { "Ref" : "S3Bucket" } 348 | } 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /samples/click_stream_sample/consumer/click_stream_consumer.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var async = require('async'); 10 | var util = require('util'); 11 | var config = require('./config'); 12 | var kcl = require('../../..'); 13 | var logger = require('../../util/logger'); 14 | var recordBuffer = require('./record_buffer'); 15 | var s3Emitter = require('./s3_emitter'); 16 | 17 | 18 | /** 19 | * A simple implementation of RecordProcessor that accepts records from an Amazon 20 | * Kinesis stream and batches them into 1 MB (configurable) datasets, then puts 21 | * them in a configured S3 bucket for further offline processing. The object 22 | * returned should implement the functions initialize, processRecords, and shutdown 23 | * in order to enable the KCL to interact with MultiLangDaemon. 24 | * MultiLangDaemon would create one child process (hence one RecordProcessor instance) 25 | * per shard. A single shard will never be accessed by more than one 26 | * RecordProcessor instance; e.g., if you run this sample on a single machine, 27 | * against a stream with 2 shards, MultiLangDaemon would create 2 child 28 | * Node.js processes (RecordProcessor), one for each shard. 29 | */ 30 | function clickStreamProcessor(emitter, cfg) { 31 | var buffer = recordBuffer(cfg.maxBufferSize); 32 | var log = logger().getLogger('clickStreamProcessor'); 33 | var shardId = null; 34 | var commitQueue = null; 35 | 36 | function _commit(commitInfo, callback) { 37 | var key = commitInfo.key; 38 | var sequenceNumber = commitInfo.sequenceNumber; 39 | var data = commitInfo.data; 40 | var checkpointer = commitInfo.checkpointer; 41 | emitter.emit(key, data, function(error) { 42 | if (error) { 43 | callback(error); 44 | return; 45 | } 46 | log.info(util.format('Successfully uploaeded data to s3 file: %s', key)); 47 | checkpointer.checkpoint(sequenceNumber, function(e, seq) { 48 | if (!e) { 49 | log.info('Successful checkpoint at sequence number: %s', sequenceNumber); 50 | } 51 | callback(e); 52 | }); 53 | }); 54 | } 55 | 56 | function _processRecord(record, checkpointer, callback) { 57 | var data = new Buffer(record.data, 'base64').toString(); 58 | var sequenceNumber = record.sequenceNumber; 59 | 60 | // Add data to buffer until maxBufferSize. 61 | buffer.putRecord(data, sequenceNumber); 62 | 63 | if (!buffer.shouldFlush()) { 64 | callback(null); 65 | return; 66 | } 67 | // Buffer is full. Add commit to the queue. 68 | commitQueue.push({ 69 | key: shardId + '/' + buffer.getFirstSequenceNumber() + '-' + buffer.getLastSequenceNumber(), 70 | sequenceNumber: buffer.getLastSequenceNumber(), 71 | data: buffer.readAndClearRecords(), 72 | checkpointer: checkpointer 73 | }, callback); 74 | } 75 | 76 | return { 77 | /** 78 | * This function is called by the KCL to allow application initialization before it 79 | * starts processing Amazon Kinesis records. The KCL won't start processing records until the 80 | * application is successfully initialized and completeCallback is called. 81 | */ 82 | initialize: function(initializeInput, completeCallback) { 83 | shardId = initializeInput.shardId; 84 | // The KCL for Node.js does not allow more than one outstanding checkpoint. So checkpoint must 85 | // be done sequentially. Async queue with 1 concurrency will allow executing checkpoints 86 | // one after another. 87 | commitQueue = async.queue(_commit, 1); 88 | 89 | emitter.initialize(function(err) { 90 | if (err) { 91 | log.error(util.format('Error initializing emitter: %s', err)); 92 | process.exit(1); 93 | } 94 | else { 95 | log.info('Click stream processor successfully initialized.'); 96 | completeCallback(); 97 | } 98 | }); 99 | }, 100 | 101 | /** 102 | * Called by the KCL with a list of records to be processed and a checkpointer. 103 | * A record looks like - 104 | * '{"data":"","partitionKey":"someKey","sequenceNumber":"1234567890"}' 105 | * Note that "data" is a base64-encoded string. You can use the Buffer class to decode the data 106 | * into a string. The checkpointer can be used to checkpoint a particular sequence number. 107 | * Any checkpoint call should be made before calling completeCallback. The KCL ingests the next 108 | * batch of records only after completeCallback is called. 109 | */ 110 | processRecords: function(processRecordsInput, completeCallback) { 111 | if (!processRecordsInput || !processRecordsInput.records) { 112 | completeCallback(); 113 | return; 114 | } 115 | 116 | var records = processRecordsInput.records; 117 | // Call completeCallback only after we have processed all records. 118 | async.series([ 119 | function(done) { 120 | var record; 121 | var processedCount = 0; 122 | var errorCount = 0; 123 | var errors; 124 | 125 | var callback = function (err) { 126 | if (err) { 127 | log.error(util.format('Received error while processing record: %s', err)); 128 | errorCount++; 129 | errors = errors + '\n' + err; 130 | } 131 | 132 | processedCount++; 133 | if (processedCount === records.length) { 134 | done(errors, errorCount); 135 | } 136 | }; 137 | 138 | for (var i = 0 ; i < records.length ; ++i) { 139 | record = records[i]; 140 | _processRecord(record, processRecordsInput.checkpointer, callback); 141 | } 142 | } 143 | ], 144 | function(err, errCount) { 145 | if (err) { 146 | log.info(util.format('%d records processed with %d errors.', records.length, errCount)); 147 | } 148 | completeCallback(); 149 | }); 150 | }, 151 | 152 | /** 153 | * Called by the KCL to indicate that this record processor should shut down. 154 | * After the shutdown operation is complete, there will not be any more calls to 155 | * any other functions of this record processor. If lease is lost, clients should not 156 | * checkpoint because there is possibly another record processor which has 157 | * acquired the lease for this shard. 158 | */ 159 | leaseLost: function(leaseLostInput, completeCallback) { 160 | completeCallback(); 161 | }, 162 | 163 | /** 164 | * Called by the KCL to indicate that this record processor should shut down. 165 | * After the shutdown operation is complete, there will not be any more calls to 166 | * any other functions of this record processor. If shard has ended, then 167 | * checkpointer.checkpoint() should be called to checkpoint at the end of 168 | * the shard so that this processor will be shut down and new processors 169 | * will be created for the children of this shard. 170 | */ 171 | shardEnded: function(shardEndedInput, completeCallback) { 172 | // Make sure to emit all remaining buffered data to S3 before shutting down. 173 | commitQueue.push({ 174 | key: shardId + '/' + buffer.getFirstSequenceNumber() + '-' + buffer.getLastSequenceNumber(), 175 | sequenceNumber: buffer.getLastSequenceNumber(), 176 | data: buffer.readAndClearRecords(), 177 | checkpointer: shardEndedInput.checkpointer 178 | }, function(error) { 179 | if (error) { 180 | log.error(util.format('Received error while shutting down: %s', error)); 181 | } 182 | completeCallback(); 183 | }); 184 | } 185 | }; 186 | } 187 | 188 | kcl(clickStreamProcessor(s3Emitter(config.s3), config.clickStreamProcessor)).run(); 189 | -------------------------------------------------------------------------------- /samples/click_stream_sample/consumer/config.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var config = module.exports = { 9 | s3 : { 10 | // Region for Amazon S3. Defaults to us-east-1. 11 | // region : '', 12 | 13 | // Amazon S3 bucket to store batched clickstream data. The consumer application 14 | // may create a new bucket (based on S3.createBucketIfNotPresent value), 15 | // if the specified bucket doesn't exist. 16 | bucket : 'kinesis-clickstream-batchdata', 17 | 18 | // Enables the consumer application to create a new S3 bucket if the specified 19 | // bucket doesn't exist. 20 | createBucketIfNotPresent : true 21 | }, 22 | 23 | clickStreamProcessor : { 24 | // Maximum batch size in bytes before sending data to S3. 25 | maxBufferSize : 1024 * 1024 26 | } 27 | }; 28 | -------------------------------------------------------------------------------- /samples/click_stream_sample/consumer/record_buffer.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | // In memory buffer for storing kinesis records. 9 | function recordBuffer(size) { 10 | var buffer = []; 11 | var firstSequenceNumber = 0; 12 | var lastSequenceNumber = 0; 13 | var totalRecords = 0; 14 | var currentSize = 0; 15 | var delimiter = '\n'; 16 | 17 | function _clear() { 18 | buffer.length = 0; 19 | firstSequenceNumber = 0; 20 | lastSequenceNumber = 0; 21 | totalRecords = 0; 22 | currentSize = 0; 23 | } 24 | 25 | return { 26 | 27 | // Stores a single record in memory. 28 | putRecord: function(data, seq, callback) { 29 | if (!data) { 30 | return; 31 | } 32 | 33 | var record = new Buffer(data + delimiter); 34 | if (firstSequenceNumber === 0) { 35 | firstSequenceNumber = seq; 36 | } 37 | 38 | lastSequenceNumber = seq; 39 | 40 | currentSize += record.length; 41 | buffer.push(record); 42 | }, 43 | 44 | // Bundles all records in a single buffer and clears local buffer. 45 | readAndClearRecords: function() { 46 | var buf = new Buffer.concat(buffer, currentSize); 47 | _clear(); 48 | return buf; 49 | }, 50 | 51 | setDelimiter: function(delimiter) { 52 | delimiter = delimiter; 53 | }, 54 | 55 | getFirstSequenceNumber: function() { 56 | return firstSequenceNumber; 57 | }, 58 | 59 | getLastSequenceNumber: function() { 60 | return lastSequenceNumber; 61 | }, 62 | 63 | shouldFlush: function() { 64 | if (currentSize >= size) { 65 | return true; 66 | } 67 | } 68 | }; 69 | } 70 | 71 | module.exports = recordBuffer; 72 | -------------------------------------------------------------------------------- /samples/click_stream_sample/consumer/s3_emitter.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var AWS = require('aws-sdk'); 10 | var util = require('util'); 11 | var logger = require('../../util/logger'); 12 | 13 | function s3Emitter(config) { 14 | var s3Client; 15 | var log = logger().getLogger('s3Emitter'); 16 | var initializeRetryCount = 0; 17 | 18 | var self = { 19 | initialize: function(callback) { 20 | ++initializeRetryCount; 21 | 22 | s3Client = new AWS.S3({region: config.region}); 23 | // Check if specified S3 bucket exists. If it does not, create one based one config. 24 | s3Client.headBucket({Bucket: config.bucket}, function(err, data) { 25 | if (!err) { 26 | log.info(util.format('Destination bucket: %s', config.bucket)); 27 | callback(null); 28 | return; 29 | } 30 | if (!config.createBucketIfNotPresent) { 31 | callback('Specified bucket does not exist in S3. Enable bucket creation by setting config.s3.createBucketIfNotPresent to true.'); 32 | return; 33 | } 34 | 35 | var params = { 36 | Bucket: config.bucket, 37 | CreateBucketConfiguration: { 38 | LocationConstraint: config.region 39 | } 40 | }; 41 | s3Client.createBucket(params, function(err, data) { 42 | if (err && initializeRetryCount < 3) { 43 | setTimeout(function() { 44 | self.initialize(callback); 45 | }, 1000); 46 | return; 47 | } 48 | callback(err); 49 | }); 50 | }); 51 | }, 52 | 53 | emit: function(key, value, callback) { 54 | var params = { 55 | Bucket: config.bucket, 56 | Key: key, 57 | Body: value 58 | }; 59 | s3Client.upload(params, callback); 60 | } 61 | }; 62 | return self; 63 | } 64 | 65 | module.exports = s3Emitter; 66 | -------------------------------------------------------------------------------- /samples/click_stream_sample/consumer/sample.properties: -------------------------------------------------------------------------------- 1 | # The script that abides by the multi-language protocol. This script will 2 | # be executed by the MultiLangDaemon, which will communicate with this script 3 | # over STDIN and STDOUT according to the multi-language protocol. 4 | executableName = node click_stream_consumer.js 5 | 6 | # The name of an Amazon Kinesis stream to process. 7 | streamName = kclnodejsclickstreamsample 8 | 9 | # Used by the KCL as the name of this application. Will be used as the name 10 | # of an Amazon DynamoDB table which will store the lease and checkpoint 11 | # information for workers with this application name 12 | applicationName = kclnodejsclickstreamsample 13 | 14 | # Users can change the credentials provider the KCL will use to retrieve credentials. 15 | # The DefaultAWSCredentialsProviderChain checks several other providers, which is 16 | # described here: 17 | # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html 18 | AWSCredentialsProvider = DefaultAWSCredentialsProviderChain 19 | 20 | # Appended to the user agent of the KCL. Does not impact the functionality of the 21 | # KCL in any other way. 22 | processingLanguage = nodejs/0.10 23 | 24 | # Valid options at TRIM_HORIZON or LATEST. 25 | # See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax 26 | initialPositionInStream = TRIM_HORIZON 27 | 28 | # The following properties are also available for configuring the KCL Worker that is created 29 | # by the MultiLangDaemon. 30 | 31 | # The KCL defaults to us-east-1 32 | regionName = us-east-1 33 | 34 | # Fail over time in milliseconds. A worker which does not renew it's lease within this time interval 35 | # will be regarded as having problems and it's shards will be assigned to other workers. 36 | # For applications that have a large number of shards, this msy be set to a higher number to reduce 37 | # the number of DynamoDB IOPS required for tracking leases 38 | #failoverTimeMillis = 10000 39 | 40 | # A worker id that uniquely identifies this worker among all workers using the same applicationName 41 | # If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself. 42 | #workerId = 43 | 44 | # Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks. 45 | #shardSyncIntervalMillis = 60000 46 | 47 | # Max records to fetch from Kinesis in a single GetRecords call. 48 | #maxRecords = 10000 49 | 50 | # Idle time between record reads in milliseconds. 51 | #idleTimeBetweenReadsInMillis = 1000 52 | 53 | # Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while) 54 | #callProcessRecordsEvenForEmptyRecordList = false 55 | 56 | # Interval in milliseconds between polling to check for parent shard completion. 57 | # Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on 58 | # completion of parent shards). 59 | #parentShardPollIntervalMillis = 10000 60 | 61 | # Cleanup leases upon shards completion (don't wait until they expire in Kinesis). 62 | # Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try 63 | # to delete the ones we don't need any longer. 64 | #cleanupLeasesUponShardCompletion = true 65 | 66 | # Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures). 67 | #taskBackoffTimeMillis = 500 68 | 69 | # Buffer metrics for at most this long before publishing to CloudWatch. 70 | #metricsBufferTimeMillis = 10000 71 | 72 | # Buffer at most this many metrics before publishing to CloudWatch. 73 | #metricsMaxQueueSize = 10000 74 | 75 | # KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls 76 | # to RecordProcessorCheckpointer#checkpoint(String) by default. 77 | #validateSequenceNumberBeforeCheckpointing = true 78 | 79 | # The maximum number of active threads for the MultiLangDaemon to permit. 80 | # If a value is provided then a FixedThreadPool is used with the maximum 81 | # active threads set to the provided value. If a non-positive integer or no 82 | # value is provided a CachedThreadPool is used. 83 | #maxActiveThreads = 0 84 | -------------------------------------------------------------------------------- /samples/click_stream_sample/producer/click_stream_generator.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | function clickStreamGenerator(totalResources) { 10 | var referrers = [ 11 | 'http://www.amazon.com', 12 | 'http://www.google.com', 13 | 'http://www.yahoo.com', 14 | 'http://bing/com', 15 | 'http://stackoverflow.com', 16 | 'http://reddit.com' 17 | ]; 18 | var resources = []; 19 | 20 | // List of resources and referrers to generate fake data. Resource names will also be used as partition-keys. 21 | for (var i = 0 ; i < totalResources ; i++) { 22 | resources.push('resource-' + i); 23 | } 24 | 25 | return { 26 | getRandomClickStreamData: function() { 27 | var referrer = referrers[Math.floor(Math.random() * referrers.length)]; 28 | var resource = resources[Math.floor(Math.random() * resources.length)]; 29 | 30 | var data = { 31 | resource: resource, 32 | referrer: referrer 33 | }; 34 | 35 | return data; 36 | } 37 | }; 38 | } 39 | 40 | module.exports = clickStreamGenerator; 41 | -------------------------------------------------------------------------------- /samples/click_stream_sample/producer/click_stream_producer.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var util = require('util'); 10 | var clickStreamGenerator = require('./click_stream_generator'); 11 | var logger = require('../../util/logger'); 12 | 13 | function clickStreamProducer(kinesis, config) { 14 | var clickStreamGen = clickStreamGenerator(config.shards); 15 | var log = logger().getLogger('producer'); 16 | var waitBetweenPutRecordsCallsInMilliseconds = config.putRecordsTps ? 1000 / config.putRecordsTps : 50; 17 | 18 | // Creates a new kinesis stream if one doesn't exist. 19 | function _createStreamIfNotCreated(callback) { 20 | var params = { 21 | ShardCount: config.shards, 22 | StreamName: config.stream 23 | }; 24 | 25 | kinesis.createStream(params, function(err, data) { 26 | if (err) { 27 | // ResourceInUseException is returned when the stream is already created. 28 | if (err.code !== 'ResourceInUseException') { 29 | callback(err); 30 | return; 31 | } 32 | else { 33 | log.info(util.format('%s stream is already created! Re-using it.', config.stream)); 34 | } 35 | } 36 | else { 37 | log.info(util.format('%s stream does not exist. Created a new stream with that name.', config.stream)); 38 | } 39 | 40 | // Poll to make sure stream is in ACTIVE state before start pushing data. 41 | _waitForStreamToBecomeActive(callback); 42 | }); 43 | } 44 | 45 | // Checks current status of the stream. 46 | function _waitForStreamToBecomeActive(callback) { 47 | kinesis.describeStream({StreamName: config.stream}, function(err, data) { 48 | if (!err) { 49 | if (data.StreamDescription.StreamStatus === 'ACTIVE') { 50 | log.info('Current status of the stream is ACTIVE.'); 51 | callback(null); 52 | } 53 | else { 54 | log.info(util.format('Current status of the stream is %s.', data.StreamDescription.StreamStatus)); 55 | setTimeout(function() { 56 | _waitForStreamToBecomeActive(callback); 57 | }, 1000 * config.waitBetweenDescribeCallsInSeconds); 58 | } 59 | } 60 | }); 61 | } 62 | 63 | // Sends batch of records to kinesis using putRecords API. 64 | function _sendToKinesis(totalRecords, done) { 65 | if (totalRecords <= 0) { 66 | return; 67 | } 68 | 69 | var data, record; 70 | var records = []; 71 | 72 | // Use putRecords API to batch more than one record. 73 | for (var i = 0 ; i < totalRecords ; i++) { 74 | data = clickStreamGen.getRandomClickStreamData(); 75 | 76 | record = { 77 | Data: JSON.stringify(data), 78 | PartitionKey: data.resource 79 | }; 80 | 81 | records.push(record); 82 | } 83 | 84 | var recordsParams = { 85 | Records: records, 86 | StreamName: config.stream 87 | }; 88 | 89 | kinesis.putRecords(recordsParams, function(err, data) { 90 | if (err) { 91 | log.error(err); 92 | } 93 | else { 94 | log.info(util.format('Sent %d records with %d failures.', records.length, data.FailedRecordCount)); 95 | } 96 | }); 97 | 98 | done(); 99 | } 100 | 101 | function _sendToKinesisRecursively(totalRecords) { 102 | setTimeout(function() { 103 | _sendToKinesis(totalRecords, function() { 104 | _sendToKinesisRecursively(totalRecords); 105 | }); 106 | }, waitBetweenPutRecordsCallsInMilliseconds); 107 | } 108 | 109 | return { 110 | run: function() { 111 | log.info(util.format('Configured wait between consecutive PutRecords call in milliseconds: %d', 112 | waitBetweenPutRecordsCallsInMilliseconds)); 113 | _createStreamIfNotCreated(function(err) { 114 | if (err) { 115 | log.error(util.format('Error creating stream: %s', err)); 116 | return; 117 | } 118 | _sendToKinesisRecursively(config.recordsToWritePerBatch); 119 | }); 120 | } 121 | }; 122 | } 123 | 124 | module.exports = clickStreamProducer; 125 | -------------------------------------------------------------------------------- /samples/click_stream_sample/producer/click_stream_producer_app.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var AWS = require('aws-sdk'); 10 | var config = require('./config'); 11 | var producer = require('./click_stream_producer'); 12 | 13 | var kinesis = new AWS.Kinesis({region: config.kinesis.region}); 14 | producer(kinesis, config.clickStreamProducer).run(); 15 | -------------------------------------------------------------------------------- /samples/click_stream_sample/producer/config.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var config = module.exports = { 9 | kinesis : { 10 | // Region for the Amazon Kinesis stream. 11 | region : 'us-east-1' 12 | }, 13 | 14 | clickStreamProducer : { 15 | // The Amazon Kinesis stream to ingest clickstream data into. If the specified 16 | // stream doesn't exist, the producer application creates a new stream. 17 | stream : 'kclnodejsclickstreamsample', 18 | 19 | // Total shards in the specified Amazon Kinesis stream. 20 | shards : 2, 21 | 22 | // The producer application batches clickstream records in to the size specified 23 | // here, and makes a single PutRecords API call to ingest all records to the 24 | // stream. 25 | recordsToWritePerBatch : 5, 26 | 27 | // If the producer application creates a stream, it has to wait for the stream to 28 | // transition to ACTIVE state before it can start putting data in it. This 29 | // specifies the wait time between consecutive describeStream calls. 30 | waitBetweenDescribeCallsInSeconds : 5, 31 | 32 | // Transactions per second for the PutRecords call to make sure the producer 33 | // doesn't hit throughput limits enforced by Amazon Kinesis. 34 | // For more information about throughput limits, see: 35 | // http://docs.aws.amazon.com/kinesis/latest/dev/service-sizes-and-limits.html 36 | putRecordsTps : 20 37 | } 38 | }; 39 | -------------------------------------------------------------------------------- /samples/util/logger.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var log4js = require('log4js'); 9 | 10 | function logger() { 11 | var logDir = process.env.NODE_LOG_DIR !== undefined ? process.env.NODE_LOG_DIR : '.'; 12 | 13 | var config = { 14 | appenders: { 15 | default: { 16 | "type": "file", 17 | "filename": logDir + "/" + "application.log", 18 | "pattern": "-yyyy-MM-dd", 19 | "layout": { 20 | "type": "pattern", 21 | "pattern": "%d (PID: %x{pid}) %p %c - %m", 22 | "tokens": { 23 | "pid": function () { return process.pid; } 24 | } 25 | } 26 | }, 27 | }, 28 | categories: { 29 | default: { appenders: ['default'], level: 'info' }, 30 | } 31 | }; 32 | 33 | log4js.configure(config, {}); 34 | 35 | return { 36 | getLogger: function (category) { 37 | return log4js.getLogger(category); 38 | } 39 | }; 40 | } 41 | 42 | module.exports = logger; 43 | -------------------------------------------------------------------------------- /test/kcl/action_handler_tests.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var chai = require('chai'); 10 | var expect = chai.expect; 11 | var should = chai.should(); 12 | var sinon = require('sinon'); 13 | var util = require('util'); 14 | 15 | var IOHandler = require('../../lib/kcl/io_handler'); 16 | var ActionHandler = require('../../lib/kcl/action_handler'); 17 | 18 | // Local stub to capture stdout/stderr. 19 | function captureStream(stream) { 20 | var origWrite = stream.write; 21 | var buffer = ''; 22 | 23 | stream.write = function(chunk, encoding, callback) { 24 | buffer += chunk.toString(); 25 | origWrite.apply(stream, arguments); 26 | return true; 27 | }; 28 | 29 | return { 30 | unhook: function unhook() { 31 | stream.write = origWrite; 32 | }, 33 | captured: function() { 34 | return buffer; 35 | }, 36 | readLast: function() { 37 | var lines = buffer.split('\n'); 38 | return lines[lines.length - 2]; 39 | } 40 | }; 41 | } 42 | 43 | describe('action_handler_tests', function() { 44 | var stdoutHook = null; 45 | var stderrHook = null; 46 | var ioHandler = new IOHandler(process.stdin, process.stdout, process.stderr); 47 | var actionHandler = new ActionHandler(ioHandler); 48 | 49 | beforeEach(function() { 50 | stdoutHook = captureStream(process.stdout); 51 | stderrHook = captureStream(process.stderr); 52 | }); 53 | 54 | afterEach(function() { 55 | stdoutHook.unhook(); 56 | stderrHook.unhook(); 57 | }); 58 | 59 | after(function() { 60 | actionHandler.destroy(); 61 | ioHandler.destroy(); 62 | }); 63 | 64 | 65 | it('should not emit action for an invalid action', function(done) { 66 | ioHandler.emit('line', '{"shardId":"shardId-000001"}'); 67 | expect(stderrHook.captured()).to.equal('Invalid action received: {"shardId":"shardId-000001"}\n'); 68 | done(); 69 | }); 70 | 71 | it('should emit action event for a valid action', function(done) { 72 | actionHandler.on('action', function(action) { 73 | expect(action.action).to.equal('initialize'); 74 | expect(action.shardId).to.equal('shardId-000001'); 75 | done(); 76 | }); 77 | ioHandler.emit('line', '{"action":"initialize","shardId":"shardId-000001"}'); 78 | }); 79 | 80 | it('should write action to stdout', function(done) { 81 | actionHandler.sendAction({action : 'initialize', shardId : 'shardId-000001'}, function(err) { 82 | should.not.exist(err); 83 | expect(stdoutHook.readLast()).to.equal('{"action":"initialize","shardId":"shardId-000001"}'); 84 | done(); 85 | }); 86 | }); 87 | 88 | it('should emit end event when IO handler is closed', function(done) { 89 | actionHandler.on('end', function() { 90 | done(); 91 | }); 92 | ioHandler.emit('close'); 93 | }); 94 | }); 95 | -------------------------------------------------------------------------------- /test/kcl/checkpointer_tests.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var expect = require('chai').expect; 10 | var sinon = require('sinon'); 11 | 12 | var Checkpointer = require('../../lib/kcl/checkpointer'); 13 | var KCLManager = require('../../lib/kcl/kcl_manager'); 14 | 15 | describe('checkpointer_tests', function() { 16 | var sandbox = null; 17 | var kclManager = new KCLManager({}, process.stdin, process.stdout, process.stderr); 18 | var checkpointer = new Checkpointer(kclManager); 19 | 20 | before(function() { 21 | kclManager.run(); 22 | }); 23 | 24 | beforeEach(function() { 25 | sandbox = sinon.createSandbox(); 26 | }); 27 | 28 | afterEach(function() { 29 | sandbox.restore(); 30 | }); 31 | 32 | after(function() { 33 | kclManager._cleanup(); 34 | }); 35 | 36 | it('should emit a checkpoint action and consume response action', function(done) { 37 | var seq = Math.floor((Math.random() * 1000000)).toString(); 38 | // Mock KCLManager checkpoint and short-circuit dummy response. 39 | sandbox.stub(kclManager, 'checkpoint').callsFake(function(seq) { 40 | checkpointer.onCheckpointerResponse(null, seq); 41 | }); 42 | 43 | checkpointer.checkpoint(seq, function(err, seq) { 44 | expect(err).to.be.equal(null); 45 | done(); 46 | }); 47 | }); 48 | 49 | it('should emit a checkpoint action and consume response when no sequence number', function(done) { 50 | sandbox.stub(kclManager, 'checkpoint').callsFake(function(seq) { 51 | expect(seq).to.be.equal(null); 52 | checkpointer.onCheckpointerResponse(null, seq); 53 | }); 54 | 55 | checkpointer.checkpoint(function(err) { 56 | expect(err).to.be.equal(null); 57 | done(); 58 | }); 59 | }); 60 | 61 | it('should raise an error when error is received from MultiLangDaemon', function(done) { 62 | var seq = Math.floor((Math.random() * 1000000)).toString(); 63 | // Mock KCLManager checkpoint and short-circuit dummy response. 64 | sandbox.stub(kclManager, 'checkpoint').callsFake(function(seq) { 65 | checkpointer.onCheckpointerResponse('ThrottlingException', seq); 66 | }); 67 | 68 | checkpointer.checkpoint(seq, function(err) { 69 | expect(err).not.to.be.equal(null); 70 | expect(err).to.equal('ThrottlingException'); 71 | done(); 72 | }); 73 | }); 74 | 75 | it('should raise an error on checkpoint when previous checkpoint is not complete', function(done) { 76 | var seq = Math.floor((Math.random() * 1000000)).toString(); 77 | // Mock KCLManager checkpoint to have outstanding checkpoint. 78 | sandbox.stub(kclManager, 'checkpoint').callsFake(function(seq) { 79 | }); 80 | 81 | checkpointer.checkpoint(seq, function(err) { 82 | }); 83 | 84 | checkpointer.checkpoint(seq, function(err) { 85 | expect(err).to.equal('Cannot checkpoint while another checkpoint is already in progress.'); 86 | done(); 87 | }); 88 | }); 89 | }); 90 | -------------------------------------------------------------------------------- /test/kcl/io_handler_tests.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | 9 | var chai = require('chai'); 10 | var expect = chai.expect; 11 | var sinon = require('sinon'); 12 | var Stream = require('stream'); 13 | 14 | var IOHandler = require('../../lib/kcl/io_handler'); 15 | 16 | // Local stub to capture stdout/stderr. 17 | function captureStream(stream) { 18 | var origWrite = stream.write; 19 | var buffer = ''; 20 | 21 | stream.write = function(chunk, encoding, callback) { 22 | buffer += chunk.toString(); 23 | origWrite.apply(stream, arguments); 24 | return true; 25 | }; 26 | 27 | return { 28 | unhook: function unhook() { 29 | stream.write = origWrite; 30 | }, 31 | captured: function() { 32 | return buffer; 33 | }, 34 | readLast: function() { 35 | var lines = buffer.split('\n'); 36 | return lines[lines.length - 2]; 37 | } 38 | }; 39 | } 40 | 41 | describe('io_handler_tests', function() { 42 | var stdoutHook = null; 43 | var stderrHook = null; 44 | // Github workflows doesn't write to process.stdin for unknown reasons, so using a new Stream 45 | const readableStream = new Stream.Readable(); 46 | readableStream._read = () => {}; // _read is required but can noop it 47 | var ioHandler = new IOHandler(readableStream, process.stdout, process.stderr); 48 | 49 | beforeEach(function() { 50 | stdoutHook = captureStream(process.stdout); 51 | stderrHook = captureStream(process.stderr); 52 | }); 53 | 54 | afterEach(function() { 55 | stdoutHook.unhook(); 56 | stderrHook.unhook(); 57 | }); 58 | 59 | after(function() { 60 | ioHandler.destroy(); 61 | }); 62 | 63 | it('should read line', function(done) { 64 | ioHandler.on('line', function(line) { 65 | expect(line).to.equal('line1'); 66 | ioHandler.removeAllListeners('line'); 67 | done(); 68 | }); 69 | readableStream.emit('data', 'line1\n'); 70 | }); 71 | 72 | it('should write to stdout', function(done) { 73 | ioHandler.writeLine('{"action":"status","responseFor":"initialize"}', function(err) { 74 | expect(stdoutHook.readLast()).to.equal('{"action":"status","responseFor":"initialize"}'); 75 | done(); 76 | }); 77 | }); 78 | 79 | it('should write error messages to stderr', function(done) { 80 | ioHandler.writeError('an error message'); 81 | expect(stderrHook.captured()).to.equal('an error message\n'); 82 | done(); 83 | }); 84 | 85 | it('should not read line after IO handler is destroyed', function(done) { 86 | var callback = sinon.spy(); 87 | ioHandler.on('line', callback); 88 | readableStream.emit('data', 'line1\n'); 89 | expect(callback.calledOnce).to.be.equal(true); 90 | ioHandler.destroy(); 91 | readableStream.emit('data', 'line2\n'); 92 | expect(callback.calledTwice).to.be.equal(false); 93 | ioHandler.removeListener('line', callback); 94 | done(); 95 | }); 96 | }); 97 | -------------------------------------------------------------------------------- /test/kcl/kcl_process_tests.js: -------------------------------------------------------------------------------- 1 | /*** 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: Apache-2.0 4 | ***/ 5 | 6 | 'use strict'; 7 | 8 | var expect = require('chai').expect; 9 | var sinon = require('sinon'); 10 | 11 | var kcl = require('../..'); 12 | 13 | function RecordProcessor() { 14 | } 15 | 16 | RecordProcessor.prototype.initialize = function (initializeInput, completeCallback) { 17 | completeCallback(); 18 | }; 19 | 20 | RecordProcessor.prototype.processRecords = function (processRecordsInput, completeCallback) { 21 | if (!processRecordsInput || !processRecordsInput.records) { 22 | completeCallback(); 23 | return; 24 | } 25 | 26 | var records = processRecordsInput.records; 27 | var seq = records[0].sequenceNumber; 28 | var checkpointer = processRecordsInput.checkpointer; 29 | checkpointer.checkpoint(seq, function (err) { 30 | if (err) { 31 | checkpointer.checkpoint(seq, function (err) { 32 | completeCallback(); 33 | }); 34 | } else { 35 | completeCallback(); 36 | } 37 | }); 38 | }; 39 | 40 | RecordProcessor.prototype.leaseLost = function (shutdownInput, completeCallback) { 41 | completeCallback(); 42 | }; 43 | 44 | RecordProcessor.prototype.shardEnded = function (shardEndedInput, completeCallback) { 45 | completeCallback(); 46 | }; 47 | 48 | describe('kcl_process_tests', function () { 49 | var sandbox = null; 50 | var kclProcess = null; 51 | 52 | var initialize = { action: 'initialize', shardId: 'shardId-000000000001' }; 53 | var initializeString = JSON.stringify(initialize) + '\n'; 54 | var initializeResponse = JSON.stringify({ action: 'status', responseFor: initialize.action }); 55 | 56 | var processRecords = { 57 | action: 'processRecords', 58 | records: [{ 'data': 'bWVvdw==', 'partitionKey': 'cat', 'sequenceNumber': '456' }] 59 | }; 60 | var processRecordsString = JSON.stringify(processRecords) + '\n'; 61 | var processRecordsResponse = JSON.stringify({ action: 'status', responseFor: processRecords.action }); 62 | 63 | var checkpoint = { action: 'checkpoint', sequenceNumber: '456' }; 64 | var checkpointString = JSON.stringify(checkpoint); 65 | var checkpointResponse = JSON.stringify({ action: checkpoint.action, checkpoint: checkpoint.sequenceNumber }) + '\n'; 66 | 67 | var shardEnded = { action: 'shardEnded' }; 68 | var shardEndedString = JSON.stringify(shardEnded) + '\n'; 69 | var shardEndedResponse = JSON.stringify({ action: 'status', responseFor: shardEnded.action }); 70 | 71 | beforeEach(function () { 72 | kclProcess = kcl(new RecordProcessor()); 73 | sandbox = sinon.createSandbox(); 74 | }); 75 | 76 | afterEach(function () { 77 | kclProcess.cleanup(); 78 | sandbox.restore(); 79 | }); 80 | 81 | it('should initialize kcl and send back response', function (done) { 82 | // Since we can't know when run() would finish processing all inputs, creating a stub for last call in the chain to force verification. 83 | sandbox.stub(kclProcess._kclManager._actionHandler, 'sendAction').callsFake(function (data, callback) { 84 | let dataString = JSON.stringify(data); 85 | console.log('Got response: ' + dataString); 86 | callback(); 87 | 88 | // Verify that Initialize action was processed. 89 | expect(dataString).to.equal(initializeResponse); 90 | done(); 91 | }); 92 | 93 | kclProcess.run(); 94 | process.stdin.emit('data', initializeString); 95 | }); 96 | 97 | it('should process records, checkpoint and then send back response', function (done) { 98 | sandbox.stub(kclProcess._kclManager._actionHandler, 'sendAction').callsFake(function (data, callback) { 99 | let dataString = JSON.stringify(data); 100 | console.log('Got response: ' + dataString); 101 | callback(); 102 | 103 | switch (data.action) { 104 | case 'status': 105 | expect(dataString).to.equal(processRecordsResponse); 106 | done(); 107 | break; 108 | case 'checkpoint': 109 | expect(dataString).to.equal(checkpointString); 110 | kclProcess._kclManager._ioHandler.emit('line', checkpointResponse); 111 | break; 112 | default: 113 | throw new Error('Should not happen ' + dataString); 114 | } 115 | }); 116 | 117 | kclProcess.run(); 118 | kclProcess._kclManager._ioHandler.emit('line', processRecordsString); 119 | }); 120 | 121 | it('should invoke shardEnd and send back response', function (done) { 122 | sandbox.stub(kclProcess._kclManager._actionHandler, 'sendAction').callsFake(function (data, callback) { 123 | let dataString = JSON.stringify(data); 124 | console.log('Got response: ' + dataString); 125 | expect(dataString).to.equal(shardEndedResponse); 126 | callback(); 127 | done(); 128 | }); 129 | 130 | kclProcess.run(); 131 | kclProcess._kclManager._ioHandler.emit('line', shardEndedString); 132 | }); 133 | 134 | it('should process Initialize, one or more processRecords and shutdown in order', function (done) { 135 | // Since we can't know when run() would finish processing all inputs, creating a stub for last call in the chain to force verification ! 136 | sandbox.stub(kclProcess._kclManager._actionHandler, 'sendAction').callsFake(function (data, callback) { 137 | let dataString = JSON.stringify(data); 138 | console.log('Got response: ' + dataString); 139 | callback(); 140 | // MultiLangDaemon never sends a message until it receives reply for previous operation 141 | // send next action based on previous response ! 142 | switch (data.action) { 143 | case 'status': 144 | switch (data.responseFor) { 145 | case 'initialize': 146 | expect(dataString).to.equal(initializeResponse); 147 | kclProcess._kclManager._ioHandler.emit('line', processRecordsString); 148 | break; 149 | case 'processRecords': 150 | expect(dataString).to.equal(processRecordsResponse); 151 | kclProcess._kclManager._ioHandler.emit('line', shardEndedString); 152 | break; 153 | case 'shardEnded': 154 | expect(dataString).to.equal(shardEndedResponse); 155 | done(); 156 | break; 157 | default: 158 | throw new Error('Should not happen ' + dataString); 159 | } 160 | break; 161 | case 'checkpoint': 162 | expect(dataString).to.equal(checkpointString); 163 | kclProcess._kclManager._ioHandler.emit('line', checkpointResponse); 164 | break; 165 | default: 166 | throw new Error('Should not happen ' + dataString); 167 | } 168 | }); 169 | 170 | kclProcess.run(); 171 | kclProcess._kclManager._ioHandler.emit('line', initializeString); 172 | }); 173 | 174 | it('should process checkpoint error from MultiLangDaemon', function (done) { 175 | // Since we can't know when run() would finish processing all inputs, creating a stub for last call in the chain to force verification ! 176 | sandbox.stub(kclProcess._kclManager._actionHandler, 'sendAction').callsFake(function (data, callback) { 177 | let dataString = JSON.stringify(data); 178 | console.log('Got response: ' + dataString); 179 | callback(); 180 | // MultiLangDaemon never sends a message until it receives reply for previous operation. 181 | // Send next action based on previous response. 182 | 183 | switch (data.action) { 184 | case 'status': 185 | switch (data.responseFor) { 186 | case 'initialize': 187 | expect(dataString).to.equal(initializeResponse); 188 | kclProcess._kclManager._ioHandler.emit('line', processRecordsString); 189 | console.log('Emitted ' + processRecordsString); 190 | break; 191 | case 'processRecords': 192 | expect(dataString).to.equal(processRecordsResponse); 193 | kclProcess._kclManager._ioHandler.emit('line', shardEndedString); 194 | break; 195 | case 'shardEnded': 196 | // Checkpoint should have been retried. 197 | expect(this.seen_checkpoint).to.equal(2); 198 | expect(dataString).to.equal(shardEndedResponse); 199 | done(); 200 | break; 201 | default: 202 | throw new Error('Should not happen ' + dataString); 203 | } 204 | break; 205 | case 'checkpoint': 206 | expect(dataString).to.equal(checkpointString); 207 | if (this.seen_checkpoint === undefined) { 208 | this.seen_checkpoint = 1; 209 | var errorResponse = JSON.parse(checkpointResponse); 210 | errorResponse.error = 'ThrottlingException'; 211 | kclProcess._kclManager._ioHandler.emit('line', JSON.stringify(errorResponse) + '\n'); 212 | } else { 213 | this.seen_checkpoint++; 214 | kclProcess._kclManager._ioHandler.emit('line', checkpointResponse); 215 | } 216 | break; 217 | default: 218 | throw new Error('Should not happen ' + dataString); 219 | } 220 | }); 221 | 222 | kclProcess.run(); 223 | kclProcess._kclManager._ioHandler.emit('line', initializeString); 224 | }); 225 | }); 226 | --------------------------------------------------------------------------------