├── .gitignore ├── .rspec ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTORS ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── VERSION ├── docs └── index.asciidoc ├── lib └── logstash │ └── inputs │ ├── kinesis.rb │ └── kinesis │ └── worker.rb ├── logstash-input-kinesis.gemspec └── spec ├── inputs ├── kinesis │ └── worker_spec.rb └── kinesis_spec.rb └── spec_helper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | *.bundle 11 | *.so 12 | *.o 13 | *.a 14 | mkmf.log 15 | vendor/ 16 | /*.gem 17 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | import: 2 | - logstash-plugins/.ci:travis/defaults.yml@1.x 3 | - logstash-plugins/.ci:travis/exec.yml@1.x 4 | 5 | env: 6 | jobs: 7 | # lock on version 8.x because use of Jackson data bind 2.15.2 available from 8.9.0 8 | - ELASTIC_STACK_VERSION=8.x DOCKER_ENV=dockerjdk17.env 9 | - SNAPSHOT=true ELASTIC_STACK_VERSION=8.x DOCKER_ENV=dockerjdk17.env -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 2.3.0 2 | - Updated Kinesis client to `1.15.0`, now requires Logstash `>=8.9.0` [#102](https://github.com/logstash-plugins/logstash-input-kinesis/pull/102) 3 | 4 | ## 2.2.2 5 | - Change `http_proxy` config type to `password` for better protection from leaks in debug logs [#101](https://github.com/logstash-plugins/logstash-input-kinesis/pull/101) 6 | 7 | ## 2.2.1 8 | - Fix: Remove usage of `java_kind_of?` to support jruby-9.3.4.0 [#91](https://github.com/logstash-plugins/logstash-input-kinesis/pull/91) 9 | 10 | ## 2.2.0 11 | - Proxy support for Kinesis, DynamoDB, and CloudWatch clients [#84](https://github.com/logstash-plugins/logstash-input-kinesis/issues/84) 12 | - Upgrade AWS SDK for WebIdentityTokenCredentialsProvider Support [#82](https://github.com/logstash-plugins/logstash-input-kinesis/issues/82) 13 | - Don't overwrite existing event metadata [#69](https://github.com/logstash-plugins/logstash-input-kinesis/issues/69) 14 | - [Docs] additional_settings does not need comma [#75](https://github.com/logstash-plugins/logstash-input-kinesis/issues/75) 15 | 16 | ## 2.1.2 17 | - Fixed logging level setup to use JUL and JCL loggers backend [#87](https://github.com/logstash-plugins/logstash-input-kinesis/issues/87) 18 | 19 | ## 2.1.1 20 | - Added helpful content from readme to doc file for publishing [#63](https://github.com/logstash-plugins/logstash-input-kinesis/pull/63) 21 | 22 | ## 2.1.0 23 | - Changed role assumption to also assume role for interactions with dynamodb and cloudwatch [#66](https://github.com/logstash-plugins/logstash-input-kinesis/pull/66) 24 | 25 | ## 2.0.11 26 | - Added the ability to assume a role [#40](https://github.com/logstash-plugins/logstash-input-kinesis/pull/40) 27 | 28 | ## 2.0.10 29 | - Added the ability to set additional settings exposed through KinesisClientLibConfiguration [#51](https://github.com/logstash-plugins/logstash-input-kinesis/pull/51) 30 | 31 | ## 2.0.9 32 | - Changed the 'workerid' to also include the host of the Logstash node [#48](https://github.com/logstash-plugins/logstash-input-kinesis/pull/48) 33 | 34 | ## 2.0.8 35 | - Changed plugin to use more recent versions of Kinesis Client library and AWS SDK[#45](https://github.com/logstash-plugins/logstash-input-kinesis/pull/45) 36 | 37 | ## 2.0.7 38 | - Docs: Set the default_codec doc attribute. 39 | 40 | ## 2.0.7 41 | - Update gemspec summary 42 | 43 | ## 2.0.6 44 | - Fix some documentation issues 45 | - Add support for `initial_position_in_stream` config parameter. `TRIM_HORIZON` and `LATEST` are supported. 46 | 47 | ## 2.0.5 48 | - Docs: Add CHANGELOG.md 49 | - Support for specifying an AWS credentials profile with the `profile` config parameter 50 | - Docs: Remove extraneous text added during doc extract 51 | 52 | ## 2.0.4 53 | - Docs: Bump version for automated doc build 54 | 55 | ## 2.0.3 56 | - Fix error about failed to coerce java.util.logging.Level to org.apache.log4j.Level with logstash 5.1.1 57 | 58 | ## 2.0.2 59 | - Fix error with Logstash 5.0 60 | 61 | ## 2.0.1 62 | - Add partition_key, approximate_arrival_timestamp and sequence_number fields in the @metadata sub-has 63 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | The following is a list of people who have contributed ideas, code, bug 2 | reports, or in general have helped logstash along its way. 3 | 4 | Maintainers: 5 | * Brian Palmer (codekitchen) 6 | 7 | Contributors: 8 | * Brian Palmer (codekitchen) 9 | * Samuel García Martínez (samuelgmartinez) 10 | 11 | Note: If you've sent us patches, bug reports, or otherwise contributed to 12 | Logstash, and you aren't on the list above and want to be, please let us know 13 | and we'll make sure you're here. Contributions from folks like you are what make 14 | open source awesome. 15 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash" 6 | use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1" 7 | 8 | if Dir.exist?(logstash_path) && use_logstash_source 9 | gem 'logstash-core', :path => "#{logstash_path}/logstash-core" 10 | gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" 11 | end 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2020 Elastic and contributors 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Logstash AWS Kinesis Input Plugin 2 | 3 | [![Build Status](https://travis-ci.com/logstash-plugins/logstash-input-kinesis.svg)](https://travis-ci.com/logstash-plugins/logstash-input-kinesis) 4 | 5 | This is a [AWS Kinesis](http://docs.aws.amazon.com/kinesis/latest/dev/introduction.html) input plugin for [Logstash](https://github.com/elasticsearch/logstash). Under the hood uses the [Kinesis Client Library](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html). 6 | 7 | ## Installation 8 | 9 | This plugin requires Logstash >= 2.0, and can be installed by Logstash 10 | itself. 11 | 12 | ```sh 13 | bin/logstash-plugin install logstash-input-kinesis 14 | ``` 15 | 16 | ## Usage 17 | 18 | ``` 19 | input { 20 | kinesis { 21 | kinesis_stream_name => "my-logging-stream" 22 | codec => json { } 23 | } 24 | } 25 | ``` 26 | 27 | ### Using with CloudWatch Logs 28 | 29 | If you are looking to read a CloudWatch Logs subscription stream, you'll also want to install and configure the [CloudWatch Logs Codec](https://github.com/threadwaste/logstash-codec-cloudwatch_logs). 30 | 31 | ## Configuration 32 | 33 | This are the properties you can configure and what are the default values: 34 | 35 | * `application_name`: The name of the application used in DynamoDB for coordination. Only one worker per unique stream partition and application will be actively consuming messages. 36 | * **required**: false 37 | * **default value**: `logstash` 38 | * `kinesis_stream_name`: The Kinesis stream name. 39 | * **required**: true 40 | * `region`: The AWS region name for Kinesis, DynamoDB and Cloudwatch (if enabled) 41 | * **required**: false 42 | * **default value**: `us-east-1` 43 | * `checkpoint_interval_seconds`: How many seconds between worker checkpoints to DynamoDB. A low value ussually means lower message replay in case of node failure/restart but it increases CPU+network ussage (which increases the AWS costs). 44 | * **required**: false 45 | * **default value**: `60` 46 | * `metrics`: Worker metric tracking. By default this is disabled, set it to "cloudwatch" to enable the cloudwatch integration in the Kinesis Client Library. 47 | * **required**: false 48 | * **default value**: `nil` 49 | * `profile`: The AWS profile name for authentication. This ensures that the `~/.aws/credentials` AWS auth provider is used. By default this is empty and the default chain will be used. 50 | * **required**: false 51 | * `role_arn`: The AWS role to assume. This can be used, for example, to access a Kinesis stream in a different AWS 52 | account. This role will be assumed after the default credentials or profile credentials are created. By default 53 | this is empty and a role will not be assumed. 54 | * **required**: false 55 | * `role_session_name`: Session name to use when assuming an IAM role. This is recorded in CloudTrail logs for example. 56 | * **required**: false 57 | * **default value**: `"logstash"` 58 | * `initial_position_in_stream`: The value for initialPositionInStream. Accepts "TRIM_HORIZON" or "LATEST". 59 | * **required**: false 60 | * **default value**: `"TRIM_HORIZON"` 61 | 62 | ### Additional KCL Settings 63 | * `additional_settings`: The KCL provides several configuration options which can be set in [KinesisClientLibConfiguration](https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/coordinator/KinesisClientLibConfiguration.java). These options are configured via various function calls that all begin with `with`. Some of these functions take complex types, which are not supported. However, you may invoke any one of the `withX()` functions that take a primitive by providing key-value pairs in `snake_case`. For example, to set the dynamodb read and write capacity values, two functions exist, withInitialLeaseTableReadCapacity and withInitialLeaseTableWriteCapacity. To set a value for these, provide a hash of `additional_settings => {"initial_lease_table_read_capacity" => 25, "initial_lease_table_write_capacity" => 100}` 64 | * **required**: false 65 | * **default value**: `{}` 66 | 67 | ## Authentication 68 | 69 | This plugin uses the default AWS SDK auth chain, [DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html), to determine which credentials the client will use, unless `profile` is set, in which case [ProfileCredentialsProvider](http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/profile/ProfileCredentialsProvider.html) is used. 70 | 71 | The default chain follows this order trying to read the credentials: 72 | * `AWS_ACCESS_KEY_ID` / `AWS_SECRET_KEY` environment variables 73 | * `~/.aws/credentials` credentials file 74 | * EC2 instance profile 75 | 76 | The credentials will need access to the following services: 77 | * AWS Kinesis 78 | * AWS DynamoDB: the client library stores information for worker coordination in DynamoDB (offsets and active worker per partition) 79 | * AWS CloudWatch: if the metrics are enabled the credentials need CloudWatch update permisions granted. 80 | 81 | Look at the [documentation](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) for deeper information on the default chain. 82 | 83 | ## Contributing 84 | 85 | 0. https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md#contribution-steps 86 | 1. Fork it ( https://github.com/logstash-plugins/logstash-input-kinesis/fork ) 87 | 2. Create your feature branch (`git checkout -b my-new-feature`) 88 | 3. Commit your changes (`git commit -am 'Add some feature'`) 89 | 4. Push to the branch (`git push origin my-new-feature`) 90 | 5. Create a new Pull Request 91 | 92 | 93 | ## Development 94 | 95 | To download all jars: 96 | `bundler exec rake install_jars` 97 | 98 | To run all specs: 99 | `bundler exec rspec` -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require 'jars/version' 3 | 4 | begin 5 | require 'rspec/core/rake_task' 6 | RSpec::Core::RakeTask.new(:spec) 7 | rescue LoadError 8 | end 9 | 10 | task default: "spec" 11 | 12 | require 'jars/installer' 13 | desc 'Install the JAR dependencies to vendor/' 14 | task :install_jars do 15 | # We actually want jar-dependencies will download the jars and place it in 16 | # vendor/jar-dependencies/runtime-jars 17 | Jars::Installer.new.vendor_jars!(false, 'vendor/jar-dependencies/runtime-jars') 18 | end 19 | 20 | task build: :install_jars 21 | require "logstash/devutils/rake" 22 | task vendor: :install_jars 23 | 24 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2.3.0 2 | -------------------------------------------------------------------------------- /docs/index.asciidoc: -------------------------------------------------------------------------------- 1 | :plugin: kinesis 2 | :type: input 3 | :default_codec: plain 4 | 5 | /////////////////////////////////////////// 6 | START - GENERATED VARIABLES, DO NOT EDIT! 7 | /////////////////////////////////////////// 8 | :version: %VERSION% 9 | :release_date: %RELEASE_DATE% 10 | :changelog_url: %CHANGELOG_URL% 11 | :include_path: ../../../../logstash/docs/include 12 | /////////////////////////////////////////// 13 | END - GENERATED VARIABLES, DO NOT EDIT! 14 | /////////////////////////////////////////// 15 | 16 | [id="plugins-{type}s-{plugin}"] 17 | 18 | === Kinesis input plugin 19 | 20 | include::{include_path}/plugin_header.asciidoc[] 21 | 22 | ==== Description 23 | 24 | You can use this plugin to receive events through 25 | http://docs.aws.amazon.com/kinesis/latest/dev/introduction.html[AWS Kinesis]. 26 | This plugin uses the http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html[Java Kinesis Client 27 | Library]. The documentation at 28 | https://github.com/awslabs/amazon-kinesis-client will be useful. 29 | 30 | AWS credentials can be specified either through environment variables, or an 31 | IAM instance role. The library uses a DynamoDB table for worker coordination, 32 | so you'll need to grant access to that as well as to the Kinesis stream. The 33 | DynamoDB table has the same name as the `application_name` configuration 34 | option, which defaults to "logstash". 35 | 36 | The library can optionally also send worker statistics to CloudWatch. 37 | 38 | [id="plugins-{type}s-{plugin}-usage"] 39 | ==== Usage 40 | 41 | [source,ruby] 42 | ----- 43 | input { 44 | kinesis { 45 | kinesis_stream_name => "my-logging-stream" 46 | codec => json { } 47 | } 48 | } 49 | ----- 50 | 51 | [id="plugins-{type}s-{plugin}-cloudwatch"] 52 | ==== Using with CloudWatch Logs 53 | 54 | If you want to read a CloudWatch Logs subscription stream, you'll also 55 | need to install and configure the 56 | https://github.com/threadwaste/logstash-codec-cloudwatch_logs[CloudWatch Logs 57 | Codec]. 58 | 59 | [id="plugins-{type}s-{plugin}-authentication"] 60 | ==== Authentication 61 | 62 | This plugin uses the default AWS SDK auth chain, 63 | https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html[DefaultAWSCredentialsProviderChain], 64 | to determine which credentials the client will use, unless `profile` is set, in 65 | which case 66 | http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/profile/ProfileCredentialsProvider.html[ProfileCredentialsProvider] 67 | is used. 68 | 69 | The default chain reads the credentials in this order: 70 | 71 | * `AWS_ACCESS_KEY_ID` / `AWS_SECRET_KEY` environment variables 72 | * `~/.aws/credentials` credentials file 73 | * EC2 instance profile 74 | 75 | The credentials need access to the following services: 76 | 77 | * AWS Kinesis 78 | * AWS DynamoDB. The client library stores information for worker coordination in DynamoDB (offsets and active worker per partition) 79 | * AWS CloudWatch. If the metrics are enabled the credentials need CloudWatch update permissions granted. 80 | 81 | See the 82 | https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html[AWS documentation] 83 | for more information on the default chain. 84 | 85 | [id="plugins-{type}s-{plugin}-options"] 86 | ==== Kinesis Input Configuration Options 87 | 88 | This plugin supports the following configuration options plus the <> described later. 89 | 90 | [cols="<,<,<",options="header",] 91 | |======================================================================= 92 | |Setting |Input type|Required 93 | | <> |<>|No 94 | | <> |<>|No 95 | | <> |<>|No 96 | | <> |<>|No 97 | | <> |<>|Yes 98 | | <> |<>, one of `[nil, "cloudwatch"]`|No 99 | | <> |<>|No 100 | | <> |<>|No 101 | | <> |<>|No 102 | | <> |<>|No 103 | | <> |<>|No 104 | | <> |<>|No 105 | |======================================================================= 106 | 107 | Also see <> for a list of options supported by all 108 | input plugins. 109 | 110 |   111 | 112 | [id="plugins-{type}s-{plugin}-application_name"] 113 | ===== `application_name` 114 | 115 | * Value type is <> 116 | * Default value is `"logstash"` 117 | 118 | The application name used for the dynamodb coordination table. Must be 119 | unique for this kinesis stream. 120 | 121 | [id="plugins-{type}s-{plugin}-checkpoint_interval_seconds"] 122 | ===== `checkpoint_interval_seconds` 123 | 124 | * Value type is <> 125 | * Default value is `60` 126 | 127 | How many seconds between worker checkpoints to dynamodb. 128 | 129 | [id="plugins-{type}s-{plugin}-http_proxy"] 130 | ===== `http_proxy` 131 | 132 | * Value type is <> 133 | * There is no default value for this setting. 134 | 135 | Proxy support for Kinesis, DynamoDB, and CloudWatch (if enabled). 136 | 137 | [id="plugins-{type}s-{plugin}-initial_position_in_stream"] 138 | ===== `initial_position_in_stream` 139 | 140 | * Value type is <> 141 | * Default value is `"TRIM_HORIZON"` 142 | 143 | The value for initialPositionInStream. Accepts "TRIM_HORIZON" or "LATEST". 144 | 145 | [id="plugins-{type}s-{plugin}-kinesis_stream_name"] 146 | ===== `kinesis_stream_name` 147 | 148 | * This is a required setting. 149 | * Value type is <> 150 | * There is no default value for this setting. 151 | 152 | The kinesis stream name. 153 | 154 | [id="plugins-{type}s-{plugin}-metrics"] 155 | ===== `metrics` 156 | 157 | * Value can be any of: ``, `cloudwatch` 158 | * Default value is `nil` 159 | 160 | Worker metric tracking. By default this is disabled, set it to "cloudwatch" 161 | to enable the cloudwatch integration in the Kinesis Client Library. 162 | 163 | [id="plugins-{type}s-{plugin}-non_proxy_hosts"] 164 | ===== `non_proxy_hosts` 165 | 166 | * Value type is <> 167 | * There is no default value for this setting. 168 | 169 | Hosts that should be excluded from proxying, separated by the "|" (pipe) character. 170 | 171 | [id="plugins-{type}s-{plugin}-profile"] 172 | ===== `profile` 173 | 174 | * Value type is <> 175 | * There is no default value for this setting. 176 | 177 | The AWS profile name for authentication. 178 | This ensures that the `~/.aws/credentials` AWS auth provider is used. 179 | By default this is empty and the default chain will be used. 180 | 181 | [id="plugins-{type}s-{plugin}-region"] 182 | ===== `region` 183 | 184 | * Value type is <> 185 | * Default value is `"us-east-1"` 186 | 187 | The AWS region for Kinesis, DynamoDB, and CloudWatch (if enabled) 188 | 189 | [id="plugins-{type}s-{plugin}-role_arn"] 190 | ===== `role_arn` 191 | 192 | * Value type is <> 193 | * There is no default value for this setting. 194 | 195 | The AWS role to assume. This can be used, for example, to access a Kinesis stream in a different AWS 196 | account. This role will be assumed after the default credentials or profile credentials are created. By default 197 | this is empty and a role will not be assumed. 198 | 199 | [id="plugins-{type}s-{plugin}-role_session_name"] 200 | ===== `role_session_name` 201 | 202 | * Value type is <> 203 | * Default value is `logstash` 204 | 205 | Session name to use when assuming an IAM role. This is recorded in CloudTrail logs for example. 206 | 207 | [id="plugins-{type}s-{plugin}-additional_settings"] 208 | ===== `additional_settings` 209 | 210 | * Value type is <> 211 | * There is no default value for this setting 212 | 213 | The KCL provides several configuration options which can be set in 214 | https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/coordinator/KinesisClientLibConfiguration.java[KinesisClientLibConfiguration]. 215 | These options are configured via various function calls that all begin with 216 | `with`. Some of these functions take complex types, which are not supported. 217 | However, you may invoke any one of the `withX()` functions that take a primitive 218 | by providing key-value pairs in `snake_case`. 219 | 220 | Example: 221 | 222 | To set the dynamodb read and write capacity values, use these functions: 223 | `withInitialLeaseTableReadCapacity` and `withInitialLeaseTableWriteCapacity`. 224 | 225 | [source,text] 226 | ---- 227 | additional_settings => {"initial_lease_table_read_capacity" => 25 "initial_lease_table_write_capacity" => 100} 228 | ---- 229 | 230 | 231 | [id="plugins-{type}s-{plugin}-common-options"] 232 | include::{include_path}/{type}.asciidoc[] 233 | 234 | :default_codec!: 235 | -------------------------------------------------------------------------------- /lib/logstash/inputs/kinesis.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "socket" 4 | require "uri" 5 | require "logstash/inputs/base" 6 | require "logstash/errors" 7 | require "logstash/environment" 8 | require "logstash/namespace" 9 | 10 | require 'logstash-input-kinesis_jars' 11 | 12 | 13 | # Receive events through an AWS Kinesis stream. 14 | # 15 | # This input plugin uses the Java Kinesis Client Library underneath, so the 16 | # documentation at https://github.com/awslabs/amazon-kinesis-client will be 17 | # useful. 18 | # 19 | # AWS credentials can be specified either through environment variables, or an 20 | # IAM instance role. The library uses a DynamoDB table for worker coordination, 21 | # so you'll need to grant access to that as well as to the Kinesis stream. The 22 | # DynamoDB table has the same name as the `application_name` configuration 23 | # option, which defaults to "logstash". 24 | # 25 | # The library can optionally also send worker statistics to CloudWatch. 26 | class LogStash::Inputs::Kinesis < LogStash::Inputs::Base 27 | KCL = com.amazonaws.services.kinesis.clientlibrary.lib.worker 28 | KCL_PROCESSOR_FACTORY_CLASS = com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory 29 | require "logstash/inputs/kinesis/worker" 30 | 31 | config_name 'kinesis' 32 | 33 | attr_reader( 34 | :kcl_config, 35 | :kcl_worker, 36 | ) 37 | 38 | # The application name used for the dynamodb coordination table. Must be 39 | # unique for this kinesis stream. 40 | config :application_name, :validate => :string, :default => "logstash" 41 | 42 | # The kinesis stream name. 43 | config :kinesis_stream_name, :validate => :string, :required => true 44 | 45 | # The AWS region for Kinesis, DynamoDB, and CloudWatch (if enabled) 46 | config :region, :validate => :string, :default => "us-east-1" 47 | 48 | # How many seconds between worker checkpoints to dynamodb. 49 | config :checkpoint_interval_seconds, :validate => :number, :default => 60 50 | 51 | # Worker metric tracking. By default this is disabled, set it to "cloudwatch" 52 | # to enable the cloudwatch integration in the Kinesis Client Library. 53 | config :metrics, :validate => [nil, "cloudwatch"], :default => nil 54 | 55 | # Select AWS profile for input 56 | config :profile, :validate => :string 57 | 58 | # The AWS IAM Role to assume, if any. 59 | # This is used to generate temporary credentials typically for cross-account access. 60 | # See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information. 61 | config :role_arn, :validate => :string 62 | 63 | # Session name to use when assuming an IAM role 64 | config :role_session_name, :validate => :string, :default => "logstash" 65 | 66 | # Select initial_position_in_stream. Accepts TRIM_HORIZON or LATEST 67 | config :initial_position_in_stream, :validate => ["TRIM_HORIZON", "LATEST"], :default => "TRIM_HORIZON" 68 | 69 | # Any additional arbitrary kcl options configurable in the KinesisClientLibConfiguration 70 | config :additional_settings, :validate => :hash, :default => {} 71 | 72 | # Proxy for Kinesis, DynamoDB, and CloudWatch (if enabled) 73 | config :http_proxy, :validate => :password, :default => nil 74 | 75 | # Hosts that should be excluded from proxying 76 | config :non_proxy_hosts, :validate => :string, :default => nil 77 | 78 | def initialize(params = {}) 79 | super(params) 80 | end 81 | 82 | def register 83 | # the INFO log level is extremely noisy in KCL 84 | lg = org.apache.commons.logging::LogFactory.getLog("com.amazonaws.services.kinesis") 85 | if lg.kind_of?(org.apache.commons.logging.impl::Jdk14Logger) 86 | kinesis_logger = lg.logger 87 | if kinesis_logger.kind_of?(java.util.logging::Logger) 88 | kinesis_logger.setLevel(java.util.logging::Level::WARNING) 89 | else 90 | kinesis_logger.setLevel(org.apache.log4j::Level::WARN) 91 | end 92 | elsif lg.kind_of?(org.apache.logging.log4jJcl::Log4jLog) 93 | logContext = org.apache.logging.log4j::LogManager.getContext(false) 94 | config = logContext.getConfiguration() 95 | config.getLoggerConfig("com.amazonaws.services.kinesis").setLevel(org.apache.logging.log4j::Level::WARN) 96 | else 97 | raise "Can't configure WARN log level for logger wrapper class #{lg.class}" 98 | end 99 | 100 | @logger.info("Registering logstash-input-kinesis") 101 | 102 | hostname = Socket.gethostname 103 | uuid = java.util::UUID.randomUUID.to_s 104 | worker_id = "#{hostname}:#{uuid}" 105 | 106 | # If the AWS profile is set, use the profile credentials provider. 107 | # Otherwise fall back to the default chain. 108 | unless @profile.nil? 109 | creds = com.amazonaws.auth.profile::ProfileCredentialsProvider.new(@profile) 110 | else 111 | creds = com.amazonaws.auth::DefaultAWSCredentialsProviderChain.new 112 | end 113 | 114 | # If a role ARN is set then assume the role as a new layer over the credentials already created 115 | unless @role_arn.nil? 116 | kinesis_creds = com.amazonaws.auth::STSAssumeRoleSessionCredentialsProvider.new(creds, @role_arn, @role_session_name) 117 | else 118 | kinesis_creds = creds 119 | end 120 | 121 | initial_position_in_stream = if @initial_position_in_stream == "TRIM_HORIZON" 122 | KCL::InitialPositionInStream::TRIM_HORIZON 123 | else 124 | KCL::InitialPositionInStream::LATEST 125 | end 126 | 127 | @kcl_config = KCL::KinesisClientLibConfiguration.new( 128 | @application_name, 129 | @kinesis_stream_name, 130 | kinesis_creds, # credential provider for Kinesis, DynamoDB and Cloudwatch access 131 | worker_id). 132 | withInitialPositionInStream(initial_position_in_stream). 133 | withRegionName(@region) 134 | 135 | # Call arbitrary "withX()" functions 136 | # snake_case => withCamelCase happens automatically 137 | @additional_settings.each do |key, value| 138 | fn = "with_#{key}" 139 | @kcl_config.send(fn, value) 140 | end 141 | 142 | if @http_proxy && !@http_proxy.value.to_s.strip.empty? 143 | proxy_uri = URI(@http_proxy.value) 144 | @logger.info("Using proxy #{proxy_uri.scheme}://#{proxy_uri.user}:*****@#{proxy_uri.host}:#{proxy_uri.port}") 145 | clnt_cfg = @kcl_config.get_kinesis_client_configuration 146 | set_client_proxy_settings(clnt_cfg, proxy_uri) 147 | clnt_cfg = @kcl_config.get_dynamo_db_client_configuration 148 | set_client_proxy_settings(clnt_cfg, proxy_uri) 149 | clnt_cfg = @kcl_config.get_cloud_watch_client_configuration 150 | set_client_proxy_settings(clnt_cfg, proxy_uri) 151 | end 152 | 153 | @logger.info("Registered logstash-input-kinesis") 154 | end 155 | 156 | def run(output_queue) 157 | @kcl_worker = kcl_builder(output_queue).build 158 | @kcl_worker.run 159 | end 160 | 161 | def kcl_builder(output_queue) 162 | KCL::Worker::Builder.new.tap do |builder| 163 | builder.java_send(:recordProcessorFactory, [KCL_PROCESSOR_FACTORY_CLASS.java_class], worker_factory(output_queue)) 164 | builder.config(@kcl_config) 165 | 166 | if metrics_factory 167 | builder.metricsFactory(metrics_factory) 168 | end 169 | end 170 | end 171 | 172 | def stop 173 | @kcl_worker.shutdown if @kcl_worker 174 | end 175 | 176 | def worker_factory(output_queue) 177 | proc { Worker.new(@codec.clone, output_queue, method(:decorate), @checkpoint_interval_seconds, @logger) } 178 | end 179 | 180 | protected 181 | 182 | def metrics_factory 183 | case @metrics 184 | when nil 185 | com.amazonaws.services.kinesis.metrics.impl::NullMetricsFactory.new 186 | when 'cloudwatch' 187 | nil # default in the underlying library 188 | end 189 | end 190 | 191 | def set_client_proxy_settings(clnt_cfg, proxy_uri) 192 | protocol = nil 193 | case proxy_uri.scheme 194 | when "http" 195 | protocol = com.amazonaws.Protocol::HTTP 196 | when "https" 197 | protocol = com.amazonaws.Protocol::HTTPS 198 | end 199 | clnt_cfg.set_proxy_protocol(protocol) if protocol 200 | clnt_cfg.set_proxy_username(proxy_uri.user) 201 | clnt_cfg.set_proxy_password(proxy_uri.password) 202 | clnt_cfg.set_proxy_host(proxy_uri.host) 203 | clnt_cfg.set_proxy_port(proxy_uri.port) 204 | clnt_cfg.set_non_proxy_hosts(@non_proxy_hosts) unless @non_proxy_hosts.to_s.empty? 205 | end 206 | end 207 | -------------------------------------------------------------------------------- /lib/logstash/inputs/kinesis/worker.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | class LogStash::Inputs::Kinesis::Worker 3 | include com.amazonaws.services.kinesis.clientlibrary.interfaces.v2::IRecordProcessor 4 | 5 | attr_reader( 6 | :checkpoint_interval, 7 | :codec, 8 | :decorator, 9 | :logger, 10 | :output_queue, 11 | ) 12 | 13 | def initialize(*args) 14 | # nasty hack, because this is the name of a method on IRecordProcessor, but also ruby's constructor 15 | if !@constructed 16 | @codec, @output_queue, @decorator, @checkpoint_interval, @logger = args 17 | @next_checkpoint = Time.now - 600 18 | @constructed = true 19 | else 20 | _shard_id = args[0].shardId 21 | end 22 | end 23 | public :initialize 24 | 25 | def processRecords(records_input) 26 | records_input.records.each { |record| process_record(record) } 27 | if Time.now >= @next_checkpoint 28 | checkpoint(records_input.checkpointer) 29 | @next_checkpoint = Time.now + @checkpoint_interval 30 | end 31 | end 32 | 33 | def shutdown(shutdown_input) 34 | if shutdown_input.shutdown_reason == com.amazonaws.services.kinesis.clientlibrary.lib.worker::ShutdownReason::TERMINATE 35 | checkpoint(shutdown_input.checkpointer) 36 | end 37 | end 38 | 39 | protected 40 | 41 | def checkpoint(checkpointer) 42 | checkpointer.checkpoint() 43 | rescue => error 44 | @logger.error("Kinesis worker failed checkpointing: #{error}") 45 | end 46 | 47 | def process_record(record) 48 | raw = String.from_java_bytes(record.getData.array) 49 | metadata = build_metadata(record) 50 | @codec.decode(raw) do |event| 51 | @decorator.call(event) 52 | event.set('@metadata', event.get('@metadata').merge(metadata)) 53 | @output_queue << event 54 | end 55 | rescue => error 56 | @logger.error("Error processing record: #{error}") 57 | end 58 | 59 | def build_metadata(record) 60 | metadata = Hash.new 61 | metadata['approximate_arrival_timestamp'] = record.getApproximateArrivalTimestamp.getTime 62 | metadata['partition_key'] = record.getPartitionKey 63 | metadata['sequence_number'] = record.getSequenceNumber 64 | metadata 65 | end 66 | 67 | end 68 | -------------------------------------------------------------------------------- /logstash-input-kinesis.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | version = File.read(File.expand_path(File.join(File.dirname(__FILE__), "VERSION"))).strip 3 | 4 | Gem::Specification.new do |spec| 5 | spec.name = "logstash-input-kinesis" 6 | spec.version = version 7 | spec.authors = ["Brian Palmer"] 8 | spec.email = ["brian@codekitchen.net"] 9 | spec.summary = "Receives events through an AWS Kinesis stream" 10 | spec.description = %q{This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program} 11 | spec.homepage = "https://github.com/logstash-plugins/logstash-input-kinesis" 12 | spec.licenses = ['Apache-2.0'] 13 | 14 | spec.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] 15 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 16 | spec.require_paths = ['lib', 'vendor/jar-dependencies/runtime-jars'] 17 | 18 | # Special flag to let us know this is actually a logstash plugin 19 | spec.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } 20 | 21 | spec.platform = 'java' 22 | 23 | spec.add_runtime_dependency 'logstash-core', '>= 8.9.0' 24 | 25 | spec.requirements << "jar 'com.amazonaws:amazon-kinesis-client', '1.15.0'" 26 | spec.requirements << "jar 'com.amazonaws:aws-java-sdk-core', '1.12.408'" 27 | spec.requirements << "jar 'com.amazonaws:aws-java-sdk-sts', '1.12.408'" 28 | 29 | spec.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" 30 | 31 | spec.add_development_dependency 'logstash-devutils' 32 | spec.add_development_dependency 'jar-dependencies', '~> 0.4' 33 | spec.add_development_dependency "logstash-codec-json" 34 | end 35 | -------------------------------------------------------------------------------- /spec/inputs/kinesis/worker_spec.rb: -------------------------------------------------------------------------------- 1 | require 'logstash-core/logstash-core' 2 | require 'logstash-input-kinesis_jars' 3 | require "logstash/plugin" 4 | require "logstash/inputs/kinesis" 5 | require "logstash/codecs/json" 6 | require "json" 7 | 8 | RSpec.describe "LogStash::Inputs::Kinesis::Worker" do 9 | KCL_TYPES = com.amazonaws.services.kinesis.clientlibrary.types 10 | 11 | subject!(:worker) { LogStash::Inputs::Kinesis::Worker.new(codec, queue, decorator, checkpoint_interval) } 12 | let(:codec) { LogStash::Codecs::JSON.new() } 13 | let(:queue) { Queue.new } 14 | let(:decorator) { proc { |x| x.set('decorated', true); x } } 15 | let(:checkpoint_interval) { 120 } 16 | let(:checkpointer) { double('checkpointer', checkpoint: nil) } 17 | let(:init_input) { KCL_TYPES::InitializationInput.new().withShardId("xyz") } 18 | 19 | it "honors the initialize java interface method contract" do 20 | expect { worker.initialize(init_input) }.to_not raise_error 21 | end 22 | 23 | def record(hash = { "message" => "test" }, arrival_timestamp, partition_key, sequence_number) 24 | encoder = java.nio.charset::Charset.forName("UTF-8").newEncoder() 25 | data = encoder.encode(java.nio.CharBuffer.wrap(JSON.generate(hash))) 26 | double( 27 | getData: data, 28 | getApproximateArrivalTimestamp: java.util.Date.new(arrival_timestamp.to_f * 1000), 29 | getPartitionKey: partition_key, 30 | getSequenceNumber: sequence_number 31 | ) 32 | end 33 | 34 | let(:process_input) { 35 | KCL_TYPES::ProcessRecordsInput.new() 36 | .withRecords(java.util.Arrays.asList([ 37 | record( 38 | { 39 | id: "record1", 40 | message: "test1" 41 | }, 42 | '1.441215410867E9', 43 | 'partitionKey1', 44 | '21269319989652663814458848515492872191' 45 | ), 46 | record( 47 | { 48 | '@metadata' => { 49 | forwarded: 'record2' 50 | }, 51 | id: "record2", 52 | message: "test2" 53 | }, 54 | '1.441215410868E9', 55 | 'partitionKey2', 56 | '21269319989652663814458848515492872192' 57 | )].to_java) 58 | ) 59 | .withCheckpointer(checkpointer) 60 | } 61 | let(:collide_metadata_process_input) { 62 | KCL_TYPES::ProcessRecordsInput.new() 63 | .withRecords(java.util.Arrays.asList([ 64 | record( 65 | { 66 | '@metadata' => { 67 | forwarded: 'record3', 68 | partition_key: 'invalid_key' 69 | }, 70 | id: "record3", 71 | message: "test3" 72 | }, 73 | '1.441215410869E9', 74 | 'partitionKey3', 75 | '21269319989652663814458848515492872193' 76 | )].to_java) 77 | ) 78 | .withCheckpointer(checkpointer) 79 | } 80 | let(:empty_process_input) { 81 | KCL_TYPES::ProcessRecordsInput.new() 82 | .withRecords(java.util.Arrays.asList([].to_java)) 83 | .withCheckpointer(checkpointer) 84 | } 85 | 86 | context "initialized" do 87 | before do 88 | worker.initialize(init_input) 89 | end 90 | 91 | describe "#processRecords" do 92 | it "decodes and queues each record with decoration" do 93 | worker.processRecords(process_input) 94 | expect(queue.size).to eq(2) 95 | m1 = queue.pop 96 | m2 = queue.pop 97 | expect(m1).to be_kind_of(LogStash::Event) 98 | expect(m2).to be_kind_of(LogStash::Event) 99 | expect(m1.get('id')).to eq("record1") 100 | expect(m1.get('message')).to eq("test1") 101 | expect(m1.get('@metadata')['approximate_arrival_timestamp']).to eq(1441215410867) 102 | expect(m1.get('@metadata')['partition_key']).to eq('partitionKey1') 103 | expect(m1.get('@metadata')['sequence_number']).to eq('21269319989652663814458848515492872191') 104 | expect(m1.get('decorated')).to eq(true) 105 | end 106 | 107 | it "decodes and keeps submitted metadata" do 108 | worker.processRecords(process_input) 109 | expect(queue.size).to eq(2) 110 | m1 = queue.pop 111 | m2 = queue.pop 112 | expect(m1).to be_kind_of(LogStash::Event) 113 | expect(m2).to be_kind_of(LogStash::Event) 114 | expect(m1.get('@metadata')['forwarded']).to eq(nil) 115 | expect(m2.get('@metadata')['forwarded']).to eq('record2') 116 | end 117 | 118 | it "decodes and does not allow submitted metadata to overwrite internal keys" do 119 | worker.processRecords(collide_metadata_process_input) 120 | expect(queue.size).to eq(1) 121 | m1 = queue.pop 122 | expect(m1).to be_kind_of(LogStash::Event) 123 | expect(m1.get('@metadata')['forwarded']).to eq('record3') 124 | expect(m1.get('@metadata')['partition_key']).to eq('partitionKey3') 125 | end 126 | 127 | it "checkpoints on interval" do 128 | expect(checkpointer).to receive(:checkpoint).once 129 | worker.processRecords(empty_process_input) 130 | 131 | # not this time 132 | worker.processRecords(empty_process_input) 133 | 134 | allow(Time).to receive(:now).and_return(Time.now + 125) 135 | expect(checkpointer).to receive(:checkpoint).once 136 | worker.processRecords(empty_process_input) 137 | end 138 | end 139 | 140 | describe "#shutdown" do 141 | it "checkpoints on termination" do 142 | input = KCL_TYPES::ShutdownInput.new 143 | checkpointer = double('checkpointer') 144 | expect(checkpointer).to receive(:checkpoint) 145 | input. 146 | with_shutdown_reason(com.amazonaws.services.kinesis.clientlibrary.lib.worker::ShutdownReason::TERMINATE). 147 | with_checkpointer(checkpointer) 148 | worker.shutdown(input) 149 | end 150 | end 151 | end 152 | end 153 | -------------------------------------------------------------------------------- /spec/inputs/kinesis_spec.rb: -------------------------------------------------------------------------------- 1 | require "logstash/plugin" 2 | require "logstash/inputs/kinesis" 3 | require "logstash/codecs/json" 4 | 5 | RSpec.describe "inputs/kinesis" do 6 | KCL = com.amazonaws.services.kinesis.clientlibrary.lib.worker 7 | 8 | let(:config) {{ 9 | "application_name" => "my-processor", 10 | "kinesis_stream_name" => "run-specs", 11 | "codec" => codec, 12 | "metrics" => metrics, 13 | "checkpoint_interval_seconds" => 120, 14 | "region" => "ap-southeast-1", 15 | "profile" => nil 16 | }} 17 | 18 | # Config hash to test credentials provider to be used if profile is specified 19 | let(:config_with_profile) {{ 20 | "application_name" => "my-processor", 21 | "kinesis_stream_name" => "run-specs", 22 | "codec" => codec, 23 | "metrics" => metrics, 24 | "checkpoint_interval_seconds" => 120, 25 | "region" => "ap-southeast-1", 26 | "profile" => "my-aws-profile" 27 | }} 28 | 29 | # Config hash to test assume role provider if role_arn is specified 30 | let(:config_with_role_arn) {{ 31 | "application_name" => "my-processor", 32 | "kinesis_stream_name" => "run-specs", 33 | "codec" => codec, 34 | "metrics" => metrics, 35 | "checkpoint_interval_seconds" => 120, 36 | "region" => "ap-southeast-1", 37 | "role_arn" => "arn:aws:iam::???????????:role/my-role" 38 | }} 39 | 40 | # other config with LATEST as initial_position_in_stream 41 | let(:config_with_latest) {{ 42 | "application_name" => "my-processor", 43 | "kinesis_stream_name" => "run-specs", 44 | "codec" => codec, 45 | "metrics" => metrics, 46 | "checkpoint_interval_seconds" => 120, 47 | "region" => "ap-southeast-1", 48 | "profile" => nil, 49 | "initial_position_in_stream" => "LATEST" 50 | }} 51 | 52 | # Config hash to test valid additional_settings 53 | let(:config_with_valid_additional_settings) {{ 54 | "application_name" => "my-processor", 55 | "kinesis_stream_name" => "run-specs", 56 | "codec" => codec, 57 | "metrics" => metrics, 58 | "checkpoint_interval_seconds" => 120, 59 | "region" => "ap-southeast-1", 60 | "profile" => nil, 61 | "additional_settings" => { 62 | "initial_lease_table_read_capacity" => 25, 63 | "initial_lease_table_write_capacity" => 100, 64 | "kinesis_endpoint" => "http://localhost" 65 | } 66 | }} 67 | 68 | # Config with proxy 69 | let(:config_with_proxy) {{ 70 | "application_name" => "my-processor", 71 | "kinesis_stream_name" => "run-specs", 72 | "codec" => codec, 73 | "metrics" => metrics, 74 | "checkpoint_interval_seconds" => 120, 75 | "region" => "ap-southeast-1", 76 | "profile" => nil, 77 | "http_proxy" => ::LogStash::Util::Password.new("http://user1:pwd1@proxy.example.com:3128/"), 78 | "non_proxy_hosts" => "127.0.0.5", 79 | }} 80 | 81 | # Config hash to test invalid additional_settings where the name is not found 82 | let(:config_with_invalid_additional_settings_name_not_found) {{ 83 | "application_name" => "my-processor", 84 | "kinesis_stream_name" => "run-specs", 85 | "codec" => codec, 86 | "metrics" => metrics, 87 | "checkpoint_interval_seconds" => 120, 88 | "region" => "ap-southeast-1", 89 | "profile" => nil, 90 | "additional_settings" => { 91 | "foo" => "bar" 92 | } 93 | }} 94 | 95 | # Config hash to test invalid additional_settings where the type is complex or wrong 96 | let(:config_with_invalid_additional_settings_wrong_type) {{ 97 | "application_name" => "my-processor", 98 | "kinesis_stream_name" => "run-specs", 99 | "codec" => codec, 100 | "metrics" => metrics, 101 | "checkpoint_interval_seconds" => 120, 102 | "region" => "ap-southeast-1", 103 | "profile" => nil, 104 | "additional_settings" => { 105 | "metrics_level" => "invalid_metrics_level" 106 | } 107 | }} 108 | 109 | subject!(:kinesis) { LogStash::Inputs::Kinesis.new(config) } 110 | let(:kcl_worker) { double('kcl_worker') } 111 | let(:stub_builder) { double('kcl_builder', build: kcl_worker) } 112 | let(:metrics) { nil } 113 | let(:codec) { LogStash::Codecs::JSON.new() } 114 | let(:queue) { Queue.new } 115 | 116 | it "registers without error" do 117 | input = LogStash::Plugin.lookup("input", "kinesis").new("kinesis_stream_name" => "specs", "codec" => codec) 118 | expect { input.register }.to_not raise_error 119 | end 120 | 121 | it "configures the KCL" do 122 | kinesis.register 123 | expect(kinesis.kcl_config.applicationName).to eq("my-processor") 124 | expect(kinesis.kcl_config.streamName).to eq("run-specs") 125 | expect(kinesis.kcl_config.regionName).to eq("ap-southeast-1") 126 | expect(kinesis.kcl_config.initialPositionInStream).to eq(KCL::InitialPositionInStream::TRIM_HORIZON) 127 | expect(kinesis.kcl_config.get_kinesis_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.DefaultAWSCredentialsProviderChain") 128 | end 129 | 130 | subject!(:kinesis_with_profile) { LogStash::Inputs::Kinesis.new(config_with_profile) } 131 | 132 | it "uses ProfileCredentialsProvider if profile is specified" do 133 | kinesis_with_profile.register 134 | expect(kinesis_with_profile.kcl_config.get_kinesis_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.profile.ProfileCredentialsProvider") 135 | end 136 | 137 | subject!(:kinesis_with_role_arn) { LogStash::Inputs::Kinesis.new(config_with_role_arn) } 138 | 139 | it "uses STS for accessing the kinesis stream if role_arn is specified" do 140 | kinesis_with_role_arn.register 141 | expect(kinesis_with_role_arn.kcl_config.get_kinesis_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider") 142 | expect(kinesis_with_role_arn.kcl_config.get_dynamo_db_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider") 143 | expect(kinesis_with_role_arn.kcl_config.get_cloud_watch_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider") 144 | end 145 | 146 | subject!(:kinesis_with_latest) { LogStash::Inputs::Kinesis.new(config_with_latest) } 147 | 148 | it "configures the KCL" do 149 | kinesis_with_latest.register 150 | expect(kinesis_with_latest.kcl_config.applicationName).to eq("my-processor") 151 | expect(kinesis_with_latest.kcl_config.streamName).to eq("run-specs") 152 | expect(kinesis_with_latest.kcl_config.regionName).to eq("ap-southeast-1") 153 | expect(kinesis_with_latest.kcl_config.initialPositionInStream).to eq(KCL::InitialPositionInStream::LATEST) 154 | expect(kinesis_with_latest.kcl_config.get_kinesis_credentials_provider.getClass.to_s).to eq("com.amazonaws.auth.DefaultAWSCredentialsProviderChain") 155 | end 156 | 157 | subject!(:kinesis_with_valid_additional_settings) { LogStash::Inputs::Kinesis.new(config_with_valid_additional_settings) } 158 | 159 | it "configures the KCL" do 160 | kinesis_with_valid_additional_settings.register 161 | expect(kinesis_with_valid_additional_settings.kcl_config.applicationName).to eq("my-processor") 162 | expect(kinesis_with_valid_additional_settings.kcl_config.streamName).to eq("run-specs") 163 | expect(kinesis_with_valid_additional_settings.kcl_config.regionName).to eq("ap-southeast-1") 164 | expect(kinesis_with_valid_additional_settings.kcl_config.initialLeaseTableReadCapacity).to eq(25) 165 | expect(kinesis_with_valid_additional_settings.kcl_config.initialLeaseTableWriteCapacity).to eq(100) 166 | expect(kinesis_with_valid_additional_settings.kcl_config.kinesisEndpoint).to eq("http://localhost") 167 | end 168 | 169 | subject!(:kinesis_with_proxy) { LogStash::Inputs::Kinesis.new(config_with_proxy) } 170 | 171 | it "configures the KCL with proxy settings" do 172 | kinesis_with_proxy.register 173 | clnt_config = kinesis_with_proxy.kcl_config.kinesis_client_configuration 174 | expect(clnt_config.get_proxy_username).to eq("user1") 175 | expect(clnt_config.get_proxy_host).to eq("proxy.example.com") 176 | expect(clnt_config.get_proxy_port).to eq(3128) 177 | expect(clnt_config.get_non_proxy_hosts).to eq("127.0.0.5") 178 | end 179 | 180 | subject!(:kinesis_with_invalid_additional_settings_name_not_found) { LogStash::Inputs::Kinesis.new(config_with_invalid_additional_settings_name_not_found) } 181 | 182 | it "raises NoMethodError for invalid configuration options" do 183 | expect{ kinesis_with_invalid_additional_settings_name_not_found.register }.to raise_error(NoMethodError) 184 | end 185 | 186 | 187 | subject!(:kinesis_with_invalid_additional_settings_wrong_type) { LogStash::Inputs::Kinesis.new(config_with_invalid_additional_settings_wrong_type) } 188 | 189 | it "raises an error for invalid configuration values such as the wrong type" do 190 | expect{ kinesis_with_invalid_additional_settings_wrong_type.register }.to raise_error(Java::JavaLang::IllegalArgumentException) 191 | end 192 | 193 | 194 | context "#run" do 195 | it "runs the KCL worker" do 196 | expect(kinesis).to receive(:kcl_builder).with(queue).and_return(stub_builder) 197 | expect(kcl_worker).to receive(:run).with(no_args) 198 | builder = kinesis.run(queue) 199 | end 200 | end 201 | 202 | context "#stop" do 203 | it "stops the KCL worker" do 204 | expect(kinesis).to receive(:kcl_builder).with(queue).and_return(stub_builder) 205 | expect(kcl_worker).to receive(:run).with(no_args) 206 | expect(kcl_worker).to receive(:shutdown).with(no_args) 207 | kinesis.run(queue) 208 | kinesis.do_stop # do_stop calls stop internally 209 | end 210 | end 211 | 212 | context "#worker_factory" do 213 | it "clones the codec for each worker" do 214 | worker = kinesis.worker_factory(queue).call() 215 | expect(worker).to be_kind_of(LogStash::Inputs::Kinesis::Worker) 216 | expect(worker.codec).to_not eq(kinesis.codec) 217 | expect(worker.codec).to be_kind_of(codec.class) 218 | end 219 | 220 | it "generates a valid worker" do 221 | worker = kinesis.worker_factory(queue).call() 222 | 223 | expect(worker.codec).to be_kind_of(codec.class) 224 | expect(worker.checkpoint_interval).to eq(120) 225 | expect(worker.output_queue).to eq(queue) 226 | expect(worker.decorator).to eq(kinesis.method(:decorate)) 227 | expect(worker.logger).to eq(kinesis.logger) 228 | end 229 | end 230 | 231 | # these tests are heavily dependent on the current Worker::Builder 232 | # implementation because its state is all private 233 | context "#kcl_builder" do 234 | let(:builder) { kinesis.kcl_builder(queue) } 235 | 236 | it "sets the worker factory" do 237 | expect(field(builder, "recordProcessorFactory")).to_not eq(nil) 238 | end 239 | 240 | it "sets the config" do 241 | kinesis.register 242 | config = field(builder, "config") 243 | expect(config).to eq(kinesis.kcl_config) 244 | end 245 | 246 | it "disables metric tracking by default" do 247 | expect(field(builder, "metricsFactory")).to be_kind_of(com.amazonaws.services.kinesis.metrics.impl::NullMetricsFactory) 248 | end 249 | 250 | context "cloudwatch" do 251 | let(:metrics) { "cloudwatch" } 252 | it "uses cloudwatch metrics if specified" do 253 | # since the behaviour is enclosed on private methods it is not testable. So here 254 | # the expected value can be tested, not the result associated to set this value 255 | expect(field(builder, "metricsFactory")).to eq(nil) 256 | end 257 | end 258 | end 259 | 260 | def field(obj, name) 261 | field = obj.java_class.declared_field(name) 262 | field.accessible = true 263 | field.value(obj) 264 | end 265 | end 266 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | RSpec.configure do |config| 2 | config.expect_with :rspec do |expectations| 3 | expectations.include_chain_clauses_in_custom_matcher_descriptions = true 4 | end 5 | 6 | config.mock_with :rspec do |mocks| 7 | mocks.verify_partial_doubles = true 8 | end 9 | 10 | config.disable_monkey_patching! 11 | config.warnings = false 12 | if config.files_to_run.one? 13 | config.default_formatter = 'doc' 14 | end 15 | config.order = :random 16 | Kernel.srand config.seed 17 | end 18 | --------------------------------------------------------------------------------