├── NOTICE.txt ├── .travis.yml ├── CONTRIBUTORS.txt ├── .gitignore ├── kpl_aggregated_records.proto ├── Gemfile ├── Rakefile ├── test ├── helper.rb └── plugin │ └── test_out_kinesis-aggregation.rb ├── fluent-plugin-kinesis-aggregation.gemspec ├── CHANGELOG.md ├── lib └── fluent │ └── plugin │ └── out_kinesis-aggregation.rb ├── README.md └── LICENSE.txt /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Fluent Plugin for Amazon Kinesis 2 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | 3 | rvm: 4 | - 2.7 5 | 6 | os: 7 | - linux 8 | 9 | gemfile: 10 | - Gemfile 11 | 12 | script: bundle exec rake 13 | 14 | sudo: false 15 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | This file contains a list of people who have made large or regular contributions 2 | to the Fluent Plugin for Amazon Kinesis. Give them a special thanks! 3 | 4 | Genki Sugawara 5 | sgwr_dts@yahoo.co.jp 6 | https://github.com/winebarrel 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | coverage 9 | doc/ 10 | lib/bundler/man 11 | pkg 12 | rdoc 13 | spec/reports 14 | test/tmp 15 | test/version_tmp 16 | tmp 17 | *.swp 18 | config/ 19 | -------------------------------------------------------------------------------- /kpl_aggregated_records.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message AggregatedRecord { 4 | repeated string partition_key_table = 1; 5 | repeated string explicit_hash_key_table = 2; 6 | repeated Record records = 3; 7 | } 8 | 9 | message Tag { 10 | string key = 1; 11 | string value = 2; 12 | } 13 | 14 | message Record { 15 | uint64 partition_key_index = 1; 16 | uint64 explicit_hash_key_index = 2; 17 | bytes data = 3; 18 | repeated Tag tags = 4; 19 | } 20 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | source 'https://rubygems.org' 15 | gemspec 16 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | require "bundler/gem_tasks" 15 | 16 | require 'rake/testtask' 17 | Rake::TestTask.new(:test) do |test| 18 | test.libs << 'lib' << 'test' 19 | test.pattern = 'test/**/test_*.rb' 20 | test.verbose = true 21 | end 22 | 23 | task :default => :test 24 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | require 'rubygems' 15 | require 'bundler' 16 | require 'stringio' 17 | begin 18 | Bundler.setup(:default, :development) 19 | rescue Bundler::BundlerError => e 20 | $stderr.puts e.message 21 | $stderr.puts "Run `bundle install` to install missing gems" 22 | exit e.status_code 23 | end 24 | 25 | require 'test/unit' 26 | require 'test/unit/rr' 27 | 28 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 29 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 30 | require 'fluent/test' 31 | require 'fluent/test/helpers' 32 | require 'fluent/test/driver/output' 33 | require 'fluent/process' 34 | require 'fluent/plugin/out_kinesis-aggregation' 35 | -------------------------------------------------------------------------------- /fluent-plugin-kinesis-aggregation.gemspec: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # coding: utf-8 15 | lib = File.expand_path('../lib', __FILE__) 16 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 17 | 18 | Gem::Specification.new do |spec| 19 | spec.name = "fluent-plugin-kinesis-aggregation" 20 | spec.version = '0.4.1' 21 | spec.author = 'Atlassian' 22 | spec.email = 'lgoolsbee@atlassian.com' 23 | spec.summary = %q{Fluentd output plugin that sends KPL style aggregated events to Amazon Kinesis.} 24 | spec.homepage = "https://github.com/atlassian/fluent-plugin-kinesis-aggregation" 25 | spec.license = "Apache-2.0" 26 | 27 | spec.files = `git ls-files`.split($/) 28 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 29 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 30 | spec.require_paths = ["lib"] 31 | spec.required_ruby_version = '>= 2.7' 32 | 33 | spec.add_development_dependency "bundler", "~> 2" 34 | spec.add_development_dependency "rake", "~> 13" 35 | spec.add_development_dependency "test-unit", "~> 3" 36 | spec.add_development_dependency "test-unit-rr", "~> 1" 37 | 38 | spec.add_dependency "fluentd", ["~> 1", "< 2"] 39 | spec.add_dependency "aws-sdk-kinesis", "~> 1", "!= 1.4", "!= 1.5", "!= 1.14", "!= 1.24" 40 | spec.add_dependency "google-protobuf", "~> 3", ">= 3.12.1" 41 | end 42 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 0.4.1 4 | 5 | - Update dependency google-protobuf to require versions newer than 3.12.0; older versions are incompatible with td-agent v4 6 | 7 | ## 0.4.0 8 | 9 | - Update dependencies to support td-agent v4 ([td-agent v3 is EOL](https://www.fluentd.org/blog/schedule-for-td-agent-3-eol)); if you need td-agent v3 support, use 0.3.x from rubygems 10 | - Drop testing and support for Ruby versions less than 2.7 (version embedded with td-agent v4) 11 | 12 | ## 0.3.4 13 | 14 | - aws-sdk-kinesis 1.24 is missing a dependency from a newer version of the aws-sdk-core gem; 1.24 has been yanked and 1.24.1 has been released with the fix, but just in case 1.24 has already been installed/cached anywhere, add it to the list of excluded versions. 15 | - Previously, we pinned google-protobuf to 3.11.x because 3.12 required Ruby >=2.5 (and td-agent ships with Ruby 2.4 embedded). google-protobuf 3.12.1 restores support for Ruby 2.3 and 2.4, so we can relax our pinning for this dependency a bit by requiring versions greater than 3.12. 16 | 17 | ## 0.3.3 18 | 19 | - Dependency google-protobuf 3.12.0 dropped support for Ruby <2.5; td-agent3 bundles Ruby 2.4, so google-protobuf is now pinned to 3.11.x. 20 | 21 | ## 0.3.2 22 | 23 | - Modify aws-sdk usage to require just the API/SDK resources for Kinesis 24 | - Drop support and testing for deprecated Ruby versions (<2.3) 25 | 26 | ## 0.3.1 27 | 28 | - Change aws-sdk usage to work with both v2 and v3 29 | (in particular, makes it possible to use latest td-agent which includes the s3 plugin 30 | and pulls in aws-sdk v3) 31 | 32 | ## 0.3.0 33 | 34 | - Update to use fluentd 0.14 API (stick to 0.2.3 if you need support for earlier versions of fluentd) 35 | Much thanks to cosmo0920 for doing this. 36 | 37 | ## 0.2.3 38 | 39 | - emit stream name in error 40 | 41 | ## 0.2.1 - 0.2.2 42 | 43 | - update documentation to refer to published gem 44 | - turn on testing for Ruby 2.1 45 | - allow running on Ruby 2.1 46 | 47 | ## 0.2.0 48 | 49 | - switch to google protobuf library (ruby native one uses too much memory) 50 | 51 | ## 0.1.1 52 | 53 | - fix up conflict with fluent-kinesis plugin 54 | - Changelog 55 | 56 | ## 0.1.0 57 | 58 | - Release on Github 59 | -------------------------------------------------------------------------------- /lib/fluent/plugin/out_kinesis-aggregation.rb: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | require 'aws-sdk-kinesis' 15 | require 'yajl' 16 | require 'logger' 17 | require 'securerandom' 18 | require 'digest' 19 | 20 | require 'google/protobuf' 21 | require 'fluent/plugin/output' 22 | 23 | Google::Protobuf::DescriptorPool.generated_pool.build do 24 | add_message "AggregatedRecord" do 25 | repeated :partition_key_table, :string, 1 26 | repeated :explicit_hash_key_table, :string, 2 27 | repeated :records, :message, 3, "Record" 28 | end 29 | add_message "Tag" do 30 | optional :key, :string, 1 31 | optional :value, :string, 2 32 | end 33 | add_message "Record" do 34 | optional :partition_key_index, :uint64, 1 35 | optional :explicit_hash_key_index, :uint64, 2 36 | optional :data, :bytes, 3 37 | repeated :tags, :message, 4, "Tag" 38 | end 39 | end 40 | 41 | AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass 42 | Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass 43 | Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass 44 | 45 | 46 | module FluentPluginKinesisAggregation 47 | class OutputFilter < Fluent::Plugin::Output 48 | 49 | helpers :compat_parameters, :inject 50 | 51 | DEFAULT_BUFFER_TYPE = "memory" 52 | NAME = 'kinesis-aggregation' 53 | PUT_RECORD_MAX_DATA_SIZE = 1024 * 1024 54 | # 200 is an arbitrary number more than the envelope overhead 55 | # and big enough to store partition/hash key table in 56 | # AggregatedRecords. Note that you shouldn't really ever have 57 | # the buffer this high, since you're likely to fail the write 58 | # if anyone else is writing to the shard at the time. 59 | FLUENTD_MAX_BUFFER_SIZE = PUT_RECORD_MAX_DATA_SIZE - 200 60 | 61 | Fluent::Plugin.register_output(NAME, self) 62 | 63 | config_set_default :include_time_key, true 64 | config_set_default :include_tag_key, true 65 | 66 | config_param :aws_key_id, :string, default: nil, :secret => true 67 | config_param :aws_sec_key, :string, default: nil, :secret => true 68 | # The 'region' parameter is optional because 69 | # it may be set as an environment variable. 70 | config_param :region, :string, default: nil 71 | 72 | config_param :profile, :string, :default => nil 73 | config_param :credentials_path, :string, :default => nil 74 | config_param :role_arn, :string, :default => nil 75 | config_param :external_id, :string, :default => nil 76 | 77 | config_param :stream_name, :string 78 | config_param :fixed_partition_key, :string, default: nil 79 | 80 | config_param :debug, :bool, default: false 81 | 82 | config_param :http_proxy, :string, default: nil 83 | 84 | config_section :buffer do 85 | config_set_default :@type, DEFAULT_BUFFER_TYPE 86 | end 87 | 88 | def configure(conf) 89 | compat_parameters_convert(conf, :buffer, :inject) 90 | super 91 | 92 | if @buffer.chunk_limit_size > FLUENTD_MAX_BUFFER_SIZE 93 | raise Fluent::ConfigError, "Kinesis buffer_chunk_limit is set to more than the 1mb shard limit (i.e. you won't be able to write your chunks!" 94 | end 95 | 96 | if @buffer.chunk_limit_size > FLUENTD_MAX_BUFFER_SIZE / 3 97 | log.warn 'Kinesis buffer_chunk_limit is set at more than 1/3 of the per second shard limit (1mb). This is not good if you have many producers.' 98 | end 99 | end 100 | 101 | def start 102 | super 103 | load_client 104 | end 105 | 106 | def format(tag, time, record) 107 | record = inject_values_to_record(tag, time, record) 108 | 109 | return AggregatedRecord.encode(AggregatedRecord.new( 110 | records: [Record.new( 111 | partition_key_index: 1, 112 | data: Yajl.dump(record).b 113 | )] 114 | )) 115 | end 116 | 117 | def write(chunk) 118 | records = chunk.read 119 | if records.length > FLUENTD_MAX_BUFFER_SIZE 120 | log.error "Can't emit aggregated #{@stream_name} stream record of length #{records.length} (more than #{FLUENTD_MAX_BUFFER_SIZE})" 121 | return # do not throw, since we can't retry 122 | end 123 | 124 | partition_key = @fixed_partition_key || SecureRandom.uuid 125 | 126 | # confusing magic. Because of the format of protobuf records, 127 | # it's valid (in this case) to concatenate the AggregatedRecords 128 | # to form one AggregatedRecord, since we only have a repeated field 129 | # in records. 130 | # 131 | # ALSO, since we use google's protobuf stuff (much better 132 | # memory usage due to C extension), we're stuck on proto3. 133 | # Unfortunately, KPL uses proto2 form, and partition_key_index 134 | # is a required field. If we set it to 0 in proto3, though, 135 | # it's helpfully ignored in the serialisation (default!). 136 | # Therefore we have to pass a partition_key_index of 1, 137 | # and put two things in our partition_key_table. 138 | message = AggregatedRecord.encode(AggregatedRecord.new( 139 | partition_key_table: ['a', partition_key] 140 | )) + records 141 | 142 | @client.put_record( 143 | stream_name: @stream_name, 144 | data: kpl_aggregation_pack(message), 145 | partition_key: partition_key 146 | ) 147 | end 148 | 149 | private 150 | 151 | # https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md 152 | KPL_MAGIC_NUMBER = "\xF3\x89\x9A\xC2" 153 | def kpl_aggregation_pack(message) 154 | [ 155 | KPL_MAGIC_NUMBER, message, Digest::MD5.digest(message) 156 | ].pack("A4A*A16") 157 | end 158 | 159 | # This code is unchanged from https://github.com/awslabs/aws-fluent-plugin-kinesis 160 | def load_client 161 | user_agent_suffix = "fluent-#{NAME}" 162 | 163 | options = { 164 | user_agent_suffix: user_agent_suffix 165 | } 166 | 167 | if @region 168 | options[:region] = @region 169 | end 170 | 171 | if @aws_key_id && @aws_sec_key 172 | options.update( 173 | access_key_id: @aws_key_id, 174 | secret_access_key: @aws_sec_key, 175 | ) 176 | elsif @profile 177 | credentials_opts = {:profile_name => @profile} 178 | credentials_opts[:path] = @credentials_path if @credentials_path 179 | credentials = Aws::SharedCredentials.new(credentials_opts) 180 | options[:credentials] = credentials 181 | elsif @role_arn 182 | credentials = Aws::AssumeRoleCredentials.new( 183 | client: Aws::STS::Client.new(options), 184 | role_arn: @role_arn, 185 | role_session_name: "fluent-plugin-kinesis-aggregation", 186 | external_id: @external_id, 187 | duration_seconds: 60 * 60 188 | ) 189 | options[:credentials] = credentials 190 | end 191 | 192 | if @debug 193 | options.update( 194 | logger: Logger.new(log.out), 195 | log_level: :debug 196 | ) 197 | end 198 | 199 | if @http_proxy 200 | options[:http_proxy] = @http_proxy 201 | end 202 | 203 | @client = Aws::Kinesis::Client.new(options) 204 | end 205 | end 206 | end 207 | -------------------------------------------------------------------------------- /test/plugin/test_out_kinesis-aggregation.rb: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | require 'helper' 15 | 16 | class KinesisOutputTest < Test::Unit::TestCase 17 | include Fluent::Test::Helpers 18 | 19 | def setup 20 | Fluent::Test.setup 21 | end 22 | 23 | CONFIG = %[ 24 | aws_key_id test_key_id 25 | aws_sec_key test_sec_key 26 | stream_name test_stream 27 | region us-east-1 28 | fixed_partition_key test_partition_key 29 | buffer_chunk_limit 100k 30 | ] 31 | 32 | def create_driver(conf = CONFIG) 33 | Fluent::Test::Driver::Output 34 | .new(FluentPluginKinesisAggregation::OutputFilter).configure(conf) 35 | end 36 | 37 | def create_mock_client 38 | client = mock(Object.new) 39 | stub(Aws::Kinesis::Client).new(anything) { client } 40 | return client 41 | end 42 | 43 | def test_configure 44 | d = create_driver 45 | assert_equal 'test_key_id', d.instance.aws_key_id 46 | assert_equal 'test_sec_key', d.instance.aws_sec_key 47 | assert_equal 'test_stream', d.instance.stream_name 48 | assert_equal 'us-east-1', d.instance.region 49 | assert_equal 'test_partition_key', d.instance.fixed_partition_key 50 | end 51 | 52 | def test_configure_with_credentials 53 | d = create_driver(<<-EOS) 54 | profile default 55 | credentials_path /home/scott/.aws/credentials 56 | stream_name test_stream 57 | region us-east-1 58 | fixed_partition_key test_partition_key 59 | buffer_chunk_limit 100k 60 | EOS 61 | 62 | assert_equal 'default', d.instance.profile 63 | assert_equal '/home/scott/.aws/credentials', d.instance.credentials_path 64 | assert_equal 'test_stream', d.instance.stream_name 65 | assert_equal 'us-east-1', d.instance.region 66 | assert_equal 'test_partition_key', d.instance.fixed_partition_key 67 | end 68 | 69 | def test_configure_with_more_options 70 | conf = %[ 71 | stream_name test_stream 72 | region us-east-1 73 | http_proxy http://proxy:3333/ 74 | fixed_partition_key test_partition_key 75 | buffer_chunk_limit 100k 76 | ] 77 | d = create_driver(conf) 78 | assert_equal 'test_stream', d.instance.stream_name 79 | assert_equal 'us-east-1', d.instance.region 80 | assert_equal 'http://proxy:3333/', d.instance.http_proxy 81 | assert_equal 'test_partition_key', d.instance.fixed_partition_key 82 | end 83 | 84 | def test_configure_fails_on_big_chunk_limit 85 | conf = %[ 86 | stream_name test_stream 87 | region us-east-1 88 | http_proxy http://proxy:3333/ 89 | fixed_partition_key test_partition_key 90 | buffer_chunk_limit 1m 91 | ] 92 | assert_raise Fluent::ConfigError do 93 | create_driver(conf) 94 | end 95 | end 96 | 97 | def test_load_client 98 | client = stub(Object.new) 99 | client.put_record { {} } 100 | 101 | stub(Aws::Kinesis::Client).new do |options| 102 | assert_equal("test_key_id", options[:access_key_id]) 103 | assert_equal("test_sec_key", options[:secret_access_key]) 104 | assert_equal("us-east-1", options[:region]) 105 | client 106 | end 107 | 108 | d = create_driver 109 | d.run(default_tag: "test") 110 | end 111 | 112 | def test_load_client_with_credentials 113 | client = stub(Object.new) 114 | client.put_record { {} } 115 | 116 | stub(Aws::Kinesis::Client).new do |options| 117 | assert_equal(nil, options[:access_key_id]) 118 | assert_equal(nil, options[:secret_access_key]) 119 | assert_equal("us-east-1", options[:region]) 120 | 121 | credentials = options[:credentials] 122 | assert_equal("default", credentials.profile_name) 123 | assert_equal("/home/scott/.aws/credentials", credentials.path) 124 | 125 | client 126 | end 127 | 128 | d = create_driver(<<-EOS) 129 | profile default 130 | credentials_path /home/scott/.aws/credentials 131 | stream_name test_stream 132 | region us-east-1 133 | fixed_partition_key test_partition_key 134 | buffer_chunk_limit 100k 135 | EOS 136 | 137 | begin 138 | d.run(default_tag: "test") 139 | rescue Aws::Errors::NoSuchProfileError 140 | end 141 | end 142 | 143 | def test_load_client_with_role_arn 144 | client = stub(Object.new) 145 | client.put_record { {} } 146 | 147 | stub(Aws::AssumeRoleCredentials).new do |options| 148 | assert_equal("arn:aws:iam::001234567890:role/my-role", options[:role_arn]) 149 | assert_equal("fluent-plugin-kinesis-aggregation", options[:role_session_name]) 150 | assert_equal("my_external_id", options[:external_id]) 151 | assert_equal(3600, options[:duration_seconds]) 152 | "sts_credentials" 153 | end 154 | 155 | stub(Aws::Kinesis::Client).new do |options| 156 | assert_equal("sts_credentials", options[:credentials]) 157 | client 158 | end 159 | 160 | d = create_driver(<<-EOS) 161 | role_arn arn:aws:iam::001234567890:role/my-role 162 | external_id my_external_id 163 | stream_name test_stream 164 | region us-east-1 165 | fixed_partition_key test_partition_key 166 | buffer_chunk_limit 100k 167 | EOS 168 | d.run(default_tag: "test") 169 | end 170 | 171 | def test_emitting 172 | d = create_driver 173 | 174 | data1 = {"a"=>1,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"} 175 | data2 = {"a"=>2,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"} 176 | 177 | time = event_time("2011-01-02 13:14:15 UTC") 178 | 179 | d.run(default_tag: "test") do 180 | client = create_mock_client 181 | stub.instance_of(Aws::Kinesis::Client).put_record( 182 | stream_name: 'test_stream', 183 | data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1A6\b\x01\x1A2{\"a\":1,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\x1A6\b\x01\x1A2{\"a\":2,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\xA2\x0E y\x8B\x02\xDF\xAE\xAB\x93\x1C;\xCB\xAD\x1Fx".b, 184 | partition_key: 'test_partition_key' 185 | ) { {} } 186 | 187 | d.feed(time, data1) 188 | d.feed(time, data2) 189 | end 190 | end 191 | 192 | def test_multibyte 193 | d = create_driver 194 | 195 | data1 = {"a"=>"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB","time"=>"2011-01-02T13:14:15Z".b,"tag"=>"test"} 196 | 197 | 198 | time = event_time("2011-01-02 13:14:15 UTC") 199 | d.run(default_tag: "test") do 200 | client = create_mock_client 201 | stub.instance_of(Aws::Kinesis::Client).put_record( 202 | stream_name: 'test_stream', 203 | data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1AI\b\x01\x1AE{\"a\":\"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB\",\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}_$\x9C\xF9v+pV:g7c\xE3\xF2$\xBA".b, 204 | partition_key: 'test_partition_key' 205 | ) { {} } 206 | 207 | d.feed(time, data1) 208 | end 209 | end 210 | 211 | def test_fail_on_bigchunk 212 | d = create_driver 213 | 214 | assert_raise(Fluent::Plugin::Buffer::BufferChunkOverflowError) do 215 | d.run(default_tag: "test") do 216 | d.feed( 217 | event_time("2011-01-02 13:14:15 UTC"), 218 | {"msg" => "z" * 1024 * 1024}) 219 | client = dont_allow(Object.new) 220 | client.put_record 221 | mock(Aws::Kinesis::Client).new(anything) { client } 222 | end 223 | end 224 | end 225 | end 226 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fluent Plugin for Amazon Kinesis producing KPL records 2 | 3 | [![Build Status](https://travis-ci.org/atlassian/fluent-plugin-kinesis-aggregation.svg?branch=master)](https://travis-ci.org/atlassian/fluent-plugin-kinesis-aggregation) 4 | 5 | ## Before you start... 6 | 7 | This is a rewrite of [aws-fluent-plugin-kinesis](https://github.com/awslabs/aws-fluent-plugin-kinesis) to implement 8 | a different shipment method using the 9 | [KPL aggregation format](https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md). 10 | 11 | *Since this plugin was forked, aws-fluent-plugin-kinesis has undergone considerable development (and improvement). 12 | Most notably, the upcoming 2.0 release supports KPL aggregated records using google-protobuf without 13 | the overhead of using the KPL: 14 | https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/107* 15 | 16 | *However, it still uses msgpack for internal buffering and only uses protobuf when it ships the records, 17 | whereas this plugin processes each record as it comes in and ships the result by simple concatenation 18 | of the encoded records. This may not be faster, of course - could depend on the overhead of calling 19 | the protobuf methods - but most probably is. The discussion below is also still mostly valid, 20 | in that the awslabs plugin does not have PutRecord == chunk equivalency, but instead has its 21 | own internal retry method.* 22 | 23 | The basic idea is to have one PutRecord === one chunk. This has a number of advantages: 24 | 25 | - much less complexity in plugin (less CPU/memory) 26 | - by aggregating, we increase the throughput and decrease the cost 27 | - since a single chunk either succeeds or fails, 28 | we get to use fluentd's more complex/complete retry mechanism 29 | (which is also exposed by the monitor plugin; we view this in datadog). The existing retry mechanism 30 | had [unfortunate issues under heavy load](https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/42) 31 | - we get ordering within a chunk without having to rely on sequence 32 | numbers (though not overall ordering) 33 | 34 | However, there are drawbacks: 35 | 36 | - if you're using this as an aggregator, you will need to tune the 37 | buffer size on your sources fairly low such that it is less 38 | than the low buffer_chunk_limit on the aggregator 39 | - you have to use a KCL library to ingest 40 | - you can't use a calculated partition key (based on the record); 41 | essentially, you need to use a random partition key 42 | 43 | ## Overview 44 | 45 | [Fluentd](http://fluentd.org/) output plugin 46 | that sends events to [Amazon Kinesis](https://aws.amazon.com/kinesis/). 47 | 48 | ## Installation 49 | 50 | This plugin is available as the `fluent-plugin-kinesis-aggregation` gem from RubyGems: 51 | 52 | gem install fluent-plugin-kinesis-aggregation 53 | 54 | Or, if using td-agent: 55 | 56 | td-agent-gem install fluent-plugin-kinesis-aggregation 57 | 58 | To install from the source: 59 | 60 | git clone https://github.com/atlassian/fluent-plugin-kinesis-aggregation.git 61 | cd fluent-plugin-kinesis-aggregation 62 | bundle install 63 | rake build 64 | rake install 65 | 66 | Or, if using td-agent, replace rake install with: 67 | 68 | fluent-gem install pkg/fluent-plugin-kinesis-aggregation 69 | 70 | Alternatively, you can replace both the rake steps, and directly 71 | specify the library path via RUBYLIB: 72 | 73 | export RUBYLIB=$RUBYLIB:/path/to/fluent-plugin-kinesis-aggregation/lib 74 | 75 | ## Dependencies 76 | 77 | * Ruby 2.7+ 78 | * Fluentd 1+ 79 | 80 | If you need td-agent v3 support, use version 0.3.x on rubygems. If you need td-agent v2 support (or fluentd 0.10 or 0.12 support), use the fluentd-v0.12 branch or version 0.2.x on rubygems. 81 | 82 | ## Basic Usage 83 | 84 | Here are general procedures for using this plugin: 85 | 86 | 1. Install. 87 | 1. Edit configuration 88 | 1. Run Fluentd or td-agent 89 | 90 | You can run this plugin with Fluentd as follows: 91 | 92 | 1. Install. 93 | 1. Edit configuration file and save it as 'fluentd.conf'. 94 | 1. Then, run `fluentd -c /path/to/fluentd.conf` 95 | 96 | To run with td-agent, it would be as follows: 97 | 98 | 1. Install. 99 | 1. Edit configuration file provided by td-agent. 100 | 1. Then, run or restart td-agent. 101 | 102 | ## Configuration 103 | 104 | Here are items for Fluentd configuration file. 105 | 106 | To put records into Amazon Kinesis, 107 | you need to provide AWS security credentials. 108 | If you provide aws_key_id and aws_sec_key in configuration file as below, 109 | we use it. You can also provide credentials via environment variables as 110 | AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY. Also we support IAM Role for 111 | authentication. Please find the [AWS SDK for Ruby Developer Guide](http://docs.aws.amazon.com/AWSSdkDocsRuby/latest/DeveloperGuide/ruby-dg-setup.html) 112 | for more information about authentication. 113 | We support all options which AWS SDK for Ruby supports. 114 | 115 | ### type 116 | 117 | Use the word 'kinesis-aggregation'. 118 | 119 | ### stream_name 120 | 121 | Name of the stream to put data. 122 | 123 | ### aws_key_id 124 | 125 | AWS access key id. 126 | 127 | ### aws_sec_key 128 | 129 | AWS secret key. 130 | 131 | ### role_arn 132 | 133 | IAM Role to be assumed with [AssumeRole](http://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html). 134 | Use this option for cross account access. 135 | 136 | ### external_id 137 | 138 | A unique identifier that is used by third parties when 139 | [assuming roles](http://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) in their customers' accounts. 140 | Use this option with `role_arn` for third party cross account access. 141 | For details, please see [How to Use an External ID When Granting Access to Your AWS Resources to a Third Party](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html). 142 | 143 | ### region 144 | 145 | AWS region of your stream. 146 | It should be in form like "us-east-1", "us-west-2". 147 | Refer to [Regions and Endpoints in AWS General Reference](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region) 148 | for supported regions. 149 | 150 | ### http_proxy 151 | 152 | Proxy server, if any. 153 | It should be in form like "http://squid:3128/" 154 | 155 | ### fixed_partition_key 156 | 157 | Instead of using a random partition key, used a fixed one. This 158 | forces all writes to a specific shard, and if you're using 159 | a single thread/process will probably keep event ordering 160 | (not recommended - watch out for hot shards!). 161 | 162 | ### num_threads 163 | 164 | Integer. The number of threads to flush the buffer. This plugin is based on 165 | Fluentd::Plugin::Output, so we buffer incoming records before emitting them to 166 | Amazon Kinesis. You can find the detail about buffering mechanism [here](http://docs.fluentd.org/articles/buffer-plugin-overview). 167 | Emitting records to Amazon Kinesis via network causes I/O Wait, so parallelizing 168 | emitting with threads will improve throughput. 169 | 170 | This option can be used to parallelize writes into the output(s) 171 | designated by the output plugin. The default is 1. 172 | Also you can use this option with *multi workers*. 173 | 174 | ### multi workers 175 | 176 | This feature is introduced in Fluentd v0.14. 177 | Instead of using *detach_process*, this feature can use as the following system directive. 178 | Note that *detach_process* parameter is removed after using v0.14 Output Plugin API. 179 | The default is 1. 180 | 181 | 182 | workers 5 183 | 184 | 185 | ### debug 186 | 187 | Boolean. Enable if you need to debug Amazon Kinesis API call. Default is false. 188 | 189 | ## Configuration examples 190 | 191 | Here are some configuration examles. 192 | Assume that the JSON object below is coming to with tag 'your_tag'. 193 | 194 | { 195 | "name":"foo", 196 | "action":"bar" 197 | } 198 | 199 | ### Simply putting events to Amazon Kinesis with a partition key 200 | 201 | In this example, a value 'foo' will be used as the partition key, 202 | then events will be sent to the stream specified in 'stream_name'. 203 | 204 | 205 | type kinesis-aggregation 206 | 207 | stream_name YOUR_STREAM_NAME 208 | 209 | aws_key_id YOUR_AWS_ACCESS_KEY 210 | aws_sec_key YOUR_SECRET_KEY 211 | 212 | region us-east-1 213 | 214 | fixed_partition_key foo 215 | 216 | # You should set the buffer_chunk_limit to substantially less 217 | # than the kinesis 1mb record limit, since we ship a chunk at once. 218 | buffer_chunk_limit 300k 219 | 220 | 221 | ### Improving throughput to Amazon Kinesis 222 | 223 | The achievable throughput to Amazon Kinesis is limited to single-threaded 224 | PutRecord calls, which should be at most around 300kb each. 225 | The plugin can also be configured to execute in parallel. 226 | The **detach_process** and **num_threads** configuration settings control 227 | parallelism. 228 | 229 | In case of the configuration below, you will spawn 2 processes. 230 | 231 | 232 | type kinesis 233 | 234 | stream_name YOUR_STREAM_NAME 235 | region us-east-1 236 | 237 | detach_process 2 238 | buffer_chunk_limit 300k 239 | 240 | 241 | You can also specify a number of threads to put. 242 | The number of threads is bound to each individual processes. 243 | So in this case, you will spawn 1 process which has 50 threads. 244 | 245 | 246 | type kinesis 247 | 248 | stream_name YOUR_STREAM_NAME 249 | region us-east-1 250 | 251 | num_threads 50 252 | buffer_chunk_limit 300k 253 | 254 | 255 | Both options can be used together, in the configuration below, 256 | you will spawn 2 processes and 50 threads per each processes. 257 | 258 | 259 | type kinesis 260 | 261 | stream_name YOUR_STREAM_NAME 262 | region us-east-1 263 | 264 | detach_process 2 265 | num_threads 50 266 | buffer_chunk_limit 300k 267 | 268 | 269 | ## Related Resources 270 | 271 | * [Amazon Kinesis Developer Guide](http://docs.aws.amazon.com/kinesis/latest/dev/introduction.html) 272 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | --------------------------------------------------------------------------------