├── NOTICE.txt
├── .travis.yml
├── CONTRIBUTORS.txt
├── .gitignore
├── kpl_aggregated_records.proto
├── Gemfile
├── Rakefile
├── test
├── helper.rb
└── plugin
│ └── test_out_kinesis-aggregation.rb
├── fluent-plugin-kinesis-aggregation.gemspec
├── CHANGELOG.md
├── lib
└── fluent
│ └── plugin
│ └── out_kinesis-aggregation.rb
├── README.md
└── LICENSE.txt
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Fluent Plugin for Amazon Kinesis
2 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 |
3 | rvm:
4 | - 2.7
5 |
6 | os:
7 | - linux
8 |
9 | gemfile:
10 | - Gemfile
11 |
12 | script: bundle exec rake
13 |
14 | sudo: false
15 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | This file contains a list of people who have made large or regular contributions
2 | to the Fluent Plugin for Amazon Kinesis. Give them a special thanks!
3 |
4 | Genki Sugawara
5 | sgwr_dts@yahoo.co.jp
6 | https://github.com/winebarrel
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | .bundle
4 | .config
5 | .yardoc
6 | Gemfile.lock
7 | InstalledFiles
8 | coverage
9 | doc/
10 | lib/bundler/man
11 | pkg
12 | rdoc
13 | spec/reports
14 | test/tmp
15 | test/version_tmp
16 | tmp
17 | *.swp
18 | config/
19 |
--------------------------------------------------------------------------------
/kpl_aggregated_records.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | message AggregatedRecord {
4 | repeated string partition_key_table = 1;
5 | repeated string explicit_hash_key_table = 2;
6 | repeated Record records = 3;
7 | }
8 |
9 | message Tag {
10 | string key = 1;
11 | string value = 2;
12 | }
13 |
14 | message Record {
15 | uint64 partition_key_index = 1;
16 | uint64 explicit_hash_key_index = 2;
17 | bytes data = 3;
18 | repeated Tag tags = 4;
19 | }
20 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | source 'https://rubygems.org'
15 | gemspec
16 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | require "bundler/gem_tasks"
15 |
16 | require 'rake/testtask'
17 | Rake::TestTask.new(:test) do |test|
18 | test.libs << 'lib' << 'test'
19 | test.pattern = 'test/**/test_*.rb'
20 | test.verbose = true
21 | end
22 |
23 | task :default => :test
24 |
--------------------------------------------------------------------------------
/test/helper.rb:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | require 'rubygems'
15 | require 'bundler'
16 | require 'stringio'
17 | begin
18 | Bundler.setup(:default, :development)
19 | rescue Bundler::BundlerError => e
20 | $stderr.puts e.message
21 | $stderr.puts "Run `bundle install` to install missing gems"
22 | exit e.status_code
23 | end
24 |
25 | require 'test/unit'
26 | require 'test/unit/rr'
27 |
28 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
29 | $LOAD_PATH.unshift(File.dirname(__FILE__))
30 | require 'fluent/test'
31 | require 'fluent/test/helpers'
32 | require 'fluent/test/driver/output'
33 | require 'fluent/process'
34 | require 'fluent/plugin/out_kinesis-aggregation'
35 |
--------------------------------------------------------------------------------
/fluent-plugin-kinesis-aggregation.gemspec:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | # coding: utf-8
15 | lib = File.expand_path('../lib', __FILE__)
16 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
17 |
18 | Gem::Specification.new do |spec|
19 | spec.name = "fluent-plugin-kinesis-aggregation"
20 | spec.version = '0.4.1'
21 | spec.author = 'Atlassian'
22 | spec.email = 'lgoolsbee@atlassian.com'
23 | spec.summary = %q{Fluentd output plugin that sends KPL style aggregated events to Amazon Kinesis.}
24 | spec.homepage = "https://github.com/atlassian/fluent-plugin-kinesis-aggregation"
25 | spec.license = "Apache-2.0"
26 |
27 | spec.files = `git ls-files`.split($/)
28 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
29 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
30 | spec.require_paths = ["lib"]
31 | spec.required_ruby_version = '>= 2.7'
32 |
33 | spec.add_development_dependency "bundler", "~> 2"
34 | spec.add_development_dependency "rake", "~> 13"
35 | spec.add_development_dependency "test-unit", "~> 3"
36 | spec.add_development_dependency "test-unit-rr", "~> 1"
37 |
38 | spec.add_dependency "fluentd", ["~> 1", "< 2"]
39 | spec.add_dependency "aws-sdk-kinesis", "~> 1", "!= 1.4", "!= 1.5", "!= 1.14", "!= 1.24"
40 | spec.add_dependency "google-protobuf", "~> 3", ">= 3.12.1"
41 | end
42 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | ## 0.4.1
4 |
5 | - Update dependency google-protobuf to require versions newer than 3.12.0; older versions are incompatible with td-agent v4
6 |
7 | ## 0.4.0
8 |
9 | - Update dependencies to support td-agent v4 ([td-agent v3 is EOL](https://www.fluentd.org/blog/schedule-for-td-agent-3-eol)); if you need td-agent v3 support, use 0.3.x from rubygems
10 | - Drop testing and support for Ruby versions less than 2.7 (version embedded with td-agent v4)
11 |
12 | ## 0.3.4
13 |
14 | - aws-sdk-kinesis 1.24 is missing a dependency from a newer version of the aws-sdk-core gem; 1.24 has been yanked and 1.24.1 has been released with the fix, but just in case 1.24 has already been installed/cached anywhere, add it to the list of excluded versions.
15 | - Previously, we pinned google-protobuf to 3.11.x because 3.12 required Ruby >=2.5 (and td-agent ships with Ruby 2.4 embedded). google-protobuf 3.12.1 restores support for Ruby 2.3 and 2.4, so we can relax our pinning for this dependency a bit by requiring versions greater than 3.12.
16 |
17 | ## 0.3.3
18 |
19 | - Dependency google-protobuf 3.12.0 dropped support for Ruby <2.5; td-agent3 bundles Ruby 2.4, so google-protobuf is now pinned to 3.11.x.
20 |
21 | ## 0.3.2
22 |
23 | - Modify aws-sdk usage to require just the API/SDK resources for Kinesis
24 | - Drop support and testing for deprecated Ruby versions (<2.3)
25 |
26 | ## 0.3.1
27 |
28 | - Change aws-sdk usage to work with both v2 and v3
29 | (in particular, makes it possible to use latest td-agent which includes the s3 plugin
30 | and pulls in aws-sdk v3)
31 |
32 | ## 0.3.0
33 |
34 | - Update to use fluentd 0.14 API (stick to 0.2.3 if you need support for earlier versions of fluentd)
35 | Much thanks to cosmo0920 for doing this.
36 |
37 | ## 0.2.3
38 |
39 | - emit stream name in error
40 |
41 | ## 0.2.1 - 0.2.2
42 |
43 | - update documentation to refer to published gem
44 | - turn on testing for Ruby 2.1
45 | - allow running on Ruby 2.1
46 |
47 | ## 0.2.0
48 |
49 | - switch to google protobuf library (ruby native one uses too much memory)
50 |
51 | ## 0.1.1
52 |
53 | - fix up conflict with fluent-kinesis plugin
54 | - Changelog
55 |
56 | ## 0.1.0
57 |
58 | - Release on Github
59 |
--------------------------------------------------------------------------------
/lib/fluent/plugin/out_kinesis-aggregation.rb:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | require 'aws-sdk-kinesis'
15 | require 'yajl'
16 | require 'logger'
17 | require 'securerandom'
18 | require 'digest'
19 |
20 | require 'google/protobuf'
21 | require 'fluent/plugin/output'
22 |
23 | Google::Protobuf::DescriptorPool.generated_pool.build do
24 | add_message "AggregatedRecord" do
25 | repeated :partition_key_table, :string, 1
26 | repeated :explicit_hash_key_table, :string, 2
27 | repeated :records, :message, 3, "Record"
28 | end
29 | add_message "Tag" do
30 | optional :key, :string, 1
31 | optional :value, :string, 2
32 | end
33 | add_message "Record" do
34 | optional :partition_key_index, :uint64, 1
35 | optional :explicit_hash_key_index, :uint64, 2
36 | optional :data, :bytes, 3
37 | repeated :tags, :message, 4, "Tag"
38 | end
39 | end
40 |
41 | AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
42 | Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
43 | Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
44 |
45 |
46 | module FluentPluginKinesisAggregation
47 | class OutputFilter < Fluent::Plugin::Output
48 |
49 | helpers :compat_parameters, :inject
50 |
51 | DEFAULT_BUFFER_TYPE = "memory"
52 | NAME = 'kinesis-aggregation'
53 | PUT_RECORD_MAX_DATA_SIZE = 1024 * 1024
54 | # 200 is an arbitrary number more than the envelope overhead
55 | # and big enough to store partition/hash key table in
56 | # AggregatedRecords. Note that you shouldn't really ever have
57 | # the buffer this high, since you're likely to fail the write
58 | # if anyone else is writing to the shard at the time.
59 | FLUENTD_MAX_BUFFER_SIZE = PUT_RECORD_MAX_DATA_SIZE - 200
60 |
61 | Fluent::Plugin.register_output(NAME, self)
62 |
63 | config_set_default :include_time_key, true
64 | config_set_default :include_tag_key, true
65 |
66 | config_param :aws_key_id, :string, default: nil, :secret => true
67 | config_param :aws_sec_key, :string, default: nil, :secret => true
68 | # The 'region' parameter is optional because
69 | # it may be set as an environment variable.
70 | config_param :region, :string, default: nil
71 |
72 | config_param :profile, :string, :default => nil
73 | config_param :credentials_path, :string, :default => nil
74 | config_param :role_arn, :string, :default => nil
75 | config_param :external_id, :string, :default => nil
76 |
77 | config_param :stream_name, :string
78 | config_param :fixed_partition_key, :string, default: nil
79 |
80 | config_param :debug, :bool, default: false
81 |
82 | config_param :http_proxy, :string, default: nil
83 |
84 | config_section :buffer do
85 | config_set_default :@type, DEFAULT_BUFFER_TYPE
86 | end
87 |
88 | def configure(conf)
89 | compat_parameters_convert(conf, :buffer, :inject)
90 | super
91 |
92 | if @buffer.chunk_limit_size > FLUENTD_MAX_BUFFER_SIZE
93 | raise Fluent::ConfigError, "Kinesis buffer_chunk_limit is set to more than the 1mb shard limit (i.e. you won't be able to write your chunks!"
94 | end
95 |
96 | if @buffer.chunk_limit_size > FLUENTD_MAX_BUFFER_SIZE / 3
97 | log.warn 'Kinesis buffer_chunk_limit is set at more than 1/3 of the per second shard limit (1mb). This is not good if you have many producers.'
98 | end
99 | end
100 |
101 | def start
102 | super
103 | load_client
104 | end
105 |
106 | def format(tag, time, record)
107 | record = inject_values_to_record(tag, time, record)
108 |
109 | return AggregatedRecord.encode(AggregatedRecord.new(
110 | records: [Record.new(
111 | partition_key_index: 1,
112 | data: Yajl.dump(record).b
113 | )]
114 | ))
115 | end
116 |
117 | def write(chunk)
118 | records = chunk.read
119 | if records.length > FLUENTD_MAX_BUFFER_SIZE
120 | log.error "Can't emit aggregated #{@stream_name} stream record of length #{records.length} (more than #{FLUENTD_MAX_BUFFER_SIZE})"
121 | return # do not throw, since we can't retry
122 | end
123 |
124 | partition_key = @fixed_partition_key || SecureRandom.uuid
125 |
126 | # confusing magic. Because of the format of protobuf records,
127 | # it's valid (in this case) to concatenate the AggregatedRecords
128 | # to form one AggregatedRecord, since we only have a repeated field
129 | # in records.
130 | #
131 | # ALSO, since we use google's protobuf stuff (much better
132 | # memory usage due to C extension), we're stuck on proto3.
133 | # Unfortunately, KPL uses proto2 form, and partition_key_index
134 | # is a required field. If we set it to 0 in proto3, though,
135 | # it's helpfully ignored in the serialisation (default!).
136 | # Therefore we have to pass a partition_key_index of 1,
137 | # and put two things in our partition_key_table.
138 | message = AggregatedRecord.encode(AggregatedRecord.new(
139 | partition_key_table: ['a', partition_key]
140 | )) + records
141 |
142 | @client.put_record(
143 | stream_name: @stream_name,
144 | data: kpl_aggregation_pack(message),
145 | partition_key: partition_key
146 | )
147 | end
148 |
149 | private
150 |
151 | # https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md
152 | KPL_MAGIC_NUMBER = "\xF3\x89\x9A\xC2"
153 | def kpl_aggregation_pack(message)
154 | [
155 | KPL_MAGIC_NUMBER, message, Digest::MD5.digest(message)
156 | ].pack("A4A*A16")
157 | end
158 |
159 | # This code is unchanged from https://github.com/awslabs/aws-fluent-plugin-kinesis
160 | def load_client
161 | user_agent_suffix = "fluent-#{NAME}"
162 |
163 | options = {
164 | user_agent_suffix: user_agent_suffix
165 | }
166 |
167 | if @region
168 | options[:region] = @region
169 | end
170 |
171 | if @aws_key_id && @aws_sec_key
172 | options.update(
173 | access_key_id: @aws_key_id,
174 | secret_access_key: @aws_sec_key,
175 | )
176 | elsif @profile
177 | credentials_opts = {:profile_name => @profile}
178 | credentials_opts[:path] = @credentials_path if @credentials_path
179 | credentials = Aws::SharedCredentials.new(credentials_opts)
180 | options[:credentials] = credentials
181 | elsif @role_arn
182 | credentials = Aws::AssumeRoleCredentials.new(
183 | client: Aws::STS::Client.new(options),
184 | role_arn: @role_arn,
185 | role_session_name: "fluent-plugin-kinesis-aggregation",
186 | external_id: @external_id,
187 | duration_seconds: 60 * 60
188 | )
189 | options[:credentials] = credentials
190 | end
191 |
192 | if @debug
193 | options.update(
194 | logger: Logger.new(log.out),
195 | log_level: :debug
196 | )
197 | end
198 |
199 | if @http_proxy
200 | options[:http_proxy] = @http_proxy
201 | end
202 |
203 | @client = Aws::Kinesis::Client.new(options)
204 | end
205 | end
206 | end
207 |
--------------------------------------------------------------------------------
/test/plugin/test_out_kinesis-aggregation.rb:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"). You
4 | # may not use this file except in compliance with the License. A copy of
5 | # the License is located at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is
10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11 | # ANY KIND, either express or implied. See the License for the specific
12 | # language governing permissions and limitations under the License.
13 |
14 | require 'helper'
15 |
16 | class KinesisOutputTest < Test::Unit::TestCase
17 | include Fluent::Test::Helpers
18 |
19 | def setup
20 | Fluent::Test.setup
21 | end
22 |
23 | CONFIG = %[
24 | aws_key_id test_key_id
25 | aws_sec_key test_sec_key
26 | stream_name test_stream
27 | region us-east-1
28 | fixed_partition_key test_partition_key
29 | buffer_chunk_limit 100k
30 | ]
31 |
32 | def create_driver(conf = CONFIG)
33 | Fluent::Test::Driver::Output
34 | .new(FluentPluginKinesisAggregation::OutputFilter).configure(conf)
35 | end
36 |
37 | def create_mock_client
38 | client = mock(Object.new)
39 | stub(Aws::Kinesis::Client).new(anything) { client }
40 | return client
41 | end
42 |
43 | def test_configure
44 | d = create_driver
45 | assert_equal 'test_key_id', d.instance.aws_key_id
46 | assert_equal 'test_sec_key', d.instance.aws_sec_key
47 | assert_equal 'test_stream', d.instance.stream_name
48 | assert_equal 'us-east-1', d.instance.region
49 | assert_equal 'test_partition_key', d.instance.fixed_partition_key
50 | end
51 |
52 | def test_configure_with_credentials
53 | d = create_driver(<<-EOS)
54 | profile default
55 | credentials_path /home/scott/.aws/credentials
56 | stream_name test_stream
57 | region us-east-1
58 | fixed_partition_key test_partition_key
59 | buffer_chunk_limit 100k
60 | EOS
61 |
62 | assert_equal 'default', d.instance.profile
63 | assert_equal '/home/scott/.aws/credentials', d.instance.credentials_path
64 | assert_equal 'test_stream', d.instance.stream_name
65 | assert_equal 'us-east-1', d.instance.region
66 | assert_equal 'test_partition_key', d.instance.fixed_partition_key
67 | end
68 |
69 | def test_configure_with_more_options
70 | conf = %[
71 | stream_name test_stream
72 | region us-east-1
73 | http_proxy http://proxy:3333/
74 | fixed_partition_key test_partition_key
75 | buffer_chunk_limit 100k
76 | ]
77 | d = create_driver(conf)
78 | assert_equal 'test_stream', d.instance.stream_name
79 | assert_equal 'us-east-1', d.instance.region
80 | assert_equal 'http://proxy:3333/', d.instance.http_proxy
81 | assert_equal 'test_partition_key', d.instance.fixed_partition_key
82 | end
83 |
84 | def test_configure_fails_on_big_chunk_limit
85 | conf = %[
86 | stream_name test_stream
87 | region us-east-1
88 | http_proxy http://proxy:3333/
89 | fixed_partition_key test_partition_key
90 | buffer_chunk_limit 1m
91 | ]
92 | assert_raise Fluent::ConfigError do
93 | create_driver(conf)
94 | end
95 | end
96 |
97 | def test_load_client
98 | client = stub(Object.new)
99 | client.put_record { {} }
100 |
101 | stub(Aws::Kinesis::Client).new do |options|
102 | assert_equal("test_key_id", options[:access_key_id])
103 | assert_equal("test_sec_key", options[:secret_access_key])
104 | assert_equal("us-east-1", options[:region])
105 | client
106 | end
107 |
108 | d = create_driver
109 | d.run(default_tag: "test")
110 | end
111 |
112 | def test_load_client_with_credentials
113 | client = stub(Object.new)
114 | client.put_record { {} }
115 |
116 | stub(Aws::Kinesis::Client).new do |options|
117 | assert_equal(nil, options[:access_key_id])
118 | assert_equal(nil, options[:secret_access_key])
119 | assert_equal("us-east-1", options[:region])
120 |
121 | credentials = options[:credentials]
122 | assert_equal("default", credentials.profile_name)
123 | assert_equal("/home/scott/.aws/credentials", credentials.path)
124 |
125 | client
126 | end
127 |
128 | d = create_driver(<<-EOS)
129 | profile default
130 | credentials_path /home/scott/.aws/credentials
131 | stream_name test_stream
132 | region us-east-1
133 | fixed_partition_key test_partition_key
134 | buffer_chunk_limit 100k
135 | EOS
136 |
137 | begin
138 | d.run(default_tag: "test")
139 | rescue Aws::Errors::NoSuchProfileError
140 | end
141 | end
142 |
143 | def test_load_client_with_role_arn
144 | client = stub(Object.new)
145 | client.put_record { {} }
146 |
147 | stub(Aws::AssumeRoleCredentials).new do |options|
148 | assert_equal("arn:aws:iam::001234567890:role/my-role", options[:role_arn])
149 | assert_equal("fluent-plugin-kinesis-aggregation", options[:role_session_name])
150 | assert_equal("my_external_id", options[:external_id])
151 | assert_equal(3600, options[:duration_seconds])
152 | "sts_credentials"
153 | end
154 |
155 | stub(Aws::Kinesis::Client).new do |options|
156 | assert_equal("sts_credentials", options[:credentials])
157 | client
158 | end
159 |
160 | d = create_driver(<<-EOS)
161 | role_arn arn:aws:iam::001234567890:role/my-role
162 | external_id my_external_id
163 | stream_name test_stream
164 | region us-east-1
165 | fixed_partition_key test_partition_key
166 | buffer_chunk_limit 100k
167 | EOS
168 | d.run(default_tag: "test")
169 | end
170 |
171 | def test_emitting
172 | d = create_driver
173 |
174 | data1 = {"a"=>1,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
175 | data2 = {"a"=>2,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
176 |
177 | time = event_time("2011-01-02 13:14:15 UTC")
178 |
179 | d.run(default_tag: "test") do
180 | client = create_mock_client
181 | stub.instance_of(Aws::Kinesis::Client).put_record(
182 | stream_name: 'test_stream',
183 | data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1A6\b\x01\x1A2{\"a\":1,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\x1A6\b\x01\x1A2{\"a\":2,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\xA2\x0E y\x8B\x02\xDF\xAE\xAB\x93\x1C;\xCB\xAD\x1Fx".b,
184 | partition_key: 'test_partition_key'
185 | ) { {} }
186 |
187 | d.feed(time, data1)
188 | d.feed(time, data2)
189 | end
190 | end
191 |
192 | def test_multibyte
193 | d = create_driver
194 |
195 | data1 = {"a"=>"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB","time"=>"2011-01-02T13:14:15Z".b,"tag"=>"test"}
196 |
197 |
198 | time = event_time("2011-01-02 13:14:15 UTC")
199 | d.run(default_tag: "test") do
200 | client = create_mock_client
201 | stub.instance_of(Aws::Kinesis::Client).put_record(
202 | stream_name: 'test_stream',
203 | data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1AI\b\x01\x1AE{\"a\":\"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB\",\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}_$\x9C\xF9v+pV:g7c\xE3\xF2$\xBA".b,
204 | partition_key: 'test_partition_key'
205 | ) { {} }
206 |
207 | d.feed(time, data1)
208 | end
209 | end
210 |
211 | def test_fail_on_bigchunk
212 | d = create_driver
213 |
214 | assert_raise(Fluent::Plugin::Buffer::BufferChunkOverflowError) do
215 | d.run(default_tag: "test") do
216 | d.feed(
217 | event_time("2011-01-02 13:14:15 UTC"),
218 | {"msg" => "z" * 1024 * 1024})
219 | client = dont_allow(Object.new)
220 | client.put_record
221 | mock(Aws::Kinesis::Client).new(anything) { client }
222 | end
223 | end
224 | end
225 | end
226 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Fluent Plugin for Amazon Kinesis producing KPL records
2 |
3 | [](https://travis-ci.org/atlassian/fluent-plugin-kinesis-aggregation)
4 |
5 | ## Before you start...
6 |
7 | This is a rewrite of [aws-fluent-plugin-kinesis](https://github.com/awslabs/aws-fluent-plugin-kinesis) to implement
8 | a different shipment method using the
9 | [KPL aggregation format](https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md).
10 |
11 | *Since this plugin was forked, aws-fluent-plugin-kinesis has undergone considerable development (and improvement).
12 | Most notably, the upcoming 2.0 release supports KPL aggregated records using google-protobuf without
13 | the overhead of using the KPL:
14 | https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/107*
15 |
16 | *However, it still uses msgpack for internal buffering and only uses protobuf when it ships the records,
17 | whereas this plugin processes each record as it comes in and ships the result by simple concatenation
18 | of the encoded records. This may not be faster, of course - could depend on the overhead of calling
19 | the protobuf methods - but most probably is. The discussion below is also still mostly valid,
20 | in that the awslabs plugin does not have PutRecord == chunk equivalency, but instead has its
21 | own internal retry method.*
22 |
23 | The basic idea is to have one PutRecord === one chunk. This has a number of advantages:
24 |
25 | - much less complexity in plugin (less CPU/memory)
26 | - by aggregating, we increase the throughput and decrease the cost
27 | - since a single chunk either succeeds or fails,
28 | we get to use fluentd's more complex/complete retry mechanism
29 | (which is also exposed by the monitor plugin; we view this in datadog). The existing retry mechanism
30 | had [unfortunate issues under heavy load](https://github.com/awslabs/aws-fluent-plugin-kinesis/issues/42)
31 | - we get ordering within a chunk without having to rely on sequence
32 | numbers (though not overall ordering)
33 |
34 | However, there are drawbacks:
35 |
36 | - if you're using this as an aggregator, you will need to tune the
37 | buffer size on your sources fairly low such that it is less
38 | than the low buffer_chunk_limit on the aggregator
39 | - you have to use a KCL library to ingest
40 | - you can't use a calculated partition key (based on the record);
41 | essentially, you need to use a random partition key
42 |
43 | ## Overview
44 |
45 | [Fluentd](http://fluentd.org/) output plugin
46 | that sends events to [Amazon Kinesis](https://aws.amazon.com/kinesis/).
47 |
48 | ## Installation
49 |
50 | This plugin is available as the `fluent-plugin-kinesis-aggregation` gem from RubyGems:
51 |
52 | gem install fluent-plugin-kinesis-aggregation
53 |
54 | Or, if using td-agent:
55 |
56 | td-agent-gem install fluent-plugin-kinesis-aggregation
57 |
58 | To install from the source:
59 |
60 | git clone https://github.com/atlassian/fluent-plugin-kinesis-aggregation.git
61 | cd fluent-plugin-kinesis-aggregation
62 | bundle install
63 | rake build
64 | rake install
65 |
66 | Or, if using td-agent, replace rake install with:
67 |
68 | fluent-gem install pkg/fluent-plugin-kinesis-aggregation
69 |
70 | Alternatively, you can replace both the rake steps, and directly
71 | specify the library path via RUBYLIB:
72 |
73 | export RUBYLIB=$RUBYLIB:/path/to/fluent-plugin-kinesis-aggregation/lib
74 |
75 | ## Dependencies
76 |
77 | * Ruby 2.7+
78 | * Fluentd 1+
79 |
80 | If you need td-agent v3 support, use version 0.3.x on rubygems. If you need td-agent v2 support (or fluentd 0.10 or 0.12 support), use the fluentd-v0.12 branch or version 0.2.x on rubygems.
81 |
82 | ## Basic Usage
83 |
84 | Here are general procedures for using this plugin:
85 |
86 | 1. Install.
87 | 1. Edit configuration
88 | 1. Run Fluentd or td-agent
89 |
90 | You can run this plugin with Fluentd as follows:
91 |
92 | 1. Install.
93 | 1. Edit configuration file and save it as 'fluentd.conf'.
94 | 1. Then, run `fluentd -c /path/to/fluentd.conf`
95 |
96 | To run with td-agent, it would be as follows:
97 |
98 | 1. Install.
99 | 1. Edit configuration file provided by td-agent.
100 | 1. Then, run or restart td-agent.
101 |
102 | ## Configuration
103 |
104 | Here are items for Fluentd configuration file.
105 |
106 | To put records into Amazon Kinesis,
107 | you need to provide AWS security credentials.
108 | If you provide aws_key_id and aws_sec_key in configuration file as below,
109 | we use it. You can also provide credentials via environment variables as
110 | AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY. Also we support IAM Role for
111 | authentication. Please find the [AWS SDK for Ruby Developer Guide](http://docs.aws.amazon.com/AWSSdkDocsRuby/latest/DeveloperGuide/ruby-dg-setup.html)
112 | for more information about authentication.
113 | We support all options which AWS SDK for Ruby supports.
114 |
115 | ### type
116 |
117 | Use the word 'kinesis-aggregation'.
118 |
119 | ### stream_name
120 |
121 | Name of the stream to put data.
122 |
123 | ### aws_key_id
124 |
125 | AWS access key id.
126 |
127 | ### aws_sec_key
128 |
129 | AWS secret key.
130 |
131 | ### role_arn
132 |
133 | IAM Role to be assumed with [AssumeRole](http://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html).
134 | Use this option for cross account access.
135 |
136 | ### external_id
137 |
138 | A unique identifier that is used by third parties when
139 | [assuming roles](http://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) in their customers' accounts.
140 | Use this option with `role_arn` for third party cross account access.
141 | For details, please see [How to Use an External ID When Granting Access to Your AWS Resources to a Third Party](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html).
142 |
143 | ### region
144 |
145 | AWS region of your stream.
146 | It should be in form like "us-east-1", "us-west-2".
147 | Refer to [Regions and Endpoints in AWS General Reference](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region)
148 | for supported regions.
149 |
150 | ### http_proxy
151 |
152 | Proxy server, if any.
153 | It should be in form like "http://squid:3128/"
154 |
155 | ### fixed_partition_key
156 |
157 | Instead of using a random partition key, used a fixed one. This
158 | forces all writes to a specific shard, and if you're using
159 | a single thread/process will probably keep event ordering
160 | (not recommended - watch out for hot shards!).
161 |
162 | ### num_threads
163 |
164 | Integer. The number of threads to flush the buffer. This plugin is based on
165 | Fluentd::Plugin::Output, so we buffer incoming records before emitting them to
166 | Amazon Kinesis. You can find the detail about buffering mechanism [here](http://docs.fluentd.org/articles/buffer-plugin-overview).
167 | Emitting records to Amazon Kinesis via network causes I/O Wait, so parallelizing
168 | emitting with threads will improve throughput.
169 |
170 | This option can be used to parallelize writes into the output(s)
171 | designated by the output plugin. The default is 1.
172 | Also you can use this option with *multi workers*.
173 |
174 | ### multi workers
175 |
176 | This feature is introduced in Fluentd v0.14.
177 | Instead of using *detach_process*, this feature can use as the following system directive.
178 | Note that *detach_process* parameter is removed after using v0.14 Output Plugin API.
179 | The default is 1.
180 |
181 |
182 | workers 5
183 |
184 |
185 | ### debug
186 |
187 | Boolean. Enable if you need to debug Amazon Kinesis API call. Default is false.
188 |
189 | ## Configuration examples
190 |
191 | Here are some configuration examles.
192 | Assume that the JSON object below is coming to with tag 'your_tag'.
193 |
194 | {
195 | "name":"foo",
196 | "action":"bar"
197 | }
198 |
199 | ### Simply putting events to Amazon Kinesis with a partition key
200 |
201 | In this example, a value 'foo' will be used as the partition key,
202 | then events will be sent to the stream specified in 'stream_name'.
203 |
204 |
205 | type kinesis-aggregation
206 |
207 | stream_name YOUR_STREAM_NAME
208 |
209 | aws_key_id YOUR_AWS_ACCESS_KEY
210 | aws_sec_key YOUR_SECRET_KEY
211 |
212 | region us-east-1
213 |
214 | fixed_partition_key foo
215 |
216 | # You should set the buffer_chunk_limit to substantially less
217 | # than the kinesis 1mb record limit, since we ship a chunk at once.
218 | buffer_chunk_limit 300k
219 |
220 |
221 | ### Improving throughput to Amazon Kinesis
222 |
223 | The achievable throughput to Amazon Kinesis is limited to single-threaded
224 | PutRecord calls, which should be at most around 300kb each.
225 | The plugin can also be configured to execute in parallel.
226 | The **detach_process** and **num_threads** configuration settings control
227 | parallelism.
228 |
229 | In case of the configuration below, you will spawn 2 processes.
230 |
231 |
232 | type kinesis
233 |
234 | stream_name YOUR_STREAM_NAME
235 | region us-east-1
236 |
237 | detach_process 2
238 | buffer_chunk_limit 300k
239 |
240 |
241 | You can also specify a number of threads to put.
242 | The number of threads is bound to each individual processes.
243 | So in this case, you will spawn 1 process which has 50 threads.
244 |
245 |
246 | type kinesis
247 |
248 | stream_name YOUR_STREAM_NAME
249 | region us-east-1
250 |
251 | num_threads 50
252 | buffer_chunk_limit 300k
253 |
254 |
255 | Both options can be used together, in the configuration below,
256 | you will spawn 2 processes and 50 threads per each processes.
257 |
258 |
259 | type kinesis
260 |
261 | stream_name YOUR_STREAM_NAME
262 | region us-east-1
263 |
264 | detach_process 2
265 | num_threads 50
266 | buffer_chunk_limit 300k
267 |
268 |
269 | ## Related Resources
270 |
271 | * [Amazon Kinesis Developer Guide](http://docs.aws.amazon.com/kinesis/latest/dev/introduction.html)
272 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------