├── .github └── workflows │ └── main.yml ├── .gitignore ├── .rubocop.yml ├── .ruby-version ├── Gemfile ├── Gemfile.lock ├── LICENSE.txt ├── README.md ├── Rakefile ├── aws ├── config └── credentials ├── bin ├── console └── setup ├── demo ├── Gemfile ├── Gemfile.lock ├── README.md ├── Rakefile ├── aws │ ├── config │ └── credentials ├── docker-compose.yml ├── lib │ ├── kcl_demo.rb │ └── kcl_demo │ │ ├── demo_record_processor.rb │ │ └── demo_record_processor_factory.rb └── terraform │ └── main.tf ├── docker-compose.yml ├── kcl-rb.gemspec ├── lib ├── kcl.rb └── kcl │ ├── checkpointer.rb │ ├── checkpoints │ └── sentinel.rb │ ├── config.rb │ ├── errors.rb │ ├── logger.rb │ ├── proxies │ ├── dynamo_db_proxy.rb │ └── kinesis_proxy.rb │ ├── record_processor.rb │ ├── record_processor_factory.rb │ ├── types │ ├── extended_sequence_number.rb │ ├── initialization_input.rb │ ├── records_input.rb │ └── shutdown_input.rb │ ├── version.rb │ ├── worker.rb │ └── workers │ ├── consumer.rb │ ├── record_checkpointer.rb │ ├── shard_info.rb │ └── shutdown_reason.rb ├── spec ├── checkpointer_spec.rb ├── spec_helper.rb ├── supports │ ├── use_dynamodb_context.rb │ ├── use_kinesis_contexts.rb │ └── use_record_processor_contexts.rb ├── worker_spec.rb └── workers │ └── consumer_spec.rb └── terraform └── main.tf /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | paths-ignore: 8 | - '**.md' 9 | - '**.txt' 10 | pull_request: 11 | paths-ignore: 12 | - '**.md' 13 | - '**.txt' 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v2 22 | 23 | - name: Up docker-compose 24 | run: docker-compose -f docker-compose.yml up -d 25 | 26 | - name: Sleep for 30 seconds 27 | uses: jakejarvis/wait-action@master 28 | with: 29 | time: '30s' 30 | 31 | - name: Set up Terraform 32 | uses: hashicorp/setup-terraform@v1 33 | with: 34 | terraform_version: 0.12.24 35 | - id: init 36 | run: terraform init 37 | working-directory: ${{ github.workspace }}/terraform 38 | - id: apply 39 | run: terraform apply -auto-approve -no-color 40 | working-directory: ${{ github.workspace }}/terraform 41 | 42 | - name: Set up Ruby 2.7 43 | uses: actions/setup-ruby@v1 44 | with: 45 | ruby-version: 2.7 46 | 47 | - name: Bundle install 48 | run: | 49 | gem install bundler 50 | bundle install --jobs 4 --retry 3 51 | 52 | - name: Rubocop 53 | run: | 54 | bundle exec rubocop 55 | 56 | - name: RSpec 57 | run: | 58 | bundle exec rspec 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /terraform/terraform.tfstate* 3 | /terraform/.terraform 4 | /vendor 5 | 6 | /demo/.bundle/ 7 | /demo/terraform/terraform.tfstate* 8 | /demo/terraform/.terraform 9 | /demo/vendor 10 | 11 | /pkg 12 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | Exclude: 3 | - "vendor/**/*" 4 | - "demo/vendor/**/*" 5 | DisplayCopNames: true 6 | 7 | Layout/ArgumentAlignment: 8 | Enabled: false 9 | 10 | Layout/ExtraSpacing: 11 | Enabled: false 12 | 13 | Layout/HashAlignment: 14 | Enabled: false 15 | 16 | Lint/NonDeterministicRequireOrder: 17 | Enabled: false 18 | 19 | Layout/SpaceAroundOperators: 20 | Enabled: false 21 | 22 | Metrics/AbcSize: 23 | Enabled: false 24 | 25 | Metrics/BlockLength: 26 | Enabled: false 27 | 28 | Metrics/ClassLength: 29 | Max: 300 30 | 31 | Metrics/MethodLength: 32 | Max: 50 33 | 34 | Naming/MemoizedInstanceVariableName: 35 | Enabled: false 36 | 37 | Metrics/CyclomaticComplexity: 38 | Max: 15 39 | 40 | Metrics/PerceivedComplexity: 41 | Max: 15 42 | 43 | # 日本語のコメントを許可する 44 | Style/AsciiComments: 45 | Enabled: false 46 | 47 | Style/ClassAndModuleChildren: 48 | Enabled: false 49 | 50 | Style/Documentation: 51 | Enabled: false 52 | 53 | Style/EmptyLineAfterGuardClause: 54 | Enabled: false 55 | 56 | Style/EmptyMethod: 57 | Enabled: false 58 | 59 | Style/FrozenStringLiteralComment: 60 | Enabled: false 61 | 62 | Style/GuardClause: 63 | MinBodyLength: 5 64 | 65 | Style/HashSyntax: 66 | Enabled: false 67 | 68 | Style/IfUnlessModifier: 69 | Enabled: false 70 | 71 | Style/MultilineTernaryOperator: 72 | Enabled: false 73 | 74 | Style/NumericPredicate: 75 | Enabled: false 76 | 77 | Style/RaiseArgs: 78 | Enabled: false 79 | 80 | Style/RedundantBegin: 81 | Enabled: false 82 | 83 | Style/RedundantInterpolation: 84 | Enabled: false 85 | 86 | Style/RedundantSelf: 87 | Enabled: false 88 | 89 | Style/RescueStandardError: 90 | Enabled: false 91 | 92 | Style/SymbolArray: 93 | Enabled: false 94 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.7.1 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in kcl-rb.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | kcl-rb (1.0.0) 5 | activesupport (>= 5.0) 6 | aws-sdk-dynamodb (~> 1) 7 | aws-sdk-kinesis (~> 1) 8 | eventmachine (~> 1.2.7) 9 | 10 | GEM 11 | remote: https://rubygems.org/ 12 | specs: 13 | activesupport (6.0.3.1) 14 | concurrent-ruby (~> 1.0, >= 1.0.2) 15 | i18n (>= 0.7, < 2) 16 | minitest (~> 5.1) 17 | tzinfo (~> 1.1) 18 | zeitwerk (~> 2.2, >= 2.2.2) 19 | ast (2.4.1) 20 | aws-eventstream (1.1.0) 21 | aws-partitions (1.326.0) 22 | aws-sdk-core (3.98.0) 23 | aws-eventstream (~> 1, >= 1.0.2) 24 | aws-partitions (~> 1, >= 1.239.0) 25 | aws-sigv4 (~> 1.1) 26 | jmespath (~> 1.0) 27 | aws-sdk-dynamodb (1.48.0) 28 | aws-sdk-core (~> 3, >= 3.71.0) 29 | aws-sigv4 (~> 1.1) 30 | aws-sdk-kinesis (1.23.0) 31 | aws-sdk-core (~> 3, >= 3.71.0) 32 | aws-sigv4 (~> 1.1) 33 | aws-sigv4 (1.1.4) 34 | aws-eventstream (~> 1.0, >= 1.0.2) 35 | concurrent-ruby (1.1.6) 36 | diff-lcs (1.4.2) 37 | eventmachine (1.2.7) 38 | i18n (1.8.3) 39 | concurrent-ruby (~> 1.0) 40 | jmespath (1.4.0) 41 | minitest (5.14.1) 42 | parallel (1.19.2) 43 | parser (2.7.1.4) 44 | ast (~> 2.4.1) 45 | rainbow (3.0.0) 46 | rake (12.3.3) 47 | regexp_parser (1.7.1) 48 | rexml (3.2.5) 49 | rspec (3.9.0) 50 | rspec-core (~> 3.9.0) 51 | rspec-expectations (~> 3.9.0) 52 | rspec-mocks (~> 3.9.0) 53 | rspec-core (3.9.2) 54 | rspec-support (~> 3.9.3) 55 | rspec-expectations (3.9.2) 56 | diff-lcs (>= 1.2.0, < 2.0) 57 | rspec-support (~> 3.9.0) 58 | rspec-mocks (3.9.1) 59 | diff-lcs (>= 1.2.0, < 2.0) 60 | rspec-support (~> 3.9.0) 61 | rspec-support (3.9.3) 62 | rubocop (0.86.0) 63 | parallel (~> 1.10) 64 | parser (>= 2.7.0.1) 65 | rainbow (>= 2.2.2, < 4.0) 66 | regexp_parser (>= 1.7) 67 | rexml 68 | rubocop-ast (>= 0.0.3, < 1.0) 69 | ruby-progressbar (~> 1.7) 70 | unicode-display_width (>= 1.4.0, < 2.0) 71 | rubocop-ast (0.1.0) 72 | parser (>= 2.7.0.1) 73 | ruby-progressbar (1.10.1) 74 | thread_safe (0.3.6) 75 | tzinfo (1.2.7) 76 | thread_safe (~> 0.1) 77 | unicode-display_width (1.7.0) 78 | zeitwerk (2.3.0) 79 | 80 | PLATFORMS 81 | ruby 82 | 83 | DEPENDENCIES 84 | kcl-rb! 85 | rake (~> 12.0) 86 | rspec (~> 3.0) 87 | rubocop (~> 0.86.0) 88 | 89 | BUNDLED WITH 90 | 2.1.4 91 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 yo_waka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kcl-rb 2 | 3 | ## Overview 4 | 5 | The Amazon Kinesis Client Library for Pure Ruby (Amazon KCL) enables Ruby developers to easily consume and process data from [Amazon Kinesis](http://aws.amazon.com/kinesis). 6 | 7 | Already [KCL for Ruby](https://github.com/awslabs/amazon-kinesis-client-ruby) is provided by AWS, but Java is required for the operating environment because MultiLangDaemon is used. 8 | **kcl-rb** is built on Pure Ruby, not depend on Java. 9 | 10 | ## Installation 11 | 12 | Add this line to your application's Gemfile: 13 | 14 | ```ruby 15 | gem 'kcl-rb' 16 | ``` 17 | 18 | And then execute: 19 | 20 | $ bundle install 21 | 22 | Or install it yourself as: 23 | 24 | $ gem install kcl-rb 25 | 26 | ## Usage 27 | 28 | It's okay if you develop it according to [the KCL specifications](https://docs.aws.amazon.com/streams/latest/dev/kinesis-record-processor-implementation-app-java.html). 29 | 30 | ### Implement the RecordProcessor 31 | 32 | ```rb 33 | class RecordProcessor < Kcl::RecordProcessor 34 | def after_initialize(initialization_input) 35 | puts "SHARD_ID: #{initialization_input.shard_id}" 36 | end 37 | 38 | def process_records(records_input) 39 | puts "Current behind: #{records_input.millis_behind_latest}" 40 | records_input.records.each do |record| 41 | puts "Record: #{record}" 42 | end 43 | end 44 | 45 | def shutdown(shutdown_input) 46 | puts "Shutdown reason: #{shutdown_input.shutdown_reason}" 47 | 48 | if shutdown_input.shutdown_reason == Kcl::Workers::ShutdownReason::TERMINATE 49 | shutdown_input.record_checkpointer.update_checkpoint(nil) 50 | end 51 | end 52 | end 53 | ``` 54 | 55 | ### Implement a Class Factory for the RecordProcessor 56 | 57 | ```rb 58 | class RecordProcessorFactory < Kcl::RecordProcessorFactory 59 | def create_processor 60 | RecordProcessor.new 61 | end 62 | end 63 | ``` 64 | 65 | ### Initialize KCL configurations 66 | 67 | ```rb 68 | Kcl.configure do |config| 69 | config.aws_region = 'ap-northeast-1' 70 | config.aws_access_key_id = 'dummy' 71 | config.aws_secret_access_key = 'dummy' 72 | config.dynamodb_endpoint = 'https://localhost:4566' 73 | config.dynamodb_table_name = 'kcl-rb' 74 | config.kinesis_endpoint = 'https://localhost:4566' 75 | config.kinesis_stream_name = 'kcl-rb' 76 | config.use_ssl = false 77 | end 78 | ``` 79 | 80 | If you want to see all the setting items, please see [config class file](https://github.com/waka/kcl-rb/blob/master/lib/kcl/config.rb). 81 | 82 | ### Run a Worker 83 | 84 | ```rb 85 | worker_id = 'kcl-worker' 86 | factory = RecordProcessorFactory.new 87 | Kcl::Worker.run(worker_id, factory) 88 | ``` 89 | 90 | If you want more concrete example, look under [the demo directory](https://github.com/waka/kcl-rb/tree/master/demo). 91 | 92 | ## Development 93 | 94 | ### Prerequisites 95 | 96 | - Install Ruby 2.7.1 97 | - Install docker 98 | - Install Terraform 99 | 100 | ### Build & Run for RSpec 101 | 102 | Create Kinesis resources on localstack using Terraform 103 | 104 | ```sh 105 | $ docker-compose up -d 106 | $ cd terraform 107 | $ terraform init 108 | $ terraform apply 109 | ``` 110 | 111 | Build dependencies. 112 | 113 | ``` 114 | $ bundle install --path vendor/bundle 115 | ``` 116 | 117 | And run RSpec. 118 | 119 | ```sh 120 | $ bundle exec rspec 121 | ``` 122 | 123 | ## Contributing 124 | 125 | Bug reports and pull requests are welcome on GitHub at https://github.com/waka/kcl-rb. 126 | 127 | 128 | ## License 129 | 130 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). 131 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | task :default => :spec 3 | -------------------------------------------------------------------------------- /aws/config: -------------------------------------------------------------------------------- 1 | [default] 2 | region = ap-northeast-1 3 | output = json 4 | -------------------------------------------------------------------------------- /aws/credentials: -------------------------------------------------------------------------------- 1 | [default] 2 | aws_access_key_id = dummy 3 | aws_secret_access_key = dummy 4 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'bundler/setup' 4 | require 'kcl' 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | # (If you use this, don't forget to add pry to your Gemfile!) 10 | require 'pry' 11 | Pry.start 12 | 13 | # require 'irb' 14 | # IRB.start(__FILE__) 15 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /demo/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'kcl-rb', path: '../' 4 | gem 'pry' 5 | gem 'rake', '~> 12.0' 6 | -------------------------------------------------------------------------------- /demo/Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: .. 3 | specs: 4 | kcl-rb (1.0.0) 5 | activesupport (>= 5.0) 6 | aws-sdk-dynamodb (~> 1) 7 | aws-sdk-kinesis (~> 1) 8 | eventmachine (~> 1.2.7) 9 | 10 | GEM 11 | remote: https://rubygems.org/ 12 | specs: 13 | activesupport (6.0.3.1) 14 | concurrent-ruby (~> 1.0, >= 1.0.2) 15 | i18n (>= 0.7, < 2) 16 | minitest (~> 5.1) 17 | tzinfo (~> 1.1) 18 | zeitwerk (~> 2.2, >= 2.2.2) 19 | aws-eventstream (1.1.0) 20 | aws-partitions (1.326.0) 21 | aws-sdk-core (3.98.0) 22 | aws-eventstream (~> 1, >= 1.0.2) 23 | aws-partitions (~> 1, >= 1.239.0) 24 | aws-sigv4 (~> 1.1) 25 | jmespath (~> 1.0) 26 | aws-sdk-dynamodb (1.48.0) 27 | aws-sdk-core (~> 3, >= 3.71.0) 28 | aws-sigv4 (~> 1.1) 29 | aws-sdk-kinesis (1.23.0) 30 | aws-sdk-core (~> 3, >= 3.71.0) 31 | aws-sigv4 (~> 1.1) 32 | aws-sigv4 (1.1.4) 33 | aws-eventstream (~> 1.0, >= 1.0.2) 34 | coderay (1.1.3) 35 | concurrent-ruby (1.1.6) 36 | eventmachine (1.2.7) 37 | i18n (1.8.3) 38 | concurrent-ruby (~> 1.0) 39 | jmespath (1.4.0) 40 | method_source (1.0.0) 41 | minitest (5.14.1) 42 | pry (0.13.1) 43 | coderay (~> 1.1) 44 | method_source (~> 1.0) 45 | rake (12.3.3) 46 | thread_safe (0.3.6) 47 | tzinfo (1.2.7) 48 | thread_safe (~> 0.1) 49 | zeitwerk (2.3.0) 50 | 51 | PLATFORMS 52 | ruby 53 | 54 | DEPENDENCIES 55 | kcl-rb! 56 | pry 57 | rake (~> 12.0) 58 | 59 | BUNDLED WITH 60 | 2.1.4 61 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # kcl-rb Demo App 2 | 3 | ## Build and Run 4 | 5 | Run localstack container (mock for Kinesis and DynamoDB). 6 | 7 | ``` 8 | $ docker-compose up 9 | ``` 10 | 11 | Create resources on localstack using Terraform 12 | 13 | ``` 14 | $ cd terraform 15 | $ terraform init 16 | $ terraform plan 17 | $ terraform apply 18 | ``` 19 | 20 | Build dependencies 21 | 22 | ``` 23 | $ bundle install --path vendor/bundle 24 | ``` 25 | 26 | Run Demo KCL application 27 | 28 | ``` 29 | $ bundle exec rake run 30 | ``` 31 | 32 | Put records to Kinesis stream 33 | 34 | ``` 35 | $ RECORD_COUNT=10 bundle exec rake seed 36 | ``` 37 | 38 | You can see in console that the input data is distributed and processed by each consumer. 39 | -------------------------------------------------------------------------------- /demo/Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'pry' 3 | 4 | require 'kcl' 5 | require_relative './lib/kcl_demo' 6 | 7 | task :default => :run 8 | task :run do 9 | KclDemo::App.initialize 10 | KclDemo::App.run 11 | end 12 | 13 | task :seed do 14 | KclDemo::App.initialize 15 | record_count = Integer(ENV['RECORD_COUNT'] ||0) 16 | if record_count.zero? 17 | puts 'Set over 1 for RECORD_COUNT' 18 | return 19 | end 20 | KclDemo::App.seed(record_count) 21 | end 22 | 23 | task :debug do 24 | KclDemo::App.initialize 25 | 26 | kinesis = Kcl::Proxies::KinesisProxy.new(KclDemo::App.config) 27 | dynamodb = Kcl::Proxies::DynamoDbProxy.new(KclDemo::App.config) 28 | # rubocop:disable Lint/Debugger 29 | binding.pry 30 | # rubocop:enable Lint/Debugger 31 | end 32 | -------------------------------------------------------------------------------- /demo/aws/config: -------------------------------------------------------------------------------- 1 | [default] 2 | region = ap-northeast-1 3 | output = json 4 | -------------------------------------------------------------------------------- /demo/aws/credentials: -------------------------------------------------------------------------------- 1 | [default] 2 | aws_access_key_id = dummy 3 | aws_secret_access_key = dummy 4 | -------------------------------------------------------------------------------- /demo/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | volumes: 4 | localstack-data: 5 | driver: local 6 | 7 | services: 8 | localstack: 9 | image: localstack/localstack:0.11.0 10 | container_name: localstack-for-kcl-demo 11 | ports: 12 | - "8080:8080" 13 | - "4566:4566" 14 | environment: 15 | - DATA_DIR=/tmp/localstack/data 16 | - DEBUG=${LOCALSTACK_DEBUG:-true} 17 | - DEFAULT_REGION=ap-northeast-1 18 | - SERVICES=dynamodb,kinesis 19 | - USE_SSL=true 20 | volumes: 21 | - "${PWD}/aws:/root/.aws" 22 | - "/var/run/docker.sock:/var/run/docker.sock" 23 | - "localstack-data:/tmp/localstack" 24 | -------------------------------------------------------------------------------- /demo/lib/kcl_demo.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require 'securerandom' 3 | 4 | require_relative './kcl_demo/demo_record_processor' 5 | require_relative './kcl_demo/demo_record_processor_factory' 6 | 7 | module KclDemo 8 | class App 9 | def self.initialize 10 | Kcl.configure do |config| 11 | config.aws_region = 'ap-northeast-1' 12 | config.aws_access_key_id = 'dummy' 13 | config.aws_secret_access_key = 'dummy' 14 | config.dynamodb_endpoint = 'https://localhost:4566' 15 | config.dynamodb_table_name = 'kcl-rb-demo' 16 | config.kinesis_endpoint = 'https://localhost:4566' 17 | config.kinesis_stream_name = 'kcl-rb-demo' 18 | config.use_ssl = false 19 | end 20 | end 21 | 22 | def self.config 23 | Kcl.config 24 | end 25 | 26 | def self.run 27 | factory = KclDemo::DemoRecordProcessorFactory.new 28 | Kcl::Worker.run('kcl-demo', factory) 29 | end 30 | 31 | def self.seed(record_count = 1000) 32 | proxy = Kcl::Proxies::KinesisProxy.new(config) 33 | 34 | # puts records 35 | record_count.times do |i| 36 | str = SecureRandom.alphanumeric 37 | hash = JSON.generate({ id: i, name: str }) 38 | resp = proxy.put_record( 39 | { 40 | stream_name: config.kinesis_stream_name, 41 | data: Base64.strict_encode64(hash), 42 | partition_key: str 43 | } 44 | ) 45 | puts resp 46 | end 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /demo/lib/kcl_demo/demo_record_processor.rb: -------------------------------------------------------------------------------- 1 | require 'pry' 2 | 3 | module KclDemo 4 | class DemoRecordProcessor < Kcl::RecordProcessor 5 | # @implement 6 | def after_initialize(initialization_input) 7 | Kcl.logger.info("Initialization at #{initialization_input}") 8 | end 9 | 10 | # @implement 11 | def process_records(records_input) 12 | Kcl.logger.info('Processing records...') 13 | 14 | # レコードのリストを取得 15 | return if records_input.records.empty? 16 | 17 | # rubocop:disable Lint/Debugger 18 | binding.pry if ENV['DEBUG'] == '1' 19 | # rubocop:enable Lint/Debugger 20 | 21 | records_input.records.each do |record| 22 | Kcl.logger.info("Record = #{record}") 23 | end 24 | 25 | # チェックポイントを記録 26 | last_sequence_number = records_input.records[-1].sequence_number 27 | Kcl.logger.info( 28 | "Checkpoint progress at: #{last_sequence_number}" \ 29 | ", MillisBehindLatest = #{records_input.millis_behind_latest}" 30 | ) 31 | records_input.record_checkpointer.update_checkpoint(last_sequence_number) 32 | end 33 | 34 | # @implement 35 | def shutdown(shutdown_input) 36 | Kcl.logger.info("Shutdown reason: #{shutdown_input.shutdown_reason}") 37 | 38 | if shutdown_input.shutdown_reason == Kcl::Workers::ShutdownReason::TERMINATE 39 | shutdown_input.record_checkpointer.update_checkpoint(nil) 40 | end 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /demo/lib/kcl_demo/demo_record_processor_factory.rb: -------------------------------------------------------------------------------- 1 | module KclDemo 2 | class DemoRecordProcessorFactory < Kcl::RecordProcessorFactory 3 | def create_processor 4 | KclDemo::DemoRecordProcessor.new 5 | end 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /demo/terraform/main.tf: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # 開発環境 (LocalStack) 3 | #------------------------------------------------------------------------------- 4 | 5 | provider "aws" { 6 | version = "~> 2.60" 7 | access_key = "dummy" 8 | secret_key = "dummy" 9 | region = "ap-northeast-1" 10 | insecure = true 11 | skip_credentials_validation = true 12 | skip_metadata_api_check = true 13 | skip_requesting_account_id = true 14 | 15 | endpoints { 16 | dynamodb = "https://localhost:4566" 17 | kinesis = "https://localhost:4566" 18 | } 19 | } 20 | 21 | 22 | #------------------------------------------------------------------------------- 23 | # Kinesis stream 24 | #------------------------------------------------------------------------------- 25 | 26 | resource "aws_kinesis_stream" "kcl-rb-demo_stream" { 27 | name = "kcl-rb-demo" 28 | shard_count = 5 29 | retention_period = 24 30 | 31 | tags = { 32 | Environment = "test" 33 | } 34 | } 35 | 36 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | volumes: 4 | localstack-data: 5 | driver: local 6 | 7 | services: 8 | localstack: 9 | image: localstack/localstack:0.11.0 10 | container_name: localstack-for-kcl 11 | ports: 12 | - "8080:8080" 13 | - "4566:4566" 14 | environment: 15 | - DATA_DIR=/tmp/localstack/data 16 | - DEBUG=${LOCALSTACK_DEBUG:-true} 17 | - DEFAULT_REGION=ap-northeast-1 18 | - SERVICES=dynamodb,kinesis 19 | - USE_SSL=true 20 | volumes: 21 | - "${PWD}/aws:/root/.aws" 22 | - "localstack-data:/tmp/localstack" 23 | -------------------------------------------------------------------------------- /kcl-rb.gemspec: -------------------------------------------------------------------------------- 1 | require_relative 'lib/kcl/version' 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = 'kcl-rb' 5 | spec.version = Kcl::VERSION 6 | spec.authors = ['yo_waka'] 7 | spec.email = ['y.wakahara@gmail.com'] 8 | 9 | spec.summary = 'Amazon.Kinesis Client Library for Ruby.' 10 | spec.description = 'A pure ruby interface for Amazon Kinesis Client.' 11 | spec.homepage = 'https://github.com/waka/kcl-rb' 12 | spec.license = 'MIT' 13 | spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0') 14 | 15 | spec.metadata['homepage_uri'] = spec.homepage 16 | spec.metadata['source_code_uri'] = 'https://github.com/waka/kcl-rb' 17 | spec.metadata['changelog_uri'] = 'https://github.com/waka/kcl-rb/CHANGELOG' 18 | 19 | # Specify which files should be added to the gem when it is released. 20 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git. 21 | spec.files = Dir.chdir(File.expand_path(__dir__)) do 22 | `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } 23 | end 24 | spec.bindir = 'exe' 25 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 26 | spec.require_paths = ['lib'] 27 | 28 | spec.add_dependency 'activesupport', '>= 5.0' 29 | spec.add_dependency 'aws-sdk-dynamodb', '~> 1' 30 | spec.add_dependency 'aws-sdk-kinesis', '~> 1' 31 | spec.add_dependency 'eventmachine', '~> 1.2.7' 32 | 33 | spec.add_development_dependency 'rake', '~> 12.0' 34 | spec.add_development_dependency 'rspec', '~> 3.0' 35 | spec.add_development_dependency 'rubocop', '~> 0.86.0' 36 | end 37 | -------------------------------------------------------------------------------- /lib/kcl.rb: -------------------------------------------------------------------------------- 1 | require 'kcl/checkpointer' 2 | require 'kcl/checkpoints/sentinel' 3 | require 'kcl/config' 4 | require 'kcl/errors' 5 | require 'kcl/logger' 6 | require 'kcl/proxies/dynamo_db_proxy' 7 | require 'kcl/proxies/kinesis_proxy' 8 | require 'kcl/record_processor' 9 | require 'kcl/record_processor_factory' 10 | require 'kcl/types/extended_sequence_number' 11 | require 'kcl/types/initialization_input' 12 | require 'kcl/types/records_input' 13 | require 'kcl/types/shutdown_input' 14 | require 'kcl/worker' 15 | require 'kcl/workers/consumer' 16 | require 'kcl/workers/record_checkpointer' 17 | require 'kcl/workers/shard_info' 18 | require 'kcl/workers/shutdown_reason' 19 | 20 | module Kcl 21 | def self.configure 22 | yield config 23 | end 24 | 25 | def self.config 26 | @_config ||= Kcl::Config.new 27 | end 28 | 29 | def self.logger 30 | @_logger ||= (config.logger || Kcl::Logger.new($stdout)) 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/kcl/checkpointer.rb: -------------------------------------------------------------------------------- 1 | require 'time' 2 | 3 | class Kcl::Checkpointer 4 | DYNAMO_DB_LEASE_PRIMARY_KEY = 'shard_id'.freeze 5 | DYNAMO_DB_LEASE_OWNER_KEY = 'assigned_to'.freeze 6 | DYNAMO_DB_LEASE_TIMEOUT_KEY = 'lease_timeout'.freeze 7 | DYNAMO_DB_CHECKPOINT_SEQUENCE_NUMBER_KEY = 'checkpoint'.freeze 8 | DYNAMO_DB_PARENT_SHARD_KEY = 'parent_shard_id'.freeze 9 | 10 | attr_reader :dynamodb 11 | 12 | # @param [Kcl::Config] config 13 | def initialize(config) 14 | @dynamodb = Kcl::Proxies::DynamoDbProxy.new(config) 15 | @table_name = config.dynamodb_table_name 16 | 17 | return if @dynamodb.exists?(@table_name) 18 | @dynamodb.create_table( 19 | @table_name, 20 | [{ 21 | attribute_name: DYNAMO_DB_LEASE_PRIMARY_KEY, 22 | attribute_type: 'S' 23 | }], 24 | [{ 25 | attribute_name: DYNAMO_DB_LEASE_PRIMARY_KEY, 26 | key_type: 'HASH' 27 | }], 28 | { 29 | read_capacity_units: config.dynamodb_read_capacity, 30 | write_capacity_units: config.dynamodb_write_capacity 31 | } 32 | ) 33 | Kcl.logger.info("Created DynamoDB table: #{@table_name}") 34 | end 35 | 36 | # Retrieves the checkpoint for the given shard 37 | # @params [Kcl::Workers::ShardInfo] shard 38 | # @return [Kcl::Workers::ShardInfo] 39 | def fetch_checkpoint(shard) 40 | checkpoint = @dynamodb.get_item( 41 | @table_name, 42 | { "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id } 43 | ) 44 | return shard if checkpoint.nil? 45 | 46 | if checkpoint[DYNAMO_DB_CHECKPOINT_SEQUENCE_NUMBER_KEY] 47 | shard.checkpoint = checkpoint[DYNAMO_DB_CHECKPOINT_SEQUENCE_NUMBER_KEY] 48 | end 49 | if checkpoint[DYNAMO_DB_LEASE_OWNER_KEY] 50 | shard.assigned_to = checkpoint[DYNAMO_DB_LEASE_OWNER_KEY] 51 | end 52 | Kcl.logger.info("Retrieves checkpoint of shard at #{shard.to_h}") 53 | 54 | shard 55 | end 56 | 57 | # Write the checkpoint for the given shard 58 | # @params [Kcl::Workers::ShardInfo] shard 59 | # @return [Kcl::Workers::ShardInfo] 60 | def update_checkpoint(shard) 61 | item = { 62 | "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id, 63 | "#{DYNAMO_DB_CHECKPOINT_SEQUENCE_NUMBER_KEY}" => shard.checkpoint, 64 | "#{DYNAMO_DB_LEASE_OWNER_KEY}" => shard.assigned_to, 65 | "#{DYNAMO_DB_LEASE_TIMEOUT_KEY}" => shard.lease_timeout.to_s 66 | } 67 | if shard.parent_shard_id > 0 68 | item[DYNAMO_DB_PARENT_SHARD_KEY] = shard.parent_shard_id 69 | end 70 | 71 | result = @dynamodb.put_item(@table_name, item) 72 | if result 73 | Kcl.logger.info("Write checkpoint of shard at #{shard.to_h}") 74 | else 75 | Kcl.logger.info("Failed to write checkpoint for shard at #{shard.to_h}") 76 | end 77 | 78 | shard 79 | end 80 | 81 | # Attempt to gain a lock on the given shard 82 | # @params [Kcl::Workers::ShardInfo] shard 83 | # @params [String] next_assigned_to 84 | # @return [Kcl::Workers::ShardInfo] 85 | def lease(shard, next_assigned_to) 86 | now = Time.now.utc 87 | next_lease_timeout = now + Kcl.config.dynamodb_failover_seconds 88 | 89 | checkpoint = @dynamodb.get_item( 90 | @table_name, 91 | { "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id } 92 | ) 93 | assigned_to = checkpoint && checkpoint[DYNAMO_DB_LEASE_OWNER_KEY] 94 | lease_timeout = checkpoint && checkpoint[DYNAMO_DB_LEASE_TIMEOUT_KEY] 95 | 96 | if assigned_to && lease_timeout 97 | if now > Time.parse(lease_timeout) && assigned_to != next_assigned_to 98 | raise Kcl::Errors::LeaseNotAquiredError 99 | end 100 | condition_expression = 'shard_id = :shard_id AND assigned_to = :assigned_to AND lease_timeout = :lease_timeout' 101 | expression_attributes = { 102 | ':shard_id' => shard.shard_id, 103 | ':assigned_to' => assigned_to, 104 | ':lease_timeout' => lease_timeout 105 | } 106 | Kcl.logger.info("Attempting to get a lock for shard: #{shard.to_h}") 107 | else 108 | condition_expression = 'attribute_not_exists(assigned_to)' 109 | expression_attributes = nil 110 | end 111 | 112 | item = { 113 | "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id, 114 | "#{DYNAMO_DB_LEASE_OWNER_KEY}" => next_assigned_to, 115 | "#{DYNAMO_DB_LEASE_TIMEOUT_KEY}" => next_lease_timeout.to_s 116 | } 117 | if shard.checkpoint != '' 118 | item[DYNAMO_DB_CHECKPOINT_SEQUENCE_NUMBER_KEY] = shard.checkpoint 119 | end 120 | if shard.parent_shard_id > 0 121 | item[DYNAMO_DB_PARENT_SHARD_KEY] = shard.parent_shard_id 122 | end 123 | 124 | result = @dynamodb.conditional_update_item( 125 | @table_name, 126 | item, 127 | condition_expression, 128 | expression_attributes 129 | ) 130 | if result 131 | shard.assigned_to = next_assigned_to 132 | shard.lease_timeout = next_lease_timeout 133 | Kcl.logger.info("Get lease for shard at #{shard.to_h}") 134 | else 135 | Kcl.logger.info("Failed to get lease for shard at #{shard.to_h}") 136 | end 137 | 138 | shard 139 | end 140 | 141 | # Remove the shard entry 142 | # @params [Kcl::Workers::ShardInfo] shard 143 | # @return [Kcl::Workers::ShardInfo] 144 | def remove_lease(shard) 145 | result = @dynamodb.remove_item( 146 | @table_name, 147 | { "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id } 148 | ) 149 | if result 150 | shard.assigned_to = nil 151 | shard.checkpoint = nil 152 | shard.lease_timeout = nil 153 | Kcl.logger.info("Remove lease for shard at #{shard.to_h}") 154 | else 155 | Kcl.logger.info("Failed to remove lease for shard at #{shard.to_h}") 156 | end 157 | 158 | shard 159 | end 160 | 161 | # Remove lease owner for the shard entry 162 | # @params [Kcl::Workers::ShardInfo] shard 163 | # @return [Kcl::Workers::ShardInfo] 164 | def remove_lease_owner(shard) 165 | result = @dynamodb.update_item( 166 | @table_name, 167 | { "#{DYNAMO_DB_LEASE_PRIMARY_KEY}" => shard.shard_id }, 168 | "remove #{DYNAMO_DB_LEASE_OWNER_KEY}" 169 | ) 170 | if result 171 | shard.assigned_to = nil 172 | Kcl.logger.info("Remove lease owner for shard at #{shard.to_h}") 173 | else 174 | Kcl.logger.info("Failed to remove lease owner for shard at #{shard.to_h}") 175 | end 176 | 177 | shard 178 | end 179 | end 180 | -------------------------------------------------------------------------------- /lib/kcl/checkpoints/sentinel.rb: -------------------------------------------------------------------------------- 1 | # Enumeration of the sentinel values of checkpoints. 2 | # Used during initialization of ShardConsumers to determine the starting point 3 | # in the shard and to flag that a shard has been completely processed. 4 | module Kcl::Checkpoints 5 | module Sentinel 6 | # Start from the first available record in the shard. 7 | TRIM_HORIZON = 'TRIM_HORIZON'.freeze 8 | # Start from the latest record in the shard. 9 | LATEST = 'LATEST'.freeze 10 | # We've completely processed all records in this shard. 11 | SHARD_END = 'SHARD_END'.freeze 12 | # Start from the record at or after the specified server-side timestamp. 13 | AT_TIMESTAMP = 'AT_TIMESTAMP'.freeze 14 | # Continue from the sequence number in the shard. 15 | AFTER_SEQUENCE_NUMBER = 'AFTER_SEQUENCE_NUMBER'.freeze 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/kcl/config.rb: -------------------------------------------------------------------------------- 1 | class Kcl::Config 2 | attr_accessor :aws_region, 3 | :aws_access_key_id, 4 | :aws_secret_access_key, 5 | :dynamodb_endpoint, 6 | :dynamodb_table_name, 7 | :dynamodb_read_capacity, 8 | :dynamodb_write_capacity, 9 | :dynamodb_failover_seconds, 10 | :kinesis_endpoint, 11 | :kinesis_stream_name, 12 | :logger, 13 | :log_level, 14 | :max_lease_count, 15 | :use_ssl, 16 | :worker_count 17 | 18 | # Set default values 19 | def initialize 20 | @aws_region = nil 21 | @aws_access_key_id = nil 22 | @aws_secret_access_key = nil 23 | @dynamodb_endpoint = 'https://localhost:4566' 24 | @dynamodb_table_name = nil 25 | @dynamodb_read_capacity = 10 26 | @dynamodb_write_capacity = 10 27 | @dynamodb_failover_seconds = 10 28 | @kinesis_endpoint = 'https://localhost:4566' 29 | @kinesis_stream_name = nil 30 | @logger = nil 31 | @max_lease_count = 1 32 | @use_ssl = false 33 | @worker_count = 1 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/kcl/errors.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Errors 2 | class IllegalArgumentError < StandardError; end 3 | class CheckpointNotFoundError < StandardError; end 4 | class SequenceNumberNotFoundError < StandardError; end 5 | class LeaseNotAquiredError < StandardError; end 6 | end 7 | -------------------------------------------------------------------------------- /lib/kcl/logger.rb: -------------------------------------------------------------------------------- 1 | require 'logger' 2 | 3 | class Kcl::Logger < ::Logger; end 4 | -------------------------------------------------------------------------------- /lib/kcl/proxies/dynamo_db_proxy.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk-dynamodb' 2 | 3 | module Kcl::Proxies 4 | class DynamoDbProxy 5 | attr_reader :client 6 | 7 | def initialize(config) 8 | @client = Aws::DynamoDB::Client.new( 9 | { 10 | access_key_id: config.aws_access_key_id, 11 | secret_access_key: config.aws_secret_access_key, 12 | region: config.aws_region, 13 | endpoint: config.dynamodb_endpoint, 14 | ssl_verify_peer: config.use_ssl 15 | } 16 | ) 17 | end 18 | 19 | # @params [String] table_name 20 | def exists?(table_name) 21 | @client.describe_table({ table_name: table_name }) 22 | true 23 | rescue Aws::DynamoDB::Errors::NotFound, 24 | Aws::DynamoDB::Errors::ResourceNotFoundException 25 | false 26 | end 27 | 28 | # @params [String] table_name 29 | # @params [Array] attributes 30 | # @params [Array] schema 31 | # @params [Hash] throughputs 32 | def create_table(table_name, attributes = [], schema = [], throughputs = {}) 33 | @client.create_table( 34 | { 35 | table_name: table_name, 36 | attribute_definitions: attributes, 37 | key_schema: schema, 38 | provisioned_throughput: throughputs 39 | } 40 | ) 41 | end 42 | 43 | # @params [String] table_name 44 | def delete_table(table_name) 45 | @client.delete_table({ table_name: table_name }) 46 | true 47 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 48 | false 49 | end 50 | 51 | # @params [String] table_name 52 | # @params [Hash] conditions 53 | # @return [Hash] 54 | def get_item(table_name, conditions) 55 | response = @client.get_item( 56 | { 57 | table_name: table_name, 58 | key: conditions 59 | } 60 | ) 61 | response.item 62 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 63 | nil 64 | end 65 | 66 | # @params [String] table_name 67 | # @params [Hash] item 68 | # @return [Boolean] 69 | def put_item(table_name, item) 70 | @client.put_item( 71 | { 72 | table_name: table_name, 73 | item: item 74 | } 75 | ) 76 | true 77 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 78 | false 79 | end 80 | 81 | # @params [String] table_name 82 | # @params [Hash] conditions 83 | # @params [String] update_expression 84 | # @return [Boolean] 85 | def update_item(table_name, conditions, update_expression) 86 | @client.update_item( 87 | { 88 | table_name: table_name, 89 | key: conditions, 90 | update_expression: update_expression 91 | } 92 | ) 93 | true 94 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 95 | false 96 | end 97 | 98 | # @params [String] table_name 99 | # @params [Hash] item 100 | # @params [String] condition_expression 101 | # @params [Hash] expression_attributes 102 | # @return [Boolean] 103 | def conditional_update_item(table_name, item, condition_expression, expression_attributes) 104 | @client.put_item( 105 | { 106 | table_name: table_name, 107 | item: item, 108 | condition_expression: condition_expression, 109 | expression_attribute_values: expression_attributes 110 | } 111 | ) 112 | true 113 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 114 | false 115 | end 116 | 117 | # @params [String] table_name 118 | # @params [Hash] conditions 119 | # @return [Boolean] 120 | def remove_item(table_name, conditions) 121 | @client.delete_item( 122 | { 123 | table_name: table_name, 124 | key: conditions 125 | } 126 | ) 127 | true 128 | rescue Aws::DynamoDB::Errors::ResourceNotFoundException 129 | false 130 | end 131 | end 132 | end 133 | -------------------------------------------------------------------------------- /lib/kcl/proxies/kinesis_proxy.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk-kinesis' 2 | 3 | module Kcl::Proxies 4 | class KinesisProxy 5 | attr_reader :client 6 | 7 | def initialize(config) 8 | @client = Aws::Kinesis::Client.new( 9 | { 10 | access_key_id: config.aws_access_key_id, 11 | secret_access_key: config.aws_secret_access_key, 12 | region: config.aws_region, 13 | endpoint: config.kinesis_endpoint, 14 | ssl_verify_peer: config.use_ssl 15 | } 16 | ) 17 | @stream_name = config.kinesis_stream_name 18 | end 19 | 20 | # @return [Array] 21 | def shards 22 | res = @client.describe_stream({ stream_name: @stream_name }) 23 | res.stream_description.shards 24 | end 25 | 26 | # @param [String] shard_id 27 | # @param [String] shard_iterator_type 28 | # @return [String] 29 | def get_shard_iterator(shard_id, shard_iterator_type = nil, sequence_number = nil) 30 | params = { 31 | stream_name: @stream_name, 32 | shard_id: shard_id, 33 | shard_iterator_type: shard_iterator_type || Kcl::Checkpoints::Sentinel::LATEST 34 | } 35 | if shard_iterator_type == Kcl::Checkpoints::Sentinel::AFTER_SEQUENCE_NUMBER 36 | params[:starting_sequence_number] = sequence_number 37 | end 38 | res = @client.get_shard_iterator(params) 39 | res.shard_iterator 40 | end 41 | 42 | # @param [String] shard_iterator 43 | # @return [Hash] 44 | def get_records(shard_iterator) 45 | res = @client.get_records({ shard_iterator: shard_iterator }) 46 | { records: res.records, next_shard_iterator: res.next_shard_iterator } 47 | end 48 | 49 | # @param [Hash] data 50 | # @return [Hash] 51 | def put_record(data) 52 | res = @client.put_record(data) 53 | { shard_id: res.shard_id, sequence_number: res.sequence_number } 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /lib/kcl/record_processor.rb: -------------------------------------------------------------------------------- 1 | class Kcl::RecordProcessor 2 | def after_initialize(_initialization_input) 3 | raise NotImplementedError.new("You must implement #{self.class}##{__method__}") 4 | end 5 | 6 | def process_records(_records_input) 7 | raise NotImplementedError.new("You must implement #{self.class}##{__method__}") 8 | end 9 | 10 | def shutdown(_shutdown_input) 11 | raise NotImplementedError.new("You must implement #{self.class}##{__method__}") 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/kcl/record_processor_factory.rb: -------------------------------------------------------------------------------- 1 | class Kcl::RecordProcessorFactory 2 | def create_processor 3 | raise NotImplementedError.new("You must implement #{self.class}##{__method__}") 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /lib/kcl/types/extended_sequence_number.rb: -------------------------------------------------------------------------------- 1 | require 'bigdecimal' 2 | 3 | module Kcl::Types 4 | class ExtendedSequenceNumber 5 | attr_reader :sequence_number, :sub_sequence_number 6 | 7 | TRIM_HORIZON_VALUE = BigDecimal(-2) 8 | LATEST_VALUE = BigDecimal(-1) 9 | AT_TIMESTAMP_VALUE = BigDecimal(-3) 10 | 11 | # @return [Kcl::Types::ExtendedSequenceNumber] 12 | def self.latest 13 | @_latest ||= self.new(Kcl::Checkpoints::Sentinel::LATEST) 14 | end 15 | 16 | # @return [Kcl::Types::ExtendedSequenceNumber] 17 | def self.shard_end 18 | @_shard_end ||= self.new(Kcl::Checkpoints::Sentinel::SHARD_END) 19 | end 20 | 21 | # @return [Kcl::Types::ExtendedSequenceNumber] 22 | def self.trim_horizon 23 | @_trim_horizon ||= self.new(Kcl::Checkpoints::Sentinel::TRIM_HORIZON) 24 | end 25 | 26 | # @param [String] str 27 | # @return [Boolean] 28 | def self.digits_or_sentinel?(str) 29 | digits?(str) || sentinel?(str) 30 | end 31 | 32 | # @param [String] str 33 | # @return [Boolean] 34 | def self.sentinel?(str) 35 | case str 36 | when Kcl::Checkpoints::Sentinel::TRIM_HORIZON, 37 | Kcl::Checkpoints::Sentinel::LATEST, 38 | Kcl::Checkpoints::Sentinel::SHARD_END, 39 | Kcl::Checkpoints::Sentinel::AT_TIMESTAMP 40 | true 41 | else 42 | false 43 | end 44 | end 45 | 46 | # @param [String] str 47 | # @return [Boolean] 48 | def self.digits?(str) 49 | return false if str.nil? || str.empty? 50 | (str =~ /\A[0-9]+\z/) != nil 51 | end 52 | 53 | # @param [String] sequence_number 54 | # @param [Number] sub_sequence_number 55 | def initialize(sequence_number, sub_sequence_number = 0) 56 | @sequence_number = sequence_number 57 | @sub_sequence_number = sub_sequence_number 58 | end 59 | 60 | # @return [BigDecimal] 61 | def value 62 | if self.class.digits?(@sequence_number) 63 | return BigDecimal(@sequence_number) 64 | end 65 | 66 | case @sequence_number 67 | when Kcl::Checkpoints::Sentinel::LATEST 68 | LATEST_VALUE 69 | when Kcl::Checkpoints::Sentinel::TRIM_HORIZON 70 | TRIM_HORIZON_VALUE 71 | when Kcl::Checkpoints::Sentinel::AT_TIMESTAMP 72 | AT_TIMESTAMP_VALUE 73 | else 74 | raise Kcl::Errors::IllegalArgumentError.new( 75 | 'Expected a string of digits, TRIM_HORIZON, LATEST or AT_TIMESTAMP but received ' + @sequence_number 76 | ) 77 | end 78 | end 79 | 80 | # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number 81 | # @return [Boolean] 82 | def equals(extended_sequence_number) 83 | if @sequence_number != extended_sequence_number.sequence_number 84 | return false 85 | end 86 | @sub_sequence_number == extended_sequence_number.sub_sequence_number 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /lib/kcl/types/initialization_input.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Types 2 | # Container for the parameters to the RecordProcessor's method. 3 | class InitializationInput 4 | attr_reader :shard_id, :extended_sequence_number 5 | 6 | # @param [String] shard_id 7 | # @param [Kcl::Types::ExtendedSequenceNumber] extended_sequence_number 8 | def initialize(shard_id, extended_sequence_number) 9 | @shard_id = shard_id 10 | @extended_sequence_number = extended_sequence_number 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/kcl/types/records_input.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Types 2 | # Container for the parameters to the IRecordProcessor's method. 3 | class RecordsInput 4 | attr_reader :records, :millis_behind_latest, :record_checkpointer 5 | 6 | # @param [Array] records 7 | # @param [Number] millis_behind_latest 8 | # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer 9 | def initialize(records, millis_behind_latest, record_checkpointer) 10 | @records = records 11 | @millis_behind_latest = millis_behind_latest 12 | @record_checkpointer = record_checkpointer 13 | end 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /lib/kcl/types/shutdown_input.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Types 2 | # Container for the parameters to the IRecordProcessor's method. 3 | class ShutdownInput 4 | attr_reader :shutdown_reason, :record_checkpointer 5 | 6 | # @param [Kcl::Worker::ShutdownReason] shutdown_reason 7 | # @param [Kcl::Workers::RecordCheckpointer] record_checkpointer 8 | def initialize(shutdown_reason, record_checkpointer) 9 | @shutdown_reason = shutdown_reason 10 | @record_checkpointer = record_checkpointer 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/kcl/version.rb: -------------------------------------------------------------------------------- 1 | module Kcl 2 | VERSION = '1.0.0'.freeze 3 | end 4 | -------------------------------------------------------------------------------- /lib/kcl/worker.rb: -------------------------------------------------------------------------------- 1 | require 'eventmachine' 2 | 3 | class Kcl::Worker 4 | PROCESS_INTERVAL = 1 # by sec 5 | 6 | def self.run(id, record_processor_factory) 7 | worker = self.new(id, record_processor_factory) 8 | worker.start 9 | end 10 | 11 | def initialize(id, record_processor_factory) 12 | @id = id 13 | @record_processor_factory = record_processor_factory 14 | @live_shards = {} # Map 15 | @shards = {} # Map 16 | @kinesis = nil # Kcl::Proxies::KinesisProxy 17 | @checkpointer = nil # Kcl::Checkpointer 18 | @timer = nil 19 | end 20 | 21 | # Start consuming data from the stream, 22 | # and pass it to the application record processors. 23 | def start 24 | Kcl.logger.info("Start worker at #{object_id}") 25 | 26 | EM.run do 27 | trap_signals 28 | 29 | @timer = EM::PeriodicTimer.new(PROCESS_INTERVAL) do 30 | sync_shards! 31 | consume_shards! if available_lease_shard? 32 | end 33 | end 34 | 35 | cleanup 36 | Kcl.logger.info("Finish worker at #{object_id}") 37 | rescue => e 38 | Kcl.logger.error("#{e.class}: #{e.message}") 39 | raise e 40 | end 41 | 42 | # Shutdown gracefully 43 | def shutdown(signal = :NONE) 44 | unless @timer.nil? 45 | @timer.cancel 46 | @timer = nil 47 | end 48 | EM.stop 49 | 50 | Kcl.logger.info("Shutdown worker with signal #{signal} at #{object_id}") 51 | rescue => e 52 | Kcl.logger.error("#{e.class}: #{e.message}") 53 | raise e 54 | end 55 | 56 | # Cleanup resources 57 | def cleanup 58 | @live_shards = {} 59 | @shards = {} 60 | @kinesis = nil 61 | @checkpointer = nil 62 | end 63 | 64 | # Add new shards and delete unused shards 65 | def sync_shards! 66 | @live_shards.transform_values! { |_| false } 67 | 68 | kinesis.shards.each do |shard| 69 | @live_shards[shard.shard_id] = true 70 | next if @shards[shard.shard_id] 71 | @shards[shard.shard_id] = Kcl::Workers::ShardInfo.new( 72 | shard.shard_id, 73 | shard.parent_shard_id, 74 | shard.sequence_number_range 75 | ) 76 | Kcl.logger.info("Found new shard at shard_id: #{shard.shard_id}") 77 | end 78 | 79 | @live_shards.each do |shard_id, alive| 80 | next if alive 81 | checkpointer.remove_lease(@shards[shard_id]) 82 | @shards.delete(shard_id) 83 | Kcl.logger.info("Remove shard at shard_id: #{shard_id}") 84 | end 85 | 86 | @shards 87 | end 88 | 89 | # Count the number of leases hold by worker excluding the processed shard 90 | # @return [Boolean] 91 | def available_lease_shard? 92 | leased_count = @shards.values.inject(0) do |num, shard| 93 | shard.lease_owner == @id && !shard.completed? ? num + 1 : num 94 | end 95 | Kcl.config.max_lease_count > leased_count 96 | end 97 | 98 | # Process records by shard 99 | def consume_shards! 100 | threads = [] 101 | @shards.each do |shard_id, shard| 102 | # already owner of the shard 103 | next if shard.lease_owner == @id 104 | 105 | begin 106 | shard = checkpointer.fetch_checkpoint(shard) 107 | rescue Kcl::Errors::CheckpointNotFoundError 108 | Kcl.logger.info("Not found checkpoint of shard at #{shard.to_h}") 109 | next 110 | end 111 | # shard is closed and processed all records 112 | next if shard.completed? 113 | 114 | shard = checkpointer.lease(shard, @id) 115 | 116 | threads << Thread.new do 117 | begin 118 | consumer = Kcl::Workers::Consumer.new( 119 | shard, 120 | @record_processor_factory.create_processor, 121 | kinesis, 122 | checkpointer 123 | ) 124 | consumer.consume! 125 | ensure 126 | shard = checkpointer.remove_lease_owner(shard) 127 | Kcl.logger.info("Finish to consume shard at shard_id: #{shard_id}") 128 | end 129 | end 130 | end 131 | threads.each(&:join) 132 | end 133 | 134 | private 135 | 136 | def kinesis 137 | if @kinesis.nil? 138 | @kinesis = Kcl::Proxies::KinesisProxy.new(Kcl.config) 139 | Kcl.logger.info('Created Kinesis session in worker') 140 | end 141 | @kinesis 142 | end 143 | 144 | def checkpointer 145 | if @checkpointer.nil? 146 | @checkpointer = Kcl::Checkpointer.new(Kcl.config) 147 | Kcl.logger.info('Created Checkpoint in worker') 148 | end 149 | @checkpointer 150 | end 151 | 152 | def trap_signals 153 | [:HUP, :INT, :TERM].each do |signal| 154 | trap signal do 155 | EM.add_timer(0) { shutdown(signal) } 156 | end 157 | end 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /lib/kcl/workers/consumer.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Workers 2 | # Shard : Consumer = 1 : 1 3 | # - get records from stream 4 | # - send to record processor 5 | # - create record checkpoint 6 | class Consumer 7 | def initialize(shard, record_processor, kinesis_proxy, checkpointer) 8 | @shard = shard 9 | @record_processor = record_processor 10 | @kinesis = kinesis_proxy 11 | @checkpointer = checkpointer 12 | end 13 | 14 | def consume! 15 | initialize_input = create_initialize_input 16 | @record_processor.after_initialize(initialize_input) 17 | 18 | record_checkpointer = Kcl::Workers::RecordCheckpointer.new(@shard, @checkpointer) 19 | shard_iterator = start_shard_iterator 20 | 21 | loop do 22 | result = @kinesis.get_records(shard_iterator) 23 | 24 | records_input = create_records_input( 25 | result[:records], 26 | result[:millis_behind_latest], 27 | record_checkpointer 28 | ) 29 | @record_processor.process_records(records_input) 30 | 31 | shard_iterator = result[:next_shard_iterator] 32 | break if result[:records].empty? || shard_iterator.nil? 33 | end 34 | 35 | shutdown_reason = shard_iterator.nil? ? 36 | Kcl::Workers::ShutdownReason::TERMINATE : 37 | Kcl::Workers::ShutdownReason::REQUESTED 38 | shutdown_input = create_shutdown_input(shutdown_reason, record_checkpointer) 39 | @record_processor.shutdown(shutdown_input) 40 | end 41 | 42 | def start_shard_iterator 43 | shard = @checkpointer.fetch_checkpoint(@shard) 44 | if shard.checkpoint.nil? 45 | return @kinesis.get_shard_iterator( 46 | @shard.shard_id, 47 | Kcl::Checkpoints::Sentinel::TRIM_HORIZON 48 | ) 49 | end 50 | 51 | @kinesis.get_shard_iterator( 52 | @shard.shard_id, 53 | Kcl::Checkpoints::Sentinel::AFTER_SEQUENCE_NUMBER, 54 | @shard.checkpoint 55 | ) 56 | end 57 | 58 | def create_initialize_input 59 | Kcl::Types::InitializationInput.new( 60 | @shard.shard_id, 61 | Kcl::Types::ExtendedSequenceNumber.new(@shard.checkpoint) 62 | ) 63 | end 64 | 65 | def create_records_input(records, millis_behind_latest, record_checkpointer) 66 | Kcl::Types::RecordsInput.new( 67 | records, 68 | millis_behind_latest, 69 | record_checkpointer 70 | ) 71 | end 72 | 73 | def create_shutdown_input(shutdown_reason, record_checkpointer) 74 | Kcl::Types::ShutdownInput.new( 75 | shutdown_reason, 76 | record_checkpointer 77 | ) 78 | end 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /lib/kcl/workers/record_checkpointer.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Workers 2 | class RecordCheckpointer 3 | def initialize(shard, checkpointer) 4 | @shard = shard 5 | @checkpointer = checkpointer 6 | end 7 | 8 | def update_checkpoint(sequence_number) 9 | # checkpoint the last sequence of a closed shard 10 | @shard.checkpoint = sequence_number || Kcl::Checkpoints::Sentinel::SHARD_END 11 | @checkpointer.update_checkpoint(@shard) 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/kcl/workers/shard_info.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Workers 2 | class ShardInfo 3 | attr_reader :shard_id, 4 | :parent_shard_id, 5 | :starting_sequence_number, 6 | :ending_sequence_number 7 | attr_accessor :assigned_to, :checkpoint, :lease_timeout 8 | 9 | # @param [String] shard_id 10 | # @param [String] parent_shard_id 11 | # @param [Hash] sequence_number_range 12 | def initialize(shard_id, parent_shard_id, sequence_number_range) 13 | @shard_id = shard_id 14 | @parent_shard_id = parent_shard_id || 0 15 | @starting_sequence_number = sequence_number_range[:starting_sequence_number] 16 | @ending_sequence_number = sequence_number_range[:ending_sequence_number] 17 | @assigned_to = nil 18 | @checkpoint = nil 19 | @lease_timeout = nil 20 | end 21 | 22 | def lease_owner 23 | @assigned_to 24 | end 25 | 26 | def lease_owner=(assigned_to) 27 | @assigned_to = assigned_to 28 | end 29 | 30 | def completed? 31 | @checkpoint == Kcl::Checkpoints::Sentinel::SHARD_END 32 | end 33 | 34 | # For debug 35 | def to_h 36 | { 37 | shard_id: shard_id, 38 | parent_shard_id: parent_shard_id, 39 | starting_sequence_number: starting_sequence_number, 40 | ending_sequence_number: ending_sequence_number, 41 | assigned_to: assigned_to, 42 | checkpoint: checkpoint, 43 | lease_timeout: lease_timeout 44 | } 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/kcl/workers/shutdown_reason.rb: -------------------------------------------------------------------------------- 1 | module Kcl::Workers 2 | module ShutdownReason 3 | TERMINATE = 'TERMINATE'.freeze 4 | REQUESTED = 'REQUESTED'.freeze 5 | end 6 | end 7 | -------------------------------------------------------------------------------- /spec/checkpointer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Kcl::Checkpointer do 4 | include_context 'use_kinesis' 5 | 6 | let(:checkpointer) { Kcl::Checkpointer.new(Kcl.config) } 7 | 8 | describe '#initialize' do 9 | it 'exists dynamodb table' do 10 | dynamodb = checkpointer.dynamodb 11 | expect(dynamodb.exists?(Kcl.config.dynamodb_table_name)).to be_truthy 12 | end 13 | end 14 | 15 | describe '#fetch_checkpoint' do 16 | subject { checkpointer.fetch_checkpoint(shard) } 17 | 18 | it do 19 | expect(subject.shard_id).to eql(shard.shard_id) 20 | expect(subject.checkpoint).to eql(shard.checkpoint) 21 | expect(subject.assigned_to).to eql(shard.assigned_to) 22 | end 23 | end 24 | 25 | describe '#update_checkpoint' do 26 | before do 27 | shard.checkpoint = Kcl::Checkpoints::Sentinel::SHARD_END 28 | shard.assigned_to = 'test-worker' 29 | shard.lease_timeout = Time.now.utc + Kcl.config.dynamodb_failover_seconds 30 | checkpointer.update_checkpoint(shard) 31 | end 32 | 33 | subject { checkpointer.fetch_checkpoint(shard) } 34 | 35 | it do 36 | expect(subject.checkpoint).to eql(Kcl::Checkpoints::Sentinel::SHARD_END) 37 | end 38 | end 39 | 40 | describe '#lease' do 41 | let(:next_assigned_to) { 'test-worker' } 42 | 43 | before do 44 | checkpointer.lease(shard, next_assigned_to) 45 | end 46 | 47 | subject { checkpointer.fetch_checkpoint(shard) } 48 | 49 | it do 50 | expect(subject.assigned_to).to eql(next_assigned_to) 51 | expect(subject.lease_timeout).not_to eql('') 52 | end 53 | end 54 | 55 | describe '#remove_lease' do 56 | let(:next_assigned_to) { 'test-worker' } 57 | 58 | before do 59 | checkpointer.lease(shard, next_assigned_to) 60 | checkpointer.remove_lease(shard) 61 | end 62 | 63 | subject { checkpointer.fetch_checkpoint(shard) } 64 | 65 | it do 66 | expect(subject.checkpoint).to be_nil 67 | expect(subject.assigned_to).to be_nil 68 | end 69 | end 70 | 71 | describe '#remove_lease_owner' do 72 | let(:next_assigned_to) { 'test-worker' } 73 | 74 | before do 75 | checkpointer.lease(shard, next_assigned_to) 76 | checkpointer.remove_lease_owner(shard) 77 | end 78 | 79 | subject { checkpointer.fetch_checkpoint(shard) } 80 | 81 | it do 82 | expect(subject.assigned_to).to be_nil 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'kcl' 3 | 4 | # load shared_contexts 5 | Dir["#{__dir__}/supports/**/*.rb"].each { |f| require f } 6 | 7 | RSpec.configure do |config| 8 | # Disable RSpec exposing methods globally on `Module` and `main` 9 | config.disable_monkey_patching! 10 | 11 | true 12 | end 13 | 14 | # use localstack 15 | Kcl.configure do |config| 16 | config.aws_region = 'ap-northeast-1' 17 | config.aws_access_key_id = 'dummy' 18 | config.aws_secret_access_key = 'dummy' 19 | config.dynamodb_endpoint = 'https://localhost:4566' 20 | config.dynamodb_table_name = 'kcl-rb-test' 21 | config.kinesis_endpoint = 'https://localhost:4566' 22 | config.kinesis_stream_name = 'kcl-rb-test' 23 | config.logger = Kcl::Logger.new('/dev/null') 24 | config.use_ssl = false 25 | end 26 | -------------------------------------------------------------------------------- /spec/supports/use_dynamodb_context.rb: -------------------------------------------------------------------------------- 1 | RSpec.shared_context 'use_dynamodb' do 2 | let(:stub_dynamodb_client) { Aws::DynamoDB::Client.new(stub_responses: true) } 3 | 4 | before do 5 | allow(Aws::DynamoDB::Client).to receive(:new).and_return(stub_dynamodb_client) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /spec/supports/use_kinesis_contexts.rb: -------------------------------------------------------------------------------- 1 | RSpec.shared_context 'use_kinesis' do 2 | let(:kinesis) { Kcl::Proxies::KinesisProxy.new(Kcl.config) } 3 | let(:kinesis_shards) { kinesis.shards } 4 | let(:shard_shadow) do 5 | Kcl::Workers::ShardInfo.new( 6 | kinesis_shards[0].shard_id, 7 | kinesis_shards[0].parent_shard_id, 8 | kinesis_shards[0].sequence_number_range 9 | ) 10 | end 11 | let(:shard) do 12 | Kcl::Workers::ShardInfo.new( 13 | kinesis_shards[1].shard_id, 14 | kinesis_shards[1].parent_shard_id, 15 | kinesis_shards[1].sequence_number_range 16 | ) 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /spec/supports/use_record_processor_contexts.rb: -------------------------------------------------------------------------------- 1 | RSpec.shared_context 'use_record_processor' do 2 | class MockRecordProcessor < Kcl::RecordProcessor 3 | def after_initialize(initialization_input) 4 | end 5 | 6 | def process_records(records_input) 7 | return if records_input.records.empty? 8 | records_input.records.each do |record| 9 | process_record(record) 10 | end 11 | last_sequence_number = records_input.records[-1].sequence_number 12 | records_input.record_checkpointer.update_checkpoint(last_sequence_number) 13 | end 14 | 15 | def process_record(record) 16 | puts record 17 | end 18 | 19 | def shutdown(shutdown_input) 20 | if shutdown_input.shutdown_reason == Kcl::Workers::ShutdownReason::TERMINATE 21 | shutdown_input.record_checkpointer.update_checkpoint(nil) 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /spec/worker_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Kcl::Worker do 4 | include_context 'use_kinesis' 5 | 6 | let(:record_processor_factory) { double('record_processor_factory') } 7 | let(:worker) { Kcl::Worker.new('test-worker', record_processor_factory) } 8 | 9 | before do 10 | allow(record_processor_factory).to receive(:create_processor) 11 | end 12 | 13 | describe '#sync_shards!' do 14 | subject { worker.sync_shards! } 15 | it { expect(subject.keys.size).to eq(5) } 16 | end 17 | 18 | describe '#available_lease_shard?' do 19 | subject { worker.available_lease_shard? } 20 | 21 | context 'before consume' do 22 | before do 23 | worker.sync_shards! 24 | end 25 | 26 | it { expect(subject).to be_truthy } 27 | end 28 | 29 | context 'after consume' do 30 | let(:consumer) { instance_double(Kcl::Workers::Consumer) } 31 | 32 | before do 33 | allow(Kcl::Workers::Consumer).to receive(:new).and_return(consumer) 34 | allow(consumer).to receive(:consume!).and_return(true) 35 | 36 | worker.sync_shards! 37 | worker.consume_shards! 38 | end 39 | 40 | it { expect(subject).to be_truthy } 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /spec/workers/consumer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Kcl::Workers::Consumer do 4 | include_context 'use_kinesis' 5 | include_context 'use_record_processor' 6 | 7 | let(:target_shard) { nil } 8 | let(:record_processor) { MockRecordProcessor.new } 9 | let(:checkpointer) { Kcl::Checkpointer.new(Kcl.config) } 10 | let(:consumer) do 11 | Kcl::Workers::Consumer.new(target_shard, record_processor, kinesis, checkpointer) 12 | end 13 | 14 | describe '#start_shard_iterator' do 15 | let(:target_shard) { shard } 16 | subject { consumer.start_shard_iterator } 17 | it { expect(subject).not_to be_nil } 18 | end 19 | 20 | describe '#consume!' do 21 | before do 22 | # mock shard 23 | checkpointer.fetch_checkpoint(target_shard) 24 | checkpointer.lease(target_shard, 'test-worker') 25 | end 26 | 27 | after do 28 | checkpointer.remove_lease_owner(target_shard) 29 | end 30 | 31 | context 'with no record' do 32 | let(:target_shard) { shard } 33 | 34 | before do 35 | allow(record_processor).to receive(:process_record) 36 | end 37 | 38 | subject { consumer.consume! } 39 | 40 | it do 41 | expect(subject).to be_nil 42 | expect(record_processor).not_to have_received(:process_record) 43 | end 44 | end 45 | 46 | context 'with a record' do 47 | let(:target_shard) { shard_shadow } 48 | 49 | before do 50 | # put data for 1st shard 51 | kinesis.put_record( 52 | { 53 | stream_name: Kcl.config.kinesis_stream_name, 54 | data: Base64.strict_encode64('test'), 55 | partition_key: 'a' 56 | } 57 | ) 58 | 59 | allow(record_processor).to receive(:process_record) 60 | end 61 | 62 | subject { consumer.consume! } 63 | 64 | it do 65 | expect(subject).to be_nil 66 | expect(record_processor).to have_received(:process_record) 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # 開発環境 (LocalStack) 3 | #------------------------------------------------------------------------------- 4 | 5 | provider "aws" { 6 | version = "~> 2.60" 7 | access_key = "dummy" 8 | secret_key = "dummy" 9 | region = "ap-northeast-1" 10 | insecure = true 11 | skip_credentials_validation = true 12 | skip_metadata_api_check = true 13 | skip_requesting_account_id = true 14 | 15 | endpoints { 16 | dynamodb = "https://localhost:4566" 17 | kinesis = "https://localhost:4566" 18 | } 19 | } 20 | 21 | 22 | #------------------------------------------------------------------------------- 23 | # Kinesis stream 24 | #------------------------------------------------------------------------------- 25 | 26 | resource "aws_kinesis_stream" "kcl-rb-test_stream" { 27 | name = "kcl-rb-test" 28 | shard_count = 5 29 | retention_period = 24 30 | 31 | tags = { 32 | Environment = "test" 33 | } 34 | } 35 | 36 | --------------------------------------------------------------------------------