├── .ruby-version ├── ext ├── .gitignore ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── kickstarter │ └── jruby │ └── Telekinesis.java ├── Gemfile ├── test ├── producer │ ├── test_helper.rb │ ├── test_async_producer.rb │ ├── test_sync_producer.rb │ └── test_async_producer_worker.rb ├── test_helper.rb └── aws │ ├── test_client_adapter.rb │ └── test_java_client_adapter.rb ├── lib ├── telekinesis │ ├── version.rb │ ├── consumer.rb │ ├── producer.rb │ ├── aws.rb │ ├── producer │ │ ├── noop_failure_handler.rb │ │ ├── warn_failure_handler.rb │ │ ├── sync_producer.rb │ │ ├── async_producer_worker.rb │ │ └── async_producer.rb │ ├── consumer │ │ ├── base_processor.rb │ │ ├── block.rb │ │ └── kcl.rb │ ├── logging │ │ ├── java_logging.rb │ │ └── ruby_logger_handler.rb │ ├── java_util.rb │ └── aws │ │ ├── client_adapter.rb │ │ └── java_client_adapter.rb └── telekinesis.rb ├── .gitignore ├── telekinesis.gemspec ├── Rakefile └── README.md /.ruby-version: -------------------------------------------------------------------------------- 1 | jruby-1.7.9 2 | -------------------------------------------------------------------------------- /ext/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | target/ 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | -------------------------------------------------------------------------------- /test/producer/test_helper.rb: -------------------------------------------------------------------------------- 1 | require_relative "../test_helper" 2 | -------------------------------------------------------------------------------- /lib/telekinesis/version.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | VERSION = '3.2.1' 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | lib/telekinesis/*.jar 3 | tmp/ 4 | Gemfile.lock 5 | telekinesis-*.gem 6 | -------------------------------------------------------------------------------- /lib/telekinesis/consumer.rb: -------------------------------------------------------------------------------- 1 | require "telekinesis/consumer/kcl" 2 | require "telekinesis/consumer/base_processor" 3 | require "telekinesis/consumer/block" 4 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "minitest/autorun" 2 | require "minitest/pride" 3 | require "bundler/setup" 4 | Bundler.require(:development) 5 | 6 | require "telekinesis" 7 | -------------------------------------------------------------------------------- /lib/telekinesis/producer.rb: -------------------------------------------------------------------------------- 1 | require "telekinesis/producer/sync_producer" 2 | require "telekinesis/producer/noop_failure_handler" 3 | require "telekinesis/producer/warn_failure_handler" 4 | require "telekinesis/producer/async_producer" 5 | -------------------------------------------------------------------------------- /lib/telekinesis/aws.rb: -------------------------------------------------------------------------------- 1 | require "telekinesis/aws/client_adapter.rb" 2 | require "telekinesis/aws/java_client_adapter" 3 | 4 | module Telekinesis 5 | module Aws 6 | KINESIS_MAX_PUT_RECORDS_SIZE = 500 7 | Client = JavaClientAdapter 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /lib/telekinesis/producer/noop_failure_handler.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Producer 3 | # A failure handler that does nothing. 4 | # 5 | # Nothing! 6 | class NoopFailureHandler 7 | def on_record_failure(item_error_tuples); end 8 | def on_kinesis_retry(error, items); end 9 | def on_kinesis_failure(error, items); end 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/telekinesis.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis; end 2 | 3 | unless RUBY_PLATFORM.match(/java/) 4 | raise "Sorry! Telekinesis is only supported on JRuby" 5 | end 6 | 7 | require "telekinesis/version" 8 | require "telekinesis/telekinesis-#{Telekinesis::VERSION}.jar" 9 | require "telekinesis/java_util" 10 | require "telekinesis/logging/java_logging" 11 | require "telekinesis/aws" 12 | 13 | require "telekinesis/producer" 14 | require "telekinesis/consumer" 15 | -------------------------------------------------------------------------------- /lib/telekinesis/consumer/base_processor.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Consumer 3 | # A RecordProcessor with no-op implementations of all of the required 4 | # IRecordProcessor methods. Override it to implement simple IRecordProcessors 5 | # that don't need to do anything special on init or shutdown. 6 | class BaseProcessor 7 | def init(initialization_input); end 8 | def process_records(process_records_input); end 9 | def shutdown(shutdown_input); end 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/telekinesis/logging/java_logging.rb: -------------------------------------------------------------------------------- 1 | require "logger" 2 | require "telekinesis/logging/ruby_logger_handler" 3 | 4 | module Telekinesis 5 | module Logging 6 | java_import java.util.logging.Logger 7 | java_import java.util.logging.LogManager 8 | 9 | def self.capture_java_logging(logger) 10 | LogManager.log_manager.reset 11 | Logger.get_logger("").add_handler(RubyLoggerHandler.create(logger)) 12 | end 13 | 14 | def self.disable_java_logging 15 | LogManager.log_manager.reset 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /telekinesis.gemspec: -------------------------------------------------------------------------------- 1 | $:.push File.expand_path("../lib", __FILE__) 2 | require "telekinesis/version" 3 | 4 | Gem::Specification.new do |spec| 5 | spec.name = "telekinesis" 6 | spec.version = Telekinesis::VERSION 7 | spec.author = "Ben Linsay" 8 | spec.email = "ben@kickstarter.com" 9 | spec.summary = "High level clients for Amazon Kinesis" 10 | spec.homepage = "https://github.com/kickstarter/telekinesis" 11 | 12 | spec.platform = "java" 13 | spec.files = `git ls-files`.split($/) + Dir.glob("lib/telekinesis/*.jar") 14 | spec.require_paths = ["lib"] 15 | 16 | spec.add_development_dependency "rake" 17 | spec.add_development_dependency "nokogiri" 18 | spec.add_development_dependency "minitest" 19 | spec.add_development_dependency "shoulda-context" 20 | end 21 | -------------------------------------------------------------------------------- /lib/telekinesis/producer/warn_failure_handler.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Producer 3 | # A simple FailureHandler that logs errors with `warn`. Available as an 4 | # example and an easy default. 5 | class WarnFailureHandler 6 | def on_record_failure(item_err_pairs) 7 | warn "Puts for #{item_err_pairs.size} records failed!" 8 | end 9 | 10 | # Do nothing on retry. Let it figure itself out. 11 | def on_kinesis_retry(err, items); end 12 | 13 | def on_kinesis_failure(err, items) 14 | warn "PutRecords request with #{items.size} items failed!" 15 | warn format_bt(err) 16 | end 17 | 18 | protected 19 | 20 | def format_bt(e) 21 | e.backtrace ? e.backtrace.map{|l| "! #{l}"}.join("\n") : "" 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/telekinesis/consumer/block.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Consumer 3 | # A RecordProcessor that uses the given block to process records. Useful to 4 | # quickly define a consumer. 5 | # 6 | # Telekinesis::Consumer::Worker.new(stream: 'my-stream', app: 'tail') do 7 | # Telekinesis::Consumer::Block.new do |records, checkpointer, millis_behind_latest| 8 | # records.each {|r| puts r} 9 | # $stderr.puts "#{millis_behind_latest} ms behind" 10 | # checkpointer.checkpoint 11 | # end 12 | # end 13 | class Block < BaseProcessor 14 | def initialize(&block) 15 | raise ArgumentError, "No block given" unless block_given? 16 | @block = block 17 | end 18 | 19 | def process_records(input) 20 | @block.call(input.records, input.checkpointer, input.millis_behind_latest) 21 | end 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /test/aws/test_client_adapter.rb: -------------------------------------------------------------------------------- 1 | require_relative '../test_helper' 2 | 3 | class ClientAdapterTest < Minitest::Test 4 | StubResponse = Struct.new(:error_code, :error_message) 5 | 6 | class EvenRecordsAreErrors < Telekinesis::Aws::ClientAdapter 7 | def do_put_records(stream, items) 8 | items.each_with_index.map do |_, idx| 9 | err, message = idx.even? ? ["error-#{idx}", "message-#{idx}"] : [nil, nil] 10 | StubResponse.new(err, message) 11 | end 12 | end 13 | end 14 | 15 | context "ClientAdapter" do 16 | context "put_records" do 17 | setup do 18 | @client = EvenRecordsAreErrors.new(nil) 19 | @items = 10.times.map{|i| ["key-#{i}", "value-#{i}"]} 20 | @expected = 10.times.select{|i| i.even?} 21 | .map{|i| ["key-#{i}", "value-#{i}", "error-#{i}", "message-#{i}"]} 22 | end 23 | 24 | should "zip error responses with records" do 25 | assert(@expected, @client.put_records('stream', @items)) 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/telekinesis/java_util.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module JavaUtil 3 | java_import java.util.concurrent.locks.ReentrantReadWriteLock 4 | 5 | # Sugar around java.util.concurrent.ReentrantReadWriteLock so that it's 6 | # easy to use with blocks. 7 | # 8 | # e.g. 9 | # 10 | # lock = ReentrantReadWriteLock.new 11 | # some_value = 12345 12 | # 13 | # # In a reader thread 14 | # lock.read_lock do 15 | # # Read some data! This won't block any other calls to read_lock, but will 16 | # # block if another thread is in a section guarded by write_lock. 17 | # end 18 | # 19 | # # In a writer thread 20 | # lock.write_lock do 21 | # # Write some data! This is exclusive with *any* other code guarded by 22 | # # either read_lock or write_lock. 23 | # end 24 | class ReadWriteLock 25 | def initialize(fair = false) 26 | lock = ReentrantReadWriteLock.new(fair) 27 | @read = lock.read_lock 28 | @write = lock.write_lock 29 | end 30 | 31 | def read_lock 32 | @read.lock_interruptibly 33 | yield 34 | ensure 35 | @read.unlock 36 | end 37 | 38 | def write_lock 39 | @write.lock_interruptibly 40 | yield 41 | ensure 42 | @write.unlock 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /lib/telekinesis/logging/ruby_logger_handler.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Logging 3 | java_import java.util.logging.Level 4 | java_import java.util.logging.Handler 5 | 6 | # A java logging Handler that delegates to a Ruby logger. The name of the 7 | # j.u.l. logger is used as the progname argument to Logger.add. 8 | # 9 | # The translation between j.u.l. serverity levels and Ruby Logger levels 10 | # isn't exact. 11 | class RubyLoggerHandler < Handler 12 | # NOTE: Since this class overrides a Java class, we have to use the Java 13 | # constructor and set the logger after instantiation. (Overriding in 14 | # JRuby is weird). Use this method to create a new logger that delegates 15 | # to the passed logger. 16 | def self.create(logger) 17 | new.tap do |s| 18 | s.set_logger(logger) 19 | end 20 | end 21 | 22 | SEVERITY = { 23 | # NOTE: There's no Java equivalent of FATAL. 24 | Level::SEVERE => Logger::ERROR, 25 | Level::WARNING => Logger::WARN, 26 | Level::INFO => Logger::INFO, 27 | Level::CONFIG => Logger::INFO, 28 | Level::FINE=> Logger::DEBUG, 29 | Level::FINER=> Logger::DEBUG, 30 | Level::FINEST=> Logger::DEBUG, 31 | } 32 | 33 | def set_logger(l) 34 | @logger = l 35 | end 36 | 37 | def close 38 | @logger.close 39 | end 40 | 41 | # Ruby's logger has no flush method. 42 | def flush; end 43 | 44 | def publish(log_record) 45 | message = if log_record.thrown.nil? 46 | log_record.message 47 | else 48 | "#{log_record.message}: #{log_record.thrown}" 49 | end 50 | @logger.add(SEVERITY[log_record.level], message, log_record.logger_name) 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/telekinesis/producer/sync_producer.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Producer 3 | # A synchronous Kinesis producer. 4 | # 5 | # This class is thread safe if and only if the underlying 6 | # Telekines::Aws::Client is threadsafe. In practice, this means this client 7 | # is threadsafe on JRuby and not thread safe elsewhere. 8 | class SyncProducer 9 | attr_reader :stream, :client 10 | 11 | # Create a new Producer. 12 | # 13 | # AWS credentials may be specified by using the `:credentials` option and 14 | # passing a hash containing your `:access_key_id` and `:secret_access_key`. 15 | # If unspecified, credentials will be fetched from the environment, an 16 | # ~/.aws/credentials file, or the current instance metadata. 17 | # 18 | # `:send_size` may also be used to configure the maximum batch size used 19 | # in `put_all`. See `put_all` for more info. 20 | def self.create(options = {}) 21 | stream = options[:stream] 22 | client = Telekinesis::Aws::Client.build(options.fetch(:credentials, {})) 23 | new(stream, client, options) 24 | end 25 | 26 | def initialize(stream, client, opts = {}) 27 | @stream = stream or raise ArgumentError, "stream may not be nil" 28 | @client = client or raise ArgumentError, "client may not be nil" 29 | @send_size = opts.fetch(:send_size, Telekinesis::Aws::KINESIS_MAX_PUT_RECORDS_SIZE) 30 | end 31 | 32 | # Put an individual k, v pair to Kinesis immediately. Both k and v must 33 | # be strings. 34 | # 35 | # Returns once the call to Kinesis is complete. 36 | def put(key, data) 37 | @client.put_record(@stream, key, data) 38 | end 39 | 40 | # Put all of the [k, v] pairs to Kinesis in as few requests as possible. 41 | # All of the ks and vs must be strings. 42 | # 43 | # Each request sends at most `:send_size` records. By default this is the 44 | # Kinesis API limit of 500 records. 45 | def put_all(items) 46 | items.each_slice(@send_size).flat_map do |batch| 47 | @client.put_records(@stream, batch) 48 | end 49 | end 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/telekinesis/aws/client_adapter.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Aws 3 | # NOTE: wrapping the cause is necessary since JRuby isn't 2.1 compatible (yet) 4 | class KinesisError < RuntimeError 5 | attr_reader :cause 6 | 7 | def initialize(cause) 8 | @cause = cause 9 | end 10 | end 11 | 12 | # Base class for other ClientAdapters. Client adapters exist to make 13 | # switching between platforms easy and painless. 14 | # 15 | # The base adapter defines the interface and provides convience methods. 16 | class ClientAdapter 17 | # Build a new client given AWS credentials. 18 | # 19 | # Credentials must be supplied as a hash that contains symbolized 20 | # :access_key_id and :secret_access_key keys. 21 | def self.build(credentials) 22 | raise NotImplementedError 23 | end 24 | 25 | def initialize(client) 26 | @client = client 27 | end 28 | 29 | # Make a put_record call to the underlying client. Must return an object 30 | # that responds to `shard_id` and `sequence_number`. 31 | def put_record(stream, key, value) 32 | raise NotImplementedError 33 | end 34 | 35 | # Make a put_records call to the underlying client. If the request 36 | # succeeds but returns errors for some records, the original [key, value] 37 | # pair is zipped with the [error_code, error_message] pair and the 38 | # offending records are returned. 39 | def put_records(stream, items) 40 | response = do_put_records(stream, items) 41 | failures = items.zip(response).reject{|_, r| r.error_code.nil?} 42 | 43 | failures.map do |(k, v), r| 44 | [k, v, r.error_code, r.error_message] 45 | end 46 | end 47 | 48 | protected 49 | 50 | # Put an enumerable of [key, value] pairs to the given stream. Returns an 51 | # enumerable of response objects the same size as the given list of items. 52 | # 53 | # Response objects must respond to `error_code` and `error_message`. Any 54 | # response with a nil error_code is considered successful. 55 | def do_put_records(stream, items) 56 | raise NotImplementedError 57 | end 58 | end 59 | end 60 | end 61 | 62 | -------------------------------------------------------------------------------- /ext/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | com.kickstarter 6 | telekinesis 7 | 3.2.1 8 | 9 | 10 | 11 | ${project.artifactId}-${project.version} 12 | 13 | 14 | 15 | org.apache.maven.plugins 16 | maven-compiler-plugin 17 | 3.1 18 | 19 | 1.6 20 | 1.6 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 1.6 27 | 28 | true 29 | 30 | 31 | 32 | package 33 | 34 | shade 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | com.amazonaws 48 | amazon-kinesis-client 49 | ${amazon-kinesis-client-version} 50 | 51 | 52 | 53 | 54 | 55 | 1.6.9.1 56 | 1.6.1 57 | 58 | -------------------------------------------------------------------------------- /lib/telekinesis/aws/java_client_adapter.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Aws 3 | java_import java.nio.ByteBuffer 4 | java_import com.amazonaws.AmazonClientException 5 | java_import com.amazonaws.auth.BasicAWSCredentials 6 | java_import com.amazonaws.auth.DefaultAWSCredentialsProviderChain 7 | java_import com.amazonaws.internal.StaticCredentialsProvider 8 | java_import com.amazonaws.services.kinesis.AmazonKinesisClient 9 | java_import com.amazonaws.services.kinesis.model.PutRecordRequest 10 | java_import com.amazonaws.services.kinesis.model.PutRecordsRequest 11 | java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry 12 | 13 | # A ClientAdapter that wraps the AWS Java SDK. 14 | # 15 | # Since the underlying Java client is thread safe, this adapter is thread 16 | # safe. 17 | class JavaClientAdapter < ClientAdapter 18 | # Build a new client adapter. `credentials` is a hash keyed with 19 | # `:access_key_id` and `:secret_access_key`. If this hash is left blank 20 | # (the default) the client uses the DefaultAWSCredentialsProviderChain to 21 | # look for credentials. 22 | def self.build(credentials = {}) 23 | client = AmazonKinesisClient.new(build_credentials_provider(credentials)) 24 | new(client) 25 | end 26 | 27 | def self.build_credentials_provider(credentials) 28 | if credentials.empty? 29 | DefaultAWSCredentialsProviderChain.new 30 | else 31 | StaticCredentialsProvider.new( 32 | BasicAWSCredentials.new( 33 | credentials[:access_key_id], 34 | credentials[:secret_access_key] 35 | ) 36 | ) 37 | end 38 | end 39 | 40 | def put_record(stream, key, value) 41 | r = PutRecordRequest.new.tap do |request| 42 | request.stream_name = stream 43 | request.partition_key = key.to_s 44 | request.data = ByteBuffer.wrap(value.to_s.to_java_bytes) 45 | end 46 | @client.put_record(r) 47 | rescue AmazonClientException => e 48 | raise KinesisError.new(e) 49 | end 50 | 51 | protected 52 | 53 | def do_put_records(stream, items) 54 | result = @client.put_records(build_put_records_request(stream, items)) 55 | result.records 56 | rescue AmazonClientException => e 57 | raise KinesisError.new(e) 58 | end 59 | 60 | def build_put_records_request(stream, items) 61 | entries = items.map do |key, value| 62 | PutRecordsRequestEntry.new.tap do |entry| 63 | entry.partition_key = key.to_s 64 | entry.data = ByteBuffer.wrap(value.to_s.to_java_bytes) 65 | end 66 | end 67 | PutRecordsRequest.new.tap do |request| 68 | request.stream_name = stream 69 | request.records = entries 70 | end 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /test/aws/test_java_client_adapter.rb: -------------------------------------------------------------------------------- 1 | require_relative '../test_helper' 2 | 3 | class JavaClientAdapterTest < Minitest::Test 4 | java_import com.amazonaws.services.kinesis.model.PutRecordRequest 5 | java_import com.amazonaws.services.kinesis.model.PutRecordsRequest 6 | 7 | SomeStruct = Struct.new(:field) 8 | StubResponse = Struct.new(:records) 9 | 10 | class EchoClient 11 | def put_record(*args) 12 | args 13 | end 14 | 15 | def put_records(*args) 16 | StubResponse.new(args) 17 | end 18 | end 19 | 20 | context "JavaClientAdapter" do 21 | setup do 22 | @client = Telekinesis::Aws::JavaClientAdapter.new(EchoClient.new) 23 | end 24 | 25 | context "#put_record" do 26 | setup do 27 | # No exceptions, coerced to string. [args, expected] 28 | @data = [ 29 | [['stream', 'key', 'value'], ['stream', 'key', 'value']], 30 | [['stream', 123, 123], ['stream', '123', '123']], 31 | [['stream', SomeStruct.new('key'), SomeStruct.new('value')], ['stream', '#', '#']], 32 | ] 33 | end 34 | 35 | should "generate aws.PutRecordsRequest" do 36 | @data.each do |args, expected| 37 | request, = @client.put_record(*args) 38 | expected_stream, expected_key, expected_value = expected 39 | 40 | assert_equal(expected_stream, request.stream_name) 41 | assert_equal(expected_key, request.partition_key) 42 | assert_equal(expected_value, String.from_java_bytes(request.data.array)) 43 | end 44 | end 45 | end 46 | 47 | context "#do_put_records" do 48 | setup do 49 | # No exceptions, coerced to string. [args, expected] 50 | @data = [ 51 | [ 52 | ['stream', [['key', 'value'], [123, 123], [SomeStruct.new('key'), SomeStruct.new('value')]]], 53 | ['stream', [['key', 'value'], ['123', '123'], ['#', '#']]] 54 | ], 55 | ] 56 | end 57 | 58 | should "generate aws.PutRecordsRequest" do 59 | @data.each do |args, expected| 60 | request, = @client.send(:do_put_records, *args) 61 | expected_stream, expected_items = expected 62 | 63 | assert_equal(expected_stream, request.stream_name) 64 | expected_items.zip(request.records) do |(expected_key, expected_value), record| 65 | assert_equal(expected_key, record.partition_key) 66 | assert_equal(expected_value, String.from_java_bytes(record.data.array)) 67 | end 68 | end 69 | end 70 | end 71 | 72 | context ".build_credentials_provider" do 73 | should "return a provider that provides the specified credentials" do 74 | credentials = { 75 | access_key_id: '0000000000', 76 | secret_access_key: '0000000000', 77 | } 78 | provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(credentials) 79 | 80 | assert_equal(credentials[:access_key_id], provider.credentials.aws_access_key_id) 81 | assert_equal(credentials[:secret_access_key], provider.credentials.aws_secret_key) 82 | end 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | 3 | Bundler.require(:development) 4 | 5 | def log_ok(message) 6 | $stderr.write "#{message}... " 7 | begin 8 | yield 9 | $stderr.puts "ok" 10 | rescue => e 11 | $stderr.puts "failed" 12 | abort <<-EOF 13 | 14 | error: #{e} 15 | EOF 16 | end 17 | end 18 | 19 | def artifact_name(path) 20 | File.open(path) do |f| 21 | doc = Nokogiri::XML(f) 22 | id = doc.css("project>artifactId").text 23 | version = doc.css("project>version").text 24 | "#{id}-#{version}.jar" 25 | end 26 | end 27 | 28 | namespace :ext do 29 | require_relative 'lib/telekinesis/version' 30 | 31 | desc "Cleanup all built extension" 32 | task :clean do 33 | FileUtils.rm(Dir.glob("lib/telekinesis/*.jar")) 34 | Dir.chdir("ext") do 35 | `mvn clean 2>&1` 36 | end 37 | end 38 | 39 | task :have_maven? do 40 | log_ok("Checking for maven") do 41 | `which mvn` 42 | raise "Maven is required to build this gem" unless $?.success? 43 | end 44 | end 45 | 46 | task :have_jdk6_or_higher? do 47 | log_ok("Checking that at least java 6 is installed") do 48 | version_match = `java -version 2>&1`.match(/java version "1\.(\d)\.(\d+_\d+)"/) 49 | if version_match.nil? 50 | raise "Can't parse Java version!" 51 | end 52 | jdk_version, _jdk_patchlevel = version_match.captures 53 | if jdk_version.to_i < 6 54 | raise "Found #{version_match}" 55 | end 56 | end 57 | end 58 | 59 | task :update_pom_version do 60 | File.open('ext/pom.xml', 'r+') do |f| 61 | doc = Nokogiri::XML(f) 62 | pom_version = doc.css("project>version") 63 | 64 | if pom_version.text != Telekinesis::VERSION 65 | log_ok("Updating pom.xml version") do 66 | pom_version.first.content = Telekinesis::VERSION 67 | f.truncate(0) 68 | f.rewind 69 | f.write(doc.to_xml) 70 | end 71 | end 72 | end 73 | end 74 | 75 | desc "Build the Java extensions for this gem. Requires JDK6+ and Maven" 76 | task :build => [:have_jdk6_or_higher?, :have_maven?, :update_pom_version, :clean] do 77 | fat_jar = artifact_name('ext/pom.xml') 78 | log_ok("Building #{fat_jar}") do 79 | Dir.chdir("ext") do 80 | `mkdir -p target/` 81 | `mvn package 2>&1 > target/build_log` 82 | raise "build failed. See ext/target/build_log for details" unless $?.success? 83 | FileUtils.copy("target/#{fat_jar}", "../lib/telekinesis/#{fat_jar}") 84 | end 85 | end 86 | end 87 | end 88 | 89 | namespace :gem do 90 | desc "Build this gem" 91 | task :build => 'ext:build' do 92 | `gem build telekinesis.gemspec` 93 | end 94 | end 95 | 96 | require 'rake/testtask' 97 | 98 | # NOTE: Tests shouldn't be run without the extension being built, but converting 99 | # the build task to a file task made it hard to depend on having a JDK 100 | # and Maven installed. This is a little kludgy but better than the 101 | # alternative. 102 | task :check_for_ext do 103 | fat_jar = artifact_name('ext/pom.xml') 104 | Rake::Task["ext:build"].invoke unless File.exists?("lib/telekinesis/#{fat_jar}") 105 | end 106 | 107 | Rake::TestTask.new(:test) do |t| 108 | t.test_files = FileList["test/**/test_*.rb"].exclude(/test_helper/) 109 | t.verbose = true 110 | end 111 | task :test => :check_for_ext 112 | -------------------------------------------------------------------------------- /lib/telekinesis/producer/async_producer_worker.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Producer 3 | java_import java.nio.ByteBuffer 4 | java_import java.util.concurrent.TimeUnit 5 | java_import com.amazonaws.services.kinesis.model.PutRecordsRequest 6 | java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry 7 | 8 | class AsyncProducerWorker 9 | SHUTDOWN = :shutdown 10 | 11 | def initialize(producer, queue, send_size, send_every, retries, retry_interval) 12 | @producer = producer 13 | @queue = queue 14 | @send_size = send_size 15 | @send_every = send_every 16 | @retries = retries 17 | @retry_interval = retry_interval 18 | 19 | @stream = producer.stream # for convenience 20 | @client = producer.client # for convenience 21 | @failure_handler = producer.failure_handler # for convenience 22 | 23 | @buffer = [] 24 | @last_poll_at = current_time_millis 25 | @shutdown = false 26 | end 27 | 28 | def run 29 | loop do 30 | next_wait = [0, (@last_poll_at + @send_every) - current_time_millis].max 31 | next_item = @queue.poll(next_wait, TimeUnit::MILLISECONDS) 32 | 33 | if next_item == SHUTDOWN 34 | next_item, @shutdown = nil, true 35 | end 36 | 37 | unless next_item.nil? 38 | buffer(next_item) 39 | end 40 | 41 | if buffer_full || (next_item.nil? && buffer_has_records) 42 | put_records(get_and_reset_buffer, @retries, @retry_interval) 43 | end 44 | 45 | @last_poll_at = current_time_millis 46 | break if @shutdown 47 | end 48 | rescue => e 49 | # TODO: is there a way to encourage people to set up an uncaught exception 50 | # hanlder and/or disable this? 51 | bt = e.backtrace ? e.backtrace.map{|l| "! #{l}"}.join("\n") : "" 52 | warn "Producer background thread died!" 53 | warn "#{e.class}: #{e.message}\n#{bt}" 54 | raise e 55 | end 56 | 57 | protected 58 | 59 | def current_time_millis 60 | (Time.now.to_f * 1000).to_i 61 | end 62 | 63 | def buffer(item) 64 | @buffer << item 65 | end 66 | 67 | def buffer_full 68 | @buffer.size == @send_size 69 | end 70 | 71 | def buffer_has_records 72 | !@buffer.empty? 73 | end 74 | 75 | def get_and_reset_buffer 76 | ret, @buffer = @buffer, [] 77 | ret 78 | end 79 | 80 | def put_records(items, retries, retry_interval) 81 | begin 82 | failed = [] 83 | while retries > 0 84 | retryable, unretryable = @client.put_records(@stream, items).partition do |_, _, code, _| 85 | code == 'InternalFailure' || code == 'ProvisionedThroughputExceededException' 86 | end 87 | failed.concat(unretryable) 88 | 89 | if retryable.empty? 90 | break 91 | else 92 | items = retryable.map{|k, v, _, _| [k, v]} 93 | retries -= 1 94 | end 95 | end 96 | failed.concat(retryable) unless retryable.empty? 97 | @failure_handler.on_record_failure(failed) unless failed.empty? 98 | rescue Telekinesis::Aws::KinesisError => e 99 | if e.cause && e.cause.is_retryable && (retries -= 1) > 0 100 | sleep retry_interval 101 | @failure_handler.on_kinesis_retry(e, items) 102 | retry 103 | else 104 | @failure_handler.on_kinesis_failure(e, items) 105 | end 106 | end 107 | end 108 | end 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /test/producer/test_async_producer.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class AsyncProducerTest < Minitest::Test 4 | java_import java.util.concurrent.TimeUnit 5 | java_import java.util.concurrent.CountDownLatch 6 | java_import java.util.concurrent.ArrayBlockingQueue 7 | 8 | StubClient = Struct.new(:welp) 9 | 10 | class LatchQueue 11 | def initialize 12 | @under = ArrayBlockingQueue.new(100) 13 | @latch = CountDownLatch.new(1) 14 | @putting = CountDownLatch.new(1) 15 | end 16 | 17 | def count_down 18 | @latch.count_down 19 | end 20 | 21 | def wait_for_put 22 | @putting.await 23 | end 24 | 25 | def put(item) 26 | @putting.count_down 27 | @latch.await 28 | @under.put(item) 29 | end 30 | end 31 | 32 | def build_producer 33 | opts = { 34 | queue: @queue, 35 | manual_start: true, 36 | worker_count: @worker_count, 37 | } 38 | Telekinesis::Producer::AsyncProducer.new( 39 | @stream, 40 | StubClient.new, 41 | Telekinesis::Producer::NoopFailureHandler.new, 42 | opts 43 | ) 44 | end 45 | 46 | context "AsyncProducer" do 47 | setup do 48 | @stream = 'test' # ignored 49 | @worker_count = 3 # arbitrary 50 | end 51 | 52 | context "put" do 53 | setup do 54 | @queue = ArrayBlockingQueue.new(100) 55 | build_producer.put("hi", "there") 56 | end 57 | 58 | should "add the k,v pair to the queue" do 59 | assert_equal([["hi", "there"]], @queue.to_a) 60 | end 61 | end 62 | 63 | context "put_all" do 64 | setup do 65 | @items = 10.times.map{|i| ["key-#{i}", "value-#{i}"]} 66 | @queue = ArrayBlockingQueue.new(100) 67 | build_producer.put_all(@items) 68 | end 69 | 70 | should "add all items to the queue" do 71 | assert_equal(@items, @queue.to_a) 72 | end 73 | end 74 | 75 | context "after shutdown" do 76 | setup do 77 | @queue = ArrayBlockingQueue.new(100) 78 | @producer = build_producer 79 | @producer.shutdown 80 | end 81 | 82 | should "shutdown all workers" do 83 | assert_equal([Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN] * @worker_count, @queue.to_a) 84 | end 85 | 86 | should "not accept events while shut down" do 87 | refute(@producer.put("key", "value")) 88 | end 89 | end 90 | 91 | context "with a put in progress" do 92 | setup do 93 | @queue = LatchQueue.new 94 | @producer = build_producer 95 | 96 | # Thread blocks waiting for the latch in LatchQueue. Don't do any other 97 | # set up until this thread is in the critical section. 98 | Thread.new do 99 | @producer.put("k", "v") 100 | end 101 | @queue.wait_for_put 102 | 103 | # Thread blocks waiting for the write_lock in AsyncProducer. Once it's 104 | # unblocked it signals by counting down shutdown_latch. 105 | @shutdown_latch = CountDownLatch.new(1) 106 | Thread.new do 107 | @producer.shutdown 108 | @shutdown_latch.count_down 109 | end 110 | end 111 | 112 | should "block on shutdown until the put is done" do 113 | # Check that the latch hasn't been triggered yet. Return immediately 114 | # from the check - don't bother waiting. 115 | refute(@shutdown_latch.await(0, TimeUnit::MILLISECONDS)) 116 | @queue.count_down 117 | # NOTE: The assert is here to fail the test if it times out. This could 118 | # effectively just be an await with no duration. 119 | assert(@shutdown_latch.await(2, TimeUnit::SECONDS)) 120 | end 121 | end 122 | 123 | context "with a shutdown in progress" do 124 | setup do 125 | @queue = LatchQueue.new 126 | @producer = build_producer 127 | 128 | # Thread blocks waiting to insert :shutdown into the queue because of 129 | # the latch in LatchQueue. Don't do any other test set up until this 130 | # thread is in the critical section. 131 | Thread.new do 132 | @producer.shutdown 133 | end 134 | @queue.wait_for_put 135 | 136 | # This thread blocks waiting for the lock in AsyncProducer. Once it's 137 | # done the put continues and then it signals completion by counting 138 | # down finished_put_latch. 139 | @finished_put_latch = CountDownLatch.new(1) 140 | Thread.new do 141 | @put_result = @producer.put("k", "v") 142 | @finished_put_latch.count_down 143 | end 144 | end 145 | 146 | should "block on a put" do 147 | # Thread is already waiting in the critical section. Just check that 148 | # the call hasn't exited yet and return immediately. 149 | refute(@finished_put_latch.await(0, TimeUnit::MILLISECONDS)) 150 | @queue.count_down 151 | # NOTE: The assert is here to fail the test if it times out. This could 152 | # effectively just be an await with no duration. 153 | assert(@finished_put_latch.await(2, TimeUnit::SECONDS)) 154 | refute(@put_result, "Producer should reject a put after shutdown") 155 | end 156 | end 157 | end 158 | end 159 | -------------------------------------------------------------------------------- /test/producer/test_sync_producer.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class SyncProducerTest < Minitest::Test 4 | StubPutRecordResponse = Struct.new(:shard_id, :sequence_number, :error_code, :error_message) 5 | 6 | class StubClient 7 | attr_reader :requests 8 | 9 | def initialize(*responses) 10 | @requests = [] 11 | @responses = responses 12 | end 13 | 14 | def put_record(stream, key, value) 15 | @requests << [stream, [key, value]] 16 | @responses.shift || [] 17 | end 18 | 19 | def put_records(stream, items) 20 | @requests << [stream, items] 21 | @responses.shift || [] 22 | end 23 | end 24 | 25 | class TestingProducer < Telekinesis::Producer::SyncProducer 26 | end 27 | 28 | context "SyncProducer" do 29 | 30 | context ".create" do 31 | setup do 32 | @sync_producer = Telekinesis::Producer::SyncProducer.create(stream: 'stream') 33 | end 34 | 35 | should "return a SyncProducer" do 36 | assert_equal(@sync_producer.class, ::Telekinesis::Producer::SyncProducer) 37 | end 38 | end 39 | 40 | context "#put" do 41 | setup do 42 | @expected_response = StubPutRecordResponse.new(123, 123) 43 | @client = StubClient.new(@expected_response) 44 | @producer = TestingProducer.new('stream', @client) 45 | end 46 | 47 | should "call the underlying client's put_record" do 48 | assert_equal(@expected_response, @producer.put('key', 'value')) 49 | assert_equal(['stream', ['key', 'value']], @client.requests.first) 50 | end 51 | end 52 | 53 | context "#put_all" do 54 | context "with an empty argument" do 55 | setup do 56 | @client = StubClient.new([]) 57 | @producer = TestingProducer.new('stream', @client) 58 | @actual_failures = @producer.put_all([]) 59 | end 60 | 61 | should "send no data" do 62 | assert(@client.requests.empty?) 63 | assert(@actual_failures.empty?) 64 | end 65 | end 66 | 67 | context "with an argument smaller than :send_size" do 68 | setup do 69 | @send_size = 30 70 | @items = (@send_size - 1).times.map{|i| ["key-#{i}", "value-#{i}"]} 71 | end 72 | 73 | context "when no records fail" do 74 | setup do 75 | @client = StubClient.new([]) 76 | @producer = TestingProducer.new('stream', @client, {send_size: @send_size}) 77 | @actual_failures = @producer.put_all(@items) 78 | end 79 | 80 | should "send one batch and return nothing" do 81 | assert(@actual_failures.empty?) 82 | assert_equal([['stream', @items]], @client.requests) 83 | end 84 | end 85 | 86 | context "when some records fail" do 87 | setup do 88 | @client = StubClient.new([["key-2", "value-2", "fake error", "message"]]) 89 | @producer = TestingProducer.new('stream', @client, {send_size: @send_size}) 90 | @actual_failures = @producer.put_all(@items) 91 | end 92 | 93 | should "call on_record_failure" do 94 | assert_equal([['stream', @items]], @client.requests) 95 | assert_equal([["key-2", "value-2", "fake error", "message"]], @actual_failures) 96 | end 97 | end 98 | end 99 | 100 | context "with an argument larger than :send_size" do 101 | setup do 102 | @send_size = 30 103 | @items = (@send_size + 3).times.map{|i| ["key-#{i}", "value-#{i}"]} 104 | # expected_requests looks like: 105 | # [ 106 | # ['stream', [[k1, v1], [k2, v2], ...]], 107 | # ['stream', [[kn, vn], [k(n+1), v(n+1)], ...]] 108 | # ] 109 | @expected_requests = @items.each_slice(@send_size).map{|batch| ['stream', batch]} 110 | end 111 | 112 | context "when no records fail" do 113 | setup do 114 | @client = StubClient.new([]) 115 | @producer = TestingProducer.new('stream', @client, {send_size: @send_size}) 116 | @actual_failures = @producer.put_all(@items) 117 | end 118 | 119 | should "send multiple batches and return nothing" do 120 | assert(@actual_failures.empty?) 121 | assert_equal(@expected_requests, @client.requests) 122 | end 123 | end 124 | 125 | context "when some records fail" do 126 | setup do 127 | @error_respones = [ 128 | [["k1", "v1", "err", "message"], ["k2", "v2", "err", "message"]], 129 | [["k-next", "v-next", "err", "message"]] 130 | ] 131 | @expected_failures = @error_respones.flat_map {|x| x } 132 | 133 | @client = StubClient.new(*@error_respones) 134 | @producer = TestingProducer.new('stream', @client, {send_size: @send_size}) 135 | @actual_failures = @producer.put_all(@items) 136 | end 137 | 138 | should "return the failures" do 139 | assert_equal(@expected_requests, @client.requests) 140 | assert_equal(@expected_failures, @actual_failures) 141 | end 142 | end 143 | end 144 | end 145 | end 146 | end 147 | -------------------------------------------------------------------------------- /ext/src/main/java/com/kickstarter/jruby/Telekinesis.java: -------------------------------------------------------------------------------- 1 | package com.kickstarter.jruby; 2 | 3 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; 4 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; 5 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker; 6 | import com.amazonaws.services.kinesis.clientlibrary.types.InitializationInput; 7 | import com.amazonaws.services.kinesis.clientlibrary.types.ProcessRecordsInput; 8 | import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownInput; 9 | 10 | import java.util.concurrent.ExecutorService; 11 | 12 | /** 13 | * A shim that makes it possible to use the Kinesis Client Library from JRuby. 14 | * Without the shim, {@code initialize} method in 15 | * {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor} 16 | * conflicts with the special {@code initialize} method in Ruby. The shim 17 | * interface renames {@code initialize} to {@code init}. 18 | *

19 | * 20 | * For convenience a {@link #newWorker(KinesisClientLibConfiguration, ExecutorService, IRecordProcessorFactory)} 21 | * method is provided, so you can use closure conversion in JRuby to specify an 22 | * {@link IRecordProcessorFactory}. For example 23 | * 24 | *

25 | * 26 | *

 27 |  *     executor = config[:executor] || nil
 28 |  *
 29 |  *     com.kickstarter.jruby.Telekinesis.new_worker(my_config, executor) do
 30 |  *       MyRecordProcessor.new(some_thing, some_other_thing)
 31 |  *     end
 32 |  * 
33 | */ 34 | public class Telekinesis { 35 | /** 36 | * Create a new KCL {@link Worker} that processes records using the given 37 | * {@link ExecutorService}, {@link IRecordProcessorFactory}, and 38 | * {@link AmazonDynamoDB}. 39 | */ 40 | public static Worker newWorker(final KinesisClientLibConfiguration config, 41 | final ExecutorService executor, 42 | final AmazonDynamoDB dynamoClient, 43 | final IRecordProcessorFactory factory) { 44 | com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory v2Factory = new com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory() { 45 | @Override 46 | public com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor createProcessor() { 47 | return new RecordProcessorShim(factory.createProcessor()); 48 | } 49 | }; 50 | 51 | return new Worker.Builder() 52 | .recordProcessorFactory(v2Factory) 53 | .config(config) 54 | .execService(executor) // NOTE: .execService(null) is a no-op 55 | .dynamoDBClient(dynamoClient) 56 | .build(); 57 | } 58 | 59 | // ======================================================================== 60 | /** 61 | * A shim that wraps a {@link IRecordProcessor} so it can get used by the KCL. 62 | */ 63 | private static class RecordProcessorShim implements com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor { 64 | private final IRecordProcessor underlying; 65 | 66 | public RecordProcessorShim(final IRecordProcessor underlying) { this.underlying = underlying; } 67 | 68 | @Override 69 | public void initialize(final InitializationInput initializationInput) { 70 | underlying.init(initializationInput); 71 | } 72 | 73 | @Override 74 | public void processRecords(final ProcessRecordsInput processRecordsInput) { 75 | underlying.processRecords(processRecordsInput); 76 | } 77 | 78 | @Override 79 | public void shutdown(final ShutdownInput shutdownInput) { 80 | underlying.shutdown(shutdownInput); 81 | } 82 | } 83 | 84 | /** 85 | * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor} 86 | * that avoids naming conflicts with reserved words in Ruby. 87 | */ 88 | public static interface IRecordProcessor { 89 | /** 90 | * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#initialize(InitializationInput) 91 | */ 92 | void init(InitializationInput initializationInput); 93 | 94 | /** 95 | * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#processRecords(ProcessRecordsInput) 96 | */ 97 | void processRecords(ProcessRecordsInput processRecordsInput); 98 | 99 | /** 100 | * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#shutdown(ShutdownInput) 101 | */ 102 | void shutdown(ShutdownInput shutdownInput); 103 | } 104 | 105 | /** 106 | * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory} 107 | * for {@link IRecordProcessor}. 108 | */ 109 | public static interface IRecordProcessorFactory { 110 | /** 111 | * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory#createProcessor() 112 | */ 113 | IRecordProcessor createProcessor(); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /lib/telekinesis/consumer/kcl.rb: -------------------------------------------------------------------------------- 1 | module Telekinesis 2 | module Consumer 3 | java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream 4 | java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration 5 | 6 | class KCL 7 | # Create a new consumer that consumes data from a Kinesis stream using the 8 | # AWS Kinesis Client Library. 9 | # 10 | # The KCL uses DynamoDB to register clients as part of the an application 11 | # and evenly distribute work between all of the clients registered for 12 | # the same application. See the AWS Docs for more information: 13 | # 14 | # http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumer-apps-with-kcl.html 15 | # 16 | # KCLs are configured with a hash. The Kinesis `:stream` to consume from 17 | # is required. 18 | # 19 | # KCL clients operate in groups. All consumers with the same `:app` id use 20 | # DynamoDB to attempt to distribute work evenly among themselves. The 21 | # `:worker_id` is used to distinguish individual clients (`:worker_id` 22 | # defaults to the current hostname. If you plan to run more than one KCL 23 | # client in the same `:app` on the same host, make sure you set this to 24 | # something unique!). 25 | # 26 | # Clients interested in configuring their own AmazonDynamoDB client may 27 | # pass an instance as the second argument. If not configured, the client 28 | # will use a default AWS configuration. 29 | # 30 | # Any other valid KCL Worker `:options` may be passed as a nested hash. 31 | # 32 | # For example, to configure a `tail` app on `some-stream` and use the 33 | # default `:worker_id`, you might pass the following configuration to your 34 | # KCL. 35 | # 36 | # config = { 37 | # app: 'tail', 38 | # stream: 'some-stream', 39 | # options: {initial_position_in_stream: 'TRIM_HORIZON'} 40 | # } 41 | # 42 | # To actually process the stream, a KCL client creates record processors. 43 | # These are objects that correspond to the KCL's RecordProcessor 44 | # interface - processors must implement `init`, `process_records`, and 45 | # `shutdown` methods. 46 | # 47 | # http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kcl-java-interface-v2 48 | # 49 | # To specify which record processor to create, pass a block to your 50 | # distribtued consumer that returns a new record processor. This block 51 | # may (nay, WILL) be called from a background thread so make sure that 52 | # it's thread-safe. 53 | # 54 | # Telekinesis provides a BaseProcessor that implements no-op versions 55 | # of all of the required methods to make writing quick processors easier 56 | # and a Block processor that executes the given block every time 57 | # `process_records` is called. 58 | # 59 | # To write a simple stream tailer, you might use Block as follows: 60 | # 61 | # kcl_worker = Telekinesis::Consumer::KCL.new(config) do 62 | # Telekinesis::Consumer::BlockProcessor.new do |records, checkpointer, millis_behind_latest| 63 | # records.each{|r| puts r} 64 | # $stderr.puts "#{millis_behind_latest} ms behind" 65 | # checkpointer.checkpoint 66 | # end 67 | # end 68 | # 69 | # kcl_worker.run 70 | # 71 | def initialize(config, dynamo_client = nil, &block) 72 | raise ArgumentError, "No block given!" unless block_given? 73 | kcl_config = self.class.build_config(config) 74 | @under = com.kickstarter.jruby.Telekinesis.new_worker(kcl_config, config[:executor], dynamo_client, &block) 75 | end 76 | 77 | # Return the underlying KCL worker. It's a java.lang.Runnable. 78 | def as_runnable 79 | @under 80 | end 81 | 82 | # Start the KCL worker. If background is set to `true`, the worker is 83 | # started in its own JRuby Thread and the Thread is returned. Otherwise, 84 | # starts in the current thread and returns nil. 85 | def run(background = false) 86 | if background 87 | Thread.new { @under.run } 88 | else 89 | @under.run 90 | end 91 | end 92 | 93 | protected 94 | 95 | def self.build_config(config) 96 | creds_hash = config.fetch(:credentials, {}) 97 | credentials_provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(creds_hash) 98 | 99 | # App and Stream are mandatory. 100 | app, stream = [:app, :stream].map do |k| 101 | raise ArgumentError, "#{k} is required" unless config.include?(k) 102 | config[k] 103 | end 104 | 105 | # Use this host as the worker_id by default. 106 | worker_id = config.fetch(:worker_id, `hostname`.chomp) 107 | 108 | KinesisClientLibConfiguration.new(app, stream, credentials_provider, worker_id).tap do |kcl_config| 109 | config.fetch(:options, {}).each do |k, v| 110 | # Handle initial position in stream separately. It's the only option 111 | # that requires a value conversion. 112 | if k.to_s == 'initial_position_in_stream' 113 | kcl_config.with_initial_position_in_stream(InitialPositionInStream.value_of(v)) 114 | else 115 | setter = "with_#{k}".to_sym 116 | if kcl_config.respond_to?(setter) 117 | kcl_config.send(setter, v) 118 | end 119 | end 120 | end 121 | end 122 | end 123 | end 124 | end 125 | end 126 | -------------------------------------------------------------------------------- /lib/telekinesis/producer/async_producer.rb: -------------------------------------------------------------------------------- 1 | require "telekinesis/producer/async_producer_worker" 2 | 3 | module Telekinesis 4 | module Producer 5 | java_import java.util.concurrent.TimeUnit 6 | java_import java.util.concurrent.Executors 7 | java_import java.util.concurrent.ArrayBlockingQueue 8 | java_import com.google.common.util.concurrent.ThreadFactoryBuilder 9 | 10 | # An asynchronous producer that buffers events into a queue and uses a 11 | # background thread to send them to Kinesis. Only available on JRuby. 12 | # 13 | # This class is thread-safe. 14 | class AsyncProducer 15 | # For convenience 16 | MAX_PUT_RECORDS_SIZE = Telekinesis::Aws::KINESIS_MAX_PUT_RECORDS_SIZE 17 | 18 | attr_reader :stream, :client, :failure_handler 19 | 20 | # Create a new producer. 21 | # 22 | # AWS credentials may be specified by using the `:credentials` option and 23 | # passing a hash containing your `:access_key_id` and `:secret_access_key`. 24 | # If unspecified, credentials will be fetched from the environment, an 25 | # ~/.aws/credentials file, or the current instance metadata. 26 | # 27 | # The producer's `:worker_count`, internal `:queue_size`, the `:send_size` 28 | # of batches to Kinesis and how often workers send data to Kinesis, even 29 | # if their batches aren't full (`:send_every_ms`) can be configured as 30 | # well. They all have reasonable defaults. 31 | # 32 | # When requests to Kinesis fail, the configured `:failure_handler` will 33 | # be called. If you don't specify a failure handler, a NoopFailureHandler 34 | # is used. 35 | def self.create(options = {}) 36 | stream = options[:stream] 37 | client = Telekinesis::Aws::Client.build(options.fetch(:credentials, {})) 38 | failure_handler = options.fetch(:failure_handler, NoopFailureHandler.new) 39 | new(stream, client, failure_handler, options) 40 | end 41 | 42 | # Construct a new producer. Intended for internal use only - prefer 43 | # #create unless it's strictly necessary. 44 | def initialize(stream, client, failure_handler, options = {}) 45 | @stream = stream or raise ArgumentError, "stream may not be nil" 46 | @client = client or raise ArgumentError, "client may not be nil" 47 | @failure_handler = failure_handler or raise ArgumentError, "failure_handler may not be nil" 48 | @shutdown = false 49 | 50 | queue_size = options.fetch(:queue_size, 1000) 51 | send_every = options.fetch(:send_every_ms, 1000) 52 | worker_count = options.fetch(:worker_count, 1) 53 | raise ArgumentError(":worker_count must be > 0") unless worker_count > 0 54 | send_size = options.fetch(:send_size, MAX_PUT_RECORDS_SIZE) 55 | raise ArgumentError(":send_size too large") if send_size > MAX_PUT_RECORDS_SIZE 56 | retries = options.fetch(:retries, 5) 57 | raise ArgumentError(":retries must be >= 0") unless retries >= 0 58 | retry_interval = options.fetch(:retry_interval, 1.0) 59 | raise ArgumentError(":retry_interval must be > 0") unless retry_interval > 0 60 | 61 | # NOTE: For testing. 62 | @queue = options[:queue] || ArrayBlockingQueue.new(queue_size) 63 | 64 | @lock = Telekinesis::JavaUtil::ReadWriteLock.new 65 | @worker_pool = build_executor(worker_count) 66 | @workers = worker_count.times.map do 67 | AsyncProducerWorker.new(self, @queue, send_size, send_every, retries, retry_interval) 68 | end 69 | 70 | # NOTE: Start by default. For testing. 71 | start unless options.fetch(:manual_start, false) 72 | end 73 | 74 | # Put a single key, value pair to Kinesis. Both key and value must be 75 | # strings. 76 | # 77 | # This call returns immediately and returns true iff the producer is still 78 | # accepting data. Data is put to Kinesis in the background. 79 | def put(key, data) 80 | put_all(key => data) 81 | end 82 | 83 | # Put all of the given key, value pairs to Kinesis. Both key and value 84 | # must be Strings. 85 | # 86 | # This call returns immediately and returns true iff the producer is still 87 | # accepting data. Data is put to Kinesis in the background. 88 | def put_all(items) 89 | # NOTE: The lock ensures that no new data can be added to the queue after 90 | # the shutdown flag has been set. See the note in shutdown for details. 91 | @lock.read_lock do 92 | if @shutdown 93 | false 94 | else 95 | items.each do |key, data| 96 | @queue.put([key, data]) 97 | end 98 | true 99 | end 100 | end 101 | end 102 | 103 | # Shut down this producer. After the call completes, the producer will not 104 | # accept any more data, but will finish processing any data it has 105 | # buffered internally. 106 | # 107 | # If block = true is passed, this call will block and wait for the producer 108 | # to shut down before returning. This wait times out after duration has 109 | # passed. 110 | def shutdown(block = false, duration = 2, unit = TimeUnit::SECONDS) 111 | # NOTE: Since a write_lock is exclusive, this prevents any data from being 112 | # added to the queue while the SHUTDOWN tokens are being inserted. Without 113 | # the lock, data can end up in the queue behind all of the shutdown tokens 114 | # and be lost. This happens if the shutdown flag is be flipped by a thread 115 | # calling shutdown after another thread has checked the "if @shutdown" 116 | # condition in put but before it's called queue.put. 117 | @lock.write_lock do 118 | @shutdown = true 119 | @workers.size.times do 120 | @queue.put(AsyncProducerWorker::SHUTDOWN) 121 | end 122 | end 123 | 124 | # Don't interrupt workers by calling shutdown_now. 125 | @worker_pool.shutdown 126 | await(duration, unit) if block 127 | end 128 | 129 | # Wait for this producer to shutdown. 130 | def await(duration, unit = TimeUnit::SECONDS) 131 | @worker_pool.await_termination(duration, unit) 132 | end 133 | 134 | # Return the number of events currently buffered by this producer. This 135 | # doesn't include any events buffered in workers that are currently on 136 | # their way to Kinesis. 137 | def queue_size 138 | @queue.size 139 | end 140 | 141 | protected 142 | 143 | def start 144 | @workers.each do |w| 145 | @worker_pool.java_send(:submit, [java.lang.Runnable.java_class], w) 146 | end 147 | end 148 | 149 | def build_executor(worker_count) 150 | Executors.new_fixed_thread_pool( 151 | worker_count, 152 | ThreadFactoryBuilder.new.set_name_format("#{stream}-producer-worker-%d").build 153 | ) 154 | end 155 | end 156 | end 157 | end 158 | -------------------------------------------------------------------------------- /test/producer/test_async_producer_worker.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class AsyncProducerWorkerTest < Minitest::Test 4 | java_import java.util.concurrent.TimeUnit 5 | java_import java.util.concurrent.ArrayBlockingQueue 6 | 7 | def string_from_bytebuffer(bb) 8 | String.from_java_bytes bb.array 9 | end 10 | 11 | class UnretryableAwsError < com.amazonaws.AmazonClientException 12 | def is_retryable 13 | false 14 | end 15 | end 16 | 17 | class CapturingFailureHandler 18 | attr_reader :retries, :final_err 19 | 20 | def initialize 21 | @retries = 0 22 | end 23 | 24 | def failed_records 25 | @failed_records ||= [] 26 | end 27 | 28 | def on_record_failure(fails) 29 | failed_records << fails 30 | end 31 | 32 | def on_kinesis_retry(error, items) 33 | @retries += 1 34 | end 35 | 36 | def on_kinesis_failure(error, items) 37 | @final_err = [error, items] 38 | end 39 | end 40 | 41 | StubProducer = Struct.new(:stream, :client, :failure_handler) 42 | 43 | # NOTE: This stub mocks the behavior of timing out on poll once all of the 44 | # items have been drained from the internal list. 45 | class StubQueue 46 | def initialize(items) 47 | @items = items 48 | end 49 | 50 | def poll(duration, unit) 51 | @items.shift 52 | end 53 | end 54 | 55 | # A wrapper over ABQ that inserts shutdown into itself after a given number 56 | # of calls to poll. Not thread-safe. 57 | class ShutdownAfterQueue 58 | def initialize(shutdown_after) 59 | @shutdown_after = shutdown_after 60 | @called = 0 61 | @under = ArrayBlockingQueue.new(10) 62 | end 63 | 64 | def poll(duration, unit) 65 | @called += 1 66 | if @called > @shutdown_after 67 | @under.put(Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN) 68 | end 69 | @under.poll(duration, unit) 70 | end 71 | end 72 | 73 | class CapturingClient 74 | attr_reader :requests 75 | 76 | def initialize(responses) 77 | @requests = ArrayBlockingQueue.new(1000) 78 | @responses = responses 79 | end 80 | 81 | def put_records(stream, items) 82 | @requests.put([stream, items]) 83 | @responses.shift || [] 84 | end 85 | end 86 | 87 | class ExplodingClient 88 | def initialize(exception) 89 | @exception = exception 90 | end 91 | 92 | def put_records(stream, items) 93 | raise @exception 94 | end 95 | end 96 | 97 | def stub_producer(stream, responses = []) 98 | StubProducer.new(stream, CapturingClient.new(responses), CapturingFailureHandler.new) 99 | end 100 | 101 | # NOTE: This always adds SHUTDOWN to the end of the list so that the worker 102 | # can be run in the test thread and there's no need to deal with coordination 103 | # across multiple threads. To simulate the worker timing out on a queue.poll 104 | # just add 'nil' to your list of items in the queue at the appropriate place. 105 | def queue_with(*items) 106 | to_put = items + [Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN] 107 | StubQueue.new(to_put) 108 | end 109 | 110 | def build_worker 111 | Telekinesis::Producer::AsyncProducerWorker.new( 112 | @producer, 113 | @queue, 114 | @send_size, 115 | @send_every, 116 | @retries, 117 | @retry_interval 118 | ) 119 | end 120 | 121 | def records_as_kv_pairs(request) 122 | request.records.map{|r| [r.partition_key, string_from_bytebuffer(r.data)]} 123 | end 124 | 125 | context "producer worker" do 126 | setup do 127 | @send_size = 10 128 | @send_every = 100 # ms 129 | @retries = 4 130 | @retry_interval = 0.01 131 | end 132 | 133 | context "with only SHUTDOWN in the queue" do 134 | setup do 135 | @producer = stub_producer('test') 136 | @queue = queue_with() # shutdown is always added 137 | @worker = build_worker 138 | end 139 | 140 | should "shut down the worker" do 141 | @worker.run 142 | assert(@worker.instance_variable_get(:@shutdown)) 143 | end 144 | end 145 | 146 | context "with [item, SHUTDOWN] in the queue" do 147 | setup do 148 | @producer = stub_producer('test') 149 | @queue = queue_with( 150 | ["key", "value"], 151 | ) 152 | @worker = build_worker 153 | end 154 | 155 | should "put data before shutting down the worker" do 156 | @worker.run 157 | stream, items = @producer.client.requests.first 158 | assert_equal(stream, 'test', "request should have the correct stream name") 159 | assert_equal([["key", "value"]], items, "Request payload should be kv pairs") 160 | end 161 | end 162 | 163 | context "with nothing in the queue" do 164 | setup do 165 | @producer = stub_producer('test') 166 | @queue = ShutdownAfterQueue.new(5) 167 | @worker = build_worker 168 | @starting_put_at = @worker.instance_variable_get(:@last_poll_at) 169 | end 170 | 171 | should "update the internal last_poll_at counter and sleep on poll" do 172 | @worker.run 173 | refute_equal(@starting_put_at, @worker.instance_variable_get(:@last_poll_at)) 174 | end 175 | end 176 | 177 | context "with buffered data that times out" do 178 | setup do 179 | @items = [["key", "value"]] 180 | 181 | @producer = stub_producer('test') 182 | # Explicitly add 'nil' to fake the queue being empty 183 | @queue = queue_with(*(@items + [nil])) 184 | @worker = build_worker 185 | end 186 | 187 | should "send whatever is in the queue" do 188 | @worker.run 189 | stream, items = @producer.client.requests.first 190 | assert_equal('test', stream, "request should have the correct stream name") 191 | assert_equal(items, @items, "Request payload should be kv pairs") 192 | end 193 | end 194 | 195 | context "with fewer than send_size items in queue" do 196 | setup do 197 | num_items = @send_size - 1 198 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 199 | 200 | @producer = stub_producer('test') 201 | @queue = queue_with(*@items) 202 | @worker = build_worker 203 | end 204 | 205 | should "send one request" do 206 | @worker.run 207 | stream, items = @producer.client.requests.first 208 | assert_equal('test', stream, "request should have the correct stream name") 209 | assert_equal(@items, items, "Request payload should be kv pairs") 210 | end 211 | end 212 | 213 | context "with more than send_size items in queue" do 214 | setup do 215 | num_items = (@send_size * 2) - 1 216 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 217 | 218 | @producer = stub_producer('test') 219 | @queue = queue_with(*@items) 220 | @worker = build_worker 221 | end 222 | 223 | should "send multiple requests of at most send_size" do 224 | @worker.run 225 | expected = @items.each_slice(@send_size).to_a 226 | expected.zip(@producer.client.requests) do |kv_pairs, (stream, batch)| 227 | assert_equal('test', stream, "Request should have the correct stream name") 228 | assert_equal(batch, kv_pairs, "Request payload should be kv pairs") 229 | end 230 | end 231 | end 232 | 233 | context "when some records return an unretryable error response" do 234 | setup do 235 | num_items = @send_size - 1 236 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 237 | @failed_items = @items.each_with_index.map do |item, idx| 238 | if idx.even? 239 | k, v = item 240 | [k, v, "some_code", "message"] 241 | else 242 | nil 243 | end 244 | end 245 | @failed_items.compact! 246 | 247 | @producer = stub_producer('test', [@failed_items]) 248 | @queue = queue_with(*@items) 249 | @worker = build_worker 250 | end 251 | 252 | should "call the failure handler with all failed records" do 253 | @worker.run 254 | assert_equal([@failed_items], @producer.failure_handler.failed_records) 255 | end 256 | end 257 | 258 | context "when some records return a retryable error response" do 259 | setup do 260 | num_items = @send_size - 1 261 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 262 | @failed_items = @items.each_with_index.map do |item, idx| 263 | if idx.even? 264 | k, v = item 265 | [k, v, "InternalFailure", "message"] 266 | else 267 | nil 268 | end 269 | end 270 | @failed_items.compact! 271 | 272 | @producer = stub_producer('test', [@failed_items, []]) 273 | @queue = queue_with(*@items) 274 | @worker = build_worker 275 | end 276 | 277 | should "not call the failure handler with any failed records" do 278 | @worker.run 279 | assert_equal([], @producer.failure_handler.failed_records) 280 | end 281 | 282 | should "retry the request" do 283 | @worker.run 284 | assert_equal(2, @producer.client.requests.size) 285 | end 286 | end 287 | 288 | context "when retryable responses fail too many times" do 289 | setup do 290 | num_items = @send_size - 1 291 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 292 | @failed_items = @items.each_with_index.map do |item, idx| 293 | if idx.even? 294 | k, v = item 295 | [k, v, "InternalFailure", "message"] 296 | else 297 | nil 298 | end 299 | end 300 | @failed_items.compact! 301 | 302 | @producer = stub_producer('test', [@failed_items] * (@retries + 1)) 303 | @queue = queue_with(*@items) 304 | @worker = build_worker 305 | end 306 | 307 | should "call the failure handler with all failed records" do 308 | @worker.run 309 | assert_equal([@failed_items], @producer.failure_handler.failed_records) 310 | end 311 | 312 | should "retry the request" do 313 | @worker.run 314 | assert_equal(@retries, @producer.client.requests.size) 315 | end 316 | end 317 | 318 | context "with a mix of retryable error responses" do 319 | setup do 320 | num_items = @send_size - 1 321 | @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]} 322 | @first_response = @items.each_with_index.map do |item, idx| 323 | k, v = item 324 | [k, v, idx.even? ? "InternalFailure" : "WHATEVER", "message"] 325 | end 326 | @did_retry = @first_response.select{|_, _, m, _| m == "InternalFailure"} 327 | @no_retry = @first_response.select{|_, _, m, _| m == "WHATEVER"} 328 | 329 | @producer = stub_producer('test', [@first_response, []]) 330 | @queue = queue_with(*@items) 331 | @worker = build_worker 332 | end 333 | 334 | should "retry the request" do 335 | @worker.run 336 | assert_equal(2, @producer.client.requests.size) 337 | _, items = @producer.client.requests.to_a.last 338 | assert_equal(@did_retry.map{|k, v, _, _| [k, v]}, items) 339 | end 340 | 341 | should "call the failure handler with only the records that failed" do 342 | @worker.run 343 | assert_equal([@no_retry], @producer.failure_handler.failed_records) 344 | end 345 | end 346 | 347 | context "when the client throws a retryable exception" do 348 | setup do 349 | @boom = Telekinesis::Aws::KinesisError.new(com.amazonaws.AmazonClientException.new("boom")) 350 | @producer = StubProducer.new( 351 | 'stream', 352 | ExplodingClient.new(@boom), 353 | CapturingFailureHandler.new 354 | ) 355 | @queue = queue_with(['foo', 'bar']) 356 | @worker = build_worker 357 | end 358 | 359 | should "call the failure handler on retries and errors" do 360 | @worker.run 361 | assert_equal((@retries - 1), @producer.failure_handler.retries) 362 | err, items = @producer.failure_handler.final_err 363 | assert_equal(@boom, err) 364 | assert_equal([['foo', 'bar']], items) 365 | end 366 | end 367 | 368 | context "when the client throws an unretryable exception" do 369 | setup do 370 | @boom = Telekinesis::Aws::KinesisError.new(UnretryableAwsError.new("boom")) 371 | @producer = StubProducer.new( 372 | 'stream', 373 | ExplodingClient.new(@boom), 374 | CapturingFailureHandler.new 375 | ) 376 | @queue = queue_with(['foo', 'bar']) 377 | @worker = build_worker 378 | end 379 | 380 | should "call the failure handler on error but not on retry" do 381 | @worker.run 382 | assert_equal(0, @producer.failure_handler.retries) 383 | err, items = @producer.failure_handler.final_err 384 | assert_equal(@boom, err) 385 | assert_equal([['foo', 'bar']], items) 386 | end 387 | end 388 | 389 | end 390 | end 391 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Looking for Maintainers 2 | 3 | We're not actively maintaining this project. If you're interested in maintaining it, please post a comment on [this issue](https://github.com/kickstarter/telekinesis/issues/22). 4 | 5 | ## Table of Contents 6 | 7 | - [Telekinesis](#telekinesis) 8 | - [Requirements](#requirements) 9 | - [Installing](#installing) 10 | - [Producers](#producers) 11 | - [SyncProducer](#syncproducer) 12 | - [AsyncProducer](#asyncproducer) 13 | - [Consumers](#consumers) 14 | - [KCL](#kcl) 15 | - [Client State](#client-state) 16 | - [Errors while processing records](#errors-while-processing-records) 17 | - [Checkpoints and `INITIAL_POSITION_IN_STREAM`](#checkpoints-and-initial_position_in_stream) 18 | - [Java client logging](#java-client-logging) 19 | - [](#) 20 | - [Building](#building) 21 | - [Prerequisites](#prerequisites) 22 | - [Build](#build) 23 | - [Testing](#testing) 24 | - [License](#license) 25 | 26 | # Telekinesis 27 | 28 | Telekinesis is a high-level client for Amazon Kinesis. 29 | 30 | The library provides a high-throughput asynchronous producer and wraps the 31 | [Kinesis Client Library](https://github.com/awslabs/amazon-kinesis-client) to 32 | provide an easy interface for writing consumers. 33 | 34 | ## Requirements 35 | 36 | Telekinesis runs on JRuby 1.7.x or later, with at least Java 6. 37 | 38 | If you want to build from source, you need to have Apache Maven installed. 39 | 40 | ## Installing 41 | 42 | ``` 43 | gem install telekinesis 44 | ``` 45 | 46 | ## Producers 47 | 48 | Telekinesis includes two high-level 49 | [Producers](http://docs.aws.amazon.com/kinesis/latest/dev/amazon-kinesis-producers.html). 50 | 51 | Telekinesis assumes that records are `[key, value]` pairs of strings. The key 52 | *must* be a string as enforced by Kinesis itself. Keys are used by the service 53 | to partition data into shards. Values can be any old blob of data, but for 54 | simplicity, Telekinesis expects strings. 55 | 56 | Both keys and values should respect any Kinesis 57 | [limits](http://docs.aws.amazon.com/kinesis/latest/dev/service-sizes-and-limits.html). 58 | and all of the [restrictions](http://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html) 59 | in the PutRecords API documentation. 60 | 61 | ### SyncProducer 62 | 63 | The `SyncProducer` sends data to Kinesis every time `put` or `put_all` 64 | is called. These calls will block until the call to Kinesis returns. 65 | 66 | 67 | ```ruby 68 | require 'telekinesis' 69 | 70 | producer = Telekinesis::Producer::SyncProducer.create( 71 | stream: 'my stream', 72 | credentials: { 73 | access_key_id: 'foo', 74 | secret_access_key: 'bar' 75 | } 76 | ) 77 | ``` 78 | 79 | Calls to `put` send a single record at a time to Kinesis, where calls to 80 | `put_all` can send up to 500 records at a time, which is the Kinesis service 81 | limit. If more than 500 records are passed to `put_all` they're grouped into 82 | batches and sent. 83 | 84 | > NOTE: To send fewer records to Kinesis at a time when using `put_all`, you 85 | > can adjust the `:send_size` parameter in the `create` method. 86 | 87 | Using `put_all` over `put` is recommended if you have any way to batch your 88 | data. Since Kinesis has an HTTP API and often has high latency, it tends to make 89 | sense to try and increase throughput as much as possible by batching data. 90 | 91 | ```ruby 92 | # file is an instance of File containing CSV data that looks like: 93 | # 94 | # "some,very,important,data,with,a,partition_key" 95 | # 96 | lines = file.lines.map do |line| 97 | key = line.split(/,/).last 98 | data = line 99 | [key, data] 100 | end 101 | 102 | # One record at a time 103 | lines.each do |key, data| 104 | producer.put(key, data) 105 | end 106 | 107 | # Manually control your batches 108 | lines.each_slice(200) do |batch| 109 | producer.put_all(batch) 110 | end 111 | 112 | # Go hog wild 113 | producer.put_all(lines.to_a) 114 | ``` 115 | 116 | When something goes wrong and the Kinesis client throws an exception, it bubbles 117 | up as a `Telekinesis::Aws::KinesisError` with the underlying exception accessible 118 | as the `cause` field. 119 | 120 | When some of (but maybe not all of) the records passed to `put_all` cause 121 | problems, they're returned as an array of 122 | `[key, value, error_code, error_message]` tuples. 123 | 124 | ### AsyncProducer 125 | 126 | The `AsyncProducer` queues events interally and uses background threads to send 127 | data to Kinesis. Data is sent when a batch reaches the Kinesis limit of 500, 128 | when the producer's timeout is reached, or when the producer is shut down. 129 | 130 | > NOTE: You can configure the size at which a batch is sent by passing the 131 | > `:send_size` parameter to create. The producer's internal timeout can be 132 | > set by using the `:send_every_ms` parameter. 133 | 134 | The API for the `AsyncProducer` is looks similar to the `SyncProducer`. However, 135 | all `put` and `put_all` calls return immediately. Both `put` and `put_all` 136 | return `true` if the producer enqueued the data for sending later, and `false` 137 | if the producer is not accepting data for any reason. If the producer's internal 138 | queue fill up, calls to `put` and `put_all` will block. 139 | 140 | Since sending (and therefore failures) happen in a different thread, you can 141 | provide an `AsyncProducer` with a failure handler that's called whenever 142 | something bad happens. 143 | 144 | ```ruby 145 | require 'telekinesis' 146 | 147 | class MyFailureHandler 148 | def on_record_failure(kv_pairs_and_errors) 149 | items = kv_pairs_and_errors.map do |k, v, code, message| 150 | maybe_log_error(code, message) 151 | [k, v] 152 | end 153 | save_for_later(items) 154 | end 155 | 156 | def on_kinesis_error(err, items) 157 | log_exception(err.cause) 158 | save_for_later(items) 159 | end 160 | end 161 | 162 | producer = Telekinesis::Producer::AsyncProducer.create( 163 | stream: 'my stream', 164 | failure_handler: MyFailureHandler.new, 165 | send_every_ms: 1500, 166 | credentials: { 167 | access_key_id: 'foo', 168 | secret_access_key: 'bar' 169 | } 170 | ) 171 | ``` 172 | 173 | ## Consumers 174 | 175 | ### KCL 176 | 177 | `Telekinesis::Consumer::KCL` is a wrapper around Amazon's [Kinesis Client 178 | Library (also called the KCL)](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html#kinesis-record-processor-overview-kcl). 179 | 180 | Each KCL instance is part of a group of consumers that make up an 181 | _application_. An application can be running on any number of hosts in any 182 | number of processes. Consumers identify themself uniquely within an 183 | application by specifying a `worker_id`. 184 | 185 | All of the consumers within an application attempt to distribute work evenly 186 | between themselves by coordinating through a DynamoDB table. This coordination 187 | ensures that a single consumer processes each shard, and that if one consumer 188 | fails for any reason, another consumer can pick up from the point at which it 189 | last checkpointed. 190 | 191 | This is all part of the official AWS library! Telekinesis just makes it easier 192 | to use from JRuby. 193 | 194 | Each client has to know how to process all the data it's 195 | retreiving from Kinesis. That's done by creating a [record 196 | processor](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java) 197 | and telling a `KCL` how to create a processor when it becomes 198 | responsible for a shard. 199 | 200 | We highly recommend reading the [official 201 | docs](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java) 202 | on implementing the `IRecordProcessor` interface before you continue. 203 | 204 | > NOTE: Since `initialize` is a reserved method, Telekinesis takes care of 205 | > calling your `init` method whenever the KCL calls `IRecordProcessor`'s 206 | > `initialize` method. 207 | 208 | > NOTE: Make sure you read the Kinesis Record Processor documentation carefully. 209 | > Failures, checkpoints, and shutting require some attention. More on that later. 210 | 211 | After it is created, a record processor is initialized with the ID of the shard 212 | it's processing, and handed an enumerable of 213 | [Records](http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/index.html?com/amazonaws/services/kinesis/AmazonKinesisClient.html) and a checkpointer (see below) every time the consumer detects new data to 214 | process. 215 | 216 | Defining and creating a simple processor might look like: 217 | 218 | ```ruby 219 | require 'telekinesis' 220 | 221 | class MyProcessor 222 | def init(init_input) 223 | @shard_id = init_input.shard_id 224 | $stderr.puts "Started processing #{@shard_id}" 225 | end 226 | 227 | def process_records(process_records_input) 228 | process_records_input.records.each do 229 | |r| puts "key=#{r.partition_key} value=#{String.from_java_bytes(r.data.array)}" 230 | end 231 | end 232 | 233 | def shutdown 234 | $stderr.puts "Shutting down #{@shard_id}" 235 | end 236 | end 237 | 238 | worker = Telekinesis::Consumer::KCL.new(stream: 'some-events', app: 'example') do 239 | MyProcessor.new 240 | end 241 | 242 | worker.run 243 | ``` 244 | 245 | To make defining record processors easier, Telekinesis comes with a `Block` 246 | processor that lets you use a block to specify your `process_records` method. 247 | Use this if you don't need to do any explicit startup or shutdown in a record 248 | processor. 249 | 250 | ```ruby 251 | require 'telekinesis' 252 | 253 | worker = Telekinesis::Consumer::KCL.new(stream: 'some-events', app: 'example') do 254 | Telekinesis::Consumer::Block.new do |records, checkpointer, millis_behind| 255 | records.each {|r| puts "key=#{r.partition_key} value=#{String.from_java_bytes(r.data.array)}" } 256 | end 257 | end 258 | 259 | worker.run 260 | ``` 261 | 262 | Once you get into building a client application, you'll probably want 263 | to know about some of the following advanced tips and tricks. 264 | 265 | #### Client State 266 | 267 | Each KCL Application gets its own DynamoDB table that stores all of this state. 268 | The `:application` name is used as the DynamoDB table name, so beware of 269 | namespace collisions if you use DynamoDB on its own. Altering or reseting any 270 | of this state involves manually altering the application's Dynamo table. 271 | 272 | #### Errors while processing records 273 | 274 | When a call to `process_records` fails, the KCL expects you to handle the 275 | failure and try to reprocess. If you let an exception escape, it happily moves 276 | on to the next batch of records from Kinesis and will let you checkpoint further 277 | on down the road. 278 | 279 | From the [official docs](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html): 280 | 281 | > The KCL relies on processRecords to handle any exceptions that arise from 282 | > processing the data records. If an exception is thrown from processRecords, 283 | > the KCL skips over the data records that were passed prior to the exception; 284 | > that is, these records are not re-sent to the record processor that threw the 285 | > exception or to any other record processor in the application. 286 | 287 | The moral of the story is that you should be absolutely sure you catch any 288 | exceptions that get thrown in your `process_records` implementation. If you 289 | don't, you can (silently) drop data on the floor. 290 | 291 | If something terrible happens and you can't attempt to re-read the list of 292 | records and re-do whatever work you needed to do in process records, we've been 293 | advised by the Kinesis team that killing the entire JVM that's running the 294 | worker is the safest thing to do. On restart, the consumer (or another consumer 295 | in the application group) will pick up the orphaned shards and attempt to 296 | restart from the last available checkpoint. 297 | 298 | #### Checkpoints and `INITIAL_POSITION_IN_STREAM` 299 | 300 | The second object passed to `process_records` is a checkpointer. This can be 301 | used to checkpoint all records that have been passed to the processor so far 302 | (by just calling `checkpointer.checkpoint`) or up to a particular sequence 303 | number (by calling `checkpointer.checkpoint(record.sequence_number)`). 304 | 305 | While a `KCL` consumer can be initialized with an `:initial_position_in_stream` 306 | option, any existing checkpoint for a shard will take precedent over that 307 | value. Furthermore, any existing STATE in DynamoDB will take precedent, so if 308 | you start a consumer with `initial_position_in_stream: 'LATEST'` and then 309 | restart with `initial_position_in_stream: 'TRIM_HORIZON'` you still end up 310 | starting from `LATEST`. 311 | 312 | ## Java client logging 313 | 314 | The AWS Java SDK can be extremely noisy and hard to control, since it logs 315 | through `java.util.logging`. 316 | 317 | Telekinesis comes with a shim that can silence all of that logging or redirect 318 | it to a Ruby Logger of your choice. This isn't fine-grained control - you're 319 | capturing or disabling ALL logging from any Java dependency that uses 320 | `java.util.logging` - so use it with care. 321 | 322 | To entirely disable logging: 323 | 324 | ```ruby 325 | Telekinesis::Logging.disable_java_logging 326 | ``` 327 | 328 | To capture all logging and send it through a Ruby logger: 329 | 330 | ```ruby 331 | Telekinesis::Logging.capture_java_logging(Logger.new($stderr)) 332 | ``` 333 | 334 | ---- 335 | 336 | # Building 337 | 338 | ## Prerequisites 339 | 340 | * JRuby 1.7.9 or later. 341 | * Apache Maven 342 | 343 | ## Build 344 | 345 | Install JRuby 1.7.9 or later, for example with `rbenv` you would: 346 | 347 | ``` 348 | $ rbenv install jruby-1.7.9 349 | ``` 350 | 351 | Install Bundler and required Gems. 352 | 353 | ``` 354 | $ gem install bundler 355 | $ bundle install 356 | ``` 357 | 358 | Install Apache Maven. 359 | 360 | On Ubuntu or related use: 361 | 362 | ``` 363 | $ sudo apt-get install maven 364 | ``` 365 | 366 | The easiest method on OSX is via `brew`. 367 | 368 | ``` 369 | $ sudo brew install maven 370 | ``` 371 | 372 | Ensure `JAVA_HOME` is set on OSX. 373 | 374 | Ensure your `JAVA_HOME` environment variable is set. In Bash for example 375 | add the following to `~/.bash_profile`. 376 | 377 | ``` 378 | export JAVA_HOME=$(/usr/libexec/java_home) 379 | ``` 380 | 381 | Then run: 382 | 383 | ``` 384 | $ source ~/.bash_profile 385 | ``` 386 | 387 | Build the Java shim and jar. 388 | 389 | ``` 390 | $ rake ext:build 391 | ``` 392 | 393 | The `rake ext:build` task builds the Java shim and packages all of the required Java 394 | classes into a single jar. Since bytecode is portable, the JAR is shipped with 395 | the built gem. 396 | 397 | Build the Gem. 398 | 399 | Use the `rake gem:build` task to build the complete gem, uberjar and all. 400 | 401 | ``` 402 | $ rake gem:build 403 | ``` 404 | 405 | # Testing 406 | 407 | Telekinesis comes with a small set of unit tests. Run those with plain ol' 408 | `rake test`. 409 | 410 | > NOTE: The Java extension *must* be built and installed before you can run 411 | > unit tests. 412 | 413 | Integration tests coming soon. 414 | 415 | 416 | # License 417 | 418 | Copyright Kickstarter, PBC. 419 | 420 | Released under an [MIT License](http://opensource.org/licenses/MIT). 421 | 422 | --------------------------------------------------------------------------------