├── .ruby-version
├── ext
    ├── .gitignore
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── com
    │               └── kickstarter
    │                   └── jruby
    │                       └── Telekinesis.java
├── Gemfile
├── test
    ├── producer
    │   ├── test_helper.rb
    │   ├── test_async_producer.rb
    │   ├── test_sync_producer.rb
    │   └── test_async_producer_worker.rb
    ├── test_helper.rb
    └── aws
    │   ├── test_client_adapter.rb
    │   └── test_java_client_adapter.rb
├── lib
    ├── telekinesis
    │   ├── version.rb
    │   ├── consumer.rb
    │   ├── producer.rb
    │   ├── aws.rb
    │   ├── producer
    │   │   ├── noop_failure_handler.rb
    │   │   ├── warn_failure_handler.rb
    │   │   ├── sync_producer.rb
    │   │   ├── async_producer_worker.rb
    │   │   └── async_producer.rb
    │   ├── consumer
    │   │   ├── base_processor.rb
    │   │   ├── block.rb
    │   │   └── kcl.rb
    │   ├── logging
    │   │   ├── java_logging.rb
    │   │   └── ruby_logger_handler.rb
    │   ├── java_util.rb
    │   └── aws
    │   │   ├── client_adapter.rb
    │   │   └── java_client_adapter.rb
    └── telekinesis.rb
├── .gitignore
├── telekinesis.gemspec
├── Rakefile
└── README.md


/.ruby-version:
--------------------------------------------------------------------------------
1 | jruby-1.7.9
2 | 


--------------------------------------------------------------------------------
/ext/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.iml
3 | target/
4 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | gemspec
3 | 


--------------------------------------------------------------------------------
/test/producer/test_helper.rb:
--------------------------------------------------------------------------------
1 | require_relative "../test_helper"
2 | 


--------------------------------------------------------------------------------
/lib/telekinesis/version.rb:
--------------------------------------------------------------------------------
1 | module Telekinesis
2 |   VERSION = '3.2.1'
3 | end
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | lib/telekinesis/*.jar
3 | tmp/
4 | Gemfile.lock
5 | telekinesis-*.gem
6 | 


--------------------------------------------------------------------------------
/lib/telekinesis/consumer.rb:
--------------------------------------------------------------------------------
1 | require "telekinesis/consumer/kcl"
2 | require "telekinesis/consumer/base_processor"
3 | require "telekinesis/consumer/block"
4 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
1 | require "minitest/autorun"
2 | require "minitest/pride"
3 | require "bundler/setup"
4 | Bundler.require(:development)
5 | 
6 | require "telekinesis"
7 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer.rb:
--------------------------------------------------------------------------------
1 | require "telekinesis/producer/sync_producer"
2 | require "telekinesis/producer/noop_failure_handler"
3 | require "telekinesis/producer/warn_failure_handler"
4 | require "telekinesis/producer/async_producer"
5 | 


--------------------------------------------------------------------------------
/lib/telekinesis/aws.rb:
--------------------------------------------------------------------------------
 1 | require "telekinesis/aws/client_adapter.rb"
 2 | require "telekinesis/aws/java_client_adapter"
 3 | 
 4 | module Telekinesis
 5 |   module Aws
 6 |     KINESIS_MAX_PUT_RECORDS_SIZE = 500
 7 |     Client = JavaClientAdapter
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer/noop_failure_handler.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Producer
 3 |     # A failure handler that does nothing.
 4 |     #
 5 |     # Nothing!
 6 |     class NoopFailureHandler
 7 |       def on_record_failure(item_error_tuples); end
 8 |       def on_kinesis_retry(error, items); end
 9 |       def on_kinesis_failure(error, items); end
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/telekinesis.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis; end
 2 | 
 3 | unless RUBY_PLATFORM.match(/java/)
 4 |   raise "Sorry! Telekinesis is only supported on JRuby"
 5 | end
 6 | 
 7 | require "telekinesis/version"
 8 | require "telekinesis/telekinesis-#{Telekinesis::VERSION}.jar"
 9 | require "telekinesis/java_util"
10 | require "telekinesis/logging/java_logging"
11 | require "telekinesis/aws"
12 | 
13 | require "telekinesis/producer"
14 | require "telekinesis/consumer"
15 | 


--------------------------------------------------------------------------------
/lib/telekinesis/consumer/base_processor.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Consumer
 3 |     # A RecordProcessor with no-op implementations of all of the required
 4 |     # IRecordProcessor methods. Override it to implement simple IRecordProcessors
 5 |     # that don't need to do anything special on init or shutdown.
 6 |     class BaseProcessor
 7 |       def init(initialization_input); end
 8 |       def process_records(process_records_input); end
 9 |       def shutdown(shutdown_input); end
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/telekinesis/logging/java_logging.rb:
--------------------------------------------------------------------------------
 1 | require "logger"
 2 | require "telekinesis/logging/ruby_logger_handler"
 3 | 
 4 | module Telekinesis
 5 |   module Logging
 6 |     java_import java.util.logging.Logger
 7 |     java_import java.util.logging.LogManager
 8 | 
 9 |     def self.capture_java_logging(logger)
10 |       LogManager.log_manager.reset
11 |       Logger.get_logger("").add_handler(RubyLoggerHandler.create(logger))
12 |     end
13 | 
14 |     def self.disable_java_logging
15 |       LogManager.log_manager.reset
16 |     end
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/telekinesis.gemspec:
--------------------------------------------------------------------------------
 1 | $:.push File.expand_path("../lib", __FILE__)
 2 | require "telekinesis/version"
 3 | 
 4 | Gem::Specification.new do |spec|
 5 |   spec.name         = "telekinesis"
 6 |   spec.version      = Telekinesis::VERSION
 7 |   spec.author       = "Ben Linsay"
 8 |   spec.email        = "ben@kickstarter.com"
 9 |   spec.summary      = "High level clients for Amazon Kinesis"
10 |   spec.homepage     = "https://github.com/kickstarter/telekinesis"
11 | 
12 |   spec.platform     = "java"
13 |   spec.files        = `git ls-files`.split($/) + Dir.glob("lib/telekinesis/*.jar")
14 |   spec.require_paths = ["lib"]
15 | 
16 |   spec.add_development_dependency "rake"
17 |   spec.add_development_dependency "nokogiri"
18 |   spec.add_development_dependency "minitest"
19 |   spec.add_development_dependency "shoulda-context"
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer/warn_failure_handler.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Producer
 3 |     # A simple FailureHandler that logs errors with `warn`. Available as an
 4 |     # example and an easy default.
 5 |     class WarnFailureHandler
 6 |       def on_record_failure(item_err_pairs)
 7 |         warn "Puts for #{item_err_pairs.size} records failed!"
 8 |       end
 9 | 
10 |       # Do nothing on retry. Let it figure itself out.
11 |       def on_kinesis_retry(err, items); end
12 | 
13 |       def on_kinesis_failure(err, items)
14 |         warn "PutRecords request with #{items.size} items failed!"
15 |         warn format_bt(err)
16 |       end
17 | 
18 |       protected
19 | 
20 |       def format_bt(e)
21 |         e.backtrace ? e.backtrace.map{|l| "!  #{l}"}.join("\n") : ""
22 |       end
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/telekinesis/consumer/block.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Consumer
 3 |     # A RecordProcessor that uses the given block to process records. Useful to
 4 |     # quickly define a consumer.
 5 |     #
 6 |     # Telekinesis::Consumer::Worker.new(stream: 'my-stream', app: 'tail') do
 7 |     #   Telekinesis::Consumer::Block.new do |records, checkpointer, millis_behind_latest|
 8 |     #     records.each {|r| puts r}
 9 |     #     $stderr.puts "#{millis_behind_latest} ms behind"
10 |     #     checkpointer.checkpoint
11 |     #   end
12 |     # end
13 |     class Block < BaseProcessor
14 |       def initialize(&block)
15 |         raise ArgumentError, "No block given" unless block_given?
16 |         @block = block
17 |       end
18 | 
19 |       def process_records(input)
20 |         @block.call(input.records, input.checkpointer, input.millis_behind_latest)
21 |       end
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/test/aws/test_client_adapter.rb:
--------------------------------------------------------------------------------
 1 | require_relative '../test_helper'
 2 | 
 3 | class ClientAdapterTest < Minitest::Test
 4 |   StubResponse = Struct.new(:error_code, :error_message)
 5 | 
 6 |   class EvenRecordsAreErrors < Telekinesis::Aws::ClientAdapter
 7 |     def do_put_records(stream, items)
 8 |       items.each_with_index.map do |_, idx|
 9 |         err, message = idx.even? ? ["error-#{idx}", "message-#{idx}"] : [nil, nil]
10 |         StubResponse.new(err, message)
11 |       end
12 |     end
13 |   end
14 | 
15 |   context "ClientAdapter" do
16 |     context "put_records" do
17 |       setup do
18 |         @client = EvenRecordsAreErrors.new(nil)
19 |         @items = 10.times.map{|i| ["key-#{i}", "value-#{i}"]}
20 |         @expected = 10.times.select{|i| i.even?}
21 |                             .map{|i| ["key-#{i}", "value-#{i}", "error-#{i}", "message-#{i}"]}
22 |       end
23 | 
24 |       should "zip error responses with records" do
25 |         assert(@expected, @client.put_records('stream', @items))
26 |       end
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/lib/telekinesis/java_util.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module JavaUtil
 3 |     java_import java.util.concurrent.locks.ReentrantReadWriteLock
 4 | 
 5 |     # Sugar around java.util.concurrent.ReentrantReadWriteLock so that it's
 6 |     # easy to use with blocks.
 7 |     #
 8 |     # e.g.
 9 |     #
10 |     # lock = ReentrantReadWriteLock.new
11 |     # some_value = 12345
12 |     #
13 |     # # In a reader thread
14 |     # lock.read_lock do
15 |     #  # Read some data! This won't block any other calls to read_lock, but will
16 |     #  # block if another thread is in a section guarded by write_lock.
17 |     # end
18 |     #
19 |     # # In a writer thread
20 |     # lock.write_lock do
21 |     #   # Write some data! This is exclusive with *any* other code guarded by
22 |     #   # either read_lock or write_lock.
23 |     # end
24 |     class ReadWriteLock
25 |       def initialize(fair = false)
26 |         lock = ReentrantReadWriteLock.new(fair)
27 |         @read = lock.read_lock
28 |         @write = lock.write_lock
29 |       end
30 | 
31 |       def read_lock
32 |         @read.lock_interruptibly
33 |         yield
34 |       ensure
35 |         @read.unlock
36 |       end
37 | 
38 |       def write_lock
39 |         @write.lock_interruptibly
40 |         yield
41 |       ensure
42 |         @write.unlock
43 |       end
44 |     end
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/lib/telekinesis/logging/ruby_logger_handler.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Logging
 3 |     java_import java.util.logging.Level
 4 |     java_import java.util.logging.Handler
 5 | 
 6 |     # A java logging Handler that delegates to a Ruby logger. The name of the
 7 |     # j.u.l. logger is used as the progname argument to Logger.add.
 8 |     #
 9 |     # The translation between j.u.l. serverity levels and Ruby Logger levels
10 |     # isn't exact.
11 |     class RubyLoggerHandler < Handler
12 |       # NOTE: Since this class overrides a Java class, we have to use the Java
13 |       # constructor and set the logger after instantiation. (Overriding in
14 |       # JRuby is weird). Use this method to create a new logger that delegates
15 |       # to the passed logger.
16 |       def self.create(logger)
17 |         new.tap do |s|
18 |           s.set_logger(logger)
19 |         end
20 |       end
21 | 
22 |       SEVERITY = {
23 |         # NOTE: There's no Java equivalent of FATAL.
24 |         Level::SEVERE => Logger::ERROR,
25 |         Level::WARNING => Logger::WARN,
26 |         Level::INFO => Logger::INFO,
27 |         Level::CONFIG => Logger::INFO,
28 |         Level::FINE=> Logger::DEBUG,
29 |         Level::FINER=> Logger::DEBUG,
30 |         Level::FINEST=> Logger::DEBUG,
31 |       }
32 | 
33 |       def set_logger(l)
34 |         @logger = l
35 |       end
36 | 
37 |       def close
38 |         @logger.close
39 |       end
40 | 
41 |       # Ruby's logger has no flush method.
42 |       def flush; end
43 | 
44 |       def publish(log_record)
45 |         message = if log_record.thrown.nil?
46 |           log_record.message
47 |         else
48 |           "#{log_record.message}: #{log_record.thrown}"
49 |         end
50 |         @logger.add(SEVERITY[log_record.level], message, log_record.logger_name)
51 |       end
52 |     end
53 |   end
54 | end
55 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer/sync_producer.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Producer
 3 |     # A synchronous Kinesis producer.
 4 |     #
 5 |     # This class is thread safe if and only if the underlying
 6 |     # Telekines::Aws::Client is threadsafe. In practice, this means this client
 7 |     # is threadsafe on JRuby and not thread safe elsewhere.
 8 |     class SyncProducer
 9 |       attr_reader :stream, :client
10 | 
11 |       # Create a new Producer.
12 |       #
13 |       # AWS credentials may be specified by using the `:credentials` option and
14 |       # passing a hash containing your `:access_key_id` and `:secret_access_key`.
15 |       # If unspecified, credentials will be fetched from the environment, an
16 |       # ~/.aws/credentials file, or the current instance metadata.
17 |       #
18 |       # `:send_size` may also be used to configure the maximum batch size used
19 |       # in `put_all`. See `put_all` for more info.
20 |       def self.create(options = {})
21 |         stream = options[:stream]
22 |         client = Telekinesis::Aws::Client.build(options.fetch(:credentials, {}))
23 |         new(stream, client, options)
24 |       end
25 | 
26 |       def initialize(stream, client, opts = {})
27 |         @stream = stream or raise ArgumentError, "stream may not be nil"
28 |         @client = client or raise ArgumentError, "client may not be nil"
29 |         @send_size = opts.fetch(:send_size, Telekinesis::Aws::KINESIS_MAX_PUT_RECORDS_SIZE)
30 |       end
31 | 
32 |       # Put an individual k, v pair to Kinesis immediately. Both k and v must
33 |       # be strings.
34 |       #
35 |       # Returns once the call to Kinesis is complete.
36 |       def put(key, data)
37 |         @client.put_record(@stream, key, data)
38 |       end
39 | 
40 |       # Put all of the [k, v] pairs to Kinesis in as few requests as possible.
41 |       # All of the ks and vs must be strings.
42 |       #
43 |       # Each request sends at most `:send_size` records. By default this is the
44 |       # Kinesis API limit of 500 records.
45 |       def put_all(items)
46 |         items.each_slice(@send_size).flat_map do |batch|
47 |           @client.put_records(@stream, batch)
48 |         end
49 |       end
50 |     end
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/telekinesis/aws/client_adapter.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Aws
 3 |     # NOTE: wrapping the cause is necessary since JRuby isn't 2.1 compatible (yet)
 4 |     class KinesisError < RuntimeError
 5 |       attr_reader :cause
 6 | 
 7 |       def initialize(cause)
 8 |         @cause = cause
 9 |       end
10 |     end
11 | 
12 |     # Base class for other ClientAdapters. Client adapters exist to make
13 |     # switching between platforms easy and painless.
14 |     #
15 |     # The base adapter defines the interface and provides convience methods.
16 |     class ClientAdapter
17 |       # Build a new client given AWS credentials.
18 |       #
19 |       # Credentials must be supplied as a hash that contains symbolized
20 |       # :access_key_id and :secret_access_key keys.
21 |       def self.build(credentials)
22 |         raise NotImplementedError
23 |       end
24 | 
25 |       def initialize(client)
26 |         @client = client
27 |       end
28 | 
29 |       # Make a put_record call to the underlying client. Must return an object
30 |       # that responds to `shard_id` and `sequence_number`.
31 |       def put_record(stream, key, value)
32 |         raise NotImplementedError
33 |       end
34 | 
35 |       # Make a put_records call to the underlying client. If the request
36 |       # succeeds but returns errors for some records, the original [key, value]
37 |       # pair is zipped with the [error_code, error_message] pair and the
38 |       # offending records are returned.
39 |       def put_records(stream, items)
40 |         response = do_put_records(stream, items)
41 |         failures = items.zip(response).reject{|_, r| r.error_code.nil?}
42 | 
43 |         failures.map do |(k, v), r|
44 |           [k, v, r.error_code, r.error_message]
45 |         end
46 |       end
47 | 
48 |       protected
49 | 
50 |       # Put an enumerable of [key, value] pairs to the given stream. Returns an
51 |       # enumerable of response objects the same size as the given list of items.
52 |       #
53 |       # Response objects must respond to `error_code` and `error_message`. Any
54 |       # response with a nil error_code is considered successful.
55 |       def do_put_records(stream, items)
56 |         raise NotImplementedError
57 |       end
58 |     end
59 |   end
60 | end
61 | 
62 | 


--------------------------------------------------------------------------------
/ext/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |     <modelVersion>4.0.0</modelVersion>
 4 | 
 5 |     <groupId>com.kickstarter</groupId>
 6 |     <artifactId>telekinesis</artifactId>
 7 |     <version>3.2.1</version>
 8 | 
 9 |     <!-- ================================================================== -->
10 |     <build>
11 |         <finalName>${project.artifactId}-${project.version}</finalName>
12 | 
13 |         <plugins>
14 |             <plugin>
15 |                 <groupId>org.apache.maven.plugins</groupId>
16 |                 <artifactId>maven-compiler-plugin</artifactId>
17 |                 <version>3.1</version>
18 |                 <configuration>
19 |                     <source>1.6</source>
20 |                     <target>1.6</target>
21 |                 </configuration>
22 |             </plugin>
23 |             <plugin>
24 |                 <groupId>org.apache.maven.plugins</groupId>
25 |                 <artifactId>maven-shade-plugin</artifactId>
26 |                 <version>1.6</version>
27 |                 <configuration>
28 |                     <createDependencyReducedPom>true</createDependencyReducedPom>
29 |                 </configuration>
30 |                 <executions>
31 |                     <execution>
32 |                         <phase>package</phase>
33 |                         <goals>
34 |                             <goal>shade</goal>
35 |                         </goals>
36 |                     </execution>
37 |                 </executions>
38 |             </plugin>
39 |         </plugins>
40 |     </build>
41 | 
42 |     <!-- ================================================================== -->
43 |     <!-- NOTE:  all version numbers are defined in the properties section -->
44 |     <dependencies>
45 |         <!-- Production dependencies :: Default scope -->
46 |         <dependency>
47 |             <groupId>com.amazonaws</groupId>
48 |             <artifactId>amazon-kinesis-client</artifactId>
49 |             <version>${amazon-kinesis-client-version}</version>
50 |         </dependency>
51 |     </dependencies>
52 | 
53 |     <!-- ================================================================== -->
54 |     <properties>
55 |         <aws-java-sdk-version>1.6.9.1</aws-java-sdk-version>
56 |         <amazon-kinesis-client-version>1.6.1</amazon-kinesis-client-version>
57 |     </properties>
58 | </project>


--------------------------------------------------------------------------------
/lib/telekinesis/aws/java_client_adapter.rb:
--------------------------------------------------------------------------------
 1 | module Telekinesis
 2 |   module Aws
 3 |     java_import java.nio.ByteBuffer
 4 |     java_import com.amazonaws.AmazonClientException
 5 |     java_import com.amazonaws.auth.BasicAWSCredentials
 6 |     java_import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
 7 |     java_import com.amazonaws.internal.StaticCredentialsProvider
 8 |     java_import com.amazonaws.services.kinesis.AmazonKinesisClient
 9 |     java_import com.amazonaws.services.kinesis.model.PutRecordRequest
10 |     java_import com.amazonaws.services.kinesis.model.PutRecordsRequest
11 |     java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry
12 | 
13 |     # A ClientAdapter that wraps the AWS Java SDK.
14 |     #
15 |     # Since the underlying Java client is thread safe, this adapter is thread
16 |     # safe.
17 |     class JavaClientAdapter < ClientAdapter
18 |       # Build a new client adapter. `credentials` is a hash keyed with
19 |       # `:access_key_id` and `:secret_access_key`. If this hash is left blank
20 |       # (the default) the client uses the DefaultAWSCredentialsProviderChain to
21 |       # look for credentials.
22 |       def self.build(credentials = {})
23 |         client = AmazonKinesisClient.new(build_credentials_provider(credentials))
24 |         new(client)
25 |       end
26 | 
27 |       def self.build_credentials_provider(credentials)
28 |         if credentials.empty?
29 |           DefaultAWSCredentialsProviderChain.new
30 |         else
31 |           StaticCredentialsProvider.new(
32 |             BasicAWSCredentials.new(
33 |               credentials[:access_key_id],
34 |               credentials[:secret_access_key]
35 |             )
36 |           )
37 |         end
38 |       end
39 | 
40 |       def put_record(stream, key, value)
41 |         r = PutRecordRequest.new.tap do |request|
42 |           request.stream_name = stream
43 |           request.partition_key = key.to_s
44 |           request.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
45 |         end
46 |         @client.put_record(r)
47 |       rescue AmazonClientException => e
48 |         raise KinesisError.new(e)
49 |       end
50 | 
51 |       protected
52 | 
53 |       def do_put_records(stream, items)
54 |         result = @client.put_records(build_put_records_request(stream, items))
55 |         result.records
56 |       rescue AmazonClientException => e
57 |         raise KinesisError.new(e)
58 |       end
59 | 
60 |       def build_put_records_request(stream, items)
61 |         entries = items.map do |key, value|
62 |           PutRecordsRequestEntry.new.tap do |entry|
63 |             entry.partition_key = key.to_s
64 |             entry.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
65 |           end
66 |         end
67 |         PutRecordsRequest.new.tap do |request|
68 |           request.stream_name = stream
69 |           request.records = entries
70 |         end
71 |       end
72 |     end
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/test/aws/test_java_client_adapter.rb:
--------------------------------------------------------------------------------
 1 | require_relative '../test_helper'
 2 | 
 3 | class JavaClientAdapterTest < Minitest::Test
 4 |   java_import com.amazonaws.services.kinesis.model.PutRecordRequest
 5 |   java_import com.amazonaws.services.kinesis.model.PutRecordsRequest
 6 | 
 7 |   SomeStruct = Struct.new(:field)
 8 |   StubResponse = Struct.new(:records)
 9 | 
10 |   class EchoClient
11 |     def put_record(*args)
12 |       args
13 |     end
14 | 
15 |     def put_records(*args)
16 |       StubResponse.new(args)
17 |     end
18 |   end
19 | 
20 |   context "JavaClientAdapter" do
21 |     setup do
22 |       @client = Telekinesis::Aws::JavaClientAdapter.new(EchoClient.new)
23 |     end
24 | 
25 |     context "#put_record" do
26 |       setup do
27 |         # No exceptions, coerced to string. [args, expected]
28 |         @data = [
29 |           [['stream', 'key', 'value'], ['stream', 'key', 'value']],
30 |           [['stream', 123, 123], ['stream', '123', '123']],
31 |           [['stream', SomeStruct.new('key'), SomeStruct.new('value')], ['stream', '#<struct JavaClientAdapterTest::SomeStruct field="key">', '#<struct JavaClientAdapterTest::SomeStruct field="value">']],
32 |         ]
33 |       end
34 | 
35 |       should "generate aws.PutRecordsRequest" do
36 |         @data.each do |args, expected|
37 |           request, = @client.put_record(*args)
38 |           expected_stream, expected_key, expected_value = expected
39 | 
40 |           assert_equal(expected_stream, request.stream_name)
41 |           assert_equal(expected_key, request.partition_key)
42 |           assert_equal(expected_value, String.from_java_bytes(request.data.array))
43 |         end
44 |       end
45 |     end
46 | 
47 |     context "#do_put_records" do
48 |       setup do
49 |         # No exceptions, coerced to string. [args, expected]
50 |         @data = [
51 |           [
52 |             ['stream', [['key', 'value'], [123, 123], [SomeStruct.new('key'), SomeStruct.new('value')]]],
53 |             ['stream', [['key', 'value'], ['123', '123'], ['#<struct JavaClientAdapterTest::SomeStruct field="key">', '#<struct JavaClientAdapterTest::SomeStruct field="value">']]]
54 |           ],
55 |         ]
56 |       end
57 | 
58 |       should "generate aws.PutRecordsRequest" do
59 |         @data.each do |args, expected|
60 |           request, = @client.send(:do_put_records, *args)
61 |           expected_stream, expected_items = expected
62 | 
63 |           assert_equal(expected_stream, request.stream_name)
64 |           expected_items.zip(request.records) do |(expected_key, expected_value), record|
65 |             assert_equal(expected_key, record.partition_key)
66 |             assert_equal(expected_value, String.from_java_bytes(record.data.array))
67 |           end
68 |         end
69 |       end
70 |     end
71 | 
72 |     context ".build_credentials_provider" do
73 |       should "return a provider that provides the specified credentials" do
74 |         credentials = {
75 |           access_key_id: '0000000000',
76 |           secret_access_key: '0000000000',
77 |         }
78 |         provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(credentials)
79 | 
80 |         assert_equal(credentials[:access_key_id], provider.credentials.aws_access_key_id)
81 |         assert_equal(credentials[:secret_access_key], provider.credentials.aws_secret_key)
82 |       end
83 |     end
84 |   end
85 | end
86 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
  1 | require 'bundler/setup'
  2 | 
  3 | Bundler.require(:development)
  4 | 
  5 | def log_ok(message)
  6 |   $stderr.write "#{message}... "
  7 |   begin
  8 |     yield
  9 |     $stderr.puts "ok"
 10 |   rescue => e
 11 |     $stderr.puts "failed"
 12 |     abort <<-EOF
 13 | 
 14 | error: #{e}
 15 |     EOF
 16 |   end
 17 | end
 18 | 
 19 | def artifact_name(path)
 20 |   File.open(path) do |f|
 21 |     doc = Nokogiri::XML(f)
 22 |     id = doc.css("project>artifactId").text
 23 |     version = doc.css("project>version").text
 24 |     "#{id}-#{version}.jar"
 25 |   end
 26 | end
 27 | 
 28 | namespace :ext do
 29 |   require_relative 'lib/telekinesis/version'
 30 | 
 31 |   desc "Cleanup all built extension"
 32 |   task :clean do
 33 |     FileUtils.rm(Dir.glob("lib/telekinesis/*.jar"))
 34 |     Dir.chdir("ext") do
 35 |       `mvn clean 2>&1`
 36 |     end
 37 |   end
 38 | 
 39 |   task :have_maven? do
 40 |     log_ok("Checking for maven") do
 41 |       `which mvn`
 42 |       raise "Maven is required to build this gem" unless $?.success?
 43 |     end
 44 |   end
 45 | 
 46 |   task :have_jdk6_or_higher? do
 47 |     log_ok("Checking that at least java 6 is installed") do
 48 |       version_match = `java -version 2>&1`.match(/java version "1\.(\d)\.(\d+_\d+)"/)
 49 |       if version_match.nil?
 50 |         raise "Can't parse Java version!"
 51 |       end
 52 |       jdk_version, _jdk_patchlevel = version_match.captures
 53 |       if jdk_version.to_i < 6
 54 |         raise "Found #{version_match}"
 55 |       end
 56 |     end
 57 |   end
 58 | 
 59 |   task :update_pom_version do
 60 |     File.open('ext/pom.xml', 'r+') do |f|
 61 |       doc = Nokogiri::XML(f)
 62 |       pom_version = doc.css("project>version")
 63 | 
 64 |       if pom_version.text != Telekinesis::VERSION
 65 |         log_ok("Updating pom.xml version") do
 66 |           pom_version.first.content = Telekinesis::VERSION
 67 |           f.truncate(0)
 68 |           f.rewind
 69 |           f.write(doc.to_xml)
 70 |         end
 71 |       end
 72 |     end
 73 |   end
 74 | 
 75 |   desc "Build the Java extensions for this gem. Requires JDK6+ and Maven"
 76 |   task :build => [:have_jdk6_or_higher?, :have_maven?, :update_pom_version, :clean] do
 77 |     fat_jar = artifact_name('ext/pom.xml')
 78 |     log_ok("Building #{fat_jar}") do
 79 |       Dir.chdir("ext") do
 80 |         `mkdir -p target/`
 81 |         `mvn package 2>&1 > target/build_log`
 82 |         raise "build failed. See ext/target/build_log for details" unless $?.success?
 83 |         FileUtils.copy("target/#{fat_jar}", "../lib/telekinesis/#{fat_jar}")
 84 |       end
 85 |     end
 86 |   end
 87 | end
 88 | 
 89 | namespace :gem do
 90 |   desc "Build this gem"
 91 |   task :build => 'ext:build' do
 92 |     `gem build telekinesis.gemspec`
 93 |   end
 94 | end
 95 | 
 96 | require 'rake/testtask'
 97 | 
 98 | # NOTE: Tests shouldn't be run without the extension being built, but converting
 99 | #       the build task to a file task made it hard to depend on having a JDK
100 | #       and Maven installed. This is a little kludgy but better than the
101 | #       alternative.
102 | task :check_for_ext do
103 |   fat_jar = artifact_name('ext/pom.xml')
104 |   Rake::Task["ext:build"].invoke unless File.exists?("lib/telekinesis/#{fat_jar}")
105 | end
106 | 
107 | Rake::TestTask.new(:test) do |t|
108 |   t.test_files = FileList["test/**/test_*.rb"].exclude(/test_helper/)
109 |   t.verbose = true
110 | end
111 | task :test => :check_for_ext
112 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer/async_producer_worker.rb:
--------------------------------------------------------------------------------
  1 | module Telekinesis
  2 |   module Producer
  3 |     java_import java.nio.ByteBuffer
  4 |     java_import java.util.concurrent.TimeUnit
  5 |     java_import com.amazonaws.services.kinesis.model.PutRecordsRequest
  6 |     java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry
  7 | 
  8 |     class AsyncProducerWorker
  9 |       SHUTDOWN = :shutdown
 10 | 
 11 |       def initialize(producer, queue, send_size, send_every, retries, retry_interval)
 12 |         @producer = producer
 13 |         @queue = queue
 14 |         @send_size = send_size
 15 |         @send_every = send_every
 16 |         @retries = retries
 17 |         @retry_interval = retry_interval
 18 | 
 19 |         @stream = producer.stream                   # for convenience
 20 |         @client = producer.client                   # for convenience
 21 |         @failure_handler = producer.failure_handler # for convenience
 22 | 
 23 |         @buffer = []
 24 |         @last_poll_at = current_time_millis
 25 |         @shutdown = false
 26 |       end
 27 | 
 28 |       def run
 29 |         loop do
 30 |           next_wait = [0, (@last_poll_at + @send_every) - current_time_millis].max
 31 |           next_item = @queue.poll(next_wait, TimeUnit::MILLISECONDS)
 32 | 
 33 |           if next_item == SHUTDOWN
 34 |             next_item, @shutdown = nil, true
 35 |           end
 36 | 
 37 |           unless next_item.nil?
 38 |             buffer(next_item)
 39 |           end
 40 | 
 41 |           if buffer_full || (next_item.nil? && buffer_has_records)
 42 |             put_records(get_and_reset_buffer, @retries, @retry_interval)
 43 |           end
 44 | 
 45 |           @last_poll_at = current_time_millis
 46 |           break if @shutdown
 47 |         end
 48 |       rescue => e
 49 |         # TODO: is there a way to encourage people to set up an uncaught exception
 50 |         # hanlder and/or disable this?
 51 |         bt = e.backtrace ? e.backtrace.map{|l| "!  #{l}"}.join("\n") : ""
 52 |         warn "Producer background thread died!"
 53 |         warn "#{e.class}: #{e.message}\n#{bt}"
 54 |         raise e
 55 |       end
 56 | 
 57 |       protected
 58 | 
 59 |       def current_time_millis
 60 |         (Time.now.to_f * 1000).to_i
 61 |       end
 62 | 
 63 |       def buffer(item)
 64 |         @buffer << item
 65 |       end
 66 | 
 67 |       def buffer_full
 68 |         @buffer.size == @send_size
 69 |       end
 70 | 
 71 |       def buffer_has_records
 72 |         !@buffer.empty?
 73 |       end
 74 | 
 75 |       def get_and_reset_buffer
 76 |         ret, @buffer = @buffer, []
 77 |         ret
 78 |       end
 79 | 
 80 |       def put_records(items, retries, retry_interval)
 81 |         begin
 82 |           failed = []
 83 |           while retries > 0
 84 |             retryable, unretryable = @client.put_records(@stream, items).partition do |_, _, code, _|
 85 |               code == 'InternalFailure' || code == 'ProvisionedThroughputExceededException'
 86 |             end
 87 |             failed.concat(unretryable)
 88 | 
 89 |             if retryable.empty?
 90 |               break
 91 |             else
 92 |               items = retryable.map{|k, v, _, _| [k, v]}
 93 |               retries -= 1
 94 |             end
 95 |           end
 96 |           failed.concat(retryable) unless retryable.empty?
 97 |           @failure_handler.on_record_failure(failed) unless failed.empty?
 98 |         rescue Telekinesis::Aws::KinesisError => e
 99 |           if e.cause && e.cause.is_retryable && (retries -= 1) > 0
100 |             sleep retry_interval
101 |             @failure_handler.on_kinesis_retry(e, items)
102 |             retry
103 |           else
104 |             @failure_handler.on_kinesis_failure(e, items)
105 |           end
106 |         end
107 |       end
108 |     end
109 |   end
110 | end
111 | 


--------------------------------------------------------------------------------
/test/producer/test_async_producer.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class AsyncProducerTest < Minitest::Test
  4 |   java_import java.util.concurrent.TimeUnit
  5 |   java_import java.util.concurrent.CountDownLatch
  6 |   java_import java.util.concurrent.ArrayBlockingQueue
  7 | 
  8 |   StubClient = Struct.new(:welp)
  9 | 
 10 |   class LatchQueue
 11 |     def initialize
 12 |       @under = ArrayBlockingQueue.new(100)
 13 |       @latch = CountDownLatch.new(1)
 14 |       @putting = CountDownLatch.new(1)
 15 |     end
 16 | 
 17 |     def count_down
 18 |       @latch.count_down
 19 |     end
 20 | 
 21 |     def wait_for_put
 22 |       @putting.await
 23 |     end
 24 | 
 25 |     def put(item)
 26 |       @putting.count_down
 27 |       @latch.await
 28 |       @under.put(item)
 29 |     end
 30 |   end
 31 | 
 32 |   def build_producer
 33 |     opts = {
 34 |       queue: @queue,
 35 |       manual_start: true,
 36 |       worker_count: @worker_count,
 37 |     }
 38 |     Telekinesis::Producer::AsyncProducer.new(
 39 |       @stream,
 40 |       StubClient.new,
 41 |       Telekinesis::Producer::NoopFailureHandler.new,
 42 |       opts
 43 |     )
 44 |   end
 45 | 
 46 |   context "AsyncProducer" do
 47 |     setup do
 48 |       @stream = 'test'  # ignored
 49 |       @worker_count = 3 # arbitrary
 50 |     end
 51 | 
 52 |     context "put" do
 53 |       setup do
 54 |         @queue = ArrayBlockingQueue.new(100)
 55 |         build_producer.put("hi", "there")
 56 |       end
 57 | 
 58 |       should "add the k,v pair to the queue" do
 59 |         assert_equal([["hi", "there"]], @queue.to_a)
 60 |       end
 61 |     end
 62 | 
 63 |     context "put_all" do
 64 |       setup do
 65 |         @items = 10.times.map{|i| ["key-#{i}", "value-#{i}"]}
 66 |         @queue = ArrayBlockingQueue.new(100)
 67 |         build_producer.put_all(@items)
 68 |       end
 69 | 
 70 |       should "add all items to the queue" do
 71 |         assert_equal(@items, @queue.to_a)
 72 |       end
 73 |     end
 74 | 
 75 |     context "after shutdown" do
 76 |       setup do
 77 |         @queue = ArrayBlockingQueue.new(100)
 78 |         @producer = build_producer
 79 |         @producer.shutdown
 80 |       end
 81 | 
 82 |       should "shutdown all workers" do
 83 |         assert_equal([Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN] * @worker_count, @queue.to_a)
 84 |       end
 85 | 
 86 |       should "not accept events while shut down" do
 87 |         refute(@producer.put("key", "value"))
 88 |       end
 89 |     end
 90 | 
 91 |     context "with a put in progress" do
 92 |       setup do
 93 |         @queue = LatchQueue.new
 94 |         @producer = build_producer
 95 | 
 96 |         # Thread blocks waiting for the latch in LatchQueue. Don't do any other
 97 |         # set up until this thread is in the critical section.
 98 |         Thread.new do
 99 |           @producer.put("k", "v")
100 |         end
101 |         @queue.wait_for_put
102 | 
103 |         # Thread blocks waiting for the write_lock in AsyncProducer. Once it's
104 |         # unblocked it signals by counting down shutdown_latch.
105 |         @shutdown_latch = CountDownLatch.new(1)
106 |         Thread.new do
107 |           @producer.shutdown
108 |           @shutdown_latch.count_down
109 |         end
110 |       end
111 | 
112 |       should "block on shutdown until the put is done" do
113 |         # Check that the latch hasn't been triggered yet. Return immediately
114 |         # from the check - don't bother waiting.
115 |         refute(@shutdown_latch.await(0, TimeUnit::MILLISECONDS))
116 |         @queue.count_down
117 |         # NOTE: The assert is here to fail the test if it times out. This could
118 |         # effectively just be an await with no duration.
119 |         assert(@shutdown_latch.await(2, TimeUnit::SECONDS))
120 |       end
121 |     end
122 | 
123 |     context "with a shutdown in progress" do
124 |       setup do
125 |         @queue = LatchQueue.new
126 |         @producer = build_producer
127 | 
128 |         # Thread blocks waiting to insert :shutdown into the queue because of
129 |         # the latch in LatchQueue. Don't do any other test set up until this
130 |         # thread is in the critical section.
131 |         Thread.new do
132 |           @producer.shutdown
133 |         end
134 |         @queue.wait_for_put
135 | 
136 |         # This thread blocks waiting for the lock in AsyncProducer. Once it's
137 |         # done the put continues and then it signals completion by counting
138 |         # down finished_put_latch.
139 |         @finished_put_latch = CountDownLatch.new(1)
140 |         Thread.new do
141 |           @put_result = @producer.put("k", "v")
142 |           @finished_put_latch.count_down
143 |         end
144 |       end
145 | 
146 |       should "block on a put" do
147 |         # Thread is already waiting in the critical section. Just check that
148 |         # the call hasn't exited yet and return immediately.
149 |         refute(@finished_put_latch.await(0, TimeUnit::MILLISECONDS))
150 |         @queue.count_down
151 |         # NOTE: The assert is here to fail the test if it times out. This could
152 |         # effectively just be an await with no duration.
153 |         assert(@finished_put_latch.await(2, TimeUnit::SECONDS))
154 |         refute(@put_result, "Producer should reject a put after shutdown")
155 |       end
156 |     end
157 |   end
158 | end
159 | 


--------------------------------------------------------------------------------
/test/producer/test_sync_producer.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class SyncProducerTest < Minitest::Test
  4 |   StubPutRecordResponse = Struct.new(:shard_id, :sequence_number, :error_code, :error_message)
  5 | 
  6 |   class StubClient
  7 |     attr_reader :requests
  8 | 
  9 |     def initialize(*responses)
 10 |       @requests = []
 11 |       @responses = responses
 12 |     end
 13 | 
 14 |     def put_record(stream, key, value)
 15 |       @requests << [stream, [key, value]]
 16 |       @responses.shift || []
 17 |     end
 18 | 
 19 |     def put_records(stream, items)
 20 |       @requests << [stream, items]
 21 |       @responses.shift || []
 22 |     end
 23 |   end
 24 | 
 25 |   class TestingProducer < Telekinesis::Producer::SyncProducer
 26 |   end
 27 | 
 28 |   context "SyncProducer" do
 29 | 
 30 |     context ".create" do
 31 |       setup do
 32 |         @sync_producer = Telekinesis::Producer::SyncProducer.create(stream: 'stream')
 33 |       end
 34 | 
 35 |       should "return a SyncProducer" do
 36 |         assert_equal(@sync_producer.class, ::Telekinesis::Producer::SyncProducer)
 37 |       end
 38 |     end
 39 | 
 40 |     context "#put" do
 41 |       setup do
 42 |         @expected_response = StubPutRecordResponse.new(123, 123)
 43 |         @client = StubClient.new(@expected_response)
 44 |         @producer = TestingProducer.new('stream', @client)
 45 |       end
 46 | 
 47 |       should "call the underlying client's put_record" do
 48 |         assert_equal(@expected_response, @producer.put('key', 'value'))
 49 |         assert_equal(['stream', ['key', 'value']], @client.requests.first)
 50 |       end
 51 |     end
 52 | 
 53 |     context "#put_all" do
 54 |       context "with an empty argument" do
 55 |         setup do
 56 |           @client = StubClient.new([])
 57 |           @producer = TestingProducer.new('stream', @client)
 58 |           @actual_failures = @producer.put_all([])
 59 |         end
 60 | 
 61 |         should "send no data" do
 62 |           assert(@client.requests.empty?)
 63 |           assert(@actual_failures.empty?)
 64 |         end
 65 |       end
 66 | 
 67 |       context "with an argument smaller than :send_size" do
 68 |         setup do
 69 |           @send_size = 30
 70 |           @items = (@send_size - 1).times.map{|i| ["key-#{i}", "value-#{i}"]}
 71 |         end
 72 | 
 73 |         context "when no records fail" do
 74 |           setup do
 75 |             @client = StubClient.new([])
 76 |             @producer = TestingProducer.new('stream', @client, {send_size: @send_size})
 77 |             @actual_failures = @producer.put_all(@items)
 78 |           end
 79 | 
 80 |           should "send one batch and return nothing" do
 81 |             assert(@actual_failures.empty?)
 82 |             assert_equal([['stream', @items]], @client.requests)
 83 |           end
 84 |         end
 85 | 
 86 |         context "when some records fail" do
 87 |           setup do
 88 |             @client = StubClient.new([["key-2", "value-2", "fake error", "message"]])
 89 |             @producer = TestingProducer.new('stream', @client, {send_size: @send_size})
 90 |             @actual_failures = @producer.put_all(@items)
 91 |           end
 92 | 
 93 |           should "call on_record_failure" do
 94 |             assert_equal([['stream', @items]], @client.requests)
 95 |             assert_equal([["key-2", "value-2", "fake error", "message"]], @actual_failures)
 96 |           end
 97 |         end
 98 |       end
 99 | 
100 |       context "with an argument larger than :send_size" do
101 |         setup do
102 |           @send_size = 30
103 |           @items = (@send_size + 3).times.map{|i| ["key-#{i}", "value-#{i}"]}
104 |           # expected_requests looks like:
105 |           # [
106 |           #   ['stream', [[k1, v1], [k2, v2], ...]],
107 |           #   ['stream', [[kn, vn], [k(n+1), v(n+1)], ...]]
108 |           # ]
109 |           @expected_requests = @items.each_slice(@send_size).map{|batch| ['stream', batch]}
110 |         end
111 | 
112 |         context "when no records fail" do
113 |           setup do
114 |             @client = StubClient.new([])
115 |             @producer = TestingProducer.new('stream', @client, {send_size: @send_size})
116 |             @actual_failures = @producer.put_all(@items)
117 |           end
118 | 
119 |           should "send multiple batches and return nothing" do
120 |             assert(@actual_failures.empty?)
121 |             assert_equal(@expected_requests, @client.requests)
122 |           end
123 |         end
124 | 
125 |         context "when some records fail" do
126 |           setup do
127 |             @error_respones = [
128 |               [["k1", "v1", "err", "message"], ["k2", "v2", "err", "message"]],
129 |               [["k-next", "v-next", "err", "message"]]
130 |             ]
131 |             @expected_failures = @error_respones.flat_map {|x| x }
132 | 
133 |             @client = StubClient.new(*@error_respones)
134 |             @producer = TestingProducer.new('stream', @client, {send_size: @send_size})
135 |             @actual_failures = @producer.put_all(@items)
136 |           end
137 | 
138 |           should "return the failures" do
139 |             assert_equal(@expected_requests, @client.requests)
140 |             assert_equal(@expected_failures, @actual_failures)
141 |           end
142 |         end
143 |       end
144 |     end
145 |   end
146 | end
147 | 


--------------------------------------------------------------------------------
/ext/src/main/java/com/kickstarter/jruby/Telekinesis.java:
--------------------------------------------------------------------------------
  1 | package com.kickstarter.jruby;
  2 | 
  3 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
  4 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
  5 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker;
  6 | import com.amazonaws.services.kinesis.clientlibrary.types.InitializationInput;
  7 | import com.amazonaws.services.kinesis.clientlibrary.types.ProcessRecordsInput;
  8 | import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownInput;
  9 | 
 10 | import java.util.concurrent.ExecutorService;
 11 | 
 12 | /**
 13 |  * A shim that makes it possible to use the Kinesis Client Library from JRuby.
 14 |  * Without the shim, {@code initialize} method in
 15 |  * {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor}
 16 |  * conflicts with the special {@code initialize} method in Ruby. The shim
 17 |  * interface renames {@code initialize} to {@code init}.
 18 |  * <p />
 19 |  *
 20 |  * For convenience a {@link #newWorker(KinesisClientLibConfiguration, ExecutorService, IRecordProcessorFactory)}
 21 |  * method is provided, so you can use closure conversion in JRuby to specify an
 22 |  * {@link IRecordProcessorFactory}. For example
 23 |  *
 24 |  * <p />
 25 |  *
 26 |  * <pre>
 27 |  *     executor = config[:executor] || nil
 28 |  *
 29 |  *     com.kickstarter.jruby.Telekinesis.new_worker(my_config, executor) do
 30 |  *       MyRecordProcessor.new(some_thing, some_other_thing)
 31 |  *     end
 32 |  * </pre>
 33 |  */
 34 | public class Telekinesis {
 35 |     /**
 36 |      * Create a new KCL {@link Worker} that processes records using the given
 37 |      * {@link ExecutorService}, {@link IRecordProcessorFactory}, and
 38 |      * {@link AmazonDynamoDB}.
 39 |      */
 40 |     public static Worker newWorker(final KinesisClientLibConfiguration config,
 41 |                                    final ExecutorService executor,
 42 |                                    final AmazonDynamoDB dynamoClient,
 43 |                                    final IRecordProcessorFactory factory) {
 44 |         com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory v2Factory = new com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory() {
 45 |             @Override
 46 |             public com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor createProcessor() {
 47 |                 return new RecordProcessorShim(factory.createProcessor());
 48 |             }
 49 |         };
 50 | 
 51 |         return new Worker.Builder()
 52 |                 .recordProcessorFactory(v2Factory)
 53 |                 .config(config)
 54 |                 .execService(executor) // NOTE: .execService(null) is a no-op
 55 |                 .dynamoDBClient(dynamoClient)
 56 |                 .build();
 57 |     }
 58 | 
 59 |     // ========================================================================
 60 |     /**
 61 |      * A shim that wraps a {@link IRecordProcessor} so it can get used by the KCL.
 62 |      */
 63 |     private static class RecordProcessorShim implements com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor {
 64 |         private final IRecordProcessor underlying;
 65 | 
 66 |         public RecordProcessorShim(final IRecordProcessor underlying) { this.underlying = underlying; }
 67 | 
 68 |         @Override
 69 |         public void initialize(final InitializationInput initializationInput) {
 70 |             underlying.init(initializationInput);
 71 |         }
 72 | 
 73 |         @Override
 74 |         public void processRecords(final ProcessRecordsInput processRecordsInput) {
 75 |             underlying.processRecords(processRecordsInput);
 76 |         }
 77 | 
 78 |         @Override
 79 |         public void shutdown(final ShutdownInput shutdownInput) {
 80 |             underlying.shutdown(shutdownInput);
 81 |         }
 82 |     }
 83 | 
 84 |     /**
 85 |      * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor}
 86 |      * that avoids naming conflicts with reserved words in Ruby.
 87 |      */
 88 |     public static interface IRecordProcessor {
 89 |         /**
 90 |          * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#initialize(InitializationInput)
 91 |          */
 92 |         void init(InitializationInput initializationInput);
 93 | 
 94 |         /**
 95 |          * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#processRecords(ProcessRecordsInput)
 96 |          */
 97 |         void processRecords(ProcessRecordsInput processRecordsInput);
 98 | 
 99 |         /**
100 |          * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor#shutdown(ShutdownInput)
101 |          */
102 |         void shutdown(ShutdownInput shutdownInput);
103 |     }
104 | 
105 |     /**
106 |      * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory}
107 |      * for {@link IRecordProcessor}.
108 |      */
109 |     public static interface IRecordProcessorFactory {
110 |         /**
111 |          * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory#createProcessor()
112 |          */
113 |         IRecordProcessor createProcessor();
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/lib/telekinesis/consumer/kcl.rb:
--------------------------------------------------------------------------------
  1 | module Telekinesis
  2 |   module Consumer
  3 |     java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
  4 |     java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
  5 | 
  6 |     class KCL
  7 |       # Create a new consumer that consumes data from a Kinesis stream using the
  8 |       # AWS Kinesis Client Library.
  9 |       #
 10 |       # The KCL uses DynamoDB to register clients as part of the an application
 11 |       # and evenly distribute work between all of the clients registered for
 12 |       # the same application. See the AWS Docs for more information:
 13 |       #
 14 |       # http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumer-apps-with-kcl.html
 15 |       #
 16 |       # KCLs are configured with a hash. The Kinesis `:stream` to consume from
 17 |       # is required.
 18 |       #
 19 |       # KCL clients operate in groups. All consumers with the same `:app` id use
 20 |       # DynamoDB to attempt to distribute work evenly among themselves. The
 21 |       # `:worker_id` is used to distinguish individual clients (`:worker_id`
 22 |       # defaults to the current hostname. If you plan to run more than one KCL
 23 |       # client in the same `:app` on the same host, make sure you set this to
 24 |       # something unique!).
 25 |       #
 26 |       # Clients interested in configuring their own AmazonDynamoDB client may
 27 |       # pass an instance as the second argument. If not configured, the client
 28 |       # will use a default AWS configuration.
 29 |       #
 30 |       # Any other valid KCL Worker `:options` may be passed as a nested hash.
 31 |       #
 32 |       # For example, to configure a `tail` app on `some-stream` and use the
 33 |       # default `:worker_id`, you might pass the following configuration to your
 34 |       # KCL.
 35 |       #
 36 |       #     config = {
 37 |       #       app: 'tail',
 38 |       #       stream: 'some-stream',
 39 |       #       options: {initial_position_in_stream: 'TRIM_HORIZON'}
 40 |       #     }
 41 |       #
 42 |       # To actually process the stream, a KCL client creates record processors.
 43 |       # These are objects that correspond to the KCL's RecordProcessor
 44 |       # interface - processors must implement `init`, `process_records`, and
 45 |       # `shutdown` methods.
 46 |       #
 47 |       # http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kcl-java-interface-v2
 48 |       #
 49 |       # To specify which record processor to create, pass a block to your
 50 |       # distribtued consumer that returns a new record processor. This block
 51 |       # may (nay, WILL) be called from a background thread so make sure that
 52 |       # it's thread-safe.
 53 |       #
 54 |       # Telekinesis provides a BaseProcessor that implements no-op versions
 55 |       # of all of the required methods to make writing quick processors easier
 56 |       # and a Block processor that executes the given block every time
 57 |       # `process_records` is called.
 58 |       #
 59 |       # To write a simple stream tailer, you might use Block as follows:
 60 |       #
 61 |       #     kcl_worker = Telekinesis::Consumer::KCL.new(config) do
 62 |       #       Telekinesis::Consumer::BlockProcessor.new do |records, checkpointer, millis_behind_latest|
 63 |       #         records.each{|r| puts r}
 64 |       #         $stderr.puts "#{millis_behind_latest} ms behind"
 65 |       #         checkpointer.checkpoint
 66 |       #       end
 67 |       #     end
 68 |       #
 69 |       #     kcl_worker.run
 70 |       #
 71 |       def initialize(config, dynamo_client = nil, &block)
 72 |         raise ArgumentError, "No block given!" unless block_given?
 73 |         kcl_config = self.class.build_config(config)
 74 |         @under = com.kickstarter.jruby.Telekinesis.new_worker(kcl_config, config[:executor], dynamo_client, &block)
 75 |       end
 76 | 
 77 |       # Return the underlying KCL worker. It's a java.lang.Runnable.
 78 |       def as_runnable
 79 |         @under
 80 |       end
 81 | 
 82 |       # Start the KCL worker. If background is set to `true`, the worker is
 83 |       # started in its own JRuby Thread and the Thread is returned. Otherwise,
 84 |       # starts in the current thread and returns nil.
 85 |       def run(background = false)
 86 |         if background
 87 |           Thread.new { @under.run }
 88 |         else
 89 |           @under.run
 90 |         end
 91 |       end
 92 | 
 93 |       protected
 94 | 
 95 |       def self.build_config(config)
 96 |         creds_hash = config.fetch(:credentials, {})
 97 |         credentials_provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(creds_hash)
 98 | 
 99 |         # App and Stream are mandatory.
100 |         app, stream = [:app, :stream].map do |k|
101 |           raise ArgumentError, "#{k} is required" unless config.include?(k)
102 |           config[k]
103 |         end
104 | 
105 |         # Use this host as the worker_id by default.
106 |         worker_id = config.fetch(:worker_id, `hostname`.chomp)
107 | 
108 |         KinesisClientLibConfiguration.new(app, stream, credentials_provider, worker_id).tap do |kcl_config|
109 |           config.fetch(:options, {}).each do |k, v|
110 |             # Handle initial position in stream separately. It's the only option
111 |             # that requires a value conversion.
112 |             if k.to_s == 'initial_position_in_stream'
113 |               kcl_config.with_initial_position_in_stream(InitialPositionInStream.value_of(v))
114 |             else
115 |               setter = "with_#{k}".to_sym
116 |               if kcl_config.respond_to?(setter)
117 |                 kcl_config.send(setter, v)
118 |               end
119 |             end
120 |           end
121 |         end
122 |       end
123 |     end
124 |   end
125 | end
126 | 


--------------------------------------------------------------------------------
/lib/telekinesis/producer/async_producer.rb:
--------------------------------------------------------------------------------
  1 | require "telekinesis/producer/async_producer_worker"
  2 | 
  3 | module Telekinesis
  4 |   module Producer
  5 |     java_import java.util.concurrent.TimeUnit
  6 |     java_import java.util.concurrent.Executors
  7 |     java_import java.util.concurrent.ArrayBlockingQueue
  8 |     java_import com.google.common.util.concurrent.ThreadFactoryBuilder
  9 | 
 10 |     # An asynchronous producer that buffers events into a queue and uses a
 11 |     # background thread to send them to Kinesis. Only available on JRuby.
 12 |     #
 13 |     # This class is thread-safe.
 14 |     class AsyncProducer
 15 |       # For convenience
 16 |       MAX_PUT_RECORDS_SIZE = Telekinesis::Aws::KINESIS_MAX_PUT_RECORDS_SIZE
 17 | 
 18 |       attr_reader :stream, :client, :failure_handler
 19 | 
 20 |       # Create a new producer.
 21 |       #
 22 |       # AWS credentials may be specified by using the `:credentials` option and
 23 |       # passing a hash containing your `:access_key_id` and `:secret_access_key`.
 24 |       # If unspecified, credentials will be fetched from the environment, an
 25 |       # ~/.aws/credentials file, or the current instance metadata.
 26 |       #
 27 |       # The producer's `:worker_count`, internal `:queue_size`, the `:send_size`
 28 |       # of batches to Kinesis and how often workers send data to Kinesis, even
 29 |       # if their batches aren't full (`:send_every_ms`) can be configured as
 30 |       # well. They all have reasonable defaults.
 31 |       #
 32 |       # When requests to Kinesis fail, the configured `:failure_handler` will
 33 |       # be called. If you don't specify a failure handler, a NoopFailureHandler
 34 |       # is used.
 35 |       def self.create(options = {})
 36 |         stream = options[:stream]
 37 |         client = Telekinesis::Aws::Client.build(options.fetch(:credentials, {}))
 38 |         failure_handler = options.fetch(:failure_handler, NoopFailureHandler.new)
 39 |         new(stream, client, failure_handler, options)
 40 |       end
 41 | 
 42 |       # Construct a new producer. Intended for internal use only - prefer
 43 |       # #create unless it's strictly necessary.
 44 |       def initialize(stream, client, failure_handler, options = {})
 45 |         @stream = stream or raise ArgumentError, "stream may not be nil"
 46 |         @client = client or raise ArgumentError, "client may not be nil"
 47 |         @failure_handler = failure_handler or raise ArgumentError, "failure_handler may not be nil"
 48 |         @shutdown = false
 49 | 
 50 |         queue_size     = options.fetch(:queue_size, 1000)
 51 |         send_every     = options.fetch(:send_every_ms, 1000)
 52 |         worker_count   = options.fetch(:worker_count, 1)
 53 |         raise ArgumentError(":worker_count must be > 0") unless worker_count > 0
 54 |         send_size      = options.fetch(:send_size, MAX_PUT_RECORDS_SIZE)
 55 |         raise ArgumentError(":send_size too large") if send_size > MAX_PUT_RECORDS_SIZE
 56 |         retries        = options.fetch(:retries, 5)
 57 |         raise ArgumentError(":retries must be >= 0") unless retries >= 0
 58 |         retry_interval = options.fetch(:retry_interval, 1.0)
 59 |         raise ArgumentError(":retry_interval must be > 0") unless retry_interval > 0
 60 | 
 61 |         # NOTE: For testing.
 62 |         @queue = options[:queue] || ArrayBlockingQueue.new(queue_size)
 63 | 
 64 |         @lock = Telekinesis::JavaUtil::ReadWriteLock.new
 65 |         @worker_pool = build_executor(worker_count)
 66 |         @workers = worker_count.times.map do
 67 |           AsyncProducerWorker.new(self, @queue, send_size, send_every, retries, retry_interval)
 68 |         end
 69 | 
 70 |         # NOTE: Start by default. For testing.
 71 |         start unless options.fetch(:manual_start, false)
 72 |       end
 73 | 
 74 |       # Put a single key, value pair to Kinesis. Both key and value must be
 75 |       # strings.
 76 |       #
 77 |       # This call returns immediately and returns true iff the producer is still
 78 |       # accepting data. Data is put to Kinesis in the background.
 79 |       def put(key, data)
 80 |         put_all(key => data)
 81 |       end
 82 | 
 83 |       # Put all of the given key, value pairs to Kinesis. Both key and value
 84 |       # must be Strings.
 85 |       #
 86 |       # This call returns immediately and returns true iff the producer is still
 87 |       # accepting data. Data is put to Kinesis in the background.
 88 |       def put_all(items)
 89 |         # NOTE: The lock ensures that no new data can be added to the queue after
 90 |         # the shutdown flag has been set. See the note in shutdown for details.
 91 |         @lock.read_lock do
 92 |           if @shutdown
 93 |             false
 94 |           else
 95 |             items.each do |key, data|
 96 |               @queue.put([key, data])
 97 |             end
 98 |             true
 99 |           end
100 |         end
101 |       end
102 | 
103 |       # Shut down this producer. After the call completes, the producer will not
104 |       # accept any more data, but will finish processing any data it has
105 |       # buffered internally.
106 |       #
107 |       # If block = true is passed, this call will block and wait for the producer
108 |       # to shut down before returning. This wait times out after duration has
109 |       # passed.
110 |       def shutdown(block = false, duration = 2, unit = TimeUnit::SECONDS)
111 |         # NOTE: Since a write_lock is exclusive, this prevents any data from being
112 |         # added to the queue while the SHUTDOWN tokens are being inserted. Without
113 |         # the lock, data can end up in the queue behind all of the shutdown tokens
114 |         # and be lost. This happens if the shutdown flag is be flipped by a thread
115 |         # calling shutdown after another thread has checked the "if @shutdown"
116 |         # condition in put but before it's called queue.put.
117 |         @lock.write_lock do
118 |           @shutdown = true
119 |           @workers.size.times do
120 |             @queue.put(AsyncProducerWorker::SHUTDOWN)
121 |           end
122 |         end
123 | 
124 |         # Don't interrupt workers by calling shutdown_now.
125 |         @worker_pool.shutdown
126 |         await(duration, unit) if block
127 |       end
128 | 
129 |       # Wait for this producer to shutdown.
130 |       def await(duration, unit = TimeUnit::SECONDS)
131 |         @worker_pool.await_termination(duration, unit)
132 |       end
133 | 
134 |       # Return the number of events currently buffered by this producer. This
135 |       # doesn't include any events buffered in workers that are currently on
136 |       # their way to Kinesis.
137 |       def queue_size
138 |         @queue.size
139 |       end
140 | 
141 |       protected
142 | 
143 |       def start
144 |         @workers.each do |w|
145 |           @worker_pool.java_send(:submit, [java.lang.Runnable.java_class], w)
146 |         end
147 |       end
148 | 
149 |       def build_executor(worker_count)
150 |         Executors.new_fixed_thread_pool(
151 |           worker_count,
152 |           ThreadFactoryBuilder.new.set_name_format("#{stream}-producer-worker-%d").build
153 |         )
154 |       end
155 |     end
156 |   end
157 | end
158 | 


--------------------------------------------------------------------------------
/test/producer/test_async_producer_worker.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class AsyncProducerWorkerTest < Minitest::Test
  4 |   java_import java.util.concurrent.TimeUnit
  5 |   java_import java.util.concurrent.ArrayBlockingQueue
  6 | 
  7 |   def string_from_bytebuffer(bb)
  8 |     String.from_java_bytes bb.array
  9 |   end
 10 | 
 11 |   class UnretryableAwsError < com.amazonaws.AmazonClientException
 12 |     def is_retryable
 13 |       false
 14 |     end
 15 |   end
 16 | 
 17 |   class CapturingFailureHandler
 18 |     attr_reader :retries, :final_err
 19 | 
 20 |     def initialize
 21 |       @retries = 0
 22 |     end
 23 | 
 24 |     def failed_records
 25 |       @failed_records ||= []
 26 |     end
 27 | 
 28 |     def on_record_failure(fails)
 29 |       failed_records << fails
 30 |     end
 31 | 
 32 |     def on_kinesis_retry(error, items)
 33 |       @retries += 1
 34 |     end
 35 | 
 36 |     def on_kinesis_failure(error, items)
 37 |       @final_err = [error, items]
 38 |     end
 39 |   end
 40 | 
 41 |   StubProducer = Struct.new(:stream, :client, :failure_handler)
 42 | 
 43 |   # NOTE: This stub mocks the behavior of timing out on poll once all of the
 44 |   # items have been drained from the internal list.
 45 |   class StubQueue
 46 |     def initialize(items)
 47 |       @items = items
 48 |     end
 49 | 
 50 |     def poll(duration, unit)
 51 |       @items.shift
 52 |     end
 53 |   end
 54 | 
 55 |   # A wrapper over ABQ that inserts shutdown into itself after a given number
 56 |   # of calls to poll. Not thread-safe.
 57 |   class ShutdownAfterQueue
 58 |     def initialize(shutdown_after)
 59 |       @shutdown_after = shutdown_after
 60 |       @called = 0
 61 |       @under = ArrayBlockingQueue.new(10)
 62 |     end
 63 | 
 64 |     def poll(duration, unit)
 65 |       @called += 1
 66 |       if @called > @shutdown_after
 67 |         @under.put(Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN)
 68 |       end
 69 |       @under.poll(duration, unit)
 70 |     end
 71 |   end
 72 | 
 73 |   class CapturingClient
 74 |     attr_reader :requests
 75 | 
 76 |     def initialize(responses)
 77 |       @requests = ArrayBlockingQueue.new(1000)
 78 |       @responses = responses
 79 |     end
 80 | 
 81 |     def put_records(stream, items)
 82 |       @requests.put([stream, items])
 83 |       @responses.shift || []
 84 |     end
 85 |   end
 86 | 
 87 |   class ExplodingClient
 88 |     def initialize(exception)
 89 |       @exception = exception
 90 |     end
 91 | 
 92 |     def put_records(stream, items)
 93 |       raise @exception
 94 |     end
 95 |   end
 96 | 
 97 |   def stub_producer(stream, responses = [])
 98 |     StubProducer.new(stream, CapturingClient.new(responses), CapturingFailureHandler.new)
 99 |   end
100 | 
101 |   # NOTE: This always adds SHUTDOWN to the end of the list so that the worker
102 |   # can be run in the test thread and there's no need to deal with coordination
103 |   # across multiple threads. To simulate the worker timing out on a queue.poll
104 |   # just add 'nil' to your list of items in the queue at the appropriate place.
105 |   def queue_with(*items)
106 |     to_put = items + [Telekinesis::Producer::AsyncProducerWorker::SHUTDOWN]
107 |     StubQueue.new(to_put)
108 |   end
109 | 
110 |   def build_worker
111 |     Telekinesis::Producer::AsyncProducerWorker.new(
112 |       @producer,
113 |       @queue,
114 |       @send_size,
115 |       @send_every,
116 |       @retries,
117 |       @retry_interval
118 |     )
119 |   end
120 | 
121 |   def records_as_kv_pairs(request)
122 |     request.records.map{|r| [r.partition_key, string_from_bytebuffer(r.data)]}
123 |   end
124 | 
125 |   context "producer worker" do
126 |     setup do
127 |       @send_size = 10
128 |       @send_every = 100 # ms
129 |       @retries = 4
130 |       @retry_interval = 0.01
131 |     end
132 | 
133 |     context "with only SHUTDOWN in the queue" do
134 |       setup do
135 |         @producer = stub_producer('test')
136 |         @queue = queue_with() # shutdown is always added
137 |         @worker = build_worker
138 |       end
139 | 
140 |       should "shut down the worker" do
141 |         @worker.run
142 |         assert(@worker.instance_variable_get(:@shutdown))
143 |       end
144 |     end
145 | 
146 |     context "with [item, SHUTDOWN] in the queue" do
147 |       setup do
148 |         @producer = stub_producer('test')
149 |         @queue = queue_with(
150 |           ["key", "value"],
151 |         )
152 |         @worker = build_worker
153 |       end
154 | 
155 |       should "put data before shutting down the worker" do
156 |         @worker.run
157 |         stream, items = @producer.client.requests.first
158 |         assert_equal(stream, 'test', "request should have the correct stream name")
159 |         assert_equal([["key", "value"]], items, "Request payload should be kv pairs")
160 |       end
161 |     end
162 | 
163 |     context "with nothing in the queue" do
164 |       setup do
165 |         @producer = stub_producer('test')
166 |         @queue = ShutdownAfterQueue.new(5)
167 |         @worker = build_worker
168 |         @starting_put_at = @worker.instance_variable_get(:@last_poll_at)
169 |       end
170 | 
171 |       should "update the internal last_poll_at counter and sleep on poll" do
172 |         @worker.run
173 |         refute_equal(@starting_put_at, @worker.instance_variable_get(:@last_poll_at))
174 |       end
175 |     end
176 | 
177 |     context "with buffered data that times out" do
178 |       setup do
179 |         @items = [["key", "value"]]
180 | 
181 |         @producer = stub_producer('test')
182 |         # Explicitly add 'nil' to fake the queue being empty
183 |         @queue = queue_with(*(@items + [nil]))
184 |         @worker = build_worker
185 |       end
186 | 
187 |       should "send whatever is in the queue" do
188 |         @worker.run
189 |         stream, items = @producer.client.requests.first
190 |         assert_equal('test', stream, "request should have the correct stream name")
191 |         assert_equal(items, @items, "Request payload should be kv pairs")
192 |       end
193 |     end
194 | 
195 |     context "with fewer than send_size items in queue" do
196 |       setup do
197 |         num_items = @send_size - 1
198 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
199 | 
200 |         @producer = stub_producer('test')
201 |         @queue = queue_with(*@items)
202 |         @worker = build_worker
203 |       end
204 | 
205 |       should "send one request" do
206 |         @worker.run
207 |         stream, items = @producer.client.requests.first
208 |         assert_equal('test', stream, "request should have the correct stream name")
209 |         assert_equal(@items, items, "Request payload should be kv pairs")
210 |       end
211 |     end
212 | 
213 |     context "with more than send_size items in queue" do
214 |       setup do
215 |         num_items = (@send_size * 2) - 1
216 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
217 | 
218 |         @producer = stub_producer('test')
219 |         @queue = queue_with(*@items)
220 |         @worker = build_worker
221 |       end
222 | 
223 |       should "send multiple requests of at most send_size" do
224 |         @worker.run
225 |         expected = @items.each_slice(@send_size).to_a
226 |         expected.zip(@producer.client.requests) do |kv_pairs, (stream, batch)|
227 |           assert_equal('test', stream, "Request should have the correct stream name")
228 |           assert_equal(batch, kv_pairs, "Request payload should be kv pairs")
229 |         end
230 |       end
231 |     end
232 | 
233 |     context "when some records return an unretryable error response" do
234 |       setup do
235 |         num_items = @send_size - 1
236 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
237 |         @failed_items = @items.each_with_index.map do |item, idx|
238 |           if idx.even?
239 |             k, v = item
240 |             [k, v, "some_code", "message"]
241 |           else
242 |             nil
243 |           end
244 |         end
245 |         @failed_items.compact!
246 | 
247 |         @producer = stub_producer('test', [@failed_items])
248 |         @queue = queue_with(*@items)
249 |         @worker = build_worker
250 |       end
251 | 
252 |       should "call the failure handler with all failed records" do
253 |         @worker.run
254 |         assert_equal([@failed_items], @producer.failure_handler.failed_records)
255 |       end
256 |     end
257 | 
258 |     context "when some records return a retryable error response" do
259 |       setup do
260 |         num_items = @send_size - 1
261 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
262 |         @failed_items = @items.each_with_index.map do |item, idx|
263 |           if idx.even?
264 |             k, v = item
265 |             [k, v, "InternalFailure", "message"]
266 |           else
267 |             nil
268 |           end
269 |         end
270 |         @failed_items.compact!
271 | 
272 |         @producer = stub_producer('test', [@failed_items, []])
273 |         @queue = queue_with(*@items)
274 |         @worker = build_worker
275 |       end
276 | 
277 |       should "not call the failure handler with any failed records" do
278 |         @worker.run
279 |         assert_equal([], @producer.failure_handler.failed_records)
280 |       end
281 | 
282 |       should "retry the request" do
283 |         @worker.run
284 |         assert_equal(2, @producer.client.requests.size)
285 |       end
286 |     end
287 | 
288 |     context "when retryable responses fail too many times" do
289 |       setup do
290 |         num_items = @send_size - 1
291 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
292 |         @failed_items = @items.each_with_index.map do |item, idx|
293 |           if idx.even?
294 |             k, v = item
295 |             [k, v, "InternalFailure", "message"]
296 |           else
297 |             nil
298 |           end
299 |         end
300 |         @failed_items.compact!
301 | 
302 |         @producer = stub_producer('test', [@failed_items] * (@retries + 1))
303 |         @queue = queue_with(*@items)
304 |         @worker = build_worker
305 |       end
306 | 
307 |       should "call the failure handler with all failed records" do
308 |         @worker.run
309 |         assert_equal([@failed_items], @producer.failure_handler.failed_records)
310 |       end
311 | 
312 |       should "retry the request" do
313 |         @worker.run
314 |         assert_equal(@retries, @producer.client.requests.size)
315 |       end
316 |     end
317 | 
318 |     context "with a mix of retryable error responses" do
319 |       setup do
320 |         num_items = @send_size - 1
321 |         @items = num_items.times.map{|i| ["key-#{i}", "value-#{i}"]}
322 |         @first_response = @items.each_with_index.map do |item, idx|
323 |           k, v = item
324 |           [k, v, idx.even? ? "InternalFailure" : "WHATEVER", "message"]
325 |         end
326 |         @did_retry = @first_response.select{|_, _, m, _| m == "InternalFailure"}
327 |         @no_retry = @first_response.select{|_, _, m, _| m == "WHATEVER"}
328 | 
329 |         @producer = stub_producer('test', [@first_response, []])
330 |         @queue = queue_with(*@items)
331 |         @worker = build_worker
332 |       end
333 | 
334 |       should "retry the request" do
335 |         @worker.run
336 |         assert_equal(2, @producer.client.requests.size)
337 |         _, items = @producer.client.requests.to_a.last
338 |         assert_equal(@did_retry.map{|k, v, _, _| [k, v]}, items)
339 |       end
340 | 
341 |       should "call the failure handler with only the records that failed" do
342 |         @worker.run
343 |         assert_equal([@no_retry], @producer.failure_handler.failed_records)
344 |       end
345 |     end
346 | 
347 |     context "when the client throws a retryable exception" do
348 |       setup do
349 |         @boom = Telekinesis::Aws::KinesisError.new(com.amazonaws.AmazonClientException.new("boom"))
350 |         @producer = StubProducer.new(
351 |           'stream',
352 |           ExplodingClient.new(@boom),
353 |           CapturingFailureHandler.new
354 |         )
355 |         @queue = queue_with(['foo', 'bar'])
356 |         @worker = build_worker
357 |       end
358 | 
359 |       should "call the failure handler on retries and errors" do
360 |         @worker.run
361 |         assert_equal((@retries - 1), @producer.failure_handler.retries)
362 |         err, items = @producer.failure_handler.final_err
363 |         assert_equal(@boom, err)
364 |         assert_equal([['foo', 'bar']], items)
365 |       end
366 |     end
367 | 
368 |     context "when the client throws an unretryable exception" do
369 |       setup do
370 |         @boom = Telekinesis::Aws::KinesisError.new(UnretryableAwsError.new("boom"))
371 |         @producer = StubProducer.new(
372 |           'stream',
373 |           ExplodingClient.new(@boom),
374 |           CapturingFailureHandler.new
375 |         )
376 |         @queue = queue_with(['foo', 'bar'])
377 |         @worker = build_worker
378 |       end
379 | 
380 |       should "call the failure handler on error but not on retry" do
381 |         @worker.run
382 |         assert_equal(0, @producer.failure_handler.retries)
383 |         err, items = @producer.failure_handler.final_err
384 |         assert_equal(@boom, err)
385 |         assert_equal([['foo', 'bar']], items)
386 |       end
387 |     end
388 | 
389 |   end
390 | end
391 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Looking for Maintainers
  2 | 
  3 | We're not actively maintaining this project. If you're interested in maintaining it, please post a comment on [this issue](https://github.com/kickstarter/telekinesis/issues/22).
  4 | 
  5 | ## Table of Contents
  6 | 
  7 | - [Telekinesis](#telekinesis)
  8 |   - [Requirements](#requirements)
  9 |   - [Installing](#installing)
 10 |   - [Producers](#producers)
 11 |     - [SyncProducer](#syncproducer)
 12 |     - [AsyncProducer](#asyncproducer)
 13 |   - [Consumers](#consumers)
 14 |     - [KCL](#kcl)
 15 |       - [Client State](#client-state)
 16 |       - [Errors while processing records](#errors-while-processing-records)
 17 |       - [Checkpoints and `INITIAL_POSITION_IN_STREAM`](#checkpoints-and-initial_position_in_stream)
 18 |   - [Java client logging](#java-client-logging)
 19 |   - [](#)
 20 | - [Building](#building)
 21 |   - [Prerequisites](#prerequisites)
 22 |   - [Build](#build)
 23 | - [Testing](#testing)
 24 | - [License](#license)
 25 | 
 26 | # Telekinesis
 27 | 
 28 | Telekinesis is a high-level client for Amazon Kinesis.
 29 | 
 30 | The library provides a high-throughput asynchronous producer and wraps the
 31 | [Kinesis Client Library](https://github.com/awslabs/amazon-kinesis-client) to
 32 | provide an easy interface for writing consumers.
 33 | 
 34 | ## Requirements
 35 | 
 36 | Telekinesis runs on JRuby 1.7.x or later, with at least Java 6.
 37 | 
 38 | If you want to build from source, you need to have Apache Maven installed.
 39 | 
 40 | ## Installing
 41 | 
 42 | ```
 43 | gem install telekinesis
 44 | ```
 45 | 
 46 | ## Producers
 47 | 
 48 | Telekinesis includes two high-level
 49 | [Producers](http://docs.aws.amazon.com/kinesis/latest/dev/amazon-kinesis-producers.html).
 50 | 
 51 | Telekinesis assumes that records are `[key, value]` pairs of strings. The key
 52 | *must* be a string as enforced by Kinesis itself. Keys are used by the service
 53 | to partition data into shards. Values can be any old blob of data, but for
 54 | simplicity, Telekinesis expects strings.
 55 | 
 56 | Both keys and values should respect any Kinesis
 57 | [limits](http://docs.aws.amazon.com/kinesis/latest/dev/service-sizes-and-limits.html).
 58 | and all of the [restrictions](http://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html)
 59 | in the PutRecords API documentation.
 60 | 
 61 | ### SyncProducer
 62 | 
 63 | The `SyncProducer` sends data to Kinesis every time `put` or `put_all`
 64 | is called. These calls will block until the call to Kinesis returns.
 65 | 
 66 | 
 67 | ```ruby
 68 | require 'telekinesis'
 69 | 
 70 | producer = Telekinesis::Producer::SyncProducer.create(
 71 |   stream: 'my stream',
 72 |   credentials: {
 73 |     access_key_id: 'foo',
 74 |     secret_access_key: 'bar'
 75 |   }
 76 | )
 77 | ```
 78 | 
 79 | Calls to `put` send a single record at a time to Kinesis, where calls to
 80 | `put_all` can send up to 500 records at a time, which is the Kinesis service
 81 | limit.  If more than 500 records are passed to `put_all` they're grouped into
 82 | batches and sent.
 83 | 
 84 | > NOTE: To send fewer records to Kinesis at a time when using `put_all`, you
 85 | > can adjust the `:send_size` parameter in the `create` method.
 86 | 
 87 | Using `put_all` over `put` is recommended if you have any way to batch your
 88 | data. Since Kinesis has an HTTP API and often has high latency, it tends to make
 89 | sense to try and increase throughput as much as possible by batching data.
 90 | 
 91 | ```ruby
 92 | # file is an instance of File containing CSV data that looks like:
 93 | #
 94 | #   "some,very,important,data,with,a,partition_key"
 95 | #
 96 | lines = file.lines.map do |line|
 97 |   key = line.split(/,/).last
 98 |   data = line
 99 |   [key, data]
100 | end
101 | 
102 | # One record at a time
103 | lines.each do |key, data|
104 |   producer.put(key, data)
105 | end
106 | 
107 | # Manually control your batches
108 | lines.each_slice(200) do |batch|
109 |   producer.put_all(batch)
110 | end
111 | 
112 | # Go hog wild
113 | producer.put_all(lines.to_a)
114 | ```
115 | 
116 | When something goes wrong and the Kinesis client throws an exception, it bubbles
117 | up as a `Telekinesis::Aws::KinesisError` with the underlying exception accessible
118 | as the `cause` field.
119 | 
120 | When some of (but maybe not all of) the records passed to `put_all` cause
121 | problems, they're returned as an array of
122 | `[key, value, error_code, error_message]` tuples.
123 | 
124 | ### AsyncProducer
125 | 
126 | The `AsyncProducer` queues events interally and uses background threads to send
127 | data to Kinesis.  Data is sent when a batch reaches the Kinesis limit of 500,
128 | when the producer's timeout is reached, or when the producer is shut down.
129 | 
130 | > NOTE: You can configure the size at which a batch is sent by passing the
131 | > `:send_size` parameter to create. The producer's internal timeout can be
132 | > set by using the `:send_every_ms` parameter.
133 | 
134 | The API for the `AsyncProducer` is looks similar to the `SyncProducer`. However,
135 | all `put` and `put_all` calls return immediately. Both `put` and `put_all`
136 | return `true` if the producer enqueued the data for sending later, and `false`
137 | if the producer is not accepting data for any reason. If the producer's internal
138 | queue fill up, calls to `put` and `put_all` will block.
139 | 
140 | Since sending (and therefore failures) happen in a different thread, you can
141 | provide an `AsyncProducer` with a failure handler that's called whenever
142 | something bad happens.
143 | 
144 | ```ruby
145 | require 'telekinesis'
146 | 
147 | class MyFailureHandler
148 |   def on_record_failure(kv_pairs_and_errors)
149 |     items = kv_pairs_and_errors.map do |k, v, code, message|
150 |       maybe_log_error(code, message)
151 |       [k, v]
152 |     end
153 |     save_for_later(items)
154 |   end
155 | 
156 |   def on_kinesis_error(err, items)
157 |     log_exception(err.cause)
158 |     save_for_later(items)
159 |   end
160 | end
161 | 
162 | producer = Telekinesis::Producer::AsyncProducer.create(
163 |   stream: 'my stream',
164 |   failure_handler: MyFailureHandler.new,
165 |   send_every_ms: 1500,
166 |   credentials: {
167 |     access_key_id: 'foo',
168 |     secret_access_key: 'bar'
169 |   }
170 | )
171 | ```
172 | 
173 | ## Consumers
174 | 
175 | ### KCL
176 | 
177 | `Telekinesis::Consumer::KCL` is a wrapper around Amazon's [Kinesis Client
178 | Library (also called the KCL)](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html#kinesis-record-processor-overview-kcl).
179 | 
180 | Each KCL instance is part of a group of consumers that make up an
181 | _application_. An application can be running on any number of hosts in any
182 | number of processes.  Consumers identify themself uniquely within an
183 | application by specifying a `worker_id`.
184 | 
185 | All of the consumers within an application attempt to distribute work evenly
186 | between themselves by coordinating through a DynamoDB table. This coordination
187 | ensures that a single consumer processes each shard, and that if one consumer
188 | fails for any reason, another consumer can pick up from the point at which it
189 | last checkpointed.
190 | 
191 | This is all part of the official AWS library! Telekinesis just makes it easier
192 | to use from JRuby.
193 | 
194 | Each client has to know how to process all the data it's
195 | retreiving from Kinesis. That's done by creating a [record
196 | processor](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java)
197 | and telling a `KCL` how to create a processor when it becomes
198 | responsible for a shard.
199 | 
200 | We highly recommend reading the [official
201 | docs](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java)
202 | on implementing the `IRecordProcessor` interface before you continue.
203 | 
204 | > NOTE: Since `initialize` is a reserved method, Telekinesis takes care of
205 | > calling your `init` method whenever the KCL calls `IRecordProcessor`'s
206 | > `initialize` method.
207 | 
208 | > NOTE: Make sure you read the Kinesis Record Processor documentation carefully.
209 | > Failures, checkpoints, and shutting require some attention. More on that later.
210 | 
211 | After it is created, a record processor is initialized with the ID of the shard
212 | it's processing, and handed an enumerable of
213 | [Records](http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/index.html?com/amazonaws/services/kinesis/AmazonKinesisClient.html) and a checkpointer (see below) every time the consumer detects new data to
214 | process.
215 | 
216 | Defining and creating a simple processor might look like:
217 | 
218 | ```ruby
219 | require 'telekinesis'
220 | 
221 | class MyProcessor
222 |   def init(init_input)
223 |     @shard_id = init_input.shard_id
224 |     $stderr.puts "Started processing #{@shard_id}"
225 |   end
226 | 
227 |   def process_records(process_records_input)
228 |     process_records_input.records.each do
229 |       |r| puts "key=#{r.partition_key} value=#{String.from_java_bytes(r.data.array)}"
230 |     end
231 |   end
232 | 
233 |   def shutdown
234 |     $stderr.puts "Shutting down #{@shard_id}"
235 |   end
236 | end
237 | 
238 | worker = Telekinesis::Consumer::KCL.new(stream: 'some-events', app: 'example') do
239 |   MyProcessor.new
240 | end
241 | 
242 | worker.run
243 | ```
244 | 
245 | To make defining record processors easier, Telekinesis comes with a `Block`
246 | processor that lets you use a block to specify your `process_records` method.
247 | Use this if you don't need to do any explicit startup or shutdown in a record
248 | processor.
249 | 
250 | ```ruby
251 | require 'telekinesis'
252 | 
253 | worker = Telekinesis::Consumer::KCL.new(stream: 'some-events', app: 'example') do
254 |   Telekinesis::Consumer::Block.new do |records, checkpointer, millis_behind|
255 |     records.each {|r| puts "key=#{r.partition_key} value=#{String.from_java_bytes(r.data.array)}" }
256 |   end
257 | end
258 | 
259 | worker.run
260 | ```
261 | 
262 | Once you get into building a client application, you'll probably want
263 | to know about some of the following advanced tips and tricks.
264 | 
265 | #### Client State
266 | 
267 | Each KCL Application gets its own DynamoDB table that stores all of this state.
268 | The `:application` name is used as the DynamoDB table name, so beware of
269 | namespace collisions if you use DynamoDB on its own. Altering or reseting any
270 | of this state involves manually altering the application's Dynamo table.
271 | 
272 | #### Errors while processing records
273 | 
274 | When a call to `process_records` fails, the KCL expects you to handle the
275 | failure and try to reprocess. If you let an exception escape, it happily moves
276 | on to the next batch of records from Kinesis and will let you checkpoint further
277 | on down the road.
278 | 
279 | From the [official docs](http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html):
280 | 
281 | > The KCL relies on processRecords to handle any exceptions that arise from
282 | > processing the data records. If an exception is thrown from processRecords,
283 | > the KCL skips over the data records that were passed prior to the exception;
284 | > that is, these records are not re-sent to the record processor that threw the
285 | > exception or to any other record processor in the application.
286 | 
287 | The moral of the story is that you should be absolutely sure you catch any
288 | exceptions that get thrown in your `process_records` implementation. If you
289 | don't, you can (silently) drop data on the floor.
290 | 
291 | If something terrible happens and you can't attempt to re-read the list of
292 | records and re-do whatever work you needed to do in process records, we've been
293 | advised by the Kinesis team that killing the entire JVM that's running the
294 | worker is the safest thing to do. On restart, the consumer (or another consumer
295 | in the application group) will pick up the orphaned shards and attempt to
296 | restart from the last available checkpoint.
297 | 
298 | #### Checkpoints and `INITIAL_POSITION_IN_STREAM`
299 | 
300 | The second object passed to `process_records` is a checkpointer. This can be
301 | used to checkpoint all records that have been passed to the processor so far
302 | (by just calling `checkpointer.checkpoint`) or up to a particular sequence
303 | number (by calling `checkpointer.checkpoint(record.sequence_number)`).
304 | 
305 | While a `KCL` consumer can be initialized with an `:initial_position_in_stream`
306 | option, any existing checkpoint for a shard will take precedent over that
307 | value. Furthermore, any existing STATE in DynamoDB will take precedent, so if
308 | you start a consumer with `initial_position_in_stream: 'LATEST'` and then
309 | restart with `initial_position_in_stream: 'TRIM_HORIZON'` you still end up
310 | starting from `LATEST`.
311 | 
312 | ## Java client logging
313 | 
314 | The AWS Java SDK can be extremely noisy and hard to control, since it logs
315 | through `java.util.logging`.
316 | 
317 | Telekinesis comes with a shim that can silence all of that logging or redirect
318 | it to a Ruby Logger of your choice. This isn't fine-grained control - you're
319 | capturing or disabling ALL logging from any Java dependency that uses
320 | `java.util.logging` - so use it with care.
321 | 
322 | To entirely disable logging:
323 | 
324 | ```ruby
325 | Telekinesis::Logging.disable_java_logging
326 | ```
327 | 
328 | To capture all logging and send it through a Ruby logger:
329 | 
330 | ```ruby
331 | Telekinesis::Logging.capture_java_logging(Logger.new($stderr))
332 | ```
333 | 
334 | ----
335 | 
336 | # Building
337 | 
338 | ## Prerequisites
339 | 
340 | * JRuby 1.7.9 or later.
341 | * Apache Maven
342 | 
343 | ## Build
344 | 
345 | Install JRuby 1.7.9 or later, for example with `rbenv` you would:
346 | 
347 | ```
348 | $ rbenv install jruby-1.7.9
349 | ```
350 | 
351 | Install Bundler and required Gems.
352 | 
353 | ```
354 | $ gem install bundler
355 | $ bundle install
356 | ```
357 | 
358 | Install Apache Maven.
359 | 
360 | On Ubuntu or related use:
361 | 
362 | ```
363 | $ sudo apt-get install maven
364 | ```
365 | 
366 | The easiest method on OSX is via `brew`.
367 | 
368 | ```
369 | $ sudo brew install maven
370 | ```
371 | 
372 | Ensure `JAVA_HOME` is set on OSX.
373 | 
374 | Ensure your `JAVA_HOME` environment variable is set. In Bash for example
375 | add the following to `~/.bash_profile`.
376 | 
377 | ```
378 | export JAVA_HOME=$(/usr/libexec/java_home)
379 | ```
380 | 
381 | Then run:
382 | 
383 | ```
384 | $ source ~/.bash_profile
385 | ```
386 | 
387 | Build the Java shim and jar.
388 | 
389 | ```
390 | $ rake ext:build
391 | ```
392 | 
393 | The `rake ext:build` task builds the Java shim and packages all of the required Java
394 | classes into a single jar. Since bytecode is portable, the JAR is shipped with
395 | the built gem.
396 | 
397 | Build the Gem.
398 | 
399 | Use the `rake gem:build` task to build the complete gem, uberjar and all.
400 | 
401 | ```
402 | $ rake gem:build
403 | ```
404 | 
405 | # Testing
406 | 
407 | Telekinesis comes with a small set of unit tests. Run those with plain ol'
408 | `rake test`.
409 | 
410 | > NOTE: The Java extension *must* be built and installed before you can run
411 | > unit tests.
412 | 
413 | Integration tests coming soon.
414 | 
415 | 
416 | # License
417 | 
418 | Copyright Kickstarter, PBC.
419 | 
420 | Released under an [MIT License](http://opensource.org/licenses/MIT).
421 | 
422 | 


--------------------------------------------------------------------------------