├── .gitignore ├── .rspec ├── .travis.yml ├── .yardopts ├── CHANGES.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── TODO.md ├── examples ├── consumer.rb └── producer.rb ├── lib ├── poseidon.rb └── poseidon │ ├── broker_pool.rb │ ├── cluster_metadata.rb │ ├── compressed_value.rb │ ├── compression.rb │ ├── compression │ ├── gzip_codec.rb │ └── snappy_codec.rb │ ├── connection.rb │ ├── fetched_message.rb │ ├── message.rb │ ├── message_conductor.rb │ ├── message_set.rb │ ├── message_to_send.rb │ ├── messages_for_broker.rb │ ├── messages_to_send.rb │ ├── messages_to_send_batch.rb │ ├── partition_consumer.rb │ ├── producer.rb │ ├── producer_compression_config.rb │ ├── protocol.rb │ ├── protocol │ ├── protocol_struct.rb │ ├── request_buffer.rb │ └── response_buffer.rb │ ├── sync_producer.rb │ ├── topic_metadata.rb │ └── version.rb ├── log └── .gitkeep ├── poseidon.gemspec └── spec ├── integration ├── multiple_brokers │ ├── consumer_spec.rb │ ├── metadata_failures_spec.rb │ ├── rebalance_spec.rb │ ├── round_robin_spec.rb │ └── spec_helper.rb └── simple │ ├── compression_spec.rb │ ├── connection_spec.rb │ ├── multiple_brokers_spec.rb │ ├── simple_producer_and_consumer_spec.rb │ ├── spec_helper.rb │ ├── truncated_messages_spec.rb │ └── unavailable_broker_spec.rb ├── spec_helper.rb ├── test_cluster.rb └── unit ├── broker_pool_spec.rb ├── cluster_metadata_spec.rb ├── compression ├── gzip_codec_spec.rb └── snappy_codec_spec.rb ├── compression_spec.rb ├── connection_spec.rb ├── fetched_message_spec.rb ├── message_conductor_spec.rb ├── message_set_spec.rb ├── message_spec.rb ├── message_to_send_spec.rb ├── messages_for_broker_spec.rb ├── messages_to_send_batch_spec.rb ├── messages_to_send_spec.rb ├── partition_consumer_spec.rb ├── producer_compression_config_spec.rb ├── producer_spec.rb ├── protocol └── request_buffer_spec.rb ├── protocol_spec.rb ├── sync_producer_spec.rb └── topic_metadata_spec.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | *.log 19 | *.log.* 20 | tags 21 | .rvmrc 22 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format progress 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | laguage: ruby 2 | rvm: 3 | - 1.9.3 4 | - 2.0.0 5 | - 2.1 6 | - ruby-head 7 | - jruby-19mode 8 | - jruby-head 9 | - rbx-2 10 | matrix: 11 | allow_failures: 12 | - rvm: ruby-head 13 | - rvm: jruby-head 14 | - rvm: rbx-2 15 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --markup-provider=redcarpet 2 | --markup=markdown 3 | --no-private 4 | --files LICENSE.txt 5 | --exclude '~$' 6 | --title 'Poseidon (Kafka library for Ruby)' 7 | --api public 8 | --readme README.md 9 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # 0.0.5 2 | 3 | * Add support for negative offsets. [GH-24] 4 | * Fix serious bug where we would send messages to the wrong partition. [GH-36] (Thanks @sclasen and @jorgeortiz85 for tracking this down.) 5 | * Better error message when we can't connect to a broker. [GH-42] 6 | * Handle broker rebalances. [GH-43] 7 | * PartitionConsumer: Block for messages by default. [GH-48] 8 | * Add a logger to help debug issues. [GH-51] 9 | * Add snappy support. [GH-57] 10 | * Allow `:none` value for `:compression_codec` option. [GH-72] 11 | * Allow request buffer to accept mixed encodings. [GH-74] 12 | 13 | # 0.0.4 14 | 15 | * Don't truncate UTF8 Messages [GH-18] 16 | * Gracefully handle truncated fetch reponses [GH-19] 17 | 18 | # 0.0.3 19 | 20 | * Better distribute messages across partitions. 21 | * Handle broken connections better. 22 | * Gracefully handle attempts to send an empty set of messages. 23 | 24 | # 0.0.2 25 | 26 | * Added ability to create a partitioner consumer for a topic+partition using topic metadata. 27 | * Added PartitionConsumer#offset to return offset of the last fetch 28 | 29 | # 0.0.1 30 | 31 | * Initial release 32 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'rake' 4 | 5 | # Specify your gem's dependencies in poseidon.gemspec 6 | gemspec 7 | 8 | gem 'coveralls', require: false 9 | 10 | group :development do 11 | gem 'github-markup', :platform => :ruby 12 | gem 'redcarpet', :platform => :ruby 13 | end 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Bob Potter 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unmaintained 2 | 3 | This project is currently unmaintained. There are a handful of other options for interacting with Kafka from Ruby: 4 | 5 | * A pure ruby client, [ruby-kafka](https://github.com/zendesk/ruby-kafka), which is 0.9 compatible and support consumer groups. 6 | * A REST proxy, [Kafka Rest](https://github.com/confluentinc/kafka-rest). 7 | * For JRuby there is [jruby-kafka](https://github.com/joekiller/jruby-kafka) which wraps the Java consumer. 8 | 9 | # Poseidon [![Build Status](https://travis-ci.org/bpot/poseidon.png?branch=master)](https://travis-ci.org/bpot/poseidon) [![Code Climate](https://codeclimate.com/github/bpot/poseidon.png)](https://codeclimate.com/github/bpot/poseidon) 10 | 11 | Poseidon is a Kafka client. Poseidon only supports the 0.8 API and above. 12 | 13 | **Until 1.0.0 this should be considered ALPHA software and not neccessarily production ready.** 14 | 15 | ## Usage 16 | 17 | ### API Documentation 18 | 19 | * [Latest release](http://rubydoc.info/gems/poseidon) 20 | * [Github master](http://rubydoc.info/github/bpot/poseidon) 21 | 22 | ### Installing a Kafka broker locally 23 | 24 | Follow the [instructions](http://kafka.apache.org/documentation.html#quickstart) on the Kafka wiki to build Kafka 0.8 and get a test broker up and running. 25 | 26 | ### Sending messages to Kafka 27 | 28 | ```ruby 29 | require 'poseidon' 30 | 31 | producer = Poseidon::Producer.new(["localhost:9092"], "my_test_producer") 32 | 33 | messages = [] 34 | messages << Poseidon::MessageToSend.new("topic1", "value1") 35 | messages << Poseidon::MessageToSend.new("topic2", "value2") 36 | producer.send_messages(messages) 37 | ``` 38 | 39 | More detailed [Poseidon::Producer](http://rubydoc.info/github/bpot/poseidon/Poseidon/Producer) documentation. 40 | 41 | ### Fetching messages from Kafka 42 | 43 | ```ruby 44 | require 'poseidon' 45 | 46 | consumer = Poseidon::PartitionConsumer.new("my_test_consumer", "localhost", 9092, 47 | "topic1", 0, :earliest_offset) 48 | 49 | loop do 50 | messages = consumer.fetch 51 | messages.each do |m| 52 | puts m.value 53 | end 54 | end 55 | ``` 56 | 57 | More detailed [Poseidon::PartitionConsumer](http://rubydoc.info/github/bpot/poseidon/Poseidon/PartitionConsumer) documentation. 58 | 59 | ### Using snappy compression 60 | 61 | To use snappy compression in your producers or consumers, install the [snappy](http://rubygems.org/gems/snappy) gem or simply add `gem 'snappy'` to your project's Gemfile. 62 | 63 | ## Semantic Versioning 64 | 65 | This gem follows [SemVer](http://semver.org). In particular, the public API should not be considered stable and anything may change without warning until Version 1.0.0. Additionally, for the purposes of the versioning the public API is everything documented in the [public API docs](http://rubydoc.info/github/bpot/poseidon). 66 | 67 | ## Requirements 68 | 69 | * Ruby 1.9.3 or higher (1.9.2 and below not supported!!!) 70 | * Kafka 0.8 or higher 71 | 72 | ## Integration Tests 73 | 74 | In order to run integration tests you must specify a `KAFKA_PATH` environment variable which points to a built Kafka installation. To build Kafka locally follow the [instructions](http://kafka.apache.org/documentation.html#quickstart) provided by the project. 75 | 76 | # cd ~/src/poseidon/ 77 | # bundle 78 | # KAFKA_PATH=~/src/kafka bundle exec rake spec:all # run all unit and integration specs 79 | 80 | The poseidon test suite will take care of spinning up and down the broker(s) needed for the integration tests. 81 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | require 'rspec/core/rake_task' 3 | 4 | RSpec::Core::RakeTask.new("spec:unit") do |t| 5 | t.pattern = 'spec/unit/**/*_spec.rb' 6 | end 7 | 8 | RSpec::Core::RakeTask.new('spec:integration:simple') do |t| 9 | t.pattern = 'spec/integration/simple/*_spec.rb' 10 | t.rspec_opts = ["--fail-fast", "-f d"] 11 | end 12 | 13 | RSpec::Core::RakeTask.new('spec:integration:multiple_brokers') do |t| 14 | t.pattern = 'spec/integration/multiple_brokers/*_spec.rb' 15 | t.rspec_opts = ["--fail-fast", "-f d"] 16 | end 17 | 18 | task :spec => 'spec:unit' 19 | task 'spec:all' => ['spec:unit', 'spec:integration:simple', 'spec:integration:multiple_brokers'] 20 | task :default => 'spec:unit' 21 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | ### 0.0.1 2 | * Ensure that protocol errors are being handled correctly and not bubbling up 3 | * More integration tests, replication, leader changes, etc. Investigate interesting cases in kafka's tests 4 | * End-to-end integration specs 5 | - In specs that test broker failure, verify that messages were actually sent/not sent with a consumer. 6 | 7 | * AsyncProducer 8 | - Implement a bounded queue, sending thread, etc 9 | * Cleanup: extract protocol struct delegation to a module. 10 | * When failing to send messages in sync producer, return messages that failed to send? 11 | 12 | ### 0.0.2 13 | 14 | * New Consumer/Consumer Enhancements 15 | - Automatically partition work among consumers (zookeeper, redis, pluggable?) 16 | - Handle case where the offset we're trying to read from no longer exists 17 | 18 | * Snappy Compression 19 | - snappy: c-ext, would like to avoid 20 | - snappy_ffi: ffi interface, but needs to be updated (pre c-api) 21 | and has no specs, docs. Also linked to a c-ext version, two gems, etc.. 22 | - new snappy ffi library with specs, docs, etc. Shave that Yak! 23 | 24 | * Benchmark/Profiling. KGIO? 25 | 26 | ### 0.0.3 -- Targets Kafka 0.8.1 27 | - Offset API 28 | -------------------------------------------------------------------------------- /examples/consumer.rb: -------------------------------------------------------------------------------- 1 | $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') 2 | require 'poseidon' 3 | 4 | producer = Poseidon::PartitionConsumer.new("example_consumer", "localhost", 9092, 5 | "example", 0, :earliest_offset) 6 | 7 | loop do 8 | begin 9 | messages = producer.fetch 10 | messages.each do |m| 11 | puts "Received message: #{m.value}" 12 | end 13 | rescue Poseidon::Errors::UnknownTopicOrPartition 14 | puts "Topic does not exist yet" 15 | end 16 | 17 | sleep 1 18 | end 19 | -------------------------------------------------------------------------------- /examples/producer.rb: -------------------------------------------------------------------------------- 1 | $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib') 2 | require 'poseidon' 3 | 4 | producer = Poseidon::Producer.new(["localhost:9092"], "example_producer") 5 | 6 | loop do 7 | producer.send_messages([Poseidon::MessageToSend.new("example", Time.now.to_s)]) 8 | sleep 1 9 | end 10 | -------------------------------------------------------------------------------- /lib/poseidon.rb: -------------------------------------------------------------------------------- 1 | # Stdlib requires 2 | require 'socket' 3 | require 'zlib' 4 | require 'thread' 5 | require 'set' 6 | require 'logger' 7 | require 'stringio' 8 | 9 | # Top level Poseidon namespace 10 | # 11 | # @api public 12 | module Poseidon 13 | # Posiedon exception namespace 14 | module Errors 15 | # @api private 16 | class ProtocolError < StandardError; end 17 | 18 | # Protocol Exceptions 19 | # 20 | # These are defined by the Poseidon wire format, 21 | # they should be caught before being raised to users. 22 | # 23 | # @api private 24 | class UnknownError < ProtocolError; end 25 | # @api private 26 | class OffsetOutOfRange < ProtocolError; end 27 | # @api private 28 | class InvalidMessage < ProtocolError; end 29 | # @api private 30 | class UnknownTopicOrPartition < ProtocolError; end 31 | # @api private 32 | class InvalidMessageSize < ProtocolError; end 33 | # @api private 34 | class LeaderNotAvailable < ProtocolError; end 35 | # @api private 36 | class NotLeaderForPartition < ProtocolError; end 37 | # @api private 38 | class RequestTimedOut < ProtocolError; end 39 | # @api private 40 | class BrokerNotAvailable < ProtocolError; end 41 | # @api private 42 | class ReplicaNotAvailable < ProtocolError; end 43 | # @api private 44 | class MessageSizeTooLarge < ProtocolError; end 45 | # @api private 46 | class UnrecognizedProtocolError < ProtocolError; end 47 | 48 | # @api private 49 | NO_ERROR_CODE = 0 50 | # @api private 51 | ERROR_CODES = { 52 | -1 => UnknownError, 53 | 1 => OffsetOutOfRange, 54 | 2 => InvalidMessage, 55 | 3 => UnknownTopicOrPartition, 56 | 4 => InvalidMessageSize, 57 | 5 => LeaderNotAvailable, 58 | 6 => NotLeaderForPartition, 59 | 7 => RequestTimedOut, 60 | 8 => BrokerNotAvailable, 61 | 9 => ReplicaNotAvailable, 62 | 10 => MessageSizeTooLarge 63 | } 64 | 65 | # Raised when a custom partitioner tries to send 66 | # a message to a partition that doesn't exist. 67 | class InvalidPartitionError < StandardError; end 68 | 69 | # Raised when we are unable to fetch metadata from 70 | # any of the brokers. 71 | class UnableToFetchMetadata < StandardError; end 72 | 73 | # Raised when a messages checksum doesn't match 74 | class ChecksumError < StandardError; end 75 | 76 | # Raised when you try to send messages to a producer 77 | # object that has been #shutdown 78 | class ProducerShutdownError < StandardError; end 79 | end 80 | 81 | def self.logger 82 | @logger ||= null_logger 83 | end 84 | 85 | def self.logger=(logger) 86 | @logger = logger 87 | end 88 | 89 | private 90 | def self.null_logger 91 | devnull = RUBY_PLATFORM =~ /w32/ ? 'nul' : '/dev/null' 92 | l = Logger.new(devnull) 93 | l.level = Logger::INFO 94 | l 95 | end 96 | end 97 | 98 | # Public API 99 | require "poseidon/message_to_send" 100 | require "poseidon/producer" 101 | require "poseidon/fetched_message" 102 | require "poseidon/partition_consumer" 103 | 104 | # Poseidon! 105 | require "poseidon/message" 106 | require "poseidon/message_set" 107 | require "poseidon/topic_metadata" 108 | require "poseidon/protocol" 109 | 110 | require "poseidon/broker_pool" 111 | require "poseidon/cluster_metadata" 112 | require "poseidon/compression" 113 | require "poseidon/connection" 114 | require "poseidon/message_conductor" 115 | require "poseidon/messages_for_broker" 116 | require "poseidon/messages_to_send" 117 | require "poseidon/messages_to_send_batch" 118 | require "poseidon/producer_compression_config" 119 | require "poseidon/sync_producer" 120 | require "poseidon/version" 121 | -------------------------------------------------------------------------------- /lib/poseidon/broker_pool.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # BrokerPool allows you to send api calls to the a brokers Connection. 3 | # 4 | # @api private 5 | class BrokerPool 6 | class UnknownBroker < StandardError; end 7 | 8 | # @yieldparam [BrokerPool] 9 | def self.open(client_id, seed_brokers, socket_timeout_ms, &block) 10 | broker_pool = new(client_id, seed_brokers, socket_timeout_ms) 11 | 12 | yield broker_pool 13 | ensure 14 | broker_pool.close 15 | end 16 | 17 | # @param [String] client_id 18 | def initialize(client_id, seed_brokers, socket_timeout_ms) 19 | @connections = {} 20 | @brokers = {} 21 | @client_id = client_id 22 | @seed_brokers = seed_brokers 23 | @socket_timeout_ms = socket_timeout_ms 24 | end 25 | 26 | def fetch_metadata(topics) 27 | @seed_brokers.each do |broker| 28 | if metadata = fetch_metadata_from_broker(broker, topics) 29 | Poseidon.logger.debug { "Fetched metadata\n" + metadata.to_s } 30 | return metadata 31 | end 32 | end 33 | raise Errors::UnableToFetchMetadata 34 | end 35 | 36 | # Update the brokers we know about 37 | # 38 | # TODO break connection when a brokers info changes? 39 | # 40 | # @param [Hash] brokers 41 | # Hash of broker_id => { :host => host, :port => port } 42 | def update_known_brokers(brokers) 43 | @brokers.update(brokers) 44 | nil 45 | end 46 | 47 | # Executes an api call on the connection 48 | # 49 | # @param [Integer] broker_id id of the broker we want to execute it on 50 | # @param [Symbol] api_call 51 | # the api call we want to execute (:produce,:fetch,etc) 52 | def execute_api_call(broker_id, api_call, *args) 53 | connection(broker_id).send(api_call, *args) 54 | end 55 | 56 | # Closes all open connections to brokers 57 | def close 58 | @brokers.values(&:close) 59 | @brokers = {} 60 | end 61 | 62 | alias_method :shutdown, :close 63 | 64 | private 65 | def fetch_metadata_from_broker(broker, topics) 66 | host, port = broker.split(":") 67 | Connection.open(host, port, @client_id, @socket_timeout_ms) do |connection| 68 | connection.topic_metadata(topics) 69 | end 70 | rescue Connection::ConnectionFailedError 71 | return nil 72 | end 73 | 74 | def connection(broker_id) 75 | @connections[broker_id] ||= new_connection(broker_id) 76 | end 77 | 78 | def new_connection(broker_id) 79 | info = @brokers[broker_id] 80 | if info.nil? 81 | raise UnknownBroker 82 | end 83 | Connection.new(info[:host], info[:port], @client_id, @socket_timeout_ms) 84 | end 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /lib/poseidon/cluster_metadata.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # Encapsulates what we known about brokers, topics and partitions 3 | # from Metadata API calls. 4 | # 5 | # @api private 6 | class ClusterMetadata 7 | attr_reader :brokers, :last_refreshed_at, :topic_metadata 8 | def initialize 9 | @brokers = {} 10 | @topic_metadata = {} 11 | @last_refreshed_at = nil 12 | end 13 | 14 | # Update what we know about the cluter based on MetadataResponse 15 | # 16 | # @param [MetadataResponse] topic_metadata_response 17 | # @return nil 18 | def update(topic_metadata_response) 19 | update_brokers(topic_metadata_response.brokers) 20 | update_topics(topic_metadata_response.topics) 21 | 22 | @last_refreshed_at = Time.now 23 | nil 24 | end 25 | 26 | # Do we have metadata for these topics already? 27 | # 28 | # @param [Enumberable] topic_names A set of topics. 29 | # @return [Boolean] true if we have metadata for all +topic_names+, otherwise false. 30 | def have_metadata_for_topics?(topic_names) 31 | topic_names.all? { |topic| @topic_metadata[topic] } 32 | end 33 | 34 | # Provides metadata for each topic 35 | # 36 | # @param [Enumerable] topic_names Topics we should return metadata for 37 | # @return [Hash] 38 | def metadata_for_topics(topic_names) 39 | Hash[topic_names.map { |name| [name, @topic_metadata[name]] }] 40 | end 41 | 42 | # Provides a Broker object for +broker_id+. This corresponds to the 43 | # broker ids in the TopicMetadata objects. 44 | # 45 | # @param [Integer] broker_id Broker id 46 | def broker(broker_id) 47 | @brokers[broker_id] 48 | end 49 | 50 | # Return lead broker for topic and partition 51 | def lead_broker_for_partition(topic_name, partition) 52 | broker_id = @topic_metadata[topic_name].partition_leader(partition) 53 | if broker_id 54 | @brokers[broker_id] 55 | else 56 | nil 57 | end 58 | end 59 | 60 | def topics 61 | @topic_metadata.keys 62 | end 63 | 64 | def to_s 65 | out = "" 66 | @topic_metadata.each do |topic, metadata| 67 | out << "Topic: #{topic}" 68 | out << "-------------------------" 69 | out << metadata.to_s 70 | end 71 | out 72 | end 73 | 74 | private 75 | def update_topics(topics) 76 | topics.each do |topic| 77 | if topic.exists? 78 | @topic_metadata[topic.name] = topic 79 | end 80 | end 81 | end 82 | 83 | def update_brokers(brokers) 84 | brokers.each do |broker| 85 | @brokers[broker.id] = broker 86 | end 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /lib/poseidon/compressed_value.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | class CompressedValue 4 | def initialize(value, codec_id) 5 | @value = value 6 | @codec_id = codec_id 7 | end 8 | 9 | # Decompressed value 10 | # 11 | # Raises ??? if the compression codec is uknown 12 | # 13 | # @return [String] decompressed value 14 | def decompressed 15 | @decompressed ||= decompress 16 | end 17 | 18 | def compression_codec 19 | Compression.find_codec(codec_id) 20 | end 21 | private 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/poseidon/compression.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | module Compression 4 | class UnrecognizedCompressionCodec < StandardError; end 5 | 6 | require "poseidon/compression/gzip_codec" 7 | require "poseidon/compression/snappy_codec" 8 | 9 | CODECS = { 10 | #0 => no codec 11 | 1 => GzipCodec, 12 | 2 => SnappyCodec 13 | } 14 | 15 | # Fetches codec module for +codec_id+ 16 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-Compression 17 | # 18 | # @param [Integer] codec_id codec's as defined by the Kafka Protocol 19 | # @return [Module] codec module for codec_id 20 | # 21 | # @private 22 | def self.find_codec(codec_id) 23 | codec = CODECS[codec_id] 24 | if codec.nil? 25 | raise UnrecognizedCompressionCodec, codec_id 26 | end 27 | codec 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/poseidon/compression/gzip_codec.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | module Compression 3 | module GzipCodec 4 | def self.codec_id 5 | 1 6 | end 7 | 8 | def self.compress(s) 9 | io = StringIO.new 10 | io.set_encoding(Encoding::BINARY) 11 | gz = Zlib::GzipWriter.new io, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY 12 | gz.write s 13 | gz.close 14 | io.string 15 | end 16 | 17 | def self.decompress(s) 18 | io = StringIO.new(s) 19 | Zlib::GzipReader.new(io).read 20 | end 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/poseidon/compression/snappy_codec.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | module Compression 3 | module SnappyCodec 4 | def self.codec_id 5 | 2 6 | end 7 | 8 | def self.compress(s) 9 | check! 10 | Snappy.deflate(s) 11 | end 12 | 13 | def self.decompress(s) 14 | check! 15 | Snappy::Reader.new(StringIO.new(s)).read 16 | end 17 | 18 | def self.check! 19 | @checked ||= begin 20 | require 'snappy' 21 | true 22 | rescue LoadError 23 | raise "Snappy compression is not available, please install the 'snappy' gem" 24 | end 25 | end 26 | 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/poseidon/connection.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # High level internal interface to a remote broker. Provides access to 3 | # the broker API. 4 | # @api private 5 | class Connection 6 | include Protocol 7 | 8 | class ConnectionFailedError < StandardError; end 9 | class TimeoutException < Exception; end 10 | 11 | API_VERSION = 0 12 | REPLICA_ID = -1 # Replica id is always -1 for non-brokers 13 | 14 | # @yieldparam [Connection] 15 | def self.open(host, port, client_id, socket_timeout_ms, &block) 16 | connection = new(host, port, client_id, socket_timeout_ms) 17 | 18 | yield connection 19 | ensure 20 | connection.close 21 | end 22 | 23 | attr_reader :host, :port 24 | 25 | # Create a new connection 26 | # 27 | # @param [String] host Host to connect to 28 | # @param [Integer] port Port broker listens on 29 | # @param [String] client_id Unique across processes? 30 | def initialize(host, port, client_id, socket_timeout_ms) 31 | @host = host 32 | @port = port 33 | 34 | @client_id = client_id 35 | @socket_timeout_ms = socket_timeout_ms 36 | end 37 | 38 | # Close broker connection 39 | def close 40 | @socket && @socket.close 41 | end 42 | 43 | # Execute a produce call 44 | # 45 | # @param [Integer] required_acks 46 | # @param [Integer] timeout 47 | # @param [Array] messages_for_topics Messages to send 48 | # @return [ProduceResponse] 49 | def produce(required_acks, timeout, messages_for_topics) 50 | ensure_connected 51 | req = ProduceRequest.new( request_common(:produce), 52 | required_acks, 53 | timeout, 54 | messages_for_topics) 55 | send_request(req) 56 | if required_acks != 0 57 | read_response(ProduceResponse) 58 | else 59 | true 60 | end 61 | end 62 | 63 | # Execute a fetch call 64 | # 65 | # @param [Integer] max_wait_time 66 | # @param [Integer] min_bytes 67 | # @param [Integer] topic_fetches 68 | def fetch(max_wait_time, min_bytes, topic_fetches) 69 | ensure_connected 70 | req = FetchRequest.new( request_common(:fetch), 71 | REPLICA_ID, 72 | max_wait_time, 73 | min_bytes, 74 | topic_fetches) 75 | send_request(req) 76 | read_response(FetchResponse) 77 | end 78 | 79 | def offset(offset_topic_requests) 80 | ensure_connected 81 | req = OffsetRequest.new(request_common(:offset), 82 | REPLICA_ID, 83 | offset_topic_requests) 84 | send_request(req) 85 | read_response(OffsetResponse).topic_offset_responses 86 | end 87 | 88 | # Fetch metadata for +topic_names+ 89 | # 90 | # @param [Enumberable] topic_names 91 | # A list of topics to retrive metadata for 92 | # @return [TopicMetadataResponse] metadata for the topics 93 | def topic_metadata(topic_names) 94 | ensure_connected 95 | req = MetadataRequest.new( request_common(:metadata), 96 | topic_names) 97 | send_request(req) 98 | read_response(MetadataResponse) 99 | end 100 | 101 | private 102 | def ensure_connected 103 | if @socket.nil? || @socket.closed? 104 | begin 105 | @socket = TCPSocket.new(@host, @port) 106 | rescue SystemCallError 107 | raise_connection_failed_error 108 | end 109 | end 110 | end 111 | 112 | def read_response(response_class) 113 | r = ensure_read_or_timeout(4) 114 | if r.nil? 115 | raise_connection_failed_error 116 | end 117 | n = r.unpack("N").first 118 | s = ensure_read_or_timeout(n) 119 | buffer = Protocol::ResponseBuffer.new(s) 120 | response_class.read(buffer) 121 | rescue Errno::ECONNRESET, SocketError, TimeoutException 122 | @socket = nil 123 | raise_connection_failed_error 124 | end 125 | 126 | def ensure_read_or_timeout(maxlen) 127 | if IO.select([@socket], nil, nil, @socket_timeout_ms / 1000.0) 128 | @socket.read(maxlen) 129 | else 130 | raise TimeoutException.new 131 | end 132 | end 133 | 134 | def send_request(request) 135 | buffer = Protocol::RequestBuffer.new 136 | request.write(buffer) 137 | ensure_write_or_timeout([buffer.to_s.bytesize].pack("N") + buffer.to_s) 138 | rescue Errno::EPIPE, Errno::ECONNRESET, TimeoutException 139 | @socket = nil 140 | raise_connection_failed_error 141 | end 142 | 143 | def ensure_write_or_timeout(data) 144 | if IO.select(nil, [@socket], nil, @socket_timeout_ms / 1000.0) 145 | @socket.write(data) 146 | else 147 | raise TimeoutException.new 148 | end 149 | end 150 | 151 | def request_common(request_type) 152 | RequestCommon.new( 153 | API_KEYS[request_type], 154 | API_VERSION, 155 | next_correlation_id, 156 | @client_id 157 | ) 158 | end 159 | 160 | def next_correlation_id 161 | @correlation_id ||= 0 162 | @correlation_id += 1 163 | end 164 | 165 | def raise_connection_failed_error 166 | raise ConnectionFailedError, "Failed to connect to #{@host}:#{@port}" 167 | end 168 | end 169 | end 170 | -------------------------------------------------------------------------------- /lib/poseidon/fetched_message.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | 3 | # A message fetched from a Kafka broker. 4 | # 5 | # ``` 6 | # fetched_messages = consumer.fetch 7 | # fetched_messages.each do |fm| 8 | # puts "Topic: #{fm.topic}" 9 | # puts "Value #{fm.value}" 10 | # puts "Key: #{fm.key}" 11 | # puts "Offset: #{fm.offset}" 12 | # end 13 | # ``` 14 | # 15 | # @param [String] topic 16 | # Topic this message should be sent to. 17 | # 18 | # @param [String] value 19 | # Value of the message we want to send. 20 | # 21 | # @param [String] key 22 | # Optional. Message's key, used to route a message 23 | # to a specific broker. Otherwise, keys will be 24 | # sent to brokers in a round-robin manner. 25 | # 26 | # @api public 27 | class FetchedMessage 28 | attr_reader :value, :key, :topic, :offset 29 | 30 | def initialize(topic, value, key, offset) 31 | @topic = topic 32 | @value = value 33 | @key = key 34 | @offset = offset 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/poseidon/message.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # The Message class is used by both Producer and Consumer classes. 3 | # 4 | # = Basic usage 5 | # 6 | # message = Poseidon::Message.new(:value => "hello", 7 | # :key => "user:123", 8 | # :topic => "salutations") 9 | # 10 | # = Sending a message 11 | # 12 | # When sending a message you must set the topic for the message, this 13 | # can be done during creation or afterwards. 14 | # 15 | # = Compression 16 | # 17 | # In normal usage you should never have to worry about compressed 18 | # Message objects. When producing the producer takes care of 19 | # compressing the messages and when fetching the fetcher will 20 | # return them decompressed. 21 | # 22 | # @api private 23 | class Message 24 | # Last 3 bits are used to indicate compression 25 | COMPRESSION_MASK = 0x7 26 | MAGIC_TYPE = 0 27 | 28 | # Build a new Message object from its binary representation 29 | # 30 | # @param [ResponseBuffer] buffer 31 | # a response buffer containing binary data representing a message. 32 | # 33 | # @return [Message] 34 | def self.read(buffer) 35 | m = Message.new 36 | m.struct = Protocol::MessageWithOffsetStruct.read(buffer) 37 | 38 | # Return nil if the message is truncated. 39 | if m.struct.message.truncated? 40 | return nil 41 | end 42 | 43 | if m.struct.message.checksum_failed? 44 | raise Errors::ChecksumError 45 | end 46 | m 47 | end 48 | 49 | attr_accessor :struct, :topic 50 | 51 | # Create a new message object 52 | # 53 | # @param [Hash] options 54 | # 55 | # @option options [String] :value (nil) 56 | # The messages value. Optional. 57 | # 58 | # @option options [String] :key (nil) 59 | # The messages key. Optional. 60 | # 61 | # @option options [String] :topic (nil) 62 | # The topic we should send this message to. Optional. 63 | # 64 | # @option options [String] :attributes (nil) 65 | # Attributes field for the message currently only idicates 66 | # whether or not the message is compressed. 67 | def initialize(options = {}) 68 | build_struct(options) 69 | 70 | @topic = options.delete(:topic) 71 | 72 | if options.any? 73 | raise ArgumentError, "Unknown options: #{options.keys.inspect}" 74 | end 75 | end 76 | 77 | def ==(other) 78 | eql?(other) 79 | end 80 | 81 | def eql?(other) 82 | struct.eql?(other.struct) 83 | end 84 | 85 | def objects_with_errors 86 | struct.objects_with_errors 87 | end 88 | 89 | # Write a binary representation of the message to buffer 90 | # 91 | # @param [RequestBuffer] buffer 92 | # @return [nil] 93 | def write(buffer) 94 | @struct.write(buffer) 95 | nil 96 | end 97 | 98 | # @return [String] the Message's key 99 | def key 100 | @struct.message.key 101 | end 102 | 103 | # @return [String] the Message's value 104 | def value 105 | @struct.message.value 106 | end 107 | 108 | # @return [Integer] the Message's offset 109 | def offset 110 | @struct.offset 111 | end 112 | 113 | # Is the value compressed? 114 | # 115 | # @return [Boolean] 116 | def compressed? 117 | compression_codec_id > 0 118 | end 119 | 120 | # Decompressed value 121 | # 122 | # @return [String] decompressed value 123 | def decompressed_value 124 | compression_codec.decompress(value) 125 | end 126 | 127 | private 128 | def attributes 129 | @struct.message.attributes 130 | end 131 | 132 | def compression_codec 133 | Compression.find_codec(compression_codec_id) 134 | end 135 | 136 | def compression_codec_id 137 | attributes & COMPRESSION_MASK 138 | end 139 | 140 | def build_struct(options) 141 | message_struct = Protocol::MessageStruct.new( 142 | MAGIC_TYPE, 143 | options.delete(:attributes) || 0, 144 | options.delete(:key), 145 | options.delete(:value) 146 | ) 147 | struct = Protocol::MessageWithOffsetStruct.new(options.delete(:offset) || 0, message_struct) 148 | self.struct = struct 149 | end 150 | end 151 | end 152 | -------------------------------------------------------------------------------- /lib/poseidon/message_conductor.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | class MessageConductor 4 | NO_PARTITION = -1 5 | NO_BROKER = -1 6 | # Create a new message conductor 7 | # 8 | # @param [Hash] topics_metadata 9 | # Metadata for all topics this conductor may receive. 10 | # @param [Object] partitioner 11 | # Custom partitioner 12 | def initialize(cluster_metadata, partitioner) 13 | @cluster_metadata = cluster_metadata 14 | @partitioner = partitioner 15 | 16 | # Don't always start from partition 0 17 | @partition_counter = rand(65536) 18 | end 19 | 20 | # Determines which partition a message should be sent to. 21 | # 22 | # @param [String] topic 23 | # Topic we are sending this message to 24 | # 25 | # @param [Object] key 26 | # Key for this message, may be nil 27 | # 28 | # @return [Integer,Integer] 29 | # partition_id and broker_id to which this message should be sent 30 | def destination(topic, key = nil) 31 | topic_metadata = topic_metadatas[topic] 32 | if topic_metadata && topic_metadata.leader_available? 33 | partition_id = determine_partition(topic_metadata, key) 34 | broker_id = topic_metadata.partition_leader(partition_id) || NO_BROKER 35 | else 36 | partition_id = NO_PARTITION 37 | broker_id = NO_BROKER 38 | end 39 | 40 | return partition_id, broker_id 41 | end 42 | 43 | private 44 | 45 | def topic_metadatas 46 | @cluster_metadata.topic_metadata 47 | end 48 | 49 | def determine_partition(topic_metadata, key) 50 | if key 51 | partition_for_keyed_message(topic_metadata, key) 52 | else 53 | partition_for_keyless_message(topic_metadata) 54 | end 55 | end 56 | 57 | def partition_for_keyed_message(topic_metadata, key) 58 | partition_count = topic_metadata.partition_count 59 | if @partitioner 60 | partition_id = @partitioner.call(key, partition_count) 61 | 62 | if partition_id >= partition_count 63 | raise Errors::InvalidPartitionError, "partitioner (#{@partitioner.inspect}) requested #{partition_id} while only #{partition_count} partitions exist" 64 | end 65 | else 66 | partition_id = Zlib::crc32(key) % partition_count 67 | end 68 | 69 | partition_id 70 | end 71 | 72 | def partition_for_keyless_message(topic_metadata) 73 | partition_count = topic_metadata.available_partition_count 74 | 75 | if partition_count > 0 76 | topic_metadata.available_partitions[next_partition_counter % partition_count].id 77 | else 78 | NO_PARTITION 79 | end 80 | end 81 | 82 | def next_partition_counter 83 | @partition_counter += 1 84 | end 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /lib/poseidon/message_set.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | class MessageSet 4 | # Build a message set object from a binary encoded message set 5 | # 6 | # @param [String] string binary encoded message set 7 | # @return [MessageSet] 8 | 9 | 10 | def self.read(buffer) 11 | ms = MessageSet.new 12 | ms.struct = Protocol::MessageSetStructWithSize.read(buffer) 13 | ms 14 | end 15 | 16 | def self.read_without_size(buffer) 17 | ms = MessageSet.new 18 | ms.struct = Protocol::MessageSetStruct.read(buffer) 19 | ms 20 | end 21 | 22 | attr_accessor :struct 23 | def initialize(messages = []) 24 | self.struct = Protocol::MessageSetStructWithSize.new(messages) 25 | end 26 | 27 | def ==(other) 28 | eql?(other) 29 | end 30 | 31 | def eql?(other) 32 | struct.eql?(other.struct) 33 | end 34 | 35 | def objects_with_errors 36 | struct.objects_with_errors 37 | end 38 | 39 | def write(buffer) 40 | struct.write(buffer) 41 | end 42 | 43 | def <<(message) 44 | struct.messages << message 45 | end 46 | 47 | def messages 48 | struct.messages 49 | end 50 | 51 | def compress(codec) 52 | MessageSet.new([to_compressed_message(codec)]) 53 | end 54 | 55 | # Builds an array of Message objects from the MessageStruct objects. 56 | # Decompressing messages if necessary. 57 | # 58 | # @return [Array] 59 | def flatten 60 | messages = struct.messages.map do |message| 61 | if message.compressed? 62 | s = message.decompressed_value 63 | MessageSet.read_without_size(Protocol::ResponseBuffer.new(s)).flatten 64 | else 65 | message 66 | end 67 | end.flatten 68 | end 69 | 70 | private 71 | def to_compressed_message(codec) 72 | buffer = Protocol::RequestBuffer.new 73 | struct.write(buffer) 74 | 75 | value = codec.compress(buffer.to_s[4..-1]) 76 | Message.new(:value => value, :attributes => codec.codec_id) 77 | end 78 | 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /lib/poseidon/message_to_send.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # A message we want to send to Kafka. Comprised of the 3 | # topic we want to send it to, the body of the message 4 | # and an optional key. 5 | # 6 | # mts = Poseidon::MessageToSend.new("topic", "value", "opt_key") 7 | # 8 | # @api public 9 | class MessageToSend 10 | attr_reader :value, :key, :topic 11 | 12 | # Create a new message for sending to a Kafka broker. 13 | # 14 | # @param [String] topic 15 | # Topic this message should be sent to. 16 | # 17 | # @param [String] value 18 | # Value of the message we want to send. 19 | # 20 | # @param [String] key 21 | # Optional. Message's key, used to route a message 22 | # to a specific broker. Otherwise, messages will be 23 | # sent to brokers in a round-robin manner. 24 | # 25 | # @api public 26 | def initialize(topic, value, key = nil) 27 | raise ArgumentError, "Must provide a non-nil topic" if topic.nil? 28 | @topic = topic 29 | @value = value 30 | @key = key 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/poseidon/messages_for_broker.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # Messages that should be sent to a particular broker. 3 | # @api private 4 | class MessagesForBroker 5 | attr_reader :broker_id, :messages 6 | 7 | def initialize(broker_id) 8 | @broker_id = broker_id 9 | @topics = {} 10 | @messages = [] 11 | end 12 | 13 | # Add a messages for this broker 14 | def add(message, partition_id) 15 | @messages << message 16 | 17 | @topics[message.topic] ||= {} 18 | @topics[message.topic][partition_id] ||= [] 19 | @topics[message.topic][partition_id] << message 20 | end 21 | 22 | # Build protocol objects for this broker! 23 | def build_protocol_objects(compression_config) 24 | @topics.map do |topic, messages_by_partition| 25 | codec = compression_config.compression_codec_for_topic(topic) 26 | 27 | messages_for_partitions = messages_by_partition.map do |partition, messages| 28 | message_set = MessageSet.new(messages) 29 | if codec 30 | Protocol::MessagesForPartition.new(partition, message_set.compress(codec)) 31 | else 32 | Protocol::MessagesForPartition.new(partition, message_set) 33 | end 34 | end 35 | Protocol::MessagesForTopic.new(topic, messages_for_partitions) 36 | end 37 | end 38 | 39 | # We can always retry these errors because they mean none of the kafka brokers persisted the message 40 | ALWAYS_RETRYABLE = [Poseidon::Errors::LeaderNotAvailable, Poseidon::Errors::NotLeaderForPartition] 41 | 42 | def successfully_sent(producer_response) 43 | failed = [] 44 | producer_response.topic_response.each do |topic_response| 45 | topic_response.partitions.each do |partition| 46 | if ALWAYS_RETRYABLE.include?(partition.error_class) 47 | Poseidon.logger.debug { "Received #{partition.error_class} when attempting to send messages to #{topic_response.topic} on #{partition.partition}" } 48 | failed.push(*@topics[topic_response.topic][partition.partition]) 49 | end 50 | end 51 | end 52 | 53 | return @messages - failed 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /lib/poseidon/messages_to_send.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # A set of messages that we need to send to the cluster. May be used 3 | # across multiple send attempts. 4 | # 5 | # If a custom partitioner is not used than a messages are distributed 6 | # in round robin fasion to each partition with an available leader. 7 | # 8 | # @api private 9 | class MessagesToSend 10 | class InvalidPartitionError < StandardError; end 11 | attr_reader :topic_set, :messages 12 | 13 | # Create a new messages to send object. 14 | # 15 | # @param [Array] messages List of messages we want to send. 16 | # @param [ClusterMetadta] cluster_metadata 17 | def initialize(messages, cluster_metadata) 18 | @messages = messages 19 | @cluster_metadata = cluster_metadata 20 | 21 | build_topic_set 22 | end 23 | 24 | def needs_metadata? 25 | !@cluster_metadata.have_metadata_for_topics?(topic_set) 26 | end 27 | 28 | def messages_for_brokers(message_conductor) 29 | topic_metadatas = @cluster_metadata.metadata_for_topics(topic_set) 30 | MessagesToSendBatch.new(@messages, message_conductor).messages_for_brokers 31 | end 32 | 33 | def successfully_sent(messages_sent) 34 | @messages -= messages_sent 35 | end 36 | 37 | def pending_messages? 38 | @messages.any? 39 | end 40 | 41 | private 42 | def build_topic_set 43 | @topic_set = Set.new 44 | @messages.each { |m| @topic_set.add(m.topic) } 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/poseidon/messages_to_send_batch.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # A batch of messages for an individual send attempt to the cluster. 3 | # @api private 4 | class MessagesToSendBatch 5 | def initialize(messages, message_conductor) 6 | @messages = messages 7 | @message_conductor = message_conductor 8 | end 9 | 10 | # Groups messages by broker and preps them for transmission. 11 | # 12 | # @return [Array] 13 | def messages_for_brokers 14 | messages_for_broker_ids = {} 15 | @messages.each do |message| 16 | partition_id, broker_id = @message_conductor.destination(message.topic, 17 | message.key) 18 | 19 | # Create a nested hash to group messages by broker_id, topic, partition. 20 | messages_for_broker_ids[broker_id] ||= MessagesForBroker.new(broker_id) 21 | messages_for_broker_ids[broker_id].add(message, partition_id) 22 | end 23 | 24 | messages_for_broker_ids.values 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/poseidon/partition_consumer.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # A primitive Kafka Consumer which operates on a specific broker, topic and partition. 3 | # 4 | # Example in the README. 5 | # 6 | # @api public 7 | class PartitionConsumer 8 | # The offset of the latest message the broker recieved for this partition. 9 | # Useful for knowning how far behind the consumer is. This value is only 10 | # as recent as the last fetch call. 11 | attr_reader :highwater_mark 12 | 13 | attr_reader :host, :port 14 | 15 | attr_reader :offset 16 | 17 | attr_reader :topic 18 | 19 | # Returns a consumer pointing at the lead broker for the partition. 20 | # 21 | # Eventually this will be replaced by higher level consumer functionality, 22 | # this is a stop-gap. 23 | # 24 | def self.consumer_for_partition(client_id, seed_brokers, topic, partition, offset, options = {}) 25 | 26 | broker = BrokerPool.open(client_id, seed_brokers, options[:socket_timeout_ms] || 10_000) do |broker_pool| 27 | cluster_metadata = ClusterMetadata.new 28 | cluster_metadata.update(broker_pool.fetch_metadata([topic])) 29 | 30 | cluster_metadata.lead_broker_for_partition(topic, partition) 31 | end 32 | 33 | new(client_id, broker.host, broker.port, topic, partition, offset, options) 34 | end 35 | 36 | # Create a new consumer which reads the specified topic and partition from 37 | # the host. 38 | # 39 | # @param [String] client_id Used to identify this client should be unique. 40 | # @param [String] host 41 | # @param [Integer] port 42 | # @param [String] topic Topic to read from 43 | # @param [Integer] partition Partitions are zero indexed. 44 | # @param [Integer,Symbol] offset 45 | # Offset to start reading from. A negative offset can also be passed. 46 | # There are a couple special offsets which can be passed as symbols: 47 | # :earliest_offset Start reading from the first offset the server has. 48 | # :latest_offset Start reading from the latest offset the server has. 49 | # 50 | # @param [Hash] options 51 | # Theses options can all be overridden in each individual fetch command. 52 | # 53 | # @option options [Integer] :max_bytes 54 | # Maximum number of bytes to fetch 55 | # Default: 1048576 (1MB) 56 | # 57 | # @option options [Integer] :max_wait_ms 58 | # How long to block until the server sends us data. 59 | # NOTE: This is only enforced if min_bytes is > 0. 60 | # Default: 100 (100ms) 61 | # 62 | # @option options [Integer] :min_bytes 63 | # Smallest amount of data the server should send us. 64 | # Default: 1 (Send us data as soon as it is ready) 65 | # 66 | # @option options [Integer] :socket_timeout_ms 67 | # How long to wait for reply from server. Should be higher than max_wait_ms. 68 | # Default: 10000 (10s) 69 | # 70 | # @api public 71 | def initialize(client_id, host, port, topic, partition, offset, options = {}) 72 | @host = host 73 | @port = port 74 | 75 | handle_options(options) 76 | 77 | @connection = Connection.new(host, port, client_id, @socket_timeout_ms) 78 | @topic = topic 79 | @partition = partition 80 | if Symbol === offset 81 | raise ArgumentError, "Unknown special offset type: #{offset}" unless [:earliest_offset, :latest_offset].include?(offset) 82 | end 83 | @offset = offset 84 | end 85 | 86 | # Fetch messages from the broker. 87 | # 88 | # @param [Hash] options 89 | # 90 | # @option options [Integer] :max_bytes 91 | # Maximum number of bytes to fetch 92 | # 93 | # @option options [Integer] :max_wait_ms 94 | # How long to block until the server sends us data. 95 | # 96 | # @option options [Integer] :min_bytes 97 | # Smallest amount of data the server should send us. 98 | # 99 | # @api public 100 | def fetch(options = {}) 101 | fetch_max_wait = options.delete(:max_wait_ms) || max_wait_ms 102 | fetch_max_bytes = options.delete(:max_bytes) || max_bytes 103 | fetch_min_bytes = options.delete(:min_bytes) || min_bytes 104 | 105 | if options.keys.any? 106 | raise ArgumentError, "Unknown options: #{options.keys.inspect}" 107 | end 108 | 109 | topic_fetches = build_topic_fetch_request(fetch_max_bytes) 110 | fetch_response = @connection.fetch(fetch_max_wait, fetch_min_bytes, topic_fetches) 111 | topic_response = fetch_response.topic_fetch_responses.first 112 | partition_response = topic_response.partition_fetch_responses.first 113 | 114 | unless partition_response.error == Errors::NO_ERROR_CODE 115 | if @offset < 0 && 116 | Errors::ERROR_CODES[partition_response.error] == Errors::OffsetOutOfRange 117 | @offset = :earliest_offset 118 | return fetch(options) 119 | end 120 | 121 | raise Errors::ERROR_CODES[partition_response.error] 122 | else 123 | @highwater_mark = partition_response.highwater_mark_offset 124 | messages = partition_response.message_set.flatten.map do |m| 125 | FetchedMessage.new(topic_response.topic, m.value, m.key, m.offset) 126 | end 127 | if messages.any? 128 | @offset = messages.last.offset + 1 129 | end 130 | messages 131 | end 132 | end 133 | 134 | # @return [Integer] next offset we will fetch 135 | # 136 | # @api public 137 | def next_offset 138 | resolve_offset_if_necessary 139 | @offset 140 | end 141 | 142 | # Close the connection to the kafka broker 143 | # 144 | # @return [Nil] 145 | # 146 | # @api public 147 | def close 148 | @connection.close 149 | nil 150 | end 151 | 152 | private 153 | def handle_options(options) 154 | @max_bytes = options.delete(:max_bytes) || 1024*1024 155 | @min_bytes = options.delete(:min_bytes) || 1 156 | @max_wait_ms = options.delete(:max_wait_ms) || 10_000 157 | @socket_timeout_ms = options.delete(:socket_timeout_ms) || @max_wait_ms + 10_000 158 | 159 | if @socket_timeout_ms < @max_wait_ms 160 | raise ArgumentError, "Setting socket_timeout_ms should be higher than max_wait_ms" 161 | end 162 | 163 | if options.keys.any? 164 | raise ArgumentError, "Unknown options: #{options.keys.inspect}" 165 | end 166 | end 167 | 168 | def max_wait_ms 169 | @max_wait_ms 170 | end 171 | 172 | def max_bytes 173 | @max_bytes 174 | end 175 | 176 | def min_bytes 177 | @min_bytes 178 | end 179 | 180 | def resolve_offset_if_necessary 181 | return unless Symbol === @offset || @offset < 0 182 | 183 | protocol_offset = case @offset 184 | when :earliest_offset 185 | -2 186 | when :latest_offset 187 | -1 188 | else 189 | -1 190 | end 191 | 192 | topic_offset_responses = @connection.offset(build_topic_offset_request(protocol_offset)) 193 | partition_offsets = topic_offset_responses.first.partition_offsets 194 | if partition_offsets.first.error != Errors::NO_ERROR_CODE 195 | raise Errors::ERROR_CODES[partition_offsets.first.error] 196 | end 197 | 198 | offset_struct = partition_offsets.first.offsets.first 199 | 200 | @offset = if offset_struct.nil? 201 | 0 202 | elsif @offset.kind_of?(Fixnum) && @offset < 0 203 | offset_struct.offset + @offset 204 | else 205 | offset_struct.offset 206 | end 207 | end 208 | 209 | def build_topic_offset_request(protocol_offset) 210 | partition_offset_request = Protocol::PartitionOffsetRequest.new( 211 | @partition, 212 | protocol_offset, 213 | max_number_of_offsets = 1) 214 | 215 | [Protocol::TopicOffsetRequest.new(topic, [partition_offset_request])] 216 | end 217 | 218 | def build_topic_fetch_request(max_bytes) 219 | partition_fetches = [Protocol::PartitionFetch.new(@partition, 220 | next_offset, 221 | max_bytes)] 222 | topic_fetches = [Protocol::TopicFetch.new(topic, partition_fetches)] 223 | end 224 | end 225 | end 226 | -------------------------------------------------------------------------------- /lib/poseidon/producer.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # Provides a high level interface for sending messages to a cluster 3 | # of Kafka brokers. 4 | # 5 | # ## Producer Creation 6 | # 7 | # Producer requires a broker list and a client_id: 8 | # 9 | # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id", 10 | # :type => :sync) 11 | # 12 | # The broker list is only used to bootstrap our knowledge of the cluster -- 13 | # it does not need to contain every broker. The client id should be unique 14 | # across all clients in the cluster. 15 | # 16 | # ## Sending Messages 17 | # 18 | # Messages must have a topic before being sent: 19 | # 20 | # messages = [] 21 | # messages << Poseidon::MessageToSend.new("topic1", "Hello Word") 22 | # messages << Poseidon::MessageToSend.new("user_updates_topic", user.update, user.id) 23 | # producer.send_messages(messages) 24 | # 25 | # ## Producer Types 26 | # 27 | # There are two types of producers: sync and async. They can be specified 28 | # via the :type option when creating a producer. 29 | # 30 | # ## Sync Producer 31 | # 32 | # The :sync producer blocks while sends messages to the cluster. The more 33 | # messages you can send per #send_messages call the more efficient it will 34 | # be. 35 | # 36 | # ## Compression 37 | # 38 | # When creating the producer you can specify a compression method: 39 | # 40 | # producer = Producer.new(["broker1:port1"], "my_client_id", 41 | # :type => :sync, :compression_codec => :gzip) 42 | # 43 | # If you don't specify which topics to compress it will compress all topics. 44 | # You can specify a set of topics to compress when creating the producer: 45 | # 46 | # producer = Producer.new(["broker1:port1"], "my_client_id", 47 | # :type => :sync, :compression_codec => :gzip, 48 | # :compressed_topics => ["compressed_topic_1"]) 49 | # 50 | # ## Partitioning 51 | # 52 | # For keyless messages the producer will round-robin messages to all 53 | # _available_ partitions for at topic. This means that if we are unable to 54 | # send messages to a specific broker we'll retry sending those to a different 55 | # broker. 56 | # 57 | # However, if you specify a key when creating the message, the producer 58 | # will choose a partition based on the key and only send to that partition. 59 | # 60 | # ## Custom Partitioning 61 | # 62 | # You may also specify a custom partitioning scheme for messages by passing 63 | # a Proc (or any object that responds to #call) to the Producer. The proc 64 | # must return a Fixnum >= 0 and less-than partition_count. 65 | # 66 | # my_partitioner = Proc.new { |key, partition_count| Zlib::crc32(key) % partition_count } 67 | # 68 | # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id", 69 | # :type => :sync, :partitioner => my_partitioner) 70 | # 71 | # @api public 72 | class Producer 73 | # @api private 74 | VALID_OPTIONS = [ 75 | :ack_timeout_ms, 76 | :compressed_topics, 77 | :compression_codec, 78 | :max_send_retries, 79 | :metadata_refresh_interval_ms, 80 | :partitioner, 81 | :retry_backoff_ms, 82 | :required_acks, 83 | :socket_timeout_ms, 84 | :type, 85 | ] 86 | 87 | # @api private 88 | OPTION_DEFAULTS = { 89 | :type => :sync 90 | } 91 | 92 | # Returns a new Producer. 93 | # 94 | # @param [Array] brokers An array of brokers in the form "host1:port1" 95 | # 96 | # @param [String] client_id A client_id used to indentify the producer. 97 | # 98 | # @param [Hash] options 99 | # 100 | # @option options [:sync / :async] :type (:sync) 101 | # Whether we should send messages right away or queue them and send 102 | # them in the background. 103 | # 104 | # @option options [:gzip / :snappy / :none] :compression_codec (:none) 105 | # Type of compression to use. 106 | # 107 | # @option options [Enumberable] :compressed_topics (nil) 108 | # Topics to compress. If this is not specified we will compress all 109 | # topics provided that +:compression_codec+ is set. 110 | # 111 | # @option options [Integer: Milliseconds] :metadata_refresh_interval_ms (600_000) 112 | # How frequently we should update the topic metadata in milliseconds. 113 | # 114 | # @option options [#call, nil] :partitioner 115 | # Object which partitions messages based on key. 116 | # Responds to #call(key, partition_count). 117 | # 118 | # @option options [Integer] :max_send_retries (3) 119 | # Number of times to retry sending of messages to a leader. 120 | # 121 | # @option options [Integer] :retry_backoff_ms (100) 122 | # The amount of time (in milliseconds) to wait before refreshing the metadata 123 | # after we are unable to send messages. 124 | # Number of times to retry sending of messages to a leader. 125 | # 126 | # @option options [Integer] :required_acks (0) 127 | # The number of acks required per request. 128 | # 129 | # @option options [Integer] :ack_timeout_ms (1500) 130 | # How long the producer waits for acks. 131 | # 132 | # @option options [Integer] :socket_timeout_ms] (10000) 133 | # How long the producer socket waits for any reply from server. 134 | # 135 | # @api public 136 | def initialize(brokers, client_id, options = {}) 137 | options = options.dup 138 | validate_options(options) 139 | 140 | if !brokers.respond_to?(:each) 141 | raise ArgumentError, "brokers must respond to #each" 142 | end 143 | @brokers = brokers 144 | @client_id = client_id 145 | @producer = build_producer(options) 146 | @shutdown = false 147 | end 148 | 149 | # Send messages to the cluster. Raises an exception if the producer fails to send the messages. 150 | # 151 | # @param [Enumerable] messages 152 | # Messages must have a +topic+ set and may have a +key+ set. 153 | # 154 | # @return [Boolean] 155 | # 156 | # @api public 157 | def send_messages(messages) 158 | raise Errors::ProducerShutdownError if @shutdown 159 | if !messages.respond_to?(:each) 160 | raise ArgumentError, "messages must respond to #each" 161 | end 162 | 163 | @producer.send_messages(convert_to_messages_objects(messages)) 164 | end 165 | 166 | # Closes all open connections to brokers 167 | def close 168 | @shutdown = true 169 | @producer.close 170 | end 171 | 172 | alias_method :shutdown, :close 173 | 174 | private 175 | def validate_options(options) 176 | unknown_keys = options.keys - VALID_OPTIONS 177 | if unknown_keys.any? 178 | raise ArgumentError, "Unknown options: #{unknown_keys.inspect}" 179 | end 180 | 181 | @type = options.delete(:type) || :sync 182 | end 183 | 184 | def convert_to_messages_objects(messages) 185 | messages.map do |m| 186 | Message.new(:value => m.value, :topic => m.topic, :key => m.key) 187 | end 188 | end 189 | 190 | def build_producer(options) 191 | case @type 192 | when :sync 193 | SyncProducer.new(@client_id, @brokers, options) 194 | when :async 195 | raise "Not implemented yet" 196 | end 197 | end 198 | end 199 | end 200 | -------------------------------------------------------------------------------- /lib/poseidon/producer_compression_config.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | class ProducerCompressionConfig 4 | COMPRESSION_CODEC_MAP = { 5 | :gzip => Compression::GzipCodec, 6 | :snappy => Compression::SnappyCodec, 7 | :none => nil 8 | } 9 | 10 | def initialize(compression_codec, compressed_topics) 11 | if compression_codec 12 | unless COMPRESSION_CODEC_MAP.has_key?(compression_codec) 13 | raise ArgumentError, "Unknown compression codec: '#{compression_codec}' (accepted: #{COMPRESSION_CODEC_MAP.keys.inspect})" 14 | end 15 | @compression_codec = COMPRESSION_CODEC_MAP[compression_codec] 16 | else 17 | @compression_codec = nil 18 | end 19 | 20 | if compressed_topics 21 | @compressed_topics = Set.new(compressed_topics) 22 | else 23 | @compressed_topics = nil 24 | end 25 | end 26 | 27 | def compression_codec_for_topic(topic) 28 | return false if @compression_codec.nil? 29 | 30 | if @compressed_topics.nil? || (@compressed_topics && @compressed_topics.include?(topic)) 31 | @compression_codec 32 | else 33 | false 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/poseidon/protocol.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | module Protocol 4 | require "poseidon/protocol/protocol_struct" 5 | require "poseidon/protocol/request_buffer" 6 | require "poseidon/protocol/response_buffer" 7 | 8 | API_KEYS = { 9 | :produce => 0, 10 | :fetch => 1, 11 | :offset => 2, 12 | :metadata => 3 13 | } 14 | 15 | # Request/Response Common Structures 16 | RequestCommon = ProtocolStruct.new(:api_key => :int16, 17 | :api_version => :int16, 18 | :correlation_id => :int32, 19 | :client_id => :string) 20 | ResponseCommon = ProtocolStruct.new(:correlation_id => :int32) 21 | 22 | # MessageSet Common Structure 23 | MessageStruct = ProtocolStruct.new(:magic_type => :int8, 24 | :attributes => :int8, 25 | :key => :bytes, 26 | :value => :bytes).prepend_size.prepend_crc32.truncatable 27 | MessageWithOffsetStruct = ProtocolStruct.new(:offset => :int64, 28 | :message => MessageStruct) 29 | 30 | # When part of produce requests of fetch responses a MessageSet 31 | # has a prepended size. When a MessageSet is compressed and 32 | # nested in a Message size is not prepended. 33 | MessageSetStruct = ProtocolStruct.new(:messages => [Message]). 34 | size_bound_array(:messages) 35 | MessageSetStructWithSize = MessageSetStruct.dup.prepend_size 36 | 37 | # Produce Request 38 | MessagesForPartition = ProtocolStruct.new(:partition => :int32, 39 | :message_set => MessageSet) 40 | MessagesForTopic = ProtocolStruct.new(:topic => :string, 41 | :messages_for_partitions => 42 | [MessagesForPartition]) 43 | ProduceRequest = ProtocolStruct.new(:common => RequestCommon, 44 | :required_acks => :int16, 45 | :timeout => :int32, 46 | :messages_for_topics => [MessagesForTopic]) 47 | 48 | # Produce Response 49 | ProducePartitionResponse = ProtocolStruct.new(:partition => :int32, 50 | :error => :int16, 51 | :offset => :int64) 52 | ProduceTopicResponse = ProtocolStruct.new(:topic => :string, 53 | :partitions => [ProducePartitionResponse]) 54 | ProduceResponse = ProtocolStruct.new(:common => ResponseCommon, 55 | :topic_response => [ProduceTopicResponse]) 56 | 57 | # Fetch Request 58 | PartitionFetch = ProtocolStruct.new(:partition => :int32, 59 | :fetch_offset => :int64, 60 | :max_bytes => :int32) 61 | TopicFetch = ProtocolStruct.new(:topic => :string, 62 | :partition_fetches => [PartitionFetch]) 63 | FetchRequest = ProtocolStruct.new(:common => RequestCommon, 64 | :replica_id => :int32, 65 | :max_wait_time => :int32, 66 | :min_bytes => :int32, 67 | :topic_fetches => [TopicFetch]) 68 | 69 | # Fetch Response 70 | PartitionFetchResponse = ProtocolStruct.new(:partition => :int32, 71 | :error => :int16, 72 | :highwater_mark_offset => :int64, 73 | :message_set => MessageSet) 74 | TopicFetchResponse = ProtocolStruct.new(:topic => :string, 75 | :partition_fetch_responses => [PartitionFetchResponse]) 76 | FetchResponse = ProtocolStruct.new( 77 | :common => ResponseCommon, 78 | :topic_fetch_responses => [TopicFetchResponse]) 79 | 80 | # Offset Request 81 | PartitionOffsetRequest = ProtocolStruct.new(:partition => :int32, 82 | :time => :int64, 83 | :max_number_of_offsets => :int32) 84 | TopicOffsetRequest = ProtocolStruct.new( 85 | :topic => :string, 86 | :partition_offset_requests => [PartitionOffsetRequest]) 87 | OffsetRequest = ProtocolStruct.new(:common => RequestCommon, 88 | :replica_id => :int32, 89 | :topic_offset_requests => [TopicOffsetRequest]) 90 | 91 | # Offset Response 92 | Offset = ProtocolStruct.new(:offset => :int64) 93 | PartitionOffset = ProtocolStruct.new(:partition => :int32, 94 | :error => :int16, 95 | :offsets => [Offset]) 96 | TopicOffsetResponse = ProtocolStruct.new(:topic => :string, 97 | :partition_offsets => [PartitionOffset]) 98 | OffsetResponse = ProtocolStruct.new( 99 | :common => ResponseCommon, 100 | :topic_offset_responses => [TopicOffsetResponse]) 101 | 102 | # Metadata Request 103 | MetadataRequest = ProtocolStruct.new( :common => RequestCommon, 104 | :topic_names => [:string]) 105 | 106 | # Metadata Response 107 | Broker = ProtocolStruct.new(:id => :int32, 108 | :host => :string, 109 | :port => :int32) 110 | PartitionMetadata = ProtocolStruct.new(:error => :int16, 111 | :id => :int32, 112 | :leader => :int32, 113 | :replicas => [:int32], 114 | :isr => [:int32]) 115 | TopicMetadataStruct = ProtocolStruct.new(:error => :int16, 116 | :name => :string, 117 | :partitions => [PartitionMetadata]) 118 | MetadataResponse = ProtocolStruct.new(:common => ResponseCommon, 119 | :brokers => [Broker], 120 | :topics => [TopicMetadata]) 121 | end 122 | end 123 | -------------------------------------------------------------------------------- /lib/poseidon/protocol/protocol_struct.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | module Protocol 3 | class ProtocolStruct < Struct 4 | class EncodingError < StandardError;end 5 | class DecodingError < StandardError;end 6 | 7 | def self.new(hash) 8 | klass = super(*hash.keys) 9 | klass.type_map = hash 10 | klass 11 | end 12 | 13 | def self.type_map=(type_map) 14 | @type_map = type_map 15 | end 16 | 17 | def self.type_map 18 | @type_map 19 | end 20 | 21 | def self.prepend_size 22 | @prepend_size = true 23 | self 24 | end 25 | 26 | def self.prepend_crc32 27 | @prepend_crc32 = true 28 | self 29 | end 30 | 31 | def self.truncatable 32 | @truncatable = true 33 | self 34 | end 35 | 36 | def self.prepend_size? 37 | @prepend_size 38 | end 39 | 40 | def self.prepend_crc32? 41 | @prepend_crc32 42 | end 43 | 44 | def self.truncatable? 45 | @truncatable 46 | end 47 | 48 | def self.size_bound_array(member) 49 | @size_bound_members ||= [] 50 | @size_bound_members << member 51 | self 52 | end 53 | 54 | def self.size_bound_array?(member) 55 | @size_bound_members ||= [] 56 | @size_bound_members.include?(member) 57 | end 58 | 59 | # Recursively find all objects with errors 60 | def objects_with_errors 61 | children = [] 62 | each_pair do |member, value| 63 | case value 64 | when Array 65 | value.each do |v| 66 | if v.respond_to?(:objects_with_errors) 67 | children << v 68 | end 69 | end 70 | else 71 | if value.respond_to?(:objects_with_errors) 72 | children << value 73 | end 74 | end 75 | end 76 | 77 | children_with_errors = children.map(&:objects_with_errors).flatten 78 | if members.include?(:error) && self[:error] != Errors::NO_ERROR_CODE 79 | children_with_errors + [self] 80 | else 81 | children_with_errors 82 | end 83 | end 84 | 85 | def raise_error 86 | raise error_class if error_class 87 | end 88 | 89 | def error_class 90 | Errors::ERROR_CODES[self[:error]] 91 | end 92 | 93 | def raise_error_if_one_exists 94 | objects_with_errors.each do |object| 95 | object.raise_error 96 | end 97 | end 98 | 99 | def write(buffer) 100 | maybe_prepend_size(buffer) do 101 | maybe_prepend_crc32(buffer) do 102 | each_pair do |member, value| 103 | begin 104 | write_member(buffer, member, value) 105 | rescue 106 | raise EncodingError, "Error writting #{member} in #{self.class} (#{$!.class}: #{$!.message})" 107 | end 108 | end 109 | end 110 | end 111 | end 112 | 113 | def maybe_prepend_size(buffer) 114 | if self.class.prepend_size? 115 | buffer.prepend_size do 116 | yield 117 | end 118 | else 119 | yield 120 | end 121 | end 122 | 123 | def maybe_prepend_crc32(buffer) 124 | if self.class.prepend_crc32? 125 | buffer.prepend_crc32 do 126 | yield 127 | end 128 | else 129 | yield 130 | end 131 | end 132 | 133 | def write_member(buffer, member, value) 134 | case type = type_map[member] 135 | when Array 136 | buffer.int32(value.size) unless self.class.size_bound_array?(member) 137 | value.each { |v| write_type(buffer, type.first, v) } 138 | else 139 | write_type(buffer, type, value) 140 | end 141 | end 142 | 143 | def write_type(buffer, type, value) 144 | case type 145 | when Symbol 146 | buffer.send(type, value) 147 | else 148 | value.write(buffer) 149 | end 150 | end 151 | 152 | # Populate struct from buffer based on members and their type definition. 153 | def self.read(buffer) 154 | s = new 155 | s.read(buffer) 156 | s 157 | end 158 | 159 | def read(buffer) 160 | if self.class.prepend_size? 161 | if !have_header?(buffer) 162 | @truncated = true 163 | return 164 | end 165 | 166 | @size = buffer.int32 167 | 168 | if self.class.prepend_crc32? 169 | @crc32 = buffer.int32 170 | @computed_crc32 = [Zlib::crc32(buffer.peek(@size-4))].pack("l>").unpack("l>").first 171 | if @crc32 != @computed_crc32 172 | @checksum_failed = true 173 | end 174 | expected_bytes_remaining = @size - 4 175 | else 176 | expected_bytes_remaining = @size 177 | end 178 | 179 | if self.class.truncatable? && expected_bytes_remaining > buffer.bytes_remaining 180 | @truncated = true 181 | return 182 | end 183 | end 184 | 185 | members.each do |member| 186 | begin 187 | self[member] = read_member(buffer, member) 188 | rescue DecodingError 189 | # Just reraise instead of producing a crazy nested exception 190 | raise 191 | rescue 192 | raise DecodingError, "Error while reading #{member} in #{self.class} (#{$!.class}: #{$!.message}))" 193 | end 194 | end 195 | end 196 | 197 | def have_header?(buffer) 198 | if self.class.truncatable? 199 | if self.class.prepend_crc32? 200 | header_bytes = 8 201 | else 202 | header_bytes = 4 203 | end 204 | 205 | return buffer.bytes_remaining >= header_bytes 206 | else 207 | return true 208 | end 209 | end 210 | 211 | def read_member(buffer, member) 212 | case type = type_map[member] 213 | when Array 214 | if self.class.size_bound_array?(member) 215 | if @size 216 | array_buffer = ResponseBuffer.new(buffer.read(@size)) 217 | else 218 | array_buffer = buffer 219 | end 220 | 221 | array = [] 222 | while !array_buffer.eof? && (v = read_type(array_buffer, type.first)) 223 | array << v 224 | end 225 | array 226 | else 227 | buffer.int32.times.map { read_type(buffer, type.first) } 228 | end 229 | else 230 | read_type(buffer, type) 231 | end 232 | end 233 | 234 | def read_type(buffer, type) 235 | case type 236 | when Symbol 237 | buffer.send(type) 238 | else 239 | type.read(buffer) 240 | end 241 | end 242 | 243 | def type_map 244 | self.class.type_map 245 | end 246 | 247 | def checksum_failed? 248 | @checksum_failed 249 | end 250 | 251 | def truncated? 252 | @truncated 253 | end 254 | end 255 | end 256 | end 257 | -------------------------------------------------------------------------------- /lib/poseidon/protocol/request_buffer.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | module Protocol 3 | # RequestBuffer allows you to build a Binary string for API requests 4 | # 5 | # API parallels the primitive types described on the wiki, with some 6 | # sugar for prepending message sizes and checksums. 7 | # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes) 8 | class RequestBuffer 9 | def initialize 10 | @s = ''.encode(Encoding::BINARY) 11 | end 12 | 13 | def append(string) 14 | string = string.dup.force_encoding(Encoding::BINARY) 15 | @s << string 16 | nil 17 | end 18 | 19 | def int8(int8) 20 | append([int8].pack("C")) 21 | end 22 | 23 | def int16(int16) 24 | append([int16].pack("s>")) 25 | end 26 | 27 | def int32(int32) 28 | append([int32].pack("l>")) 29 | end 30 | 31 | def int64(int64) 32 | append([int64].pack("q>")) 33 | end 34 | 35 | # Add a string 36 | # 37 | # @param [String] string 38 | def string(string) 39 | if string.nil? 40 | int16(-1) 41 | else 42 | int16(string.bytesize) 43 | append(string) 44 | end 45 | end 46 | 47 | def bytes(string) 48 | if string.nil? 49 | int32(-1) 50 | else 51 | int32(string.bytesize) 52 | append(string) 53 | end 54 | end 55 | 56 | def prepend_crc32 57 | checksum_pos = @s.bytesize 58 | @s += " " 59 | yield 60 | @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N") 61 | nil 62 | end 63 | 64 | def prepend_size 65 | size_pos = @s.bytesize 66 | @s += " " 67 | yield 68 | @s[size_pos] = [(@s.bytesize-1) - size_pos].pack("N") 69 | nil 70 | end 71 | 72 | def to_s 73 | @s 74 | end 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/poseidon/protocol/response_buffer.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | module Protocol 3 | class ResponseBuffer 4 | def initialize(response) 5 | @s = response 6 | @pos = 0 7 | end 8 | 9 | def int8 10 | byte = @s.byteslice(@pos, 1).unpack("C").first 11 | @pos += 1 12 | byte 13 | end 14 | 15 | def int16 16 | short = @s.byteslice(@pos, 2).unpack("s>").first 17 | @pos += 2 18 | short 19 | end 20 | 21 | def int32 22 | int = @s.byteslice(@pos, 4).unpack("l>").first 23 | @pos += 4 24 | int 25 | end 26 | 27 | def int64 28 | long = @s.byteslice(@pos, 8).unpack("q>").first 29 | @pos += 8 30 | long 31 | end 32 | 33 | def string 34 | len = int16 35 | string = @s.byteslice(@pos, len) 36 | @pos += len 37 | string 38 | end 39 | 40 | def read(bytes) 41 | data = @s.byteslice(@pos, bytes) 42 | @pos += bytes 43 | data 44 | end 45 | 46 | def peek(bytes) 47 | @s.byteslice(@pos, bytes) 48 | end 49 | 50 | def bytes 51 | n = int32 52 | if n == -1 53 | return nil 54 | else 55 | read(n) 56 | end 57 | end 58 | 59 | def bytes_remaining 60 | @s.bytesize - @pos 61 | end 62 | 63 | def eof? 64 | @pos == @s.bytesize 65 | end 66 | 67 | def to_s 68 | @s 69 | end 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/poseidon/sync_producer.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # Used by +Producer+ for sending messages to the kafka cluster. 3 | # 4 | # You should not use this interface directly 5 | # 6 | # Fetches metadata at appropriate times. 7 | # Builds MessagesToSend 8 | # Handle MessageBatchToSend lifecyle 9 | # 10 | # Who is responsible for fetching metadata from broker seed list? 11 | # Do we want to be fetching from real live brokers eventually? 12 | # 13 | # @api private 14 | class SyncProducer 15 | OPTION_DEFAULTS = { 16 | :compression_codec => nil, 17 | :compressed_topics => nil, 18 | :metadata_refresh_interval_ms => 600_000, 19 | :partitioner => nil, 20 | :max_send_retries => 3, 21 | :retry_backoff_ms => 100, 22 | :required_acks => 0, 23 | :ack_timeout_ms => 1500, 24 | :socket_timeout_ms => 10_000 25 | } 26 | 27 | attr_reader :client_id, :retry_backoff_ms, :max_send_retries, 28 | :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms, :socket_timeout_ms 29 | def initialize(client_id, seed_brokers, options = {}) 30 | @client_id = client_id 31 | 32 | handle_options(options.dup) 33 | 34 | @cluster_metadata = ClusterMetadata.new 35 | @message_conductor = MessageConductor.new(@cluster_metadata, @partitioner) 36 | @broker_pool = BrokerPool.new(client_id, seed_brokers, socket_timeout_ms) 37 | end 38 | 39 | def send_messages(messages) 40 | return if messages.empty? 41 | 42 | messages_to_send = MessagesToSend.new(messages, @cluster_metadata) 43 | 44 | if refresh_interval_elapsed? 45 | refresh_metadata(messages_to_send.topic_set) 46 | end 47 | 48 | ensure_metadata_available_for_topics(messages_to_send) 49 | 50 | (@max_send_retries+1).times do 51 | messages_to_send.messages_for_brokers(@message_conductor).each do |messages_for_broker| 52 | if sent = send_to_broker(messages_for_broker) 53 | messages_to_send.successfully_sent(sent) 54 | end 55 | end 56 | 57 | if !messages_to_send.pending_messages? || @max_send_retries == 0 58 | break 59 | else 60 | Kernel.sleep retry_backoff_ms / 1000.0 61 | refresh_metadata(messages_to_send.topic_set) 62 | end 63 | end 64 | 65 | if messages_to_send.pending_messages? 66 | raise "Failed to send all messages: #{messages_to_send.messages} remaining" 67 | else 68 | true 69 | end 70 | end 71 | 72 | def close 73 | @broker_pool.close 74 | end 75 | 76 | alias_method :shutdown, :close 77 | 78 | private 79 | 80 | def ensure_metadata_available_for_topics(messages_to_send) 81 | return if !messages_to_send.needs_metadata? 82 | 83 | Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt 1)" } 84 | refresh_metadata(messages_to_send.topic_set) 85 | return if !messages_to_send.needs_metadata? 86 | 87 | 2.times do |n| 88 | sleep 5 89 | 90 | Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt #{n+2})" } 91 | refresh_metadata(messages_to_send.topic_set) 92 | return if !messages_to_send.needs_metadata? 93 | end 94 | raise Errors::UnableToFetchMetadata 95 | end 96 | 97 | def handle_options(options) 98 | @ack_timeout_ms = handle_option(options, :ack_timeout_ms) 99 | @socket_timeout_ms = handle_option(options, :socket_timeout_ms) 100 | @retry_backoff_ms = handle_option(options, :retry_backoff_ms) 101 | 102 | @metadata_refresh_interval_ms = 103 | handle_option(options, :metadata_refresh_interval_ms) 104 | 105 | @required_acks = handle_option(options, :required_acks) 106 | @max_send_retries = handle_option(options, :max_send_retries) 107 | 108 | @compression_config = ProducerCompressionConfig.new( 109 | handle_option(options, :compression_codec), 110 | handle_option(options, :compressed_topics)) 111 | 112 | @partitioner = handle_option(options, :partitioner) 113 | 114 | raise ArgumentError, "Unknown options: #{options.keys.inspect}" if options.keys.any? 115 | end 116 | 117 | def handle_option(options, sym) 118 | options.delete(sym) || OPTION_DEFAULTS[sym] 119 | end 120 | 121 | def refresh_interval_elapsed? 122 | @cluster_metadata.last_refreshed_at.nil? || 123 | (Time.now - @cluster_metadata.last_refreshed_at) * 1000 > metadata_refresh_interval_ms 124 | end 125 | 126 | def refresh_metadata(topics) 127 | topics_to_refresh = topics.dup 128 | 129 | @cluster_metadata.topics.each do |topic| 130 | topics_to_refresh.add(topic) 131 | end 132 | 133 | @cluster_metadata.update(@broker_pool.fetch_metadata(topics_to_refresh)) 134 | @broker_pool.update_known_brokers(@cluster_metadata.brokers) 135 | end 136 | 137 | def send_to_broker(messages_for_broker) 138 | return false if messages_for_broker.broker_id == -1 139 | to_send = messages_for_broker.build_protocol_objects(@compression_config) 140 | 141 | Poseidon.logger.debug { "Sending messages to broker #{messages_for_broker.broker_id}" } 142 | response = @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce, 143 | required_acks, ack_timeout_ms, 144 | to_send) 145 | if required_acks == 0 146 | messages_for_broker.messages 147 | else 148 | messages_for_broker.successfully_sent(response) 149 | end 150 | rescue Connection::ConnectionFailedError 151 | false 152 | end 153 | end 154 | end 155 | -------------------------------------------------------------------------------- /lib/poseidon/topic_metadata.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # @api private 3 | class TopicMetadata 4 | # Build a new TopicMetadata object from its binary representation 5 | # 6 | # @param [ResponseBuffer] buffer 7 | # @return [TopicMetadata] 8 | # 9 | def self.read(buffer) 10 | tm = TopicMetadata.new 11 | tm.struct = Protocol::TopicMetadataStruct.read(buffer) 12 | tm 13 | end 14 | 15 | attr_accessor :struct 16 | def initialize(struct=nil) 17 | self.struct = struct 18 | end 19 | 20 | # Write a binary representation of the TopicMetadata to buffer 21 | # 22 | # @param [RequestBuffer] buffer 23 | # @return [nil] 24 | def write(buffer) 25 | struct.write(buffer) 26 | nil 27 | end 28 | 29 | def name 30 | struct.name 31 | end 32 | 33 | def ==(o) 34 | eql?(o) 35 | end 36 | 37 | def exists? 38 | struct.error == 0 39 | end 40 | 41 | def eql?(o) 42 | struct.eql?(o.struct) 43 | end 44 | 45 | def objects_with_errors 46 | struct.objects_with_errors 47 | end 48 | 49 | def leader_available? 50 | struct.error_class != Errors::LeaderNotAvailable 51 | end 52 | 53 | def partition_count 54 | @partition_count ||= struct.partitions.count 55 | end 56 | 57 | def available_partitions 58 | @available_partitions ||= struct.partitions.select do |partition| 59 | partition.error == 0 && partition.leader != -1 60 | end 61 | end 62 | 63 | def available_partition_count 64 | available_partitions.count 65 | end 66 | 67 | def partition_leader(partition_id) 68 | partition = partitions_by_id[partition_id] 69 | if partition 70 | partition.leader 71 | else 72 | nil 73 | end 74 | end 75 | 76 | def to_s 77 | struct.partitions.map { |p| p.inspect }.join("\n") 78 | end 79 | 80 | private 81 | def partitions_by_id 82 | @partitions_by_id ||= Hash[partitions.map { |p| [p.id, p] }] 83 | end 84 | 85 | def partitions 86 | struct.partitions 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /lib/poseidon/version.rb: -------------------------------------------------------------------------------- 1 | module Poseidon 2 | # Unstable! API May Change! 3 | VERSION = "0.0.5" 4 | end 5 | -------------------------------------------------------------------------------- /log/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bpot/poseidon/bfbf084ea21af2a31350ad5f58d8ef5dc30b948e/log/.gitkeep -------------------------------------------------------------------------------- /poseidon.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'poseidon/version' 5 | 6 | Gem::Specification.new do |gem| 7 | gem.name = "poseidon" 8 | gem.version = Poseidon::VERSION 9 | gem.authors = ["Bob Potter"] 10 | gem.email = ["bobby.potter@gmail.com"] 11 | gem.description = %q{A Kafka (http://kafka.apache.org/) producer and consumer} 12 | gem.summary = %q{Poseidon is a producer and consumer implementation for Kafka >= 0.8} 13 | gem.homepage = "https://github.com/bpot/poseidon" 14 | gem.licenses = ["MIT"] 15 | gem.required_ruby_version = '>= 1.9.3' 16 | 17 | gem.files = `git ls-files`.split($/) 18 | gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } 19 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 20 | gem.require_paths = ["lib"] 21 | 22 | gem.add_development_dependency(%q, '>= 3') 23 | gem.add_development_dependency(%q) 24 | gem.add_development_dependency(%q) 25 | gem.add_development_dependency(%q) 26 | end 27 | -------------------------------------------------------------------------------- /spec/integration/multiple_brokers/consumer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/multiple_brokers/spec_helper' 2 | 3 | RSpec.describe "consuming with multiple brokers", :type => :request do 4 | before(:each) do 5 | # autocreate the topic by asking for information about it 6 | c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000) 7 | md = c.topic_metadata(["test"]) 8 | sleep 1 9 | end 10 | 11 | it "finds the lead broker for each partition" do 12 | brokers = Set.new 13 | 0.upto(2) do |partition| 14 | pc = PartitionConsumer.consumer_for_partition("test_client", 15 | ["localhost:9092"], 16 | "test", partition, 17 | :earliest_offset) 18 | 19 | brokers.add("#{pc.host}:#{pc.port}") 20 | end 21 | expect(brokers.size).to eq(3) 22 | end 23 | 24 | it "consumes from all partitions" do 25 | @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test", 26 | :required_acks => 1) 27 | 28 | msgs = 24.times.map { |n| "hello_#{n}" } 29 | msgs.each do |msg| 30 | @p.send_messages([MessageToSend.new("test", msg)]) 31 | end 32 | 33 | fetched_messages = [] 34 | 0.upto(2) do |partition| 35 | pc = PartitionConsumer.consumer_for_partition("test_client", 36 | ["localhost:9092"], 37 | "test", partition, 38 | :earliest_offset) 39 | fetched_messages.push(*pc.fetch) 40 | end 41 | expect(fetched_messages.map(&:value).sort).to eq(msgs.sort) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /spec/integration/multiple_brokers/metadata_failures_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/multiple_brokers/spec_helper' 2 | 3 | RSpec.describe "handling failures", :type => :request do 4 | describe "metadata failures" do 5 | before(:each) do 6 | @messages_to_send = [ 7 | MessageToSend.new("topic1", "hello"), 8 | MessageToSend.new("topic2", "hello") 9 | ] 10 | end 11 | 12 | describe "unable to connect to brokers" do 13 | before(:each) do 14 | @p = Producer.new(["localhost:1092","localhost:1093","localhost:1094"], "producer") 15 | end 16 | 17 | it "triggers callback failures for both topics" do 18 | expect { 19 | @p.send_messages(@messages_to_send) 20 | }.to raise_error(Poseidon::Errors::UnableToFetchMetadata) 21 | end 22 | end 23 | end 24 | 25 | describe "unknown topic" do 26 | it "receives error callback" do 27 | pending "need a way to turn off auto-topic creation just for this test" 28 | @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "producer") 29 | 30 | expect { 31 | @p.send_messages([MessageToSend.new("imnothere", "hello")]) 32 | }.to raise_error(Poseidon::Errors::UnableToFetchMetadata) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /spec/integration/multiple_brokers/rebalance_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/multiple_brokers/spec_helper' 2 | 3 | RSpec.describe "producer handles rebalancing", :type => :request do 4 | before(:each) do 5 | # autocreate the topic by asking for information about it 6 | @c = Connection.new("localhost", 9093, "metadata_fetcher", 10_000) 7 | @c.topic_metadata(["failure_spec"]) 8 | sleep 1 9 | end 10 | 11 | def current_leadership_mapping(c) 12 | metadata = c.topic_metadata(["failure_spec"]) 13 | topic_metadata = metadata.topics.find { |t| t.name == "failure_spec" } 14 | (0..2).map { |p| topic_metadata.partition_leader(p) } 15 | end 16 | 17 | it "produces a bunch of messages and consumes all without error" do 18 | @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test", 19 | :required_acks => -1) 20 | 21 | 1.upto(25) do |n| 22 | @p.send_messages([MessageToSend.new("failure_spec", n.to_s)]) 23 | end 24 | 25 | # The goal here is to have the producer attempt to send messages 26 | # to a broker which is no longer the leader for the partition. 27 | # 28 | # We accomplish this by turning off a broker which causes leadership 29 | # to failover. Then we turn that broker back on and begin sending 30 | # messages. While sending messages, the kafka cluster should rebalance 31 | # the partitions causing leadership to switch back to the original 32 | # broker in the midst of messages being sent. 33 | # 34 | # We compare leadership before and after the message sending period 35 | # to make sure we were successful. 36 | $tc.stop_first_broker 37 | sleep 30 38 | SPEC_LOGGER.info "Pre start #{current_leadership_mapping(@c).inspect}" 39 | $tc.start_first_broker 40 | 41 | pre_send_leadership = current_leadership_mapping(@c) 42 | SPEC_LOGGER.info "Pre send #{pre_send_leadership.inspect}" 43 | 26.upto(50) do |n| 44 | sleep 0.5 45 | @p.send_messages([MessageToSend.new("failure_spec", n.to_s)]) 46 | end 47 | post_send_leadership = current_leadership_mapping(@c) 48 | SPEC_LOGGER.info "Post send #{post_send_leadership.inspect}" 49 | 50 | expect(pre_send_leadership).to_not eq(post_send_leadership) 51 | 52 | messages = [] 53 | 0.upto(2) do |partition| 54 | consumer = PartitionConsumer.consumer_for_partition("consumer_failure_spect", 55 | ["localhost:9092","localhost:9093","localhost:9094"], 56 | "failure_spec", 57 | partition, 58 | :earliest_offset) 59 | while (fetched = consumer.fetch).any? 60 | messages.push(*fetched) 61 | end 62 | end 63 | 64 | expect(messages.size).to eq(50) 65 | expect(messages.map { |m| m.value.to_i }.sort).to eq((1..50).to_a) 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /spec/integration/multiple_brokers/round_robin_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/multiple_brokers/spec_helper' 2 | 3 | RSpec.describe "round robin sending", :type => :request do 4 | describe "with small message batches" do 5 | it "evenly distributes messages across brokers" do 6 | c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000) 7 | md = c.topic_metadata(["test"]) 8 | sleep 1 9 | md = c.topic_metadata(["test"]) 10 | 11 | test_topic = md.topics.first 12 | 13 | consumers = test_topic.send(:partitions).map do |partition| 14 | leader_id = partition.leader 15 | broker = md.brokers.find { |b| b.id == leader_id } 16 | PartitionConsumer.new("test_consumer_#{partition.id}", broker.host, 17 | broker.port, "test", partition.id, -1) 18 | end 19 | 20 | # Update offsets to current position before adding test messages 21 | consumers.each do |c| 22 | c.fetch 23 | end 24 | 25 | @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test", 26 | :required_acks => 1) 27 | 24.times do 28 | @p.send_messages([MessageToSend.new("test", "hello")]) 29 | end 30 | 31 | sleep 5 32 | 33 | consumers.each do |c| 34 | messages = c.fetch 35 | expect(messages.size).to eq(8) 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/integration/multiple_brokers/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'test_cluster' 4 | 5 | class ThreeBrokerCluster 6 | def initialize(properties = {}) 7 | @zookeeper = ZookeeperRunner.new 8 | @brokers = (9092..9094).map { |port| BrokerRunner.new(port - 9092, port, 9 | 3, 10 | 2, 11 | properties) } 12 | end 13 | 14 | def start 15 | @zookeeper.start 16 | @brokers.each(&:start) 17 | sleep 5 18 | end 19 | 20 | def stop 21 | SPEC_LOGGER.info "Stopping three broker cluster" 22 | SPEC_LOGGER.info "Stopping brokers" 23 | @brokers.each(&:stop) 24 | sleep 5 25 | 26 | SPEC_LOGGER.info "Stopping ZK" 27 | @zookeeper.stop 28 | sleep 5 29 | end 30 | 31 | def stop_first_broker 32 | SPEC_LOGGER.info "Stopping first broker" 33 | @brokers.first.stop 34 | sleep 5 35 | end 36 | 37 | def start_first_broker 38 | SPEC_LOGGER.info "Starting first broker" 39 | @brokers.first.start 40 | end 41 | end 42 | 43 | RSpec.configure do |config| 44 | config.before(:each) do 45 | JavaRunner.remove_tmp 46 | JavaRunner.set_kafka_path! 47 | $tc = ThreeBrokerCluster.new 48 | $tc.start 49 | SPEC_LOGGER.info "Waiting on cluster" 50 | sleep 10 # wait for cluster to come up 51 | end 52 | 53 | config.after(:each) do 54 | $tc.stop if $tc 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /spec/integration/simple/compression_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | RSpec.describe "compression", :type => :request do 4 | it "roundtrips" do 5 | i = rand(1000) 6 | 7 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 8 | "test12", 0, -2) 9 | 10 | @producer = Producer.new(["localhost:9092"], 11 | "test_client", 12 | :type => :sync, 13 | :compression_codec => :gzip) 14 | messages = [MessageToSend.new("test12", "Hello World: #{i}")] 15 | 16 | expect(@producer.send_messages(messages)).to eq(true) 17 | sleep 1 18 | messages = @consumer.fetch 19 | expect(messages.last.value).to eq("Hello World: #{i}") 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /spec/integration/simple/connection_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | include Protocol 4 | RSpec.describe Connection, :type => :request do 5 | before(:each) do 6 | @connection = Connection.new("localhost", 9092, "test", 10_000) 7 | end 8 | 9 | it 'sends and parses topic metadata requests' do 10 | @connection.topic_metadata(["test2"]) 11 | end 12 | 13 | it 'sends and parsers produce requests' do 14 | message = MessageStruct.new(0, 0, nil, "hello") 15 | message_with_offset = MessageWithOffsetStruct.new(0, message) 16 | message_set = MessageSetStruct.new([message_with_offset]) 17 | messages_for_partitions = [MessagesForPartition.new(0,message_set)] 18 | messages_for_topics = [MessagesForTopic.new("test2",messages_for_partitions)] 19 | @connection.produce(1, 10_000, messages_for_topics) 20 | end 21 | 22 | it 'sends and parsers fetch requests' do 23 | partition_fetches = [PartitionFetch.new(0,0,1024*1024)] 24 | topic_fetches = [TopicFetch.new("test2", partition_fetches)] 25 | @connection.fetch(1000, 0, topic_fetches) 26 | end 27 | 28 | it 'sends and parsers offset requests' do 29 | partition_offset_requests = [PartitionOffsetRequest.new(0,-1,1000)] 30 | offset_topic_requests = [TopicOffsetRequest.new("test2", partition_offset_requests)] 31 | @connection.offset(offset_topic_requests) 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /spec/integration/simple/multiple_brokers_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | RSpec.describe "three brokers in cluster", :type => :request do 4 | describe "sending batches of 1 message" do 5 | it "sends messages to all brokers" do 6 | end 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /spec/integration/simple/simple_producer_and_consumer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | RSpec.describe "simple producer and consumer", :type => :request do 4 | 5 | describe "writing and consuming one topic" do 6 | it "fetches produced messages" do 7 | @producer = Producer.new(["localhost:9092"], 8 | "test_client", 9 | :type => :sync) 10 | 11 | 12 | messages = [MessageToSend.new("topic_simple_producer_and_consumer", "Hello World")] 13 | expect(@producer.send_messages(messages)).to eq(true) 14 | 15 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 16 | "topic_simple_producer_and_consumer", 0, -2) 17 | messages = @consumer.fetch 18 | expect(messages.last.value).to eq("Hello World") 19 | 20 | @producer.close 21 | end 22 | 23 | it "fetches only messages since the last offset" do 24 | @producer = Producer.new(["localhost:9092"], 25 | "test_client", 26 | :type => :sync, 27 | :required_acks => 1) 28 | 29 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 30 | "topic_simple_producer_and_consumer", 0, -1) 31 | 32 | # Read up to the end of the current messages (if there are any) 33 | begin 34 | @consumer.fetch 35 | rescue Errors::UnknownTopicOrPartition 36 | end 37 | 38 | # First Batch 39 | messages = [MessageToSend.new("topic_simple_producer_and_consumer", "Hello World")] 40 | expect(@producer.send_messages(messages)).to eq(true) 41 | 42 | messages = @consumer.fetch 43 | expect(messages.last.value).to eq("Hello World") 44 | 45 | # Second Batch 46 | messages = [MessageToSend.new("topic_simple_producer_and_consumer", "Hello World Again")] 47 | expect(@producer.send_messages(messages)).to eq(true) 48 | 49 | messages = @consumer.fetch 50 | expect(messages.map(&:value)).to eq(["Hello World Again"]) 51 | 52 | # Empty Batch 53 | messages = @consumer.fetch 54 | expect(messages.empty?).to eq(true) 55 | end 56 | 57 | it "waits for messages" do 58 | # Create topic 59 | @c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000) 60 | @c.topic_metadata(["simple_wait_test"]) 61 | 62 | sleep 5 63 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 64 | "simple_wait_test", 0, :earliest_offset, 65 | :max_wait_ms => 2500) 66 | 67 | require 'benchmark' 68 | n = Benchmark.realtime do 69 | @consumer.fetch 70 | end 71 | expect(n).to be_within(0.25).of(2.5) 72 | end 73 | 74 | # Not sure what's going on here, will revisit. 75 | =begin 76 | it "fetches larger messages with a larger max bytes size" do 77 | @producer = Producer.new(["localhost:9092"], 78 | "test_client", 79 | :type => :sync, 80 | :required_acks => 1) 81 | 82 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 83 | "topic_simple_producer_and_consumer", 0, -2) 84 | 85 | messages = [] 86 | 2000.times do 87 | messages << MessageToSend.new("topic_simple_producer_and_consumer",'KcjNyFBtqfSbpwjjcGKckMKLUCWz83IVcp21C8FQzs8JJKKTTrc4OLxSjLpYc5z7fsncX59te2cBn0sWDRaYmRuZyttRMLMHvXrM5o3QReKPIYUKzVCFahC4cb3Ivcbb5ZuS98Ohnb7Io42Bz9FucXwwGkQyFhJwyn3nD3BYs5r8TZM8Q76CGR2kTH1rjnFeB7J3hrRKukztxCrDY3smrQE1bbVR80IF3yWlhzkdfv3cpfwnD0TKadtt21sFJANFmORAJ0HKs6Z2262hcBQyF7WcWypC2RoLWVgKVQxbouVUP7yV6YYOAQEevYrl9sOB0Yi6h1mS8fTBUmRTmWLqyl8KzwbnbQvmCvgnX26F5JEzIoXsVaoDT2ks5eep9RyE1zm5yPtbYVmd2Sz7t5ru0wj6YiAmbF7Xgiw2l4VpNOxG0Ec6rFxXRXs0bahyBd2YtxpGyZBeruIK1RAN4n0t97xVXgZG5CGoVhL1oRDxw2pTbwEO1cvwHiiYXpXSqaxF7G9kiiPsQt24Vu7chXrJT7Xqv4RIg1aOT5Os5JVlISaJCmx8ZLtbC3OjAdGtF1ZkDuUeQHHohqeKh0qBJjw7Rv1oSDwcM0MRazjF36jijpYg26Qml9lSEnGYIFLQWHVDWKqqhl2GIntjxDXn1IyI') 88 | end 89 | expect(@producer.send_messages(messages)).to eq(true) 90 | 91 | messages = @consumer.fetch 92 | expect(messages.length).to be > 2 93 | 94 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 95 | "topic_simple_producer_and_consumer", 0, -2) 96 | messages = @consumer.fetch(:max_bytes => 1400000) 97 | expect(messages.length).to be > 2 98 | end 99 | =end 100 | end 101 | 102 | describe "broker that becomes unavailable" do 103 | it "fails the fetch" do 104 | @producer = Producer.new(["localhost:9092"], 105 | "test_client", 106 | :type => :sync) 107 | 108 | 109 | messages = [MessageToSend.new("topic_simple_producer_and_consumer", "Hello World")] 110 | expect(@producer.send_messages(messages)).to eq(true) 111 | 112 | @consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 113 | "topic_simple_producer_and_consumer", 0, -2) 114 | 115 | $tc.broker.without_process do 116 | expect { @consumer.fetch }.to raise_error(Connection::ConnectionFailedError) 117 | end 118 | end 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /spec/integration/simple/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'test_cluster' 4 | 5 | RSpec.configure do |config| 6 | config.before(:each) do 7 | JavaRunner.remove_tmp 8 | JavaRunner.set_kafka_path! 9 | $tc = TestCluster.new 10 | $tc.start 11 | sleep 5 12 | end 13 | 14 | config.after(:each) do 15 | $tc.stop 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /spec/integration/simple/truncated_messages_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | RSpec.describe "truncated messages", :type => :request do 4 | before(:each) do 5 | @s1 = "a" * 335 6 | @s2 = "b" * 338 7 | 8 | @producer = Producer.new(["localhost:9092"], 9 | "test_client", 10 | :type => :sync) 11 | 12 | @producer.send_messages([Message.new(:topic => 'test_max_bytes', :value => @s1), Message.new(:topic => 'test_max_bytes', :value => @s2)]) 13 | end 14 | 15 | it "correctly handles max_byte lengths smallert than a message" do 16 | 0.upto(360) do |n| 17 | consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 18 | "test_max_bytes", 0, :earliest_offset) 19 | expect(consumer.fetch(:max_bytes => n)).to eq([]) 20 | end 21 | end 22 | 23 | it "correctly handles max_byte lengths that should return a single message" do 24 | 361.upto(724) do |n| 25 | consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 26 | "test_max_bytes", 0, :earliest_offset) 27 | 28 | messages = consumer.fetch(:max_bytes => n) 29 | expect(messages.size).to eq(1) 30 | expect(messages.first.value).to eq(@s1) 31 | end 32 | end 33 | 34 | it "correctly handles max_byte lengths that should return two messages" do 35 | 725.upto(1000) do |n| 36 | consumer = PartitionConsumer.new("test_consumer", "localhost", 9092, 37 | "test_max_bytes", 0, :earliest_offset) 38 | 39 | messages = consumer.fetch(:max_bytes => n) 40 | expect(messages.size).to eq(2) 41 | expect(messages.map(&:value)).to eq([@s1, @s2]) 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /spec/integration/simple/unavailable_broker_spec.rb: -------------------------------------------------------------------------------- 1 | require 'integration/simple/spec_helper' 2 | 3 | RSpec.describe "unavailable broker scenarios:", :type => :request do 4 | context "producer with a dead broker in bootstrap list" do 5 | before(:each) do 6 | @p = Producer.new(["localhost:9091","localhost:9092"], "test") 7 | end 8 | 9 | it "succesfully sends a message" do 10 | expect(@p.send_messages([MessageToSend.new("test", "hello")])).to eq(true) 11 | 12 | pc = PartitionConsumer.new("test_consumer", "localhost", 13 | 9092, "test", 0, -2) 14 | 15 | messages = pc.fetch 16 | expect(messages.last.value).to eq("hello") 17 | end 18 | end 19 | 20 | context "producer with required_acks set to 1" do 21 | before(:each) do 22 | @p = Producer.new(["localhost:9092"], "test", :required_acks => 1) 23 | end 24 | 25 | context "broker stops running" do 26 | it "fails to send" do 27 | expect(@p.send_messages([MessageToSend.new("test", "hello")])).to eq(true) 28 | 29 | $tc.broker.without_process do 30 | expect { 31 | @p.send_messages([MessageToSend.new("test", "hello")]) 32 | }.to raise_error(Poseidon::Errors::UnableToFetchMetadata) 33 | end 34 | end 35 | end 36 | 37 | context "broker stops running but starts again" do 38 | it "sends succesfully once broker returns" do 39 | expect(@p.send_messages([MessageToSend.new("test", "hello")])).to eq(true) 40 | 41 | $tc.broker.without_process do 42 | expect { 43 | @p.send_messages([MessageToSend.new("test", "hello")]) 44 | }.to raise_error(Poseidon::Errors::UnableToFetchMetadata) 45 | end 46 | 47 | expect(@p.send_messages([MessageToSend.new("test", "hello")])).to eq(true) 48 | end 49 | end 50 | end 51 | 52 | context "producer with required_acks set to 0" do 53 | before(:each) do 54 | @p = Producer.new(["localhost:9092"], "test", :required_acks => 0) 55 | end 56 | 57 | context "broker stops running" do 58 | it "fails to send" do 59 | expect(@p.send_messages([MessageToSend.new("test", "hello_a")])).to eq(true) 60 | 61 | $tc.broker.without_process do 62 | @p.send_messages([MessageToSend.new("test", "hello_b")]) 63 | expect { 64 | @p.send_messages([MessageToSend.new("test", "hello_b")]) 65 | }.to raise_error(Poseidon::Errors::UnableToFetchMetadata) 66 | end 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # This file was generated by the `rspec --init` command. Conventionally, all 2 | # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. 3 | # Require this file using `require "spec_helper"` to ensure that it is only 4 | # loaded once. 5 | # 6 | # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration 7 | RSpec.configure do |config| 8 | # Run specs in random order to surface order dependencies. If you find an 9 | # order dependency and want to debug it, you can fix the order by providing 10 | # the seed, which is printed after each run. 11 | # --seed 1234 12 | config.order = 'random' 13 | 14 | config.disable_monkey_patching! 15 | end 16 | 17 | POSEIDON_PATH = File.absolute_path(File.dirname(__FILE__) + "/../") 18 | 19 | require 'logger' 20 | SPEC_LOGGER = Logger.new(File.join(POSEIDON_PATH, "log", "spec.log")) 21 | 22 | require 'simplecov' 23 | SimpleCov.start 24 | 25 | require 'poseidon' 26 | include Poseidon 27 | 28 | require 'coveralls' 29 | Coveralls.wear! 30 | -------------------------------------------------------------------------------- /spec/test_cluster.rb: -------------------------------------------------------------------------------- 1 | class TestCluster 2 | attr_reader :broker, :zookeeper 3 | def initialize 4 | @zookeeper = ZookeeperRunner.new 5 | @broker = BrokerRunner.new(0, 9092) 6 | end 7 | 8 | def start 9 | @zookeeper.start 10 | @broker.start 11 | end 12 | 13 | def stop 14 | # The broker will end up in a state where it ignores SIGTERM 15 | # if zookeeper is stopped before the broker. 16 | @broker.stop 17 | sleep 5 18 | 19 | @zookeeper.stop 20 | end 21 | end 22 | 23 | class JavaRunner 24 | def self.remove_tmp 25 | FileUtils.rm_rf("#{POSEIDON_PATH}/tmp") 26 | end 27 | 28 | def self.set_kafka_path! 29 | if ENV['KAFKA_PATH'] 30 | JavaRunner.kafka_path = ENV['KAFKA_PATH'] 31 | else 32 | puts "******To run integration specs you must set KAFKA_PATH to kafka src directory. See README*****" 33 | exit 34 | end 35 | end 36 | 37 | def self.kafka_path=(kafka_path) 38 | @kafka_path = kafka_path 39 | end 40 | 41 | def self.kafka_path 42 | @kafka_path 43 | end 44 | 45 | def initialize(id, start_cmd, pid_cmd, kill_signal, properties = {}) 46 | @id = id 47 | @properties = properties 48 | @start_cmd = start_cmd 49 | @pid_cmd = pid_cmd 50 | @kill_signal = kill_signal 51 | @stopped = false 52 | end 53 | 54 | def start 55 | write_properties 56 | run 57 | end 58 | 59 | def stop 60 | if !@stopped 61 | killed_at = Time.now 62 | loop do 63 | if (pid = `#{@pid_cmd}`.to_i) == 0 64 | SPEC_LOGGER.info "Killed." 65 | break 66 | end 67 | 68 | if Time.now - killed_at > 30 69 | raise "Failed to kill process!" 70 | end 71 | 72 | SPEC_LOGGER.info "Sending #{@kill_signal} To #{pid}" 73 | SPEC_LOGGER.info "(#{@start_cmd})" 74 | `kill -#{@kill_signal} #{pid}` 75 | 76 | sleep 5 77 | end 78 | @stopped = true 79 | end 80 | end 81 | 82 | def without_process 83 | stop 84 | sleep 5 85 | begin 86 | yield 87 | ensure 88 | start 89 | sleep 5 90 | end 91 | end 92 | 93 | private 94 | 95 | def run 96 | FileUtils.mkdir_p(log_dir) 97 | `LOG_DIR=#{log_dir} #{@start_cmd} #{config_path}` 98 | @stopped = false 99 | end 100 | 101 | def write_properties 102 | FileUtils.mkdir_p(config_dir) 103 | File.open(config_path, "w+") do |f| 104 | @properties.each do |k,v| 105 | f.puts "#{k}=#{v}" 106 | end 107 | end 108 | end 109 | 110 | def log_dir 111 | "#{file_path}/log" 112 | end 113 | 114 | def config_path 115 | "#{config_dir}/#{@id}.properties" 116 | end 117 | 118 | def config_dir 119 | "#{file_path}/config" 120 | end 121 | 122 | def file_path 123 | POSEIDON_PATH + "/tmp/" 124 | end 125 | end 126 | 127 | class BrokerRunner 128 | DEFAULT_PROPERTIES = { 129 | "broker.id" => 0, 130 | "port" => 9092, 131 | "num.network.threads" => 2, 132 | "num.io.threads" => 2, 133 | "socket.send.buffer.bytes" => 1048576, 134 | "socket.receive.buffer.bytes" => 1048576, 135 | "socket.request.max.bytes" => 104857600, 136 | "log.dir" => "#{POSEIDON_PATH}/tmp/kafka-logs", 137 | "num.partitions" => 1, 138 | "log.flush.interval.messages" => 10000, 139 | "log.flush.interval.ms" => 1000, 140 | "log.retention.hours" => 168, 141 | "log.segment.bytes" => 536870912, 142 | #"log.cleanup.interval.mins" => 1, 143 | "zookeeper.connect" => "localhost:2181", 144 | "zookeeper.connection.timeout.ms" => 1000000, 145 | #"kafka.metrics.polling.interval.secs" => 5, 146 | #"kafka.metrics.reporters" => "kafka.metrics.KafkaCSVMetricsReporter", 147 | #"kafka.csv.metrics.dir" => "#{POSEIDON_PATH}/tmp/kafka_metrics", 148 | #"kafka.csv.metrics.reporter.enabled" => "false", 149 | "auto.create.topics.enable" => "true", 150 | 151 | # Trigger rebalances often to catch edge cases. 152 | "auto.leader.rebalance.enable" => "true", 153 | "leader.imbalance.check.interval.seconds" => 5 154 | } 155 | 156 | def initialize(id, port, partition_count = 1, replication_factor = 1, properties = {}) 157 | @id = id 158 | @port = port 159 | @jr = JavaRunner.new("broker_#{id}", 160 | "#{ENV['KAFKA_PATH']}/bin/kafka-run-class.sh -daemon -name broker_#{id} kafka.Kafka", 161 | "ps ax | grep -i 'kafka\.Kafka' | grep java | grep broker_#{id} | grep -v grep | awk '{print $1}'", 162 | "SIGTERM", 163 | DEFAULT_PROPERTIES.merge( 164 | "broker.id" => id, 165 | "port" => port, 166 | "log.dir" => "#{POSEIDON_PATH}/tmp/kafka-logs_#{id}", 167 | "default.replication.factor" => replication_factor, 168 | "num.partitions" => partition_count 169 | ).merge(properties)) 170 | end 171 | 172 | def start 173 | @jr.start 174 | end 175 | 176 | def stop 177 | @jr.stop 178 | end 179 | 180 | def without_process 181 | @jr.without_process { yield } 182 | end 183 | end 184 | 185 | 186 | class ZookeeperRunner 187 | def initialize 188 | @jr = JavaRunner.new("zookeeper", 189 | "#{ENV['KAFKA_PATH']}/bin/zookeeper-server-start.sh -daemon", 190 | "ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}'", 191 | "SIGKILL", 192 | :dataDir => "#{POSEIDON_PATH}/tmp/zookeeper", 193 | :clientPort => 2181, 194 | :maxClientCnxns => 0) 195 | end 196 | 197 | def pid 198 | @jr.pid 199 | end 200 | 201 | def start 202 | @jr.start 203 | end 204 | 205 | def stop 206 | @jr.stop 207 | end 208 | end 209 | -------------------------------------------------------------------------------- /spec/unit/broker_pool_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe BrokerPool do 4 | context "empty broker list" do 5 | it "raises UnknownBroker error when trying to produce data" do 6 | expect { BrokerPool.new("test_client", [], 10_000).execute_api_call(0, :produce) }.to raise_error(BrokerPool::UnknownBroker) 7 | end 8 | end 9 | 10 | describe "fetching metadata" do 11 | context "single broker" do 12 | it "initializes connection properly" do 13 | @broker_pool = BrokerPool.new("test_client", ["localhost:9092"], 2_000) 14 | @broker = double('Poseidon::Connection', :topic_metadata => nil) 15 | 16 | expected_args = ["localhost", "9092", "test_client", 2_000] 17 | connection = double('conn').as_null_object 18 | 19 | expect(Connection).to receive(:new).with(*expected_args).and_return(connection) 20 | 21 | @broker_pool.fetch_metadata(Set.new) 22 | end 23 | end 24 | 25 | context "no seed brokers" do 26 | it "raises Error" do 27 | @broker_pool = BrokerPool.new("test_client", [], 10_000) 28 | expect { @broker_pool.fetch_metadata(Set.new) }.to raise_error(Errors::UnableToFetchMetadata) 29 | end 30 | end 31 | 32 | context "2 seed brokers" do 33 | before(:each) do 34 | @broker_pool = BrokerPool.new("test_client", ["first:9092","second:9092"], 10_000) 35 | @broker_1 = double('Poseidon::Connection_1', :topic_metadata => nil, :close => nil) 36 | @broker_2 = double('Poseidon::Connection_2', :topic_metadata => double('topic_metadata').as_null_object, :close => nil) 37 | allow(Connection).to receive(:new).and_return(@broker_1, @broker_2) 38 | end 39 | 40 | context ", first doesn't have metadata" do 41 | it "asks the second" do 42 | expect(@broker_2).to receive(:topic_metadata) 43 | 44 | @broker_pool.fetch_metadata(Set.new) 45 | end 46 | end 47 | 48 | it "cleans up its connections" do 49 | expect(@broker_1).to receive(:close) 50 | expect(@broker_2).to receive(:close) 51 | 52 | @broker_pool.fetch_metadata(Set.new) 53 | end 54 | end 55 | end 56 | 57 | context "which knowns about two brokers" do 58 | before(:each) do 59 | @broker_pool = BrokerPool.new("test_client", [], 10_000) 60 | @broker_pool.update_known_brokers({0 => { :host => "localhost", :port => 9092 }, 1 => {:host => "localhost", :port => 9093 }}) 61 | end 62 | 63 | describe "when executing a call" do 64 | 65 | it "creates a connection for the correct broker" do 66 | c = double('conn').as_null_object 67 | expected_args = ["localhost", 9092, "test_client", 10_000] 68 | 69 | expect(Connection).to receive(:new).with(*expected_args).and_return(c) 70 | @broker_pool.execute_api_call(0, :produce) 71 | end 72 | 73 | it "it does so on the correct broker" do 74 | c = double('conn').as_null_object 75 | allow(Connection).to receive(:new).and_return(c) 76 | 77 | expect(c).to receive(:produce) 78 | @broker_pool.execute_api_call(0, :produce) 79 | end 80 | end 81 | 82 | describe "when executing two calls" do 83 | it "reuses the connection" do 84 | c = double('conn').as_null_object 85 | 86 | expect(Connection).to receive(:new).once.and_return(c) 87 | @broker_pool.execute_api_call(0, :produce) 88 | @broker_pool.execute_api_call(0, :produce) 89 | end 90 | end 91 | 92 | describe "executing a call for an unknown broker" do 93 | it "raises UnknownBroker" do 94 | expect { @broker_pool.execute_api_call(2, :produce) }.to raise_error(BrokerPool::UnknownBroker) 95 | end 96 | end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /spec/unit/cluster_metadata_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | include Protocol 4 | RSpec.describe ClusterMetadata do 5 | describe "populated" do 6 | before(:each) do 7 | partitions = [ 8 | PartitionMetadata.new(0, 1, 1, [1,2], [1,2]), 9 | PartitionMetadata.new(0, 2, 2, [2,1], [2,1]) 10 | ] 11 | topics = [TopicMetadata.new(TopicMetadataStruct.new(0, "test", partitions))] 12 | 13 | brokers = [Broker.new(1, "host1", 1), Broker.new(2, "host2", 2)] 14 | 15 | @mr = MetadataResponse.new(nil, brokers, topics) 16 | 17 | @cm = ClusterMetadata.new 18 | @cm.update(@mr) 19 | end 20 | 21 | it "knows when it has metadata for a set of topics" do 22 | have_metadata = @cm.have_metadata_for_topics?(Set.new(["test"])) 23 | expect(have_metadata).to eq(true) 24 | end 25 | 26 | it "knows when it doesn't have metadata for a topic" do 27 | have_metadata = @cm.have_metadata_for_topics?(Set.new(["test", "no_data"])) 28 | expect(have_metadata).to eq(false) 29 | end 30 | 31 | it "provides topic metadata for a set of topics" do 32 | topic_metadata = @cm.metadata_for_topics(Set.new(["test"])) 33 | expect(topic_metadata).to eq({ "test" => @mr.topics.first }) 34 | end 35 | 36 | it "provides broker information" do 37 | broker = @cm.broker(1) 38 | expect(broker).to eq(@mr.brokers.first) 39 | end 40 | 41 | it "provides the lead broker for a partition" do 42 | expect(@cm.lead_broker_for_partition("test",1).id).to eq(1) 43 | expect(@cm.lead_broker_for_partition("test",2).id).to eq(2) 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /spec/unit/compression/gzip_codec_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'spec_helper' 3 | 4 | RSpec.describe Poseidon::Compression::GzipCodec do 5 | 6 | let :data do 7 | %({"a":"val1"}\n{"a":"val2"}\n{"a":"val3"}) 8 | end 9 | 10 | it "should have an ID" do 11 | expect(described_class.codec_id).to eq(1) 12 | end 13 | 14 | it "should compress" do 15 | compressed = described_class.compress(data) 16 | expect(compressed.size).to eq(41) 17 | expect(compressed.encoding).to eq(Encoding::BINARY) 18 | end 19 | 20 | it "should decompress" do 21 | original = described_class.decompress(described_class.compress(data)) 22 | expect(original).to eq(data) 23 | expect(original.encoding).to eq(Encoding::UTF_8) 24 | end 25 | 26 | it "should decompress unicode messages" do 27 | str = "\x1F\x8B\b\x00\x00\x00\x00\x00\x00\x00c`\x80\x03\xE3I\x91\xD3|\x19\x18\xFE\x03\x01\x90\xA7Z\xAD\x94\xA8d\xA5\x14Z\x92XP\xEC\xE9\xE3\xE1\xEB\x12Y\xEE\xE8\x98\x16\xA4\xA4\xA3\x94\x04\x14~6}\xE9\xB39k\x94j\xA1Z\x19A\xDAm\f\xD9\xEF\x10\xD0\x1E\x8C\xA6\x1D\x00\x96\x98\x1E\xB9~\x00\x00\x00".force_encoding(Encoding::BINARY) 28 | buf = Protocol::ResponseBuffer.new(described_class.decompress(str)) 29 | msg = MessageSet.read_without_size(buf).flatten 30 | expect(msg.size).to eq(2) 31 | expect(msg[0].value).to eq(%({"a":"UtapsILHMDYwAAfR","b":"日本"})) 32 | end 33 | 34 | end 35 | -------------------------------------------------------------------------------- /spec/unit/compression/snappy_codec_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'spec_helper' 3 | 4 | RSpec.describe Poseidon::Compression::SnappyCodec do 5 | 6 | let :data do 7 | %({"a":"val1"}\n{"a":"val2"}\n{"a":"val3"}) 8 | end 9 | 10 | it "should have an ID" do 11 | expect(described_class.codec_id).to eq(2) 12 | end 13 | 14 | it "should compress" do 15 | compressed = described_class.compress(data) 16 | expect(compressed.size).to eq(34) 17 | expect(compressed.encoding).to eq(Encoding::BINARY) 18 | end 19 | 20 | it "should decompress" do 21 | original = described_class.decompress(described_class.compress(data)) 22 | expect(original).to eq(data) 23 | end 24 | 25 | it "should decompress streams" do 26 | str = "\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x1E#\x00\x00\x19\x01\\\x17\x8B\xA7x\xB9\x00\x00\xFF\xFF\xFF\xFF\x00\x00\x00\tPLAINDATA".force_encoding(Encoding::BINARY) 27 | buf = Protocol::ResponseBuffer.new(described_class.decompress(str)) 28 | msg = MessageSet.read_without_size(buf).flatten 29 | expect(msg.size).to eq(1) 30 | expect(msg[0].value).to eq("PLAINDATA") 31 | end 32 | 33 | it "should decompress bulk streams" do 34 | str = "\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\xCA\xE8\x04\x00\x00\x19\x01\xA0L`\x9E\xD4(\x00\x00\xFF\xFF\xFF\xFF\x00\x00\x00>{\"a\":\"UtaaKYLHMCwiAA-l\",\"bF\x17\x00Dm\",\"c\":1389795881}\rW \x01\x00\x00\x00Ln\x14\x98\xA8zX\x00\x00nVX\x00\x00o^X\x00\x00\x02\x01X\b3\xF1\e~\xB0\x00\x00pVX\x00\x00q^X\x00\x00\x03\x01X\b.\xE5\x82~X\x00\x00tVX\x00\x00u^X\x00\x00\x04\x01X\b o\xCE~\b\x01\x00vVX\x00\x00w^X\x00\x00\x05\x01X\f\t\xD8)(z`\x01\x00xVX\x00\x00y^X\x00\x00\x06\x01X\f@\bf\xA6zX\x00\x00zVX\x00\x000BX\x00".force_encoding(Encoding::BINARY) 35 | buf = Protocol::ResponseBuffer.new(described_class.decompress(str)) 36 | msg = MessageSet.read_without_size(buf).flatten 37 | expect(msg.size).to eq(7) 38 | expect(msg[0].value).to eq(%({"a":"UtaaKYLHMCwiAA-l","b":"UtaaKYLHMCwiAA-m","c":1389795881})) 39 | end 40 | 41 | it "should decompress unicode messages" do 42 | str = "\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00:?\x00\x00\x19\x01\xCC3\xBA?\x91\xFA\x00\x00\xFF\xFF\xFF\xFF\x00\x00\x00%{\"a\":\"UtaitILHMDAAAAfU\",\"b\":\"\xE6\x97\xA5\xE6\x9C\xAC\"}".force_encoding(Encoding::BINARY) 43 | buf = Protocol::ResponseBuffer.new(described_class.decompress(str)) 44 | msg = MessageSet.read_without_size(buf).flatten 45 | expect(msg.size).to eq(1) 46 | expect(msg[0].value).to eq(%({"a":"UtaitILHMDAAAAfU","b":"日本"})) 47 | end 48 | 49 | end 50 | -------------------------------------------------------------------------------- /spec/unit/compression_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Compression do 4 | it 'returns GzipCompessor for codec_id of 1' do 5 | codec = Compression.find_codec(1) 6 | expect(codec).to eq(Compression::GzipCodec) 7 | end 8 | 9 | it 'returns SnappyCompessor for codec_id of 2' do 10 | codec = Compression.find_codec(2) 11 | expect(codec).to eq(Compression::SnappyCodec) 12 | end 13 | 14 | it 'raises UnrecognizedCompressionCodec for codec_id of 3' do 15 | expect { Compression.find_codec(3) }.to raise_error(Compression::UnrecognizedCompressionCodec) 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /spec/unit/connection_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Connection do 4 | end 5 | -------------------------------------------------------------------------------- /spec/unit/fetched_message_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe FetchedMessage do 4 | it "provides access to topic,value,key,offset" do 5 | mts = FetchedMessage.new("hello_topic", "Hello World", "key", 0) 6 | expect(mts.topic).to eq("hello_topic") 7 | expect(mts.value).to eq("Hello World") 8 | expect(mts.key).to eq("key") 9 | expect(mts.offset).to eq(0) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /spec/unit/message_conductor_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | include Protocol 4 | RSpec.describe MessageConductor do 5 | context "two available partitions" do 6 | before(:each) do 7 | partitions = [ 8 | # These are intentionally not ordered by partition_id. 9 | # [:error, :id, :leader, :replicas, :isr] 10 | PartitionMetadata.new(0, 1, 2, [2,1], [2,1]), 11 | PartitionMetadata.new(0, 0, 1, [1,2], [1,2]) 12 | ] 13 | topics = [TopicMetadata.new(TopicMetadataStruct.new(0, "test", partitions))] 14 | brokers = [Broker.new(1, "host1", 1), Broker.new(2, "host2", 2)] 15 | 16 | @mr = MetadataResponse.new(0, brokers, topics) 17 | 18 | @cm = ClusterMetadata.new 19 | @cm.update(@mr) 20 | end 21 | 22 | context "no custom partitioner" do 23 | before(:each) do 24 | @mc = MessageConductor.new(@cm, nil) 25 | end 26 | 27 | context "for unkeyed messages" do 28 | it "round robins which partition the message should go to" do 29 | destinations = 4.times.map do 30 | @mc.destination("test").first 31 | end 32 | 33 | first = [destinations[0], destinations[2]] 34 | second = [destinations[1], destinations[3]] 35 | expect([first.uniq, second.uniq].sort).to eq([[0],[1]]) 36 | end 37 | 38 | context "unknown topic" do 39 | it "returns -1 for broker and partition" do 40 | expect(@mc.destination("no_exist")).to eq([-1,-1]) 41 | end 42 | end 43 | end 44 | 45 | context "keyed message" do 46 | it "sends the same keys to the same destinations" do 47 | keys = 1000.times.map { rand(500).to_s } 48 | key_destinations = {} 49 | 50 | keys.sort_by { rand }.each do |k| 51 | partition,broker = @mc.destination("test", k) 52 | 53 | key_destinations[k] ||= [] 54 | key_destinations[k].push([partition,broker]) 55 | end 56 | 57 | expect(key_destinations.values.all? { |destinations| destinations.uniq.size == 1 }).to eq(true) 58 | end 59 | end 60 | end 61 | 62 | context "custom partitioner" do 63 | before(:each) do 64 | partitioner = Proc.new { |key, count| key.split("_").first.to_i % count } 65 | @mc = MessageConductor.new(@cm, partitioner) 66 | end 67 | 68 | it "obeys custom partitioner" do 69 | expect(@mc.destination("test", "2_hello").first).to eq(0) 70 | expect(@mc.destination("test", "3_hello").first).to eq(1) 71 | end 72 | end 73 | 74 | context "partitioner always sends to partition 1" do 75 | before(:each) do 76 | partitioner = Proc.new { 1 } 77 | @mc = MessageConductor.new(@cm, partitioner) 78 | end 79 | 80 | it "sends to partition 1 on broker 2" do 81 | expect(@mc.destination("test", "2_hello")).to eq([1,2]) 82 | end 83 | end 84 | 85 | context "broken partitioner" do 86 | before(:each) do 87 | partitioner = Proc.new { |key, count| count + 1 } 88 | @mc = MessageConductor.new(@cm, partitioner) 89 | end 90 | 91 | it "raises InvalidPartitionError" do 92 | expect{@mc.destination("test", "2_hello").first}.to raise_error(Errors::InvalidPartitionError) 93 | end 94 | end 95 | end 96 | 97 | context "two partitions, one is unavailable" do 98 | before(:each) do 99 | partitions = [ 100 | Protocol::PartitionMetadata.new(0, 0, 1, [1,2], [1,2]), 101 | Protocol::PartitionMetadata.new(0, 1, -1, [2,1], [2,1]) 102 | ] 103 | topics = [TopicMetadata.new(TopicMetadataStruct.new(0, "test", partitions))] 104 | brokers = [Broker.new(1, "host1", 1), Broker.new(2, "host2", 2)] 105 | 106 | @mr = MetadataResponse.new(0, brokers, topics) 107 | 108 | @cm = ClusterMetadata.new 109 | @cm.update(@mr) 110 | 111 | @mc = MessageConductor.new(@cm, nil) 112 | end 113 | 114 | context "keyless message" do 115 | it "is never sent to an unavailable partition" do 116 | 10.times do |destination| 117 | expect(@mc.destination("test").first).to eq(0) 118 | end 119 | end 120 | end 121 | 122 | context "keyed message" do 123 | it "is sent to unavailable partition" do 124 | destinations = Set.new 125 | 100.times do |key| 126 | destinations << @mc.destination("test",key.to_s).first 127 | end 128 | expect(destinations).to eq(Set.new([0,1])) 129 | end 130 | end 131 | end 132 | 133 | context "no available partitions" do 134 | before(:each) do 135 | partitions = [ 136 | Protocol::PartitionMetadata.new(0, 0, -1, [1,2], [1,2]), 137 | Protocol::PartitionMetadata.new(0, 1, -1, [2,1], [2,1]) 138 | ] 139 | topics = [TopicMetadata.new(TopicMetadataStruct.new(0, "test", partitions))] 140 | brokers = [Broker.new(1, "host1", 1), Broker.new(2, "host2", 2)] 141 | 142 | @mr = MetadataResponse.new(0, brokers, topics) 143 | 144 | @cm = ClusterMetadata.new 145 | @cm.update(@mr) 146 | 147 | @mc = MessageConductor.new(@cm, nil) 148 | end 149 | 150 | context "keyless message" do 151 | it "return -1 for broker and partition" do 152 | expect(@mc.destination("test")).to eq([-1,-1]) 153 | end 154 | end 155 | 156 | context "keyed message" do 157 | it "returns a valid partition and -1 for broker" do 158 | partition_id, broker_id = @mc.destination("test", "key") 159 | expect(partition_id).to_not eq(-1) 160 | expect(broker_id).to eq(-1) 161 | end 162 | end 163 | end 164 | end 165 | -------------------------------------------------------------------------------- /spec/unit/message_set_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe MessageSet do 4 | describe "converting to a compressed message" do 5 | before(:each) do 6 | ms = MessageSet.new([Message.new(:value => "I will be compressed", :topic => "test")]) 7 | 8 | @compressed_message_set = ms.compress(Compression::GzipCodec) 9 | end 10 | 11 | it "contains a compressed message" do 12 | expect(@compressed_message_set.messages.first.compressed?).to eq(true) 13 | end 14 | 15 | it "can be decompressed and reconstituted" do 16 | expect(@compressed_message_set.flatten.first.value).to eq("I will be compressed") 17 | end 18 | end 19 | 20 | describe "adding messages" do 21 | it "adds the message to the struct" do 22 | m = Message.new(:value => "sup", :topic => "topic") 23 | ms = MessageSet.new 24 | ms << m 25 | expect(ms.struct.messages.first).to eq(m) 26 | end 27 | end 28 | 29 | describe "encoding" do 30 | it "round trips" do 31 | m = Message.new(:value => "sup", :key => "keyz", :topic => "hello") 32 | ms = MessageSet.new 33 | ms << m 34 | 35 | request_buffer = Protocol::RequestBuffer.new 36 | ms.write(request_buffer) 37 | 38 | response_buffer = Protocol::ResponseBuffer.new(request_buffer.to_s) 39 | expect(MessageSet.read(response_buffer)).to eq(ms) 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/unit/message_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'spec_helper' 3 | 4 | RSpec.describe Message do 5 | describe "when constructing a new message" do 6 | it 'raises an ArgumentError on unknown options' do 7 | expect { Message.new(:cow => "dog") }.to raise_error(ArgumentError) 8 | end 9 | 10 | it 'handles options correctly' do 11 | m = Message.new(:value => "value", 12 | :key => "key", 13 | :attributes => 1, 14 | :topic => "topic") 15 | 16 | expect(m.value).to eq("value") 17 | expect(m.key).to eq("key") 18 | expect(m.compressed?).to eq(true) 19 | expect(m.topic).to eq("topic") 20 | end 21 | end 22 | 23 | describe "checksum" do 24 | context "is incorrect" do 25 | before(:each) do 26 | m = Message.new(:value => "value", 27 | :key => "key", 28 | :topic => "topic") 29 | 30 | req_buf = Protocol::RequestBuffer.new 31 | m.write(req_buf) 32 | 33 | @s = req_buf.to_s 34 | @s[-1] = "q" # break checksum 35 | end 36 | 37 | it "knows it" do 38 | expect { Message.read(Protocol::ResponseBuffer.new(@s)) }.to raise_error(Errors::ChecksumError) 39 | end 40 | end 41 | 42 | context 'is correct' do 43 | before(:each) do 44 | m = Message.new(:value => "value", 45 | :key => "key", 46 | :topic => "topic") 47 | 48 | req_buf = Protocol::RequestBuffer.new 49 | m.write(req_buf) 50 | 51 | @s = req_buf.to_s 52 | end 53 | 54 | it "raises no error" do 55 | expect { Message.read(Protocol::ResponseBuffer.new(@s)) }.to_not raise_error 56 | end 57 | end 58 | end 59 | 60 | describe "truncated message" do 61 | before(:each) do 62 | m = Message.new(:value => "value", 63 | :key => "key", 64 | :topic => "topic") 65 | 66 | req_buf = Protocol::RequestBuffer.new 67 | m.write(req_buf) 68 | 69 | @s = req_buf.to_s 70 | end 71 | 72 | it "reading returns nil" do 73 | expect(Message.read(Protocol::ResponseBuffer.new(@s[0..-4]))).to eq(nil) 74 | end 75 | end 76 | 77 | context "invalid utf8 string for value" do 78 | it "builds the payload without error" do 79 | s = "asdf\xffasdf" 80 | m = Message.new(:value => s, 81 | :key => "key", 82 | :topic => "topic") 83 | 84 | req_buf = Protocol::RequestBuffer.new 85 | expect { 86 | m.write(req_buf) 87 | }.to_not raise_error 88 | end 89 | end 90 | 91 | context "utf8 string with multibyte characters" do 92 | it "roundtrips correctly" do 93 | s = "the µ is two bytes" 94 | m = Message.new(:value => s, 95 | :key => "key", 96 | :topic => "topic") 97 | 98 | req_buf = Protocol::RequestBuffer.new 99 | m.write(req_buf) 100 | 101 | resp_buf = Protocol::ResponseBuffer.new(req_buf.to_s) 102 | 103 | expect(Message.read(resp_buf).value).to eq(s.force_encoding(Encoding::BINARY)) 104 | end 105 | end 106 | 107 | context "frozen string for value" do 108 | it "builds the payload without error" do 109 | s = "asdffasdf".freeze 110 | m = Message.new(:value => s, 111 | :key => "key", 112 | :topic => "topic") 113 | 114 | req_buf = Protocol::RequestBuffer.new 115 | expect { 116 | m.write(req_buf) 117 | }.to_not raise_error 118 | end 119 | end 120 | 121 | it "decompresses a compressed value" 122 | 123 | it "raises an error if you try to decompress an uncompressed value" 124 | 125 | describe "#write" do 126 | it 'writes a MessageWithOffsetStruct to the request buffer' do 127 | end 128 | end 129 | end 130 | -------------------------------------------------------------------------------- /spec/unit/message_to_send_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe MessageToSend do 4 | it "provides access to topic,value,key" do 5 | mts = MessageToSend.new("hello_topic", "Hello World", "key") 6 | expect(mts.topic).to eq("hello_topic") 7 | expect(mts.value).to eq("Hello World") 8 | expect(mts.key).to eq("key") 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /spec/unit/messages_for_broker_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe MessagesForBroker do 4 | context "twos message one to broker 0, partition 0, another to partition 1" do 5 | before(:each) do 6 | @messages = [ Message.new(:topic => "topic1",:value => "hi0"), 7 | Message.new(:topic => "topic1",:value => "hi1")] 8 | 9 | @compression_config = double('compression_config', 10 | :compression_codec_for_topic => nil) 11 | 12 | @mfb = MessagesForBroker.new(0) 13 | @mfb.add(@messages[0], 0) 14 | @mfb.add(@messages[1], 1) 15 | end 16 | 17 | it "provides the messages" do 18 | expect(@mfb.messages.to_set).to eq(@messages.to_set) 19 | end 20 | 21 | it "is has a broker_id of 0" do 22 | expect(@mfb.broker_id).to eq(0) 23 | end 24 | 25 | it "builds the protocol object correctly" do 26 | protocol_object = @mfb.build_protocol_objects(@compression_config) 27 | 28 | messages_for_topics = [ 29 | MessagesForTopic.new("topic1", 30 | [ 31 | MessagesForPartition.new(0, MessageSet.new([@messages[0]])), 32 | MessagesForPartition.new(1, MessageSet.new([@messages[1]])), 33 | ]) 34 | ] 35 | expect(protocol_object).to eq(messages_for_topics) 36 | end 37 | 38 | context "and topic is compressed" do 39 | it "builds the protocol object correctly" do 40 | allow(@compression_config).to receive_messages(:compression_codec_for_topic => Compression::GzipCodec) 41 | protocol_object = @mfb.build_protocol_objects(@compression_config) 42 | 43 | messages_for_topics = [ 44 | MessagesForTopic.new("topic1", 45 | [ 46 | MessagesForPartition.new(0, MessageSet.new([@messages[0]]).compress(Compression::GzipCodec)), 47 | MessagesForPartition.new(1, MessageSet.new([@messages[1]]).compress(Compression::GzipCodec)), 48 | ]) 49 | ] 50 | expect(protocol_object).to eq(messages_for_topics) 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /spec/unit/messages_to_send_batch_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe MessagesToSendBatch do 4 | context "messages sent to two different brokers" do 5 | before(:each) do 6 | message_conductor = double('message_conductor') 7 | allow(message_conductor).to receive(:destination).and_return([0,0],[1,1]) 8 | 9 | @messages = [ 10 | Message.new(:topic => "topic1", :value => "hi"), 11 | Message.new(:topic => "topic1", :value => "hi") 12 | ] 13 | @batch = MessagesToSendBatch.new(@messages, message_conductor) 14 | end 15 | 16 | it "returns a couple messages brokers" do 17 | expect(@batch.messages_for_brokers.size).to eq(2) 18 | end 19 | 20 | it "has all messages in the returned message brokers" do 21 | messages = @batch.messages_for_brokers.map(&:messages).flatten 22 | expect(messages.to_set).to eq(@messages.to_set) 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /spec/unit/messages_to_send_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe MessagesToSend do 4 | before(:each) do 5 | @messages = [] 6 | @messages << Message.new(:topic => "test1", :value => "hi") 7 | @messages << Message.new(:topic => "test2", :value => "hi") 8 | @messages << Message.new(:topic => "test2", :value => "hi") 9 | 10 | 11 | @cluster_metadata = double('cluster_metdata').as_null_object 12 | @mts = MessagesToSend.new(@messages, @cluster_metadata) 13 | end 14 | 15 | describe "needing metadata" do 16 | it "returns set of topics" do 17 | expect(@mts.topic_set).to eq(Set.new(["test1","test2"])) 18 | end 19 | 20 | it "asks ClusterMetadata about having metadata" do 21 | allow(@cluster_metadata).to receive(:have_metadata_for_topics?).and_return(true) 22 | 23 | expect(@mts.needs_metadata?).to eq(false) 24 | end 25 | end 26 | 27 | describe "sending" do 28 | before(:each) do 29 | @mfb = double('mfb', :messages => @messages) 30 | @messages_for_brokers = [@mfb] 31 | 32 | @mtsb = double('messages_to_send_batch').as_null_object 33 | allow(@mtsb).to receive(:messages_for_brokers).and_return(@messages_for_brokers) 34 | 35 | allow(MessagesToSendBatch).to receive(:new).and_return(@mtsb) 36 | end 37 | 38 | context "is successful" do 39 | before(:each) do 40 | @mts.messages_for_brokers(nil).each do |mfb| 41 | @mts.successfully_sent(mfb.messages) 42 | end 43 | end 44 | 45 | it "successfully sends all" do 46 | expect(@mts.pending_messages?).to eq(false) 47 | end 48 | end 49 | 50 | context "is not successful" do 51 | before(:each) do 52 | @mts.messages_for_brokers(nil).each do |mfb| 53 | end 54 | end 55 | 56 | it "does not send all" do 57 | @mts.messages_for_brokers(nil).each do |mfb| 58 | end 59 | expect(@mts.pending_messages?).to eq(true) 60 | end 61 | end 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /spec/unit/partition_consumer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe PartitionConsumer do 4 | before(:each) do 5 | @connection = double('connection') 6 | allow(Connection).to receive(:new).and_return(@connection) 7 | 8 | offset = Protocol::Offset.new(100) 9 | partition_offsets = [Protocol::PartitionOffset.new(0, 0, [offset])] 10 | @offset_response = [Protocol::TopicOffsetResponse.new("test_topic", partition_offsets)] 11 | allow(@connection).to receive(:offset).and_return(@offset_response) 12 | end 13 | 14 | describe "creation" do 15 | context "when passed unknown options" do 16 | it "raises an ArgumentError" do 17 | expect { PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 0, :earliest_offset, :unknown => true) }.to raise_error(ArgumentError) 18 | end 19 | end 20 | 21 | context "when passed an unknown offset" do 22 | it "raises an ArgumentError" do 23 | expect { PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 0, :coolest_offset) }.to raise_error(ArgumentError) 24 | end 25 | end 26 | end 27 | 28 | describe "next offset" do 29 | context "when offset is not set" do 30 | it "resolves offset if it's not set" do 31 | expect(@connection).to receive(:offset).and_return(@offset_response) 32 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 33 | 0, :earliest_offset) 34 | 35 | pc.next_offset 36 | end 37 | 38 | it "returns resolved offset" do 39 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 40 | 0, :earliest_offset) 41 | expect(pc.next_offset).to eq(100) 42 | end 43 | end 44 | 45 | context "when offset is set" do 46 | it "does not resolve it" do 47 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 48 | 0, 200) 49 | pc.next_offset 50 | end 51 | end 52 | 53 | context "when call returns an error" do 54 | it "is raised" do 55 | allow(@offset_response.first.partition_offsets.first).to receive(:error).and_return(2) 56 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 57 | 0, :earliest_offset) 58 | 59 | expect { pc.next_offset }.to raise_error(Errors::InvalidMessage) 60 | end 61 | end 62 | 63 | context "when no offset exists" do 64 | it "sets offset to 0" do 65 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 66 | 0, :earliest_offset) 67 | 68 | allow(@offset_response.first.partition_offsets.first).to receive(:offsets).and_return([]) 69 | expect(pc.next_offset).to eq(0) 70 | end 71 | end 72 | 73 | context "when offset negative" do 74 | it "resolves offset to one " do 75 | pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 76 | 0, -10) 77 | expect(pc.next_offset).to eq(90) 78 | end 79 | end 80 | end 81 | 82 | describe "fetching messages" do 83 | before(:each) do 84 | message_set = MessageSet.new 85 | message_set << Message.new(:value => "value", :key => "key", :offset => 90) 86 | partition_fetch_response = Protocol::PartitionFetchResponse.new(0, 0, 100, message_set) 87 | topic_fetch_response = Protocol::TopicFetchResponse.new('test_topic', 88 | [partition_fetch_response]) 89 | @response = Protocol::FetchResponse.new(double('common'), [topic_fetch_response]) 90 | 91 | allow(@connection).to receive(:fetch).and_return(@response) 92 | @pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 0, :earliest_offset) 93 | end 94 | 95 | it "returns FetchedMessage objects" do 96 | expect(@pc.fetch.first.class).to eq(FetchedMessage) 97 | end 98 | 99 | it "uses object defaults" do 100 | expect(@connection).to receive(:fetch).with(10_000, 1, anything) 101 | @pc.fetch 102 | end 103 | 104 | context "when options are passed" do 105 | it "overrides object defaults" do 106 | expect(@connection).to receive(:fetch).with(20_000, 1, anything) 107 | @pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 0, :earliest_offset, :max_wait_ms => 20_000) 108 | 109 | @pc.fetch 110 | end 111 | end 112 | 113 | context "when negative offset beyond beginning of partition is passed" do 114 | it "starts from the earliest offset" do 115 | @pc = PartitionConsumer.new("test_client", "localhost", 9092, "test_topic", 0, -10000) 116 | pfr = @response.topic_fetch_responses.first.partition_fetch_responses.first 117 | allow(pfr).to receive(:error).and_return(1, 1, 0) 118 | 119 | @pc.fetch 120 | end 121 | end 122 | 123 | context "when call returns an error" do 124 | it "is raised" do 125 | pfr = @response.topic_fetch_responses.first.partition_fetch_responses.first 126 | allow(pfr).to receive(:error).and_return(2) 127 | 128 | expect { @pc.fetch }.to raise_error(Errors::InvalidMessage) 129 | end 130 | end 131 | 132 | it "sets the highwater mark" do 133 | @pc.fetch 134 | expect(@pc.highwater_mark).to eq(100) 135 | end 136 | 137 | it "sets the latest offset" do 138 | @pc.fetch 139 | expect(@pc.next_offset).to eq(91) 140 | end 141 | end 142 | end 143 | -------------------------------------------------------------------------------- /spec/unit/producer_compression_config_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe ProducerCompressionConfig do 4 | describe "creation" do 5 | it "raises ArgumentError when codec is unknown" do 6 | expect { ProducerCompressionConfig.new(:ripple, nil) }.to raise_error(ArgumentError) 7 | end 8 | end 9 | 10 | context "no codec set" do 11 | it "compresses no topics" do 12 | pcc = ProducerCompressionConfig.new(nil,nil) 13 | expect(pcc.compression_codec_for_topic("test")).to eq(false) 14 | end 15 | end 16 | 17 | describe "none compression codec" do 18 | it "compresses no topics" do 19 | pcc = ProducerCompressionConfig.new(:none,nil) 20 | expect(pcc.compression_codec_for_topic("test")).to eq(false) 21 | end 22 | end 23 | 24 | describe "compression codec no topics specified" do 25 | it "compresses any topic" do 26 | pcc = ProducerCompressionConfig.new(:gzip,nil) 27 | expect(pcc.compression_codec_for_topic("test")).to eq(Compression::GzipCodec) 28 | end 29 | end 30 | 31 | describe "compression codec set, but only compress 'compressed' topic" do 32 | it "compresses 'compressed' topic" do 33 | pcc = ProducerCompressionConfig.new(:gzip, ["compressed"]) 34 | expect(pcc.compression_codec_for_topic("compressed")).to eq(Compression::GzipCodec) 35 | end 36 | 37 | it "does not compresses 'test' topic" do 38 | pcc = ProducerCompressionConfig.new(:gzip, ["compressed"]) 39 | expect(pcc.compression_codec_for_topic("test")).to eq(false) 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/unit/producer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Producer do 4 | it "requires brokers and client_id" do 5 | expect { Producer.new }.to raise_error 6 | end 7 | 8 | it "raises ArgumentError on unknown arguments" do 9 | expect { Producer.new([],"client_id", :unknown => true) }.to raise_error(ArgumentError) 10 | end 11 | 12 | it "raises ArgumentError unless brokers is an enumerable" do 13 | expect { Producer.new("host:port","client_id") }.to raise_error(ArgumentError) 14 | end 15 | 16 | it "raises ProducerShutdown if we try to send to a shutdown producer" do 17 | p = Producer.new(["host:port"],"client_id") 18 | p.close 19 | expect { p.send_messages([]) }.to raise_error(Errors::ProducerShutdownError) 20 | end 21 | 22 | it "accepts all options" do 23 | expect { Producer.new([],"client_id", Producer::OPTION_DEFAULTS.dup) }.not_to raise_error 24 | end 25 | 26 | it "accepts socket_timeout_ms option" do 27 | expect { Producer.new([],"client_id", socket_timeout_ms: 10_000) }.not_to raise_error 28 | end 29 | 30 | describe "sending messages" do 31 | before(:each) do 32 | @sync_producer = double('sync_producer').as_null_object 33 | allow(SyncProducer).to receive(:new).and_return(@sync_producer) 34 | 35 | @producer = Producer.new([], "client_id", :type => :sync) 36 | end 37 | 38 | it "turns MessagesToSend into Message objects" do 39 | expect(@sync_producer).to receive(:send_messages).with(an_instance_of(Array)) do |array| 40 | array.each { |obj| expect(obj).to be_an_instance_of(Message) } 41 | end 42 | 43 | m = MessageToSend.new("topic", "value") 44 | @producer.send_messages([m]) 45 | end 46 | 47 | it "raises an ArgumentError if you try to send a single message" do 48 | expect { @producer.send_messages(MessageToSend.new("topic", "value")) }.to raise_error(ArgumentError) 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /spec/unit/protocol/request_buffer_spec.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | require 'spec_helper' 4 | include Protocol 5 | 6 | RSpec.describe RequestBuffer do 7 | subject(:buffer) { Poseidon::Protocol::RequestBuffer.new } 8 | 9 | it 'appends UTF-8 strings' do 10 | expect do 11 | str = 'hello ümlaut' 12 | buffer.append(str) 13 | buffer.append(str.force_encoding(Encoding::BINARY)) 14 | end.to_not raise_error 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /spec/unit/protocol_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | include Protocol 3 | RSpec.describe RequestCommon do 4 | it "roundtrips" do 5 | rc = RequestCommon.new(0,1,2,"client_id") 6 | 7 | req_buffer = RequestBuffer.new 8 | rc.write(req_buffer) 9 | 10 | resp_buffer = ResponseBuffer.new(req_buffer.to_s) 11 | rc_roundtrip = RequestCommon.read(resp_buffer) 12 | 13 | expect(rc).to eq(rc_roundtrip) 14 | end 15 | end 16 | 17 | RSpec.describe MetadataRequest do 18 | it "roundtrips" do 19 | rc = RequestCommon.new(0,1,2,"client_id") 20 | mr = MetadataRequest.new(rc, ["topic1","topic2"]) 21 | 22 | req_buffer = RequestBuffer.new 23 | mr.write(req_buffer) 24 | 25 | resp_buffer = ResponseBuffer.new(req_buffer.to_s) 26 | mr_roundtrip = MetadataRequest.read(resp_buffer) 27 | 28 | expect(mr).to eq(mr_roundtrip) 29 | end 30 | end 31 | 32 | RSpec.describe "objects with errors" do 33 | it "returns objects that have errors" do 34 | message_set = MessageSet.new 35 | message_set << Message.new(:value => "value", :key => "key") 36 | partition_fetch_response = PartitionFetchResponse.new(0, 5, 100, message_set) 37 | topic_fetch_response = TopicFetchResponse.new('test_topic', 38 | [partition_fetch_response]) 39 | response = FetchResponse.new(double('common'), [topic_fetch_response]) 40 | 41 | expect(response.objects_with_errors).to eq([partition_fetch_response]) 42 | end 43 | 44 | it "raises error when asked" do 45 | message_set = MessageSet.new 46 | message_set << Message.new(:value => "value", :key => "key") 47 | partition_fetch_response = PartitionFetchResponse.new(0, 5, 100, message_set) 48 | topic_fetch_response = TopicFetchResponse.new('test_topic', 49 | [partition_fetch_response]) 50 | response = FetchResponse.new(double('common'), [topic_fetch_response]) 51 | 52 | expect { response.raise_error_if_one_exists }.to raise_error 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /spec/unit/sync_producer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe SyncProducer do 4 | describe "creation" do 5 | 6 | it "sets correct defaults" do 7 | sp = SyncProducer.new(nil,nil) 8 | expect(sp.ack_timeout_ms).to eq(1500) 9 | expect(sp.retry_backoff_ms).to eq(100) 10 | expect(sp.metadata_refresh_interval_ms).to eq(600_000) 11 | expect(sp.required_acks).to eq(0) 12 | expect(sp.max_send_retries).to eq(3) 13 | expect(sp.socket_timeout_ms).to eq(10_000) 14 | end 15 | 16 | it "raises ArgumentError on unknown options" do 17 | expect { SyncProducer.new(nil,nil,:unknown => true) }.to raise_error(ArgumentError) 18 | end 19 | end 20 | 21 | # Fetches metadata 22 | 23 | describe "sending" do 24 | before(:each) do 25 | allow(Kernel).to receive(:sleep) 26 | 27 | @broker_pool = double('broker_pool').as_null_object 28 | allow(BrokerPool).to receive(:new).and_return(@broker_pool) 29 | 30 | @cluster_metadata = double('cluster_metadata', :last_refreshed_at => Time.now).as_null_object 31 | allow(ClusterMetadata).to receive(:new).and_return(@cluster_metadata) 32 | 33 | @mbts = double('messages_to_send', :needs_metadata? => false).as_null_object 34 | allow(MessagesToSend).to receive(:new).and_return(@mbts) 35 | end 36 | 37 | context "needs metadata" do 38 | before(:each) do 39 | allow(@mbts).to receive(:needs_metadata?).and_return(true) 40 | end 41 | 42 | it "fetches metadata" do 43 | @broker_pool.should_recieve(:fetch_metadata) 44 | 45 | @sp = SyncProducer.new("test_client", []) 46 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue Errors::UnableToFetchMetadata 47 | end 48 | end 49 | 50 | context "there are messages to send" do 51 | before(:each) do 52 | allow(@mbts).to receive(:messages_for_brokers).and_return([double('mfb').as_null_object]) 53 | end 54 | 55 | it "sends messages" do 56 | @broker_pool.should_recieve(:execute_api_call, :producer, anything, anything, anything) 57 | 58 | @sp = SyncProducer.new("test_client", []) 59 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue StandardError 60 | end 61 | end 62 | 63 | context "always fails" do 64 | before(:each) do 65 | allow(@mbts).to receive(:pending_messages?).and_return(true) 66 | @sp = SyncProducer.new("test_client", []) 67 | end 68 | 69 | it "retries the correct number of times" do 70 | expect(@mbts).to receive(:messages_for_brokers).exactly(4).times 71 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue StandardError 72 | end 73 | 74 | it "sleeps the correct amount between retries" do 75 | expect(Kernel).to receive(:sleep).with(0.1).exactly(4).times 76 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue StandardError 77 | end 78 | 79 | it "refreshes metadata between retries" do 80 | expect(@cluster_metadata).to receive(:update).exactly(4).times 81 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue StandardError 82 | end 83 | 84 | it "raises an exception" do 85 | expect { 86 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) 87 | }.to raise_error 88 | end 89 | end 90 | 91 | context "no retries" do 92 | before(:each) do 93 | allow(@mbts).to receive(:pending_messages?).and_return(true) 94 | @sp = SyncProducer.new("test_client", [], max_send_retries: 0) 95 | end 96 | 97 | it "does not call sleep" do 98 | expect(Kernel).to receive(:sleep).exactly(0).times 99 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) rescue Errors::UnableToFetchMetadata 100 | end 101 | end 102 | 103 | context "succeeds on first attempt" do 104 | before(:each) do 105 | allow(@mbts).to receive(:pending_messages?).and_return(false) 106 | @sp = SyncProducer.new("test_client", []) 107 | end 108 | 109 | it "returns true" do 110 | expect(@sp.send_messages([Message.new(:topic => "topic", :value => "value")])).to eq(true) 111 | end 112 | 113 | it "does not sleep" do 114 | expect(Kernel).not_to receive(:sleep) 115 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) 116 | end 117 | 118 | it "only attempts to send once" do 119 | expect(@mbts).to receive(:messages_for_brokers).once 120 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) 121 | end 122 | end 123 | 124 | context "succeeds on second attempt" do 125 | before(:each) do 126 | allow(@mbts).to receive(:pending_messages?).and_return(true, false) 127 | @sp = SyncProducer.new("test_client", []) 128 | end 129 | 130 | it "returns true" do 131 | expect(@sp.send_messages([Message.new(:topic => "topic", :value => "value")])).to eq(true) 132 | end 133 | 134 | it "sleeps once" do 135 | expect(Kernel).to receive(:sleep).once 136 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) 137 | end 138 | 139 | it "attempts to send twice" do 140 | expect(@mbts).to receive(:messages_for_brokers).twice 141 | @sp.send_messages([Message.new(:topic => "topic", :value => "value")]) 142 | end 143 | end 144 | end 145 | end 146 | -------------------------------------------------------------------------------- /spec/unit/topic_metadata_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe TopicMetadata do 4 | context "encoding" do 5 | it "roundtrips" do 6 | partition_metadata = Protocol::PartitionMetadata.new(0, 0, 0, [0], [0]) 7 | partitions = [partition_metadata] 8 | tm = TopicMetadata.new(Protocol::TopicMetadataStruct.new(0, "topic", partitions)) 9 | 10 | request_buffer = Protocol::RequestBuffer.new 11 | tm.write(request_buffer) 12 | 13 | response_buffer = Protocol::ResponseBuffer.new(request_buffer.to_s) 14 | expect(TopicMetadata.read(response_buffer)).to eq(tm) 15 | end 16 | end 17 | 18 | it 'determines leader for a partition' do 19 | partition_metadata = Protocol::PartitionMetadata.new(0, 0, 0, [0], [0]) 20 | partitions = [partition_metadata] 21 | tm = TopicMetadata.new(Protocol::TopicMetadataStruct.new(0, "topic", partitions)) 22 | 23 | expect(tm.partition_leader(0)).to eq(0) 24 | end 25 | end 26 | --------------------------------------------------------------------------------