├── logstash-kafka.gemspec ├── .gitignore ├── CHANGELOG ├── extract_services.rb ├── lib └── logstash │ ├── outputs │ └── kafka.rb │ └── inputs │ └── kafka.rb ├── gembag.rb ├── README.md └── Makefile /logstash-kafka.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | Gem::Specification.new do |gem| 4 | gem.name = "logstash-kafka" 5 | if RUBY_PLATFORM == 'java' 6 | gem.add_runtime_dependency "jruby-kafka", [">=0.0.12"] #(Apache 2.0 license) 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *.gem 3 | pkg/*.deb 4 | pkg/*.rpm 5 | *.class 6 | .rbx 7 | Gemfile.lock 8 | .rbx 9 | *.tar.gz 10 | *.jar 11 | .bundle 12 | build 13 | local 14 | test/setup/elasticsearch/elasticsearch-* 15 | vendor 16 | .sass-cache 17 | data 18 | .buildpath 19 | .project 20 | .DS_Store 21 | *.pyc 22 | etc/jira-output.conf 23 | coverage/* 24 | sample-kafka.conf 25 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.4.0 (March 13, 2014) 2 | # general 3 | - updated to support Kafka 0.8.1 4 | - made default codec JSON on inputs and outputs. 5 | # outputs 6 | - added support for request_required_acks 7 | 0.3.0 (January 23, 2014) 8 | # general 9 | - updated jruby-kafka gemspec dependency 10 | # outputs 11 | - added support for compression_codec and compressed_topics 12 | 0.2.0 (January 23, 2014) 13 | # inputs 14 | - fixed an issue where shutdown caused an extra message to be logged. 15 | 0.1.0 (January 13, 2014) 16 | # general 17 | - Adding change log. 18 | - First tagged verison of logstash-kafka. -------------------------------------------------------------------------------- /extract_services.rb: -------------------------------------------------------------------------------- 1 | # Extract META-INFO/services/* files from jars 2 | # 3 | require "optparse" 4 | 5 | output = nil 6 | 7 | flags = OptionParser.new do |opts| 8 | opts.on("-o", "--output DIR", 9 | "Where to write the merged META-INF/services/* files") do |dir| 10 | output = dir 11 | end 12 | end 13 | 14 | flags.parse!(ARGV) 15 | 16 | ARGV.each do |jar| 17 | # Find any files matching /META-INF/services/* in any jar given on the 18 | # command line. 19 | # Append all file content to the output directory with the same file name 20 | # as is in the jar. 21 | glob = "file:///#{File.expand_path(jar)}!/META-INF/services/*" 22 | Dir.glob(glob).each do |service| 23 | name = File.basename(service) 24 | File.open(File.join(output, name), "a") do |fd| 25 | puts "Adding #{name} from #{File.basename(jar)}" 26 | fd.write(File.read(service)) 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/logstash/outputs/kafka.rb: -------------------------------------------------------------------------------- 1 | require 'logstash/namespace' 2 | require 'logstash/outputs/base' 3 | require 'jruby-kafka' 4 | 5 | class LogStash::Outputs::Kafka < LogStash::Outputs::Base 6 | config_name 'kafka' 7 | milestone 1 8 | 9 | default :codec, 'json' 10 | 11 | config :broker_list, :validate => :string, :default => 'localhost:9092' 12 | config :topic_id, :validate => :string, :default => 'test' 13 | config :compression_codec, :validate => %w(none gzip snappy), :default => 'none' 14 | config :compressed_topics, :validate => :string, :default => '' 15 | config :request_required_acks, :validate => [-1,0,1], :default => 0 16 | 17 | public 18 | def register 19 | jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*.jar") 20 | Dir[jarpath].each do |jar| 21 | require jar 22 | end 23 | options = { 24 | :topic_id => @topic_id, 25 | :broker_list => @broker_list, 26 | :compression_codec => @compression_codec, 27 | :compressed_topics => @compressed_topics, 28 | :request_required_acks => @request_required_acks 29 | } 30 | @producer = Kafka::Producer.new(options) 31 | @producer.connect() 32 | 33 | @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list) 34 | 35 | @codec.on_event do |event| 36 | begin 37 | @producer.sendMsg(@topic_id,nil,event) 38 | rescue LogStash::ShutdownSignal 39 | @logger.info('Kafka producer got shutdown signal') 40 | rescue => e 41 | @logger.warn('kafka producer threw exception, restarting', 42 | :exception => e) 43 | end 44 | end 45 | end # def register 46 | 47 | def receive(event) 48 | return unless output?(event) 49 | if event == LogStash::SHUTDOWN 50 | finished 51 | return 52 | end 53 | @codec.encode(event) 54 | end 55 | 56 | end #class LogStash::Outputs::Kafka 57 | -------------------------------------------------------------------------------- /gembag.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # target for now 4 | rubymajor = RUBY_VERSION.split(".")[0..1].join(".") 5 | target = "#{Dir.pwd}/vendor/bundle/#{RUBY_ENGINE}/#{rubymajor}/" 6 | ENV["GEM_HOME"] = target 7 | ENV["GEM_PATH"] = "" 8 | 9 | require "rubygems/specification" 10 | require "rubygems/commands/install_command" 11 | 12 | def install_gem(name, requirement, target) 13 | puts "Fetching and installing gem: #{name} (#{requirement})" 14 | 15 | installer = Gem::Commands::InstallCommand.new 16 | installer.options[:generate_rdoc] = false 17 | installer.options[:generate_ri] = false 18 | installer.options[:version] = requirement 19 | installer.options[:args] = [name] 20 | installer.options[:install_dir] = target 21 | 22 | # ruby 2.0.0 / rubygems 2.x; disable documentation generation 23 | installer.options[:document] = [] 24 | begin 25 | installer.execute 26 | rescue Gem::SystemExitException => e 27 | if e.exit_code != 0 28 | puts "Installation of #{name} failed" 29 | raise 30 | end 31 | end 32 | end # def install_gem 33 | 34 | gemspec = ARGV.shift || "logstash-kafka.gemspec" 35 | 36 | spec = Gem::Specification.load(gemspec) 37 | deps = [spec.runtime_dependencies].flatten 38 | 39 | deps.each do |dep| 40 | # TODO(sissel): Hack for now 41 | next if "#{dep}" == "addressable (~> 2.2.6)" 42 | 43 | begin 44 | # Check if the gem is available 45 | # 'gem' returns 'true' if it loaded it, false if already loaded, 46 | # and raises a Gem::LoadError exception on failure. 47 | # Skip downloading/installing it if it's already here. 48 | gem(dep.name, dep.requirement) 49 | 50 | # If we get here, we have the gem. 51 | puts "Gem found matching: #{dep}" 52 | rescue Gem::LoadError => e 53 | # Not installed, continue. 54 | message = e.to_s 55 | 56 | # Sometimes we failed to load because gembag installs too 57 | # many things. Like 'shoulda' fails to load because two 58 | # conflicting versions of 'mocha' were installed. 59 | # Fundamentally, gembag should build a dependency graph and 60 | # resolve all version requirements to single nodes to prevent 61 | # this madness. 62 | # 63 | # Possible we can steal bundler's implementation of this, 64 | # or just use bundler to do it, but only if bundler doesn't 65 | # bite me in the ass again :) 66 | case message 67 | when /Unable to activate/ 68 | puts "Gem found, but funky: #{dep} (#{e})" 69 | when /Could not find/ 70 | puts "Gem not found: #{dep}" 71 | install_gem(dep.name, dep.requirement, target) 72 | else 73 | puts "Unexpected error: #{e}" 74 | exit 1 75 | end # case message 76 | end # begin / rescue Gem::LoadError 77 | end # deps.each 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # logstash-kafka 2 | 3 | This project implements Kafka 0.8.1 inputs and outputs for logstash. 4 | 5 | For more info about logstash, see 6 | 7 | ## Dependencies 8 | 9 | * [Apache Kafka] version 0.8.1 10 | 11 | * [jruby-kafka] library. 12 | 13 | [Apache Kafka]: http://kafka.apache.org/ 14 | [jruby-kafka]: https://github.com/joekiller/jruby-kafka 15 | 16 | ## Building 17 | 18 | Because this is a plugin to Logstash, it must be built. Luckily for you, there is a make file that handles all of this. 19 | 20 | Most of the logic originated from logstash's make file so thank you everyone who had contributed to it to enable me to 21 | make this easy for you. 22 | 23 | The make file is currently configured to use JRuby version 1.7.9 and logstash version 1.3.3. 24 | 25 | To simply build the logstash jar as is with Kafka enabled run: 26 | 27 | # make flatjar 28 | 29 | To build the logstash jar with a different version of logstash do: 30 | 31 | # make flatjar LOGSTASH_VERSION=1.3.3 32 | 33 | ## Configuration for runtime 34 | 35 | jruby-kafka supports nearly all the configuration options of a Kafka high level consumer but some have been left out of 36 | this plugin simply because either it was a priority or I hadn't tested it yet. If one isn't currently, it should be 37 | trivial to add it via jruby-kafka and then in the logstash input or output. 38 | 39 | ### Input 40 | 41 | # input { 42 | kafka { 43 | zk_connect => ... # string (optional), default: "localhost:2181" 44 | group_id => ... # string (optional), default: "logstash" 45 | topic_id => ... # string (optional), default: "test" 46 | reset_beginning => ... # boolean (optional), default: false 47 | consumer_threads => ... # number (optional), default: 1 48 | queue_size => ... # number (optional), default: 20 49 | rebalance_max_retries => ... # number (optional), default: 4 50 | rebalance_backoff_ms => ... # number (optional), default: 2000 51 | consumer_timeout_ms => ... # number (optional), default: -1 52 | consumer_restart_on_error => ... # boolean (optional), default: true 53 | consumer_restart_sleep_ms => ... # number (optional), default: 0 54 | } 55 | } 56 | 57 | ### Output 58 | 59 | # output { 60 | kafka { 61 | :broker_list => ... # string (optional), default: "localhost:9092" 62 | :topic_id => ... # string (optional), default: "test" 63 | :compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none" 64 | :compressed_topics => ... # string (optional), default: "" 65 | :request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0 66 | } 67 | } 68 | 69 | The default codec is json for input and outputs. If you select a codec of plain, logstash will encode your messages with not only the message 70 | but also with a timestamp and hostname. If you do not want anything but your message passing through, you should make 71 | the output configuration something like: 72 | 73 | # output { 74 | kafka { 75 | codec => plain { 76 | format => "%{message}" 77 | } 78 | } 79 | } 80 | 81 | ## Testing 82 | 83 | There are no tests are the current time. Please feel free to submit a pull request. 84 | 85 | ## Notes 86 | 87 | The make file currently flattens the Kafka jar files and merges them into the uberjar. I think this is overkill but 88 | it is the way it is working now. Feel free to test other ways to make this all simpler. I need to get it to 89 | "just work" currently so that is where we are now. 90 | -------------------------------------------------------------------------------- /lib/logstash/inputs/kafka.rb: -------------------------------------------------------------------------------- 1 | require 'logstash/namespace' 2 | require 'logstash/inputs/base' 3 | require 'jruby-kafka' 4 | 5 | class LogStash::Inputs::Kafka < LogStash::Inputs::Base 6 | config_name 'kafka' 7 | milestone 1 8 | 9 | default :codec, 'json' 10 | 11 | config :zk_connect, :validate => :string, :default => 'localhost:2181' 12 | config :group_id, :validate => :string, :default => 'logstash' 13 | config :topic_id, :validate => :string, :default => 'test' 14 | config :reset_beginning, :validate => :boolean, :default => false 15 | config :consumer_threads, :validate => :number, :default => 1 16 | config :queue_size, :validate => :number, :default => 20 17 | config :rebalance_max_retries, :validate => :number, :default => 4 18 | config :rebalance_backoff_ms, :validate => :number, :default => 2000 19 | config :consumer_timeout_ms, :validate => :number, :default => -1 20 | config :consumer_restart_on_error, :validate => :boolean, :default => true 21 | config :consumer_restart_sleep_ms, :validate => :number, :default => 0 22 | 23 | public 24 | def register 25 | jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/kafka*.jar") 26 | Dir[jarpath].each do |jar| 27 | require jar 28 | end 29 | options = { 30 | :zk_connect => @zk_connect, 31 | :group_id => @group_id, 32 | :topic_id => @topic_id, 33 | :rebalance_max_retries => @rebalance_max_retries, 34 | :rebalance_backoff_ms => @rebalance_backoff_ms, 35 | :consumer_timeout_ms => @consumer_timeout_ms, 36 | :consumer_restart_on_error => @consumer_restart_on_error, 37 | :consumer_restart_sleep_ms => @consumer_restart_sleep_ms 38 | } 39 | if @reset_beginning == true 40 | options[:reset_beginning] = 'from-beginning' 41 | end # if :reset_beginning 42 | @kafka_client_queue = SizedQueue.new(@queue_size) 43 | @consumer_group = Kafka::Group.new(options) 44 | @logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) 45 | end # def register 46 | 47 | public 48 | def run(logstash_queue) 49 | java_import 'kafka.common.ConsumerRebalanceFailedException' 50 | @logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect) 51 | begin 52 | @consumer_group.run(@consumer_threads,@kafka_client_queue) 53 | begin 54 | while true 55 | event = @kafka_client_queue.pop 56 | queue_event("#{event}",logstash_queue) 57 | end 58 | rescue LogStash::ShutdownSignal 59 | @logger.info('Kafka got shutdown signal') 60 | @consumer_group.shutdown() 61 | end 62 | until @kafka_client_queue.empty? 63 | queue_event("#{@kafka_client_queue.pop}",logstash_queue) 64 | end 65 | @logger.info('Done running kafka input') 66 | rescue => e 67 | @logger.warn('kafka client threw exception, restarting', 68 | :exception => e) 69 | if @consumer_group.running? 70 | @consumer_group.shutdown() 71 | end 72 | sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000) 73 | retry 74 | end 75 | finished 76 | end # def run 77 | 78 | private 79 | def queue_event(msg, output_queue) 80 | begin 81 | @codec.decode(msg) do |event| 82 | decorate(event) 83 | event['kafka'] = {'msg_size' => msg.bytesize, 'topic' => @topic_id, 'consumer_group' => @group_id} 84 | output_queue << event 85 | end # @codec.decode 86 | rescue => e # parse or event creation error 87 | @logger.error("Failed to create event", :message => msg, :exception => e, 88 | :backtrace => e.backtrace); 89 | end # begin 90 | end # def queue_event 91 | 92 | end #class LogStash::Inputs::Kafka 93 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Requirements to build: 2 | # rsync 3 | # wget or curl 4 | # 5 | JRUBY_VERSION?=1.7.9 6 | 7 | WITH_JRUBY=java -jar $(shell pwd)/$(JRUBY) -S 8 | JRUBY=vendor/jar/jruby-complete-$(JRUBY_VERSION).jar 9 | JRUBY_URL=http://jruby.org.s3.amazonaws.com/downloads/$(JRUBY_VERSION)/jruby-complete-$(JRUBY_VERSION).jar 10 | JRUBY_CMD=java -jar $(JRUBY) 11 | JRUBYC=$(WITH_JRUBY) jrubyc 12 | 13 | KAFKA_VERSION=0.8.1 14 | LOGSTASH_VERSION?=1.3.3 15 | VENDOR_DIR=vendor/bundle/jruby/1.9 16 | 17 | KAFKA_URL=https://archive.apache.org/dist/kafka/0.8.1/kafka_2.8.0-0.8.1.tgz 18 | 19 | LOGSTASH_URL=https://download.elasticsearch.org/logstash/logstash/logstash-$(LOGSTASH_VERSION)-flatjar.jar 20 | 21 | WGET=$(shell which wget 2>/dev/null) 22 | CURL=$(shell which curl 2>/dev/null) 23 | 24 | QUIET=@ 25 | 26 | # OS-specific options 27 | TARCHECK=$(shell tar --help|grep wildcard|wc -l|tr -d ' ') 28 | ifeq (0, $(TARCHECK)) 29 | TAR_OPTS= 30 | else 31 | TAR_OPTS=--wildcards 32 | endif 33 | 34 | # Figure out if we're using wget or curl 35 | .PHONY: wget-or-curl 36 | wget-or-curl: 37 | ifeq ($(CURL),) 38 | ifeq ($(WGET),) 39 | @echo "wget or curl are required." 40 | exit 1 41 | else 42 | DOWNLOAD_COMMAND=wget -q --no-check-certificate -O 43 | endif 44 | else 45 | DOWNLOAD_COMMAND=curl -s -L -k -o 46 | endif 47 | 48 | clean: clean-vendor clean-build 49 | 50 | clean-vendor: 51 | $(QUIET)rm -rf vendor 52 | 53 | clean-build: 54 | $(QUIET)rm -rf build 55 | 56 | build: 57 | -$(QUIET)mkdir -p $@ 58 | 59 | build/ruby: | build 60 | -$(QUIET)mkdir -p $@ 61 | 62 | vendor: 63 | $(QUIET)mkdir -p $@ 64 | 65 | vendor/jar: vendor 66 | $(QUIET)mkdir -p $@ 67 | 68 | 69 | get-kafka: | vendor/jar 70 | @echo "=> Fetching kafka" 71 | $(QUIET)$(DOWNLOAD_COMMAND) vendor/kafka_2.8.0-$(KAFKA_VERSION).tar.gz $(KAFKA_URL) 72 | 73 | @echo "=> Pulling the jars out of Kafka" 74 | $(QUIET)tar -C vendor/jar -xf vendor/kafka_2.8.0-$(KAFKA_VERSION).tar.gz $(TAR_OPTS) \ 75 | --strip-components 2 'kafka_2.8.0-$(KAFKA_VERSION)/libs/*.jar' 76 | $(QUIET)tar -C vendor/jar -xf vendor/kafka_2.8.0-$(KAFKA_VERSION).tar.gz $(TAR_OPTS) \ 77 | --strip-components 1 'kafka_2.8.0-$(KAFKA_VERSION)/*.jar' 78 | $(QUIET)rm -rf vendor/jar/libs/ 79 | 80 | get-logstash: | vendor/jar 81 | @echo "=> Fetching logstash jar" 82 | $(QUIET)$(DOWNLOAD_COMMAND) vendor/jar/logstash-$(LOGSTASH_VERSION)-flatjar.jar $(LOGSTASH_URL) 83 | 84 | build/monolith: get-logstash get-kafka $(JRUBY) vendor-gems copy-ruby-files | build 85 | $(QUIET)mkdir -p $@ 86 | @# Unpack all the 3rdparty jars and any jars in gems 87 | $(QUIET)find $$PWD/vendor/bundle $$PWD/vendor/jar -name '*.jar' \ 88 | | (cd $@; xargs -n1 jar xf) 89 | @# Merge all service file in all 3rdparty jars 90 | $(QUIET)mkdir -p $@/META-INF/services/ 91 | $(QUIET)find $$PWD/vendor/bundle $$PWD/vendor/jar -name '*.jar' \ 92 | | xargs $(JRUBY_CMD) extract_services.rb -o $@/META-INF/services 93 | -$(QUIET)rm -f $@/META-INF/*.LIST 94 | -$(QUIET)rm -f $@/META-INF/*.MF 95 | -$(QUIET)rm -f $@/META-INF/*.RSA 96 | -$(QUIET)rm -f $@/META-INF/*.SF 97 | -$(QUIET)rm -f $@/META-INF/NOTICE $@/META-INF/NOTICE.txt 98 | -$(QUIET)rm -f $@/META-INF/LICENSE $@/META-INF/LICENSE.txt 99 | 100 | build-jruby: $(JRUBY) 101 | 102 | $(JRUBY): | vendor/jar 103 | $(QUIET)echo "=> Downloading jruby $(JRUBY_VERSION)" 104 | $(QUIET)$(DOWNLOAD_COMMAND) $@ $(JRUBY_URL) 105 | 106 | vendor-gems: | vendor/bundle 107 | 108 | vendor/bundle: | vendor $(JRUBY) 109 | @echo "=> Installing gems to $@..." 110 | $(QUIET)GEM_HOME=./vendor/bundle/jruby/1.9/ GEM_PATH= $(JRUBY_CMD) --1.9 ./gembag.rb logstash-kafka.gemspec 111 | $(QUIET)-rm -rf $@/jruby/1.9/gems/riak-client-1.0.3/pkg 112 | @# Purge any rspec or test directories 113 | $(QUIET)-rm -rf $@/jruby/1.9/gems/*/spec $@/jruby/1.9/gems/*/test 114 | @# Purge any comments in ruby code. 115 | @#-find $@/jruby/1.9/gems/ -name '*.rb' | xargs -n1 sed -i -re '/^[ \t]*#/d; /^[ \t]*$$/d' 116 | $(QUIET)touch $@ 117 | 118 | .PHONY: copy-ruby-files 119 | copy-ruby-files: | build/ruby 120 | @# Copy lib/ and test/ files to the root 121 | $(QUIET)rsync -a --include "*/" --include "*.rb" --exclude "*" ./lib/ ./build/ruby 122 | @# Delete any empty directories copied by rsync. 123 | $(QUIET)find ./build/ruby -type d -empty -delete 124 | 125 | build/flatgems: | build vendor/bundle 126 | @echo "=> Copy external gems" 127 | mkdir $@ 128 | for i in $(VENDOR_DIR)/gems/*/lib; do \ 129 | rsync -a $$i/ $@/$$(basename $$i) ; \ 130 | done 131 | 132 | build/jar: | build build/flatgems build/monolith 133 | $(QUIET)mkdir build/jar 134 | $(QUIET)rsync -a build/monolith/ build/flatgems/lib/ build/ruby/ build/jar/ 135 | 136 | flatjar: build/logstash-$(LOGSTASH_VERSION)-flatjar-kafka-$(KAFKA_VERSION).jar 137 | build/logstash-$(LOGSTASH_VERSION)-flatjar-kafka-$(KAFKA_VERSION).jar: | build/jar 138 | $(QUIET)rm -f $@ 139 | $(QUIET)jar cfe $@ logstash.runner -C build/jar . 140 | @echo "Created $@" 141 | --------------------------------------------------------------------------------