├── config.yaml ├── README.md ├── LICENSE ├── .gitignore ├── run-kafka.rb └── server.properties.erb /config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | cluster: your-cluster 3 | zk_hosts: 4 | - 10.0.0.1:2181 5 | - 10.0.0.2:2181 6 | - 10.0.0.3:2181 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kafka-on-marathon 2 | ================= 3 | 4 | Scripts for running Apache Kafka on Mesosphere's Marathon 5 | 6 | Alternatively, you may want to use the [Kafka framework](https://github.com/mesos/kafka). 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /test/tmp/ 9 | /test/version_tmp/ 10 | /tmp/ 11 | 12 | ## Specific to RubyMotion: 13 | .dat* 14 | .repl_history 15 | build/ 16 | 17 | ## Documentation cache and generated files: 18 | /.yardoc/ 19 | /_yardoc/ 20 | /doc/ 21 | /rdoc/ 22 | 23 | ## Environment normalisation: 24 | /.bundle/ 25 | /lib/bundler/man/ 26 | 27 | # for a library or gem, you might want to ignore these files since the code is 28 | # intended to run in multiple environments; otherwise, check them in: 29 | # Gemfile.lock 30 | # .ruby-version 31 | # .ruby-gemset 32 | 33 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 34 | .rvmrc 35 | -------------------------------------------------------------------------------- /run-kafka.rb: -------------------------------------------------------------------------------- 1 | require 'zk' 2 | require 'yaml' 3 | require 'erb' 4 | 5 | module Kafka 6 | class Broker 7 | def initialize(heap_size, broker_count, ports, config) 8 | @heap_size = heap_size.to_i 9 | @prng = Random.new(Time.now.to_f * 100000) 10 | 11 | @yaml = YAML.load_file(config) 12 | 13 | @zk_servers = @yaml['zk_hosts'].shuffle.join(',') 14 | @cluster = @yaml['cluster'] 15 | 16 | zk_connect 17 | 18 | @ports = ports.split(/,/) 19 | 20 | @broker_count = broker_count.to_i 21 | 22 | @broker_set = get_missing_brokers 23 | if @broker_set.empty? 24 | $stderr.puts "No missing brokers found!" 25 | exit 1 26 | end 27 | 28 | @attempts = 0 29 | @max_attempts = 3 * @broker_count 30 | 31 | @hostname = `hostname`.chomp 32 | end 33 | 34 | def zk_connect 35 | @zk = ZK.new(@zk_servers, { 36 | :chroot => "/kafka-#{@cluster}", 37 | :thread => :single, 38 | :timeout => 5, 39 | }) 40 | 41 | @zk.wait_until_connected 42 | end 43 | 44 | def zk_reconnect 45 | close 46 | sleep 10 47 | zk_connect 48 | become_broker 49 | end 50 | 51 | # Initially, populate broker set with list of brokers not in Zookeeper, if 52 | # possible 53 | def get_missing_brokers 54 | broker_set = Set.new(0..@broker_count - 1) 55 | if @zk.exists?('/brokers/ids') && @zk.stat('/brokers/ids').num_children > 0 56 | ids = @zk.children('/brokers/ids').map{ |x| x.to_i }.sort 57 | puts "Found these broker IDs in Zookeeper: #{ids}" 58 | broker_set = broker_set.subtract(ids) 59 | end 60 | puts "Missing broker IDs: #{broker_set.to_a.sort}" 61 | broker_set 62 | end 63 | 64 | def close 65 | @candidate.close unless @candidate.nil? 66 | @zk.close! unless @zk.nil? 67 | end 68 | 69 | def become_broker 70 | elected = false 71 | got_result = false 72 | 73 | @candidate = @zk.election_candidate( 74 | "kafka-#{@broker_id}", @hostname, :follow => :leader) 75 | 76 | @candidate.on_winning_election { 77 | puts "Won election for kafka-#{@broker_id}" 78 | elected = true 79 | got_result = true 80 | 81 | @zk.on_expired_session do 82 | puts "ZK session expired" 83 | zk_reconnect 84 | end 85 | } 86 | 87 | @candidate.on_losing_election { 88 | puts "Lost election for kafka-#{@broker_id}" 89 | 90 | elected = false 91 | got_result = true 92 | } 93 | 94 | while !got_result 95 | puts "Trying to get elected for kafka-#{@broker_id}..." 96 | @candidate.vote! 97 | # Random sleep to help avoid thundering herd 98 | sleep @prng.rand(@broker_count) 99 | @attempts += 1 100 | if @attempts > @max_attempts 101 | break 102 | end 103 | end 104 | 105 | if !elected 106 | close 107 | $stderr.puts "Couldn't become a broker. Suiciding." 108 | exit 1 109 | end 110 | end 111 | 112 | def run 113 | @broker_id = @broker_set.to_a.sample(:random => @prng) 114 | become_broker 115 | 116 | 117 | erb = ERB.new(File.open('server.properties.erb').readlines.map{|x| x.chomp }.join("\n")) 118 | File.open('server.properties', 'w') do |f| 119 | f.puts erb.result(binding) 120 | end 121 | 122 | at_exit { 123 | close 124 | } 125 | 126 | env = { 127 | "KAFKA_HEAP_OPTS" => "-XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -Xmx#{@heap_size.to_s}m -Xms#{(@heap_size / 2).to_s}m -XX:NewSize=#{(@heap_size / 3).to_s}m -XX:MaxNewSize=#{(@heap_size / 3).to_s}m -Xss256k -XX:+UseTLAB -XX:+AlwaysPreTouch", 128 | "SCALA_VERSION" => "2.10.3", 129 | "KAFKA_LOG4J_OPTS" => "-Dlog4j.configuration=file:log4j.properties", 130 | "KAFKA_JVM_PERFORMANCE_OPTS" => "-server -XX:+UseCompressedOops -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC", 131 | "JMX_PORT" => @ports[1], 132 | } 133 | 134 | %x(tar xf kafka-exec.tar.xz) 135 | cmd = "./kafka-exec/bin/kafka-run-class.sh -name kafkaServer -loggc kafka.Kafka server.properties".freeze 136 | last_finished = 0 137 | 138 | loop do 139 | puts "About to run:" 140 | puts env, cmd 141 | GC.start # clean up memory 142 | system env, cmd 143 | finished = Time.now.to_f 144 | if finished - last_finished < 120 145 | # If the process was running for less than 2 minutes, abort. We're 146 | # probably 'bouncing'. Occasional restarts are okay, but not 147 | # continuous restarting. 148 | $stderr.puts "Kafka exited too soon!" 149 | exit 1 150 | end 151 | last_finished = finished 152 | end 153 | end 154 | end 155 | end 156 | 157 | begin 158 | broker = Kafka::Broker.new ARGV[0], ARGV[1], ARGV[2], ARGV[3] 159 | broker.run 160 | rescue => e 161 | $stdout.puts $!.inspect, $@ 162 | $stderr.puts $!.inspect, $@ 163 | ensure 164 | broker.close 165 | end 166 | -------------------------------------------------------------------------------- /server.properties.erb: -------------------------------------------------------------------------------- 1 | ############################# Server Basics ############################# 2 | 3 | # The id of the broker. This must be set to a unique integer for each broker. 4 | broker.id=<%= @broker_id %> 5 | 6 | ############################# Socket Server Settings ############################# 7 | 8 | # The port the socket server listens on 9 | port=<%= @ports[0] %> 10 | 11 | # Hostname the broker will bind to and advertise to producers and consumers. 12 | # If not set, the server will bind to all interfaces and advertise the value returned from 13 | # from java.net.InetAddress.getCanonicalHostName(). 14 | host.name=<%= @hostname %> 15 | 16 | # The number of threads handling network requests 17 | num.network.threads=16 18 | 19 | # The number of threads doing disk I/O 20 | num.io.threads=4 21 | 22 | # The send buffer (SO_SNDBUF) used by the socket server 23 | socket.send.buffer.bytes=8388608 24 | 25 | # The receive buffer (SO_RCVBUF) used by the socket server 26 | socket.receive.buffer.bytes=8388608 27 | 28 | # The maximum size of a request that the socket server will accept (protection against OOM) 29 | socket.request.max.bytes=104857600 30 | 31 | 32 | ############################# Log Basics ############################# 33 | 34 | # A comma seperated list of directories under which to store log files 35 | log.dirs=./kafka-logs 36 | 37 | # The number of logical partitions per topic per server. More partitions allow greater parallelism 38 | # for consumption, but also mean more files. 39 | num.partitions=48 40 | 41 | ############################# Log Flush Policy ############################# 42 | 43 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 44 | # the OS cache lazily. The following configurations control the flush of data to disk. 45 | # There are a few important trade-offs here: 46 | # 1. Durability: Unflushed data may be lost if you are not using replication. 47 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 48 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 49 | # The settings below allow one to configure the flush policy to flush data after a period of time or 50 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 51 | 52 | # The number of messages to accept before forcing a flush of data to disk 53 | log.flush.interval.messages=100000 54 | 55 | # The maximum amount of time a message can sit in a log before we force a flush 56 | log.flush.interval.ms=10000 57 | log.flush.scheduler.interval.ms=10000 58 | 59 | ############################# Log Retention Policy ############################# 60 | 61 | # The following configurations control the disposal of log segments. The policy can 62 | # be set to delete segments after a period of time, or after a given size has accumulated. 63 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 64 | # from the end of the log. 65 | 66 | # The minimum age of a log file to be eligible for deletion 67 | log.retention.hours=36 68 | 69 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 70 | # segments don't drop below log.retention.bytes. 71 | #log.retention.bytes=1073741824 72 | 73 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 74 | log.segment.bytes=268435456 75 | 76 | # The interval at which log segments are checked to see if they can be deleted according 77 | # to the retention policies 78 | log.cleanup.interval.mins=1 79 | 80 | ############################# Zookeeper ############################# 81 | 82 | # Zookeeper connection string (see zookeeper docs for details). 83 | # This is a comma separated host:port pairs, each corresponding to a zk 84 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 85 | # You can also append an optional chroot string to the urls to specify the 86 | # root directory for all kafka znodes. 87 | zookeeper.connect=<%= @zk_servers %>/kafka-<%= @yaml['cluster'] %> 88 | 89 | # Timeout in ms for connecting to zookeeper 90 | zookeeper.session.timeout.ms=6000 91 | zookeeper.connection.timeout.ms=6000 92 | zookeeper.sync.time.ms=2000 93 | 94 | log.cleanup.policy=delete 95 | 96 | kafka.graphite.metrics.host=graphite-di.musta.ch 97 | kafka.graphite.metrics.prefix=kafka.<%= @yaml['cluster'] %>.<%= @broker_id %> 98 | kafka.graphite.metrics.reporter.enabled=true 99 | kafka.metrics.reporters=kafka.metrics.KafkaGraphiteMetricsReporter 100 | kafka.metrics.polling.interval.secs=60 101 | 102 | controlled.shutdown.enable=true 103 | 104 | replica.fetch.wait.max.ms=5000 105 | replica.fetch.max.bytes=8388608 106 | replica.socket.receive.buffer.bytes=8388608 107 | replica.lag.max.messages=10000 108 | replica.lag.time.max.ms=30000 109 | replica.high.watermark.checkpoint.interval.ms=5000 110 | replica.socket.timeout.ms=20000 111 | num.replica.fetchers=3 112 | 113 | controller.socket.timeout.ms=20000 114 | controller.message.queue.size=10 115 | 116 | auto.leader.rebalance.enable=true 117 | message.max.bytes=8000000 118 | 119 | fetch.purgatory.purge.interval.requests=1000 120 | producer.purgatory.purge.interval.requests=1000 121 | --------------------------------------------------------------------------------