├── .dockerignore ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── README.md ├── config ├── consumer.properties ├── log4j.properties ├── producer.properties ├── server.properties.template ├── tools-log4j.properties └── zookeeper.properties └── start.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .gitignore 3 | CHANGELOG.md 4 | README.md 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | data 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Release Notes 2 | ============= 3 | 4 | ## latest - running releases of `latest` tag 5 | 6 | No changes yet. 7 | 8 | ## 0.10.2.1 - 12 June, 2017 9 | 10 | ### Added 11 | 12 | - Allow configuration of `num.partitions` ([sjking], #40) 13 | - Allow configuration of `log.retention.hours` (#42) 14 | 15 | ### Changed 16 | 17 | - Update to Kafka 0.10.2.1 18 | 19 | ### Fixed 20 | 21 | - Fixed incorrect variable name substitution for `KAFKA_AUTO_CREATE_TOPICS_ENABLE` (#43) 22 | 23 | ## 0.10.2.0 - 31 March, 2017 24 | 25 | - Update to Kafka 0.10.2.0 ([bgaechter], #39) 26 | - Switch to Scala 2.12 builds as now recommended by Kafka. 27 | - Change config var `GROUP_MAX_SESSION_TIMEOUT_MS` to 28 | `KAFKA_GROUP_MAX_SESSION_TIMEOUT_MS` for consistency. 29 | - Allow configuration of `auto.create.topics.enable`, 30 | `inter.broker.protocol.version`, and `log.message.format.version` 31 | ([bgaechter], #39) 32 | 33 | ## 0.10.1.1 - 31 March, 2017 34 | 35 | - Update to Kafka 0.10.1.1 36 | - Allow configuration of `default.replication.factor` ([sjking], #32) 37 | 38 | ## 0.10.1.0 - 27 October, 2016 39 | 40 | - Update to Kafka 0.10.1.0 ([xrl], #25) 41 | 42 | ## 0.10.0.1 - 3 September, 2016 43 | 44 | - Update to Kafka 0.10.0.1 45 | - Make IP detection from `/etc/hosts` in the start script resilient to multiple 46 | or partial matches. ([Jamstah], #18) 47 | - Add configurability for several timeout values. ([closedLoop], #20) 48 | 49 | ## 0.10.0.0 - 16 June, 2016 50 | 51 | - Updated to Kafka 0.10.0.0 52 | - Updated to Java 8 53 | 54 | ## 0.9.0.1 - 17 April, 2016 55 | 56 | - Updated to Kafka 0.9.0.1 57 | 58 | ## 0.9.0.0 - 17 April, 2016 59 | 60 | - Updated to Kafka 0.9.0.0. Switched to Scala 2.11 builds as now recommended by 61 | the project. 62 | 63 | ## 0.8.2.2 - 17 April, 2016 64 | 65 | - Updated to Kafka 0.8.2.2 66 | - Allow more flexible configuration of ZooKeeper connection string so that a ZK 67 | cluster can be used. ([androa], #4) 68 | - Fix `advertised.host.name` for resolution for `/etc/hosts` changes in Docker 69 | 1.10.0+. ([davidgiesberg], #14) 70 | 71 | ## 0.8.2.1 - 24 August, 2015 72 | 73 | - Updated to Kafka 0.8.2.1 74 | - Switch base image to `netflixoss/java:7`. `relateiq/oracle-java7` does not 75 | tag its images, which is rather annoying for build consistency, and further, 76 | they changed it to basing on `ubuntu:14.10` which is not a Long Term Support 77 | release. In my opinion non-LTS versions are not suitable for production 78 | server usage. 79 | - Fix JMX connectivity by pegging RMI port. 80 | - Cleaned up the `start.sh` script to remove RelateIQ dev particularities. 81 | - Changed EXPOSE env var names to ADVERTISED to better match Kafka config 82 | properties. 83 | 84 | ## 0.8.1.1-1 - 4 September, 2014 85 | 86 | - Adds /kafka/bin to PATH for more convenient use of tools like `kafka-topics.sh` 87 | - Creates a `kafka` user to own the service process and data 88 | - Fixes slf4j-log4j not loading--typo on adding jar to classpath 89 | 90 | ## 0.8.1.1 91 | 92 | Initial build with Kafka 0.8.1.1 from official binary distribution. 93 | 94 | 95 | [androa]: https://github.com/androa 96 | [bgaechter]: https://github.com/bgaechter 97 | [closedLoop]: https://github.com/closedLoop 98 | [davidgiesberg]: https://github.com/davidgiesberg 99 | [Jamstah]: https://github.com/Jamstah 100 | [sjking]: https://github.com/sjking 101 | [xrl]: https://github.com/xrl 102 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Builds an image for Apache Kafka from binary distribution. 2 | # 3 | # The netflixoss/java base image runs Oracle Java 8 installed atop the 4 | # ubuntu:trusty (14.04) official image. Docker's official java images are 5 | # OpenJDK-only currently, and the Kafka project, Confluent, and most other 6 | # major Java projects test and recommend Oracle Java for production for optimal 7 | # performance. 8 | 9 | FROM netflixoss/java:8 10 | MAINTAINER Ches Martin 11 | 12 | # The Scala 2.12 build is currently recommended by the project. 13 | ENV KAFKA_VERSION=0.10.2.1 KAFKA_SCALA_VERSION=2.12 JMX_PORT=7203 14 | ENV KAFKA_RELEASE_ARCHIVE kafka_${KAFKA_SCALA_VERSION}-${KAFKA_VERSION}.tgz 15 | 16 | RUN mkdir /kafka /data /logs 17 | 18 | RUN apt-get update && \ 19 | DEBIAN_FRONTEND=noninteractive apt-get install -y \ 20 | ca-certificates 21 | 22 | # Download Kafka binary distribution 23 | ADD http://www.us.apache.org/dist/kafka/${KAFKA_VERSION}/${KAFKA_RELEASE_ARCHIVE} /tmp/ 24 | ADD https://dist.apache.org/repos/dist/release/kafka/${KAFKA_VERSION}/${KAFKA_RELEASE_ARCHIVE}.md5 /tmp/ 25 | 26 | WORKDIR /tmp 27 | 28 | # Check artifact digest integrity 29 | RUN echo VERIFY CHECKSUM: && \ 30 | gpg --print-md MD5 ${KAFKA_RELEASE_ARCHIVE} 2>/dev/null && \ 31 | cat ${KAFKA_RELEASE_ARCHIVE}.md5 32 | 33 | # Install Kafka to /kafka 34 | RUN tar -zx -C /kafka --strip-components=1 -f ${KAFKA_RELEASE_ARCHIVE} && \ 35 | rm -rf kafka_* 36 | 37 | ADD config /kafka/config 38 | ADD start.sh /start.sh 39 | 40 | # Set up a user to run Kafka 41 | RUN groupadd kafka && \ 42 | useradd -d /kafka -g kafka -s /bin/false kafka && \ 43 | chown -R kafka:kafka /kafka /data /logs 44 | USER kafka 45 | ENV PATH /kafka/bin:$PATH 46 | WORKDIR /kafka 47 | 48 | # broker, jmx 49 | EXPOSE 9092 ${JMX_PORT} 50 | VOLUME [ "/data", "/logs" ] 51 | 52 | CMD ["/start.sh"] 53 | 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Apache Kafka on Docker 2 | ====================== 3 | 4 | This repository holds a build definition and supporting files for building a 5 | [Docker] image to run [Kafka] in containers. It is published as an Automated 6 | Build [on Docker Hub], as `ches/kafka`. 7 | 8 | This build intends to provide an operator-friendly Kafka deployment suitable for 9 | usage in a production Docker environment: 10 | 11 | - It runs one service, no bundled ZooKeeper (for more convenient development, 12 | use [Docker Compose]!). 13 | - Configuration is parameterized, enabling a Kafka cluster to be run from 14 | multiple container instances. 15 | - Kafka data and logs can be handled outside the container(s) using volumes. 16 | - JMX is exposed, for Kafka and JVM metrics visibility. 17 | 18 | If you find any shortcomings with the build regarding operability, pull requests 19 | or feedback via GitHub issues are welcomed. 20 | 21 | [Docker Compose]: https://docs.docker.com/compose/ 22 | 23 | Usage Quick Start 24 | ----------------- 25 | 26 | Here is a minimal-configuration example running the Kafka broker service, then 27 | using the container as a client to run the basic producer and consumer example 28 | from [the Kafka Quick Start]: 29 | 30 | ``` 31 | # A non-default bridge network enables convenient name-to-hostname discovery 32 | $ docker network create kafka-net 33 | 34 | $ docker run -d --name zookeeper --network kafka-net zookeeper:3.4 35 | $ docker run -d --name kafka --network kafka-net --env ZOOKEEPER_IP=zookeeper ches/kafka 36 | 37 | $ docker run --rm --network kafka-net ches/kafka \ 38 | > kafka-topics.sh --create --topic test --replication-factor 1 --partitions 1 --zookeeper zookeeper:2181 39 | Created topic "test". 40 | 41 | # In separate terminals: 42 | $ docker run --rm --interactive --network kafka-net ches/kafka \ 43 | > kafka-console-producer.sh --topic test --broker-list kafka:9092 44 | 45 | 46 | $ docker run --rm --network kafka-net ches/kafka \ 47 | > kafka-console-consumer.sh --topic test --from-beginning --bootstrap-server kafka:9092 48 | ``` 49 | 50 | ### Volumes 51 | 52 | The container exposes two volumes that you may wish to bind-mount, or process 53 | elsewhere with `--volumes-from`: 54 | 55 | - `/data`: Path where Kafka's data is stored (`log.dirs` in Kafka configuration) 56 | - `/logs`: Path where Kafka's logs (`INFO` level) will be written, via log4j 57 | 58 | ### Ports and Linking 59 | 60 | The container publishes two ports: 61 | 62 | - `9092`: Kafka's standard broker communication 63 | - `7203`: JMX publishing, for e.g. jconsole or VisualVM connection 64 | 65 | Kafka requires Apache ZooKeeper. You can satisfy the dependency by simply 66 | linking another container that exposes ZooKeeper on its standard port of 2181, 67 | as shown in the above example, **ensuring** that you link using an alias of 68 | `zookeeper`. 69 | 70 | Alternatively, you may configure a specific address for Kafka to find ZK. See 71 | the Configuration section below. 72 | 73 | ### A more complex local development setup 74 | 75 | This example shows more configuration options and assumes that you wish to run a 76 | development environment with Kafka ports mapped directly to localhost, for 77 | instance if you're writing a producer or consumer and want to avoid rebuilding a 78 | container for it to run in as you iterate. This requires that localhost is your 79 | Docker host, i.e. your workstation runs Linux. If you're using something like 80 | boot2docker, substitute the value of `boot2docker ip` below. 81 | 82 | ```bash 83 | $ mkdir -p kafka-ex/{data,logs} && cd kafka-ex 84 | $ docker run -d --name zookeeper --publish 2181:2181 zookeeper:3.4 85 | $ docker run -d \ 86 | --hostname localhost \ 87 | --name kafka \ 88 | --volume ./data:/data --volume ./logs:/logs \ 89 | --publish 9092:9092 --publish 7203:7203 \ 90 | --env KAFKA_ADVERTISED_HOST_NAME=127.0.0.1 --env ZOOKEEPER_IP=127.0.0.1 \ 91 | ches/kafka 92 | ``` 93 | 94 | Configuration 95 | ------------- 96 | 97 | Some parameters of Kafka configuration can be set through environment variables 98 | when running the container (`docker run -e VAR=value`). These are shown here 99 | with their default values, if any: 100 | 101 | - `KAFKA_BROKER_ID=0` 102 | 103 | Maps to Kafka's `broker.id` setting. Must be a unique integer for each broker 104 | in a cluster. 105 | - `KAFKA_PORT=9092` 106 | 107 | Maps to Kafka's `port` setting. The port that the broker service listens on. 108 | You will need to explicitly publish a new port from container instances if you 109 | change this. 110 | - `KAFKA_ADVERTISED_HOST_NAME=` 111 | 112 | Maps to Kafka's `advertised.host.name` setting. Kafka brokers gossip the list 113 | of brokers in the cluster to relieve producers from depending on a ZooKeeper 114 | library. This setting should reflect the address at which producers can reach 115 | the broker on the network, i.e. if you build a cluster consisting of multiple 116 | physical Docker hosts, you will need to set this to the hostname of the Docker 117 | *host's* interface where you forward the container `KAFKA_PORT`. 118 | - `KAFKA_ADVERTISED_PORT=9092` 119 | 120 | As above, for the port part of the advertised address. Maps to Kafka's 121 | `advertised.port` setting. If you run multiple broker containers on a single 122 | Docker host and need them to be accessible externally, this should be set to 123 | the port that you forward to on the Docker host. 124 | - `KAFKA_DEFAULT_REPLICATION_FACTOR=1` 125 | 126 | Maps to Kafka's `default.replication.factor` setting. The default replication 127 | factor for automatically created topics. 128 | - `KAFKA_NUM_PARTITIONS=1` 129 | 130 | Maps to Kafka's `num.partitions` setting. The default number of log partitions 131 | per topic. 132 | - `KAFKA_AUTO_CREATE_TOPICS_ENABLE=true` 133 | 134 | Maps to Kafka's `auto.create.topics.enable`. 135 | - `KAFKA_INTER_BROKER_PROTOCOL_VERSION` 136 | 137 | Maps to Kafka's `inter.broker.protocol.version`. If you have a cluster that 138 | runs brokers with different Kafka versions make sure they communicate with 139 | the same protocol version. 140 | - `KAFKA_LOG_MESSAGE_FORMAT_VERSION` 141 | 142 | Maps to Kafka's `log.message.format.version`. Specifies the protocol version 143 | with which your cluster communicates with its consumers. 144 | - `KAFKA_LOG_RETENTION_HOURS=168` 145 | 146 | Maps to Kafka's `log.retention.hours`. The number of hours to keep a log file 147 | before deleting it. 148 | 149 | - `KAFKA_MESSAGE_MAX_BYTES` 150 | 151 | Maps to Kafka's `message.max.bytes`. The maximum size of message that the 152 | server can receive. Default: 1000012 153 | 154 | - `KAFKA_REPLICA_FETCH_MAX_BYTES` 155 | 156 | Maps to Kafka's `replica.fetch.max.bytes`. The number of bytes of messages to 157 | attempt to fetch for each partition. This is not an absolute maximum, if 158 | the first message in the first non-empty partition of the fetch is larger 159 | than this value, the message will still be returned to ensure that progress 160 | can be made. The maximum message size accepted by the broker is defined via 161 | message.max.bytes (broker config) or max.message.bytes (topic config). 162 | Default: 1048576 163 | 164 | - `JAVA_RMI_SERVER_HOSTNAME=$KAFKA_ADVERTISED_HOST_NAME` 165 | 166 | Maps to the `java.rmi.server.hostname` JVM property, which is used to bind the 167 | interface that will accept remote JMX connections. Like 168 | `KAFKA_ADVERTISED_HOST_NAME`, it may be necessary to set this to a reachable 169 | address of *the Docker host* if you wish to connect a JMX client from outside 170 | of Docker. 171 | - `ZOOKEEPER_IP=` 172 | 173 | **Required** if no container is linked with the alias "zookeeper" and 174 | publishing port 2181, or not using `ZOOKEEPER_CONNECTION_STRING` instead. Used 175 | in constructing Kafka's `zookeeper.connect` setting. 176 | - `ZOOKEEPER_PORT=2181` 177 | 178 | Used in constructing Kafka's `zookeeper.connect` setting. 179 | - `ZOOKEEPER_CONNECTION_STRING=` 180 | 181 | Set a string with host:port pairs for connecting to a ZooKeeper Cluster. This 182 | setting overrides `ZOOKEEPER_IP` and `ZOOKEEPER_PORT`. 183 | - `ZOOKEEPER_CHROOT`, ex: `/v0_8_1` 184 | 185 | ZooKeeper root path used in constructing Kafka's `zookeeper.connect` setting. 186 | This is blank by default, which means Kafka will use the ZK `/`. You should 187 | set this if the ZK instance/cluster is shared by other services, or to 188 | accommodate Kafka upgrades that change schema. Starting in Kafka 0.8.2, it 189 | will create the path in ZK automatically; with earlier versions, you must 190 | ensure it is created before starting brokers. 191 | 192 | JMX 193 | --- 194 | 195 | Remote JMX access can be a bit of a pain to set up. The start script for this 196 | container tries to make it as painless as possible, but it's important to 197 | understand that if you want to connect a client like VisualVM from outside other 198 | Docker containers (e.g. directly from your host OS in development), then you'll 199 | need to configure RMI to be addressed *as the Docker host IP or hostname*. If 200 | you have set `KAFKA_ADVERTISED_HOST_NAME`, that value will be used and is 201 | probably what you want. If not (you're only using other containers to talk to 202 | Kafka brokers) or you need to override it for some reason, then you can instead 203 | set `JAVA_RMI_SERVER_HOSTNAME`. 204 | 205 | For example in practice, if your Docker host is VirtualBox run by Docker 206 | Machine, a `run` command like this should allow you to connect VisualVM from 207 | your host OS to `$(docker-machine ip docker-vm):7203`: 208 | 209 | $ docker run -d --name kafka -p 7203:7203 \ 210 | --link zookeeper:zookeeper \ 211 | --env JAVA_RMI_SERVER_HOSTNAME=$(docker-machine ip docker-vm) \ 212 | ches/kafka 213 | 214 | Note that it is fussy about port as well—it may not work if the same port 215 | number is not used within the container and on the host (any advice for 216 | workarounds is welcome). 217 | 218 | Finally, please note that by default remote JMX has authentication and SSL 219 | turned off (these settings are taken from Kafka's own default start scripts). If 220 | you expose the JMX hostname/port from the Docker host in a production 221 | environment, you should make make certain that access is locked down 222 | appropriately with firewall rules or similar. A more advisable setup in a Docker 223 | setting would be to run a metrics collector in another container, and link it to 224 | the Kafka container(s). 225 | 226 | If you need finer-grained configuration, you can totally control the relevant 227 | Java system properties by setting `KAFKA_JMX_OPTS` yourself—see `start.sh`. 228 | 229 | Fork Legacy 230 | ----------- 231 | 232 | This image/repo was originally forked from [relateiq/kafka]. My original 233 | motivations for forking were: 234 | 235 | - Change the Kafka binary source to an official Apache artifact. RelateIQ's was 236 | on a private S3 bucket, and this opaqueness is not suitable for a 237 | publicly-shared image for reasons of trust. 238 | - Changes described in [this pull request](https://github.com/relateiq/docker-kafka/pull/4). 239 | 240 | After a period of unresponsiveness from upstream on pull requests and my repo 241 | tallying far more downloads on Docker Hub, I have made further updates and 242 | changes with the expectation of maintaining independently from here on. This 243 | project's changelog file describes these in detail. 244 | 245 | 246 | [Docker]: http://www.docker.io 247 | [Kafka]: http://kafka.apache.org 248 | [on Docker Hub]: https://hub.docker.com/r/ches/kafka/ 249 | [relateiq/kafka]: https://github.com/relateiq/docker-kafka 250 | [the Kafka Quick Start]: http://kafka.apache.org/documentation.html#quickstart 251 | 252 | -------------------------------------------------------------------------------- /config/consumer.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Zookeeper connection string 17 | # comma separated host:port pairs, each corresponding to a zk 18 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" 19 | zookeeper.connect=127.0.0.1:2181 20 | 21 | # timeout in ms for connecting to zookeeper 22 | zookeeper.connection.timeout.ms=1000000 23 | 24 | #consumer group id 25 | group.id=test-consumer-group 26 | 27 | #consumer timeout 28 | #consumer.timeout.ms=5000 29 | -------------------------------------------------------------------------------- /config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | kafka.logs.dir=/logs 17 | 18 | log4j.rootLogger=INFO, stdout 19 | 20 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 21 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 22 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 23 | 24 | log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender 25 | log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH 26 | log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log 27 | log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout 28 | log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 29 | 30 | log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender 31 | log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH 32 | log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log 33 | log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout 34 | log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 35 | 36 | log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender 37 | log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH 38 | log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log 39 | log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout 40 | log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 41 | 42 | log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender 43 | log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH 44 | log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log 45 | log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout 46 | log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 47 | 48 | log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender 49 | log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH 50 | log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log 51 | log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout 52 | log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 53 | 54 | # Turn on all our debugging info 55 | #log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender 56 | #log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender 57 | #log4j.logger.kafka.perf=DEBUG, kafkaAppender 58 | #log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender 59 | #log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG 60 | log4j.logger.kafka=INFO, kafkaAppender 61 | 62 | log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender 63 | log4j.additivity.kafka.network.RequestChannel$=false 64 | 65 | #log4j.logger.kafka.network.Processor=TRACE, requestAppender 66 | #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender 67 | #log4j.additivity.kafka.server.KafkaApis=false 68 | log4j.logger.kafka.request.logger=WARN, requestAppender 69 | log4j.additivity.kafka.request.logger=false 70 | 71 | log4j.logger.kafka.controller=TRACE, controllerAppender 72 | log4j.additivity.kafka.controller=false 73 | 74 | log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender 75 | log4j.additivity.kafka.log.LogCleaner=false 76 | 77 | log4j.logger.state.change.logger=TRACE, stateChangeAppender 78 | log4j.additivity.state.change.logger=false 79 | -------------------------------------------------------------------------------- /config/producer.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.producer.ProducerConfig for more details 16 | 17 | ############################# Producer Basics ############################# 18 | 19 | # list of brokers used for bootstrapping knowledge about the rest of the cluster 20 | # format: host1:port1,host2:port2 ... 21 | metadata.broker.list=localhost:9092 22 | 23 | # name of the partitioner class for partitioning events; default partition spreads data randomly 24 | #partitioner.class= 25 | 26 | # specifies whether the messages are sent asynchronously (async) or synchronously (sync) 27 | producer.type=sync 28 | 29 | # specify the compression codec for all data generated: none , gzip, snappy. 30 | # the old config values work as well: 0, 1, 2 for none, gzip, snappy, respectivally 31 | compression.codec=snappy 32 | 33 | # message encoder 34 | serializer.class= 35 | 36 | # allow topic level compression 37 | #compressed.topics= 38 | 39 | ############################# Async Producer ############################# 40 | # maximum time, in milliseconds, for buffering data on the producer queue 41 | #queue.buffering.max.ms= 42 | 43 | # the maximum size of the blocking queue for buffering on the producer 44 | #queue.buffering.max.messages= 45 | 46 | # Timeout for event enqueue: 47 | # 0: events will be enqueued immediately or dropped if the queue is full 48 | # -ve: enqueue will block indefinitely if the queue is full 49 | # +ve: enqueue will block up to this many milliseconds if the queue is full 50 | #queue.enqueue.timeout.ms= 51 | 52 | # the number of messages batched at the producer 53 | #batch.num.messages= 54 | -------------------------------------------------------------------------------- /config/server.properties.template: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.server.KafkaConfig for additional details and defaults 16 | 17 | ############################# Server Basics ############################# 18 | 19 | # The id of the broker. This must be set to a unique integer for each broker. 20 | broker.id={{KAFKA_BROKER_ID}} 21 | auto.leader.rebalance.enable=true 22 | 23 | # Replication 24 | auto.create.topics.enable={{KAFKA_AUTO_CREATE_TOPICS_ENABLE}} 25 | default.replication.factor={{KAFKA_DEFAULT_REPLICATION_FACTOR}} 26 | 27 | # Hostname the broker will advertise to consumers. If not set, kafka will use the value returned 28 | # from InetAddress.getLocalHost(). If there are multiple interfaces getLocalHost 29 | # may not be what you want. 30 | advertised.host.name={{KAFKA_ADVERTISED_HOST_NAME}} 31 | 32 | # Enable topic deletion 33 | delete.topic.enable={{KAFKA_DELETE_TOPIC_ENABLE}} 34 | 35 | ############################# Socket Server Settings ############################# 36 | 37 | # The port the socket server listens on 38 | port={{KAFKA_PORT}} 39 | advertised.port={{KAFKA_ADVERTISED_PORT}} 40 | 41 | ############################# Log Basics ############################# 42 | 43 | # The directory under which to store log files 44 | log.dir=/data 45 | log.dirs=/data 46 | 47 | # The number of logical partitions per topic per server. More partitions allow greater parallelism 48 | # for consumption, but also mean more files. 49 | num.partitions={{KAFKA_NUM_PARTITIONS}} 50 | 51 | ############################# Log Retention Policy ############################# 52 | 53 | # The following configurations control the disposal of log segments. The policy can 54 | # be set to delete segments after a period of time, or after a given size has accumulated. 55 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 56 | # from the end of the log. 57 | 58 | # The minimum age of a log file to be eligible for deletion 59 | log.retention.hours={{KAFKA_LOG_RETENTION_HOURS}} 60 | 61 | ############################# Zookeeper ############################# 62 | 63 | # Zk connection string (see zk docs for details). 64 | # This is a comma separated host:port pairs, each corresponding to a zk 65 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 66 | # You can also append an optional chroot string to the urls to specify the 67 | # root directory for all kafka znodes. 68 | zookeeper.connect={{ZOOKEEPER_CONNECTION_STRING}}{{ZOOKEEPER_CHROOT}} 69 | zookeeper.connection.timeout.ms={{ZOOKEEPER_CONNECTION_TIMEOUT_MS}} 70 | zookeeper.session.timeout.ms={{ZOOKEEPER_SESSION_TIMEOUT_MS}} 71 | 72 | ############################# Additional Broker Settings ####################### 73 | controlled.shutdown.enable=true 74 | group.max.session.timeout.ms={{KAFKA_GROUP_MAX_SESSION_TIMEOUT_MS}} 75 | inter.broker.protocol.version={{KAFKA_INTER_BROKER_PROTOCOL_VERSION}} 76 | log.message.format.version={{KAFKA_LOG_MESSAGE_FORMAT_VERSION}} 77 | message.max.bytes={{KAFKA_MESSAGE_MAX_BYTES}} 78 | replica.fetch.max.bytes={{KAFKA_REPLICA_FETCH_MAX_BYTES}} 79 | 80 | # vim:set filetype=jproperties 81 | -------------------------------------------------------------------------------- /config/tools-log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=WARN, stdout 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | -------------------------------------------------------------------------------- /config/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir=/tmp/zookeeper 17 | # the port at which the clients will connect 18 | clientPort=2181 19 | # disable the per-ip limit on the number of connections since this is a non-production config 20 | maxClientCnxns=0 21 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # If a ZooKeeper container is linked with the alias `zookeeper`, use it. 4 | # You MUST set ZOOKEEPER_IP in env otherwise. 5 | [ -n "$ZOOKEEPER_PORT_2181_TCP_ADDR" ] && ZOOKEEPER_IP=$ZOOKEEPER_PORT_2181_TCP_ADDR 6 | [ -n "$ZOOKEEPER_PORT_2181_TCP_PORT" ] && ZOOKEEPER_PORT=$ZOOKEEPER_PORT_2181_TCP_PORT 7 | 8 | IP=$(grep "\s${HOSTNAME}$" /etc/hosts | head -n 1 | awk '{print $1}') 9 | 10 | # Concatenate the IP:PORT for ZooKeeper to allow setting a full connection 11 | # string with multiple ZooKeeper hosts 12 | [ -z "$ZOOKEEPER_CONNECTION_STRING" ] && ZOOKEEPER_CONNECTION_STRING="${ZOOKEEPER_IP}:${ZOOKEEPER_PORT:-2181}" 13 | 14 | cat /kafka/config/server.properties.template | sed \ 15 | -e "s|{{KAFKA_ADVERTISED_HOST_NAME}}|${KAFKA_ADVERTISED_HOST_NAME:-$IP}|g" \ 16 | -e "s|{{KAFKA_ADVERTISED_PORT}}|${KAFKA_ADVERTISED_PORT:-9092}|g" \ 17 | -e "s|{{KAFKA_AUTO_CREATE_TOPICS_ENABLE}}|${KAFKA_AUTO_CREATE_TOPICS_ENABLE:-true}|g" \ 18 | -e "s|{{KAFKA_BROKER_ID}}|${KAFKA_BROKER_ID:-0}|g" \ 19 | -e "s|{{KAFKA_DEFAULT_REPLICATION_FACTOR}}|${KAFKA_DEFAULT_REPLICATION_FACTOR:-1}|g" \ 20 | -e "s|{{KAFKA_DELETE_TOPIC_ENABLE}}|${KAFKA_DELETE_TOPIC_ENABLE:-false}|g" \ 21 | -e "s|{{KAFKA_GROUP_MAX_SESSION_TIMEOUT_MS}}|${KAFKA_GROUP_MAX_SESSION_TIMEOUT_MS:-300000}|g" \ 22 | -e "s|{{KAFKA_INTER_BROKER_PROTOCOL_VERSION}}|${KAFKA_INTER_BROKER_PROTOCOL_VERSION:-$KAFKA_VERSION}|g" \ 23 | -e "s|{{KAFKA_LOG_MESSAGE_FORMAT_VERSION}}|${KAFKA_LOG_MESSAGE_FORMAT_VERSION:-$KAFKA_VERSION}|g" \ 24 | -e "s|{{KAFKA_LOG_RETENTION_HOURS}}|${KAFKA_LOG_RETENTION_HOURS:-168}|g" \ 25 | -e "s|{{KAFKA_NUM_PARTITIONS}}|${KAFKA_NUM_PARTITIONS:-1}|g" \ 26 | -e "s|{{KAFKA_PORT}}|${KAFKA_PORT:-9092}|g" \ 27 | -e "s|{{ZOOKEEPER_CHROOT}}|${ZOOKEEPER_CHROOT:-}|g" \ 28 | -e "s|{{ZOOKEEPER_CONNECTION_STRING}}|${ZOOKEEPER_CONNECTION_STRING}|g" \ 29 | -e "s|{{ZOOKEEPER_CONNECTION_TIMEOUT_MS}}|${ZOOKEEPER_CONNECTION_TIMEOUT_MS:-10000}|g" \ 30 | -e "s|{{ZOOKEEPER_SESSION_TIMEOUT_MS}}|${ZOOKEEPER_SESSION_TIMEOUT_MS:-10000}|g" \ 31 | -e "s|{{KAFKA_MESSAGE_MAX_BYTES}}|${KAFKA_MESSAGE_MAX_BYTES:-1000012}|g" \ 32 | -e "s|{{KAFKA_REPLICA_FETCH_MAX_BYTES}}|${KAFKA_REPLICA_FETCH_MAX_BYTES:-1048576}|g" \ 33 | > /kafka/config/server.properties 34 | 35 | # Kafka's built-in start scripts set the first three system properties here, but 36 | # we add two more to make remote JMX easier/possible to access in a Docker 37 | # environment: 38 | # 39 | # 1. RMI port - pinning this makes the JVM use a stable one instead of 40 | # selecting random high ports each time it starts up. 41 | # 2. RMI hostname - normally set automatically by heuristics that may have 42 | # hard-to-predict results across environments. 43 | # 44 | # These allow saner configuration for firewalls, EC2 security groups, Docker 45 | # hosts running in a VM with Docker Machine, etc. See: 46 | # 47 | # https://issues.apache.org/jira/browse/CASSANDRA-7087 48 | if [ -z $KAFKA_JMX_OPTS ]; then 49 | KAFKA_JMX_OPTS="-Dcom.sun.management.jmxremote=true" 50 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.authenticate=false" 51 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.ssl=false" 52 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" 53 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Djava.rmi.server.hostname=${JAVA_RMI_SERVER_HOSTNAME:-$KAFKA_ADVERTISED_HOST_NAME} " 54 | export KAFKA_JMX_OPTS 55 | fi 56 | 57 | echo "Starting kafka" 58 | exec /kafka/bin/kafka-server-start.sh /kafka/config/server.properties 59 | --------------------------------------------------------------------------------