├── .gitignore ├── .travis.yml ├── src ├── test │ ├── resources │ │ ├── kafka-config.properties │ │ └── test-config.properties │ └── java │ │ └── nl │ │ └── minvenj │ │ └── nfi │ │ └── storm │ │ └── kafka │ │ ├── fail │ │ ├── UnreliableFailHandlerTest.java │ │ └── ReliableFailHandlerTest.java │ │ ├── util │ │ ├── KafkaMessageIdTest.java │ │ └── ConfigUtilsTest.java │ │ ├── KafkaSpoutFailurePolicyTest.java │ │ ├── KafkaSpoutConstructorTest.java │ │ └── KafkaSpoutBufferBehaviourTest.java └── main │ └── java │ └── nl │ └── minvenj │ └── nfi │ └── storm │ └── kafka │ ├── fail │ ├── UnreliableFailHandler.java │ ├── AbstractFailHandler.java │ ├── ReliableFailHandler.java │ └── FailHandler.java │ ├── util │ ├── KafkaMessageId.java │ └── ConfigUtils.java │ └── KafkaSpout.java ├── CONTRIBUTING.md ├── NOTICE ├── README.md ├── pom.xml └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE files 2 | .idea/ 3 | *.iml 4 | .project 5 | 6 | # maven-specific files 7 | target/ 8 | 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk8 5 | 6 | after_success: 7 | - mvn jacoco:report coveralls:report 8 | -------------------------------------------------------------------------------- /src/test/resources/kafka-config.properties: -------------------------------------------------------------------------------- 1 | # kafka configuration to connect to zookeeper 2 | zookeeper.connect = non-existent.host:2181 3 | # kafka configuration for consumer operation (-1 is default, indicates indefinite wait for data) 4 | consumer.timeout.ms = 100 5 | -------------------------------------------------------------------------------- /src/test/resources/test-config.properties: -------------------------------------------------------------------------------- 1 | # file to be treated as a Java properties file 2 | key = value 3 | dashed-key = silly value 4 | a.test.property = sillier value 5 | 6 | # number of properties with a prefix 7 | prefix.key = value 8 | prefix.another.key = another value 9 | # and one with the prefix value not at the front 10 | another.prefix.key = should not match prefix 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Kafka spout contribution guidelines 2 | =================================== 3 | The kafka spout is still in heavy development, users should expect changes in behaviour and APIs over time. 4 | Contributions to this project are welcome, please take the following guidelines into account: 5 | 6 | - Provide both documentation and test code with contributions; 7 | - Try to keep as much of the original API intact as possible; 8 | - Follow conventions of the existing code; 9 | - Code contributed to this project is automatically licensed under the Apache License, Version 2.0. 10 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2013 Netherlands Forensic Institute 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | *This project has been archived and is no longer being maintained.* 2 | 3 | [![Build Status (development branch)](https://img.shields.io/travis/HolmesNL/kafka-spout/develop.svg)](https://travis-ci.org/HolmesNL/kafka-spout) 4 | [![Coverage Status (development branch)](https://img.shields.io/coveralls/HolmesNL/kafka-spout/develop.svg)](https://coveralls.io/r/HolmesNL/kafka-spout?branch=develop) 5 | 6 | Kafka spout 7 | =========== 8 | Storm spout implementation reading messages from a kafka topic and emits these as single field tuples into a storm topology. 9 | Documentation is available on [the wiki](https://github.com/HolmesNL/kafka-spout/wiki). 10 | 11 | Development 12 | ----------- 13 | This implementation was created by the Netherlands Forensic Institute and is still under development. 14 | Contributions are welcome, please read [the contribution guidelines](./CONTRIBUTING.md). 15 | 16 | License 17 | ------- 18 | This work is licensed under the Apache License, Version 2.0. 19 | See [LICENSE](./LICENSE) for details. 20 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/fail/UnreliableFailHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 20 | 21 | /** 22 | * {@link FailHandler} implementation making tuple failure unreliable: messages are never replayed and calls to 23 | * {@link #fail(KafkaMessageId, byte[])} are ignored. 24 | * 25 | * @author Netherlands Forensics Institute 26 | */ 27 | public class UnreliableFailHandler extends AbstractFailHandler { 28 | /** 29 | * Configuration identifier for the unreliable failure policy ({@code "unreliable"}). 30 | */ 31 | public static final String IDENTIFIER = "unreliable"; 32 | 33 | @Override 34 | public boolean shouldReplay(final KafkaMessageId id) { 35 | // never replay a message, ignore calls to fail; lose the message in time and space 36 | return false; 37 | } 38 | 39 | @Override 40 | public String getIdentifier() { 41 | return IDENTIFIER; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/fail/AbstractFailHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.storm.spout.SpoutOutputCollector; 22 | import org.apache.storm.task.TopologyContext; 23 | 24 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 25 | 26 | /** 27 | * Abstract convenience implementation of the {@link FailHandler} interface. 28 | * 29 | * @author Netherlands Forensics Institute 30 | */ 31 | public abstract class AbstractFailHandler implements FailHandler { 32 | @Override 33 | public abstract boolean shouldReplay(final KafkaMessageId id); 34 | 35 | @Override 36 | public void ack(final KafkaMessageId id) { 37 | } 38 | 39 | @Override 40 | public void fail(final KafkaMessageId id, final byte[] message) { 41 | } 42 | 43 | @Override 44 | public void open(final Map config, final TopologyContext topology, final SpoutOutputCollector collector) { 45 | } 46 | 47 | @Override 48 | public void activate() { 49 | } 50 | 51 | @Override 52 | public void deactivate() { 53 | } 54 | 55 | @Override 56 | public void close() { 57 | } 58 | 59 | @Override 60 | public abstract String getIdentifier(); 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/fail/ReliableFailHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 20 | 21 | /** 22 | * {@link FailHandler} implementation making tuple failure reliable: messages are always replayed, calls to 23 | * {@link #fail(KafkaMessageId, byte[])} will cause an error. 24 | * 25 | * @author Netherlands Forensics Institute 26 | */ 27 | public class ReliableFailHandler extends AbstractFailHandler { 28 | /** 29 | * Configuration identifier for the reliable failure policy ({@code "reliable"}). 30 | */ 31 | public static final String IDENTIFIER = "reliable"; 32 | 33 | @Override 34 | public boolean shouldReplay(final KafkaMessageId id) { 35 | // always replay the message, never call fail 36 | return true; 37 | } 38 | 39 | /** 40 | * {@inheritDoc} 41 | * 42 | * @throws IllegalStateException always; tuples should always replayed using reliable policy. 43 | */ 44 | @Override 45 | public void fail(final KafkaMessageId id, final byte[] message) { 46 | throw new IllegalStateException("reliable failure policy unexpectedly made to deal with message failure"); 47 | } 48 | 49 | @Override 50 | public String getIdentifier() { 51 | return IDENTIFIER; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/fail/UnreliableFailHandlerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import static org.junit.Assert.assertFalse; 20 | import static org.mockito.Mockito.mock; 21 | 22 | import java.util.Map; 23 | 24 | import org.apache.storm.spout.SpoutOutputCollector; 25 | import org.apache.storm.task.TopologyContext; 26 | import org.junit.Test; 27 | 28 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 29 | 30 | public class UnreliableFailHandlerTest { 31 | @Test 32 | public void testSideEffects() { 33 | final FailHandler subject = new UnreliableFailHandler(); 34 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 35 | 36 | // convenience methods should have no effect 37 | subject.open(mock(Map.class), mock(TopologyContext.class), mock(SpoutOutputCollector.class)); 38 | subject.activate(); 39 | subject.deactivate(); 40 | subject.close(); 41 | 42 | // ack should be ignored 43 | subject.ack(id); 44 | } 45 | 46 | @Test 47 | public void testShouldReplay() { 48 | final FailHandler subject = new UnreliableFailHandler(); 49 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 50 | 51 | // unreliable handler should never tell spout to replay 52 | assertFalse(subject.shouldReplay(id)); 53 | } 54 | 55 | @Test 56 | public void testFail() { 57 | final FailHandler subject = new UnreliableFailHandler(); 58 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 59 | final byte[] message = {1, 2, 3, 4}; 60 | 61 | // failing a message to the unreliable handler should not throw an exception 62 | subject.fail(id, message); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/fail/ReliableFailHandlerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import static org.junit.Assert.assertTrue; 20 | import static org.mockito.Mockito.mock; 21 | 22 | import java.util.Map; 23 | 24 | import org.apache.storm.spout.SpoutOutputCollector; 25 | import org.apache.storm.task.TopologyContext; 26 | import org.junit.Test; 27 | 28 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 29 | 30 | /** 31 | * Tests the simple provided {@link ReliableFailHandler} and {@link UnreliableFailHandler}. 32 | */ 33 | public class ReliableFailHandlerTest { 34 | @Test 35 | public void testSideEffects() { 36 | final FailHandler subject = new ReliableFailHandler(); 37 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 38 | 39 | // convenience methods should have no effect 40 | subject.open(mock(Map.class), mock(TopologyContext.class), mock(SpoutOutputCollector.class)); 41 | subject.activate(); 42 | subject.deactivate(); 43 | subject.close(); 44 | 45 | // ack should be ignored 46 | subject.ack(id); 47 | } 48 | 49 | @Test 50 | public void testShouldReplay() { 51 | final FailHandler subject = new ReliableFailHandler(); 52 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 53 | 54 | // reliable handler should *always* tell the spout to replay 55 | assertTrue(subject.shouldReplay(id)); 56 | } 57 | 58 | @Test(expected = IllegalStateException.class) 59 | public void testRefuseFail() { 60 | final FailHandler subject = new ReliableFailHandler(); 61 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 62 | final byte[] message = {1, 2, 3, 4}; 63 | 64 | // failing a message to the reliable handler should *always* throw an exception 65 | subject.fail(id, message); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/util/KafkaMessageId.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.util; 18 | 19 | import java.io.Serializable; 20 | 21 | /** 22 | * Convenience class representing an orderable 2-tuple of a kafka message's partition and offset within that partition. 23 | * The natural order of {@link KafkaMessageId} is identical to the ordering in a kafka partition. 24 | * 25 | * @author Netherlands Forensics Institute 26 | */ 27 | public class KafkaMessageId implements Comparable, Serializable { 28 | private final int _partition; 29 | private final long _offset; 30 | 31 | public KafkaMessageId(final int partition, final long offset) { 32 | _partition = partition; 33 | _offset = offset; 34 | } 35 | 36 | public int getPartition() { 37 | return _partition; 38 | } 39 | 40 | public long getOffset() { 41 | return _offset; 42 | } 43 | 44 | /** 45 | * {@link KafkaMessageId}s are considered equal when both their partition and offset are identical. 46 | * 47 | * @param o The object to compare with. 48 | * @return Whether {@code o} is considered to be equal to this {@link KafkaMessageId}. 49 | */ 50 | @Override 51 | public boolean equals(final Object o) { 52 | if (o instanceof KafkaMessageId) { 53 | final KafkaMessageId other = (KafkaMessageId) o; 54 | return other.getPartition() == _partition && other.getOffset() == _offset; 55 | } 56 | else { 57 | return false; 58 | } 59 | } 60 | 61 | @Override 62 | public int hashCode() { 63 | // create a hash code using all bits of both identifying members 64 | return (31 + _partition) * (int) (_offset ^ (_offset >>> 32)); 65 | } 66 | 67 | /** 68 | * Compares this {@link KafkaMessageId} to {@code id}. Comparison is made numerically, where the partition is 69 | * considered more significant than the offset within the partition. The resulting ordering of 70 | * {@link KafkaMessageId} is identical to the ordering in a kafka partition. 71 | * An instance is considered greater than {@code null}. 72 | * 73 | * @param id The {@link KafkaMessageId} to compare with. 74 | * @return The result of {@code 2 * signum(partition - id.getPartition()) + signum(offset - id.getOffset())} or 75 | * {@code 1} if {@code id} is null. 76 | */ 77 | @Override 78 | public int compareTo(final KafkaMessageId id) { 79 | // instance is always > null 80 | if (id == null) { 81 | return 1; 82 | } 83 | // use signum to perform the comparison, mark _partition more significant than _offset 84 | return 2 * Integer.signum(_partition - id.getPartition()) + Long.signum(_offset - id.getOffset()); 85 | } 86 | 87 | @Override 88 | public String toString() { 89 | return "(" + _partition + "," + _offset + ")"; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/fail/FailHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.fail; 18 | 19 | import java.io.Serializable; 20 | import java.util.Map; 21 | 22 | import org.apache.storm.spout.SpoutOutputCollector; 23 | import org.apache.storm.task.TopologyContext; 24 | 25 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 26 | 27 | /** 28 | * Handler interface to implement a user-defined failure policy. Used by the 29 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} to determine whether failed messages should be replayed. Messages not 30 | * to be replayed are provided to the handler to deal with, at which point the 31 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} will continue as if the message was processed correctly. This makes it 32 | * the handler's responsibility to implement the failure policy (e.g. log failure, add to error topic on kafka, ...). 33 | * 34 | * @author Netherlands Forensics Institute 35 | */ 36 | public interface FailHandler extends Serializable { 37 | /** 38 | * Queries the handler whether the message emitted as {@code id} should be replayed. 39 | * NB: messages that should not be replayed are provided to the handler through {@link #fail}, but are considered 40 | * processed from the kafka point of view and offsets will be committed as such. The {@link FailHandler} is 41 | * responsible for dealing with the message when the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} should not emit 42 | * it again. 43 | * 44 | * @param id The failed id. 45 | * @return Whether the kafka message emitted as {@code id} should be replayed. 46 | */ 47 | boolean shouldReplay(KafkaMessageId id); 48 | 49 | /** 50 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when a tuple is acknowledged by the topology. 51 | * 52 | * @param id The message that was acknowledged by the topology. 53 | */ 54 | void ack(KafkaMessageId id); 55 | 56 | /** 57 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when a tuple is failed by the topology and 58 | * {@link #shouldReplay(nl.minvenj.nfi.storm.kafka.util.KafkaMessageId)} indicates it should *not* be replayed. 59 | * 60 | * @param id The failed id. 61 | * @param message The failed message. 62 | */ 63 | void fail(KafkaMessageId id, byte[] message); 64 | 65 | /** 66 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when 67 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#open(java.util.Map, org.apache.storm.task.TopologyContext, org.apache.storm.spout.SpoutOutputCollector)} 68 | * is called on it to allow the {@link FailHandler} to update its state. 69 | * 70 | * @param config The configuration as passed to 71 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#open(java.util.Map, org.apache.storm.task.TopologyContext, org.apache.storm.spout.SpoutOutputCollector)}. 72 | * @param topology The {@link TopologyContext} as passed to 73 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#open(java.util.Map, org.apache.storm.task.TopologyContext, org.apache.storm.spout.SpoutOutputCollector)}. 74 | * @param collector The {@link SpoutOutputCollector} as passed to 75 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#open(java.util.Map, org.apache.storm.task.TopologyContext, org.apache.storm.spout.SpoutOutputCollector)}. 76 | */ 77 | void open(Map config, TopologyContext topology, SpoutOutputCollector collector); 78 | 79 | /** 80 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when 81 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#activate()} is called on it. 82 | */ 83 | void activate(); 84 | 85 | /** 86 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when 87 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#deactivate()} is called on it. 88 | */ 89 | void deactivate(); 90 | 91 | /** 92 | * Called by the {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} when 93 | * {@link nl.minvenj.nfi.storm.kafka.KafkaSpout#close()} is called on it. 94 | */ 95 | void close(); 96 | 97 | /** 98 | * Called by {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} on opening the spout to log the failure policy used. 99 | * 100 | * @return A short identifier for this type of {@link FailHandler}. 101 | */ 102 | String getIdentifier(); 103 | } 104 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/util/KafkaMessageIdTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.util; 18 | 19 | import static org.hamcrest.number.OrderingComparison.comparesEqualTo; 20 | import static org.hamcrest.number.OrderingComparison.greaterThan; 21 | import static org.hamcrest.number.OrderingComparison.lessThan; 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertNotEquals; 24 | import static org.junit.Assert.assertThat; 25 | 26 | import java.util.Arrays; 27 | import java.util.List; 28 | import java.util.SortedSet; 29 | import java.util.TreeSet; 30 | 31 | import org.junit.Test; 32 | 33 | public class KafkaMessageIdTest { 34 | @Test 35 | public void testHashCodeEquals() { 36 | final KafkaMessageId id1 = new KafkaMessageId(1, 1234); 37 | final KafkaMessageId id2 = new KafkaMessageId(2, 3456); 38 | final KafkaMessageId id3 = new KafkaMessageId(1, 1234); 39 | 40 | // assert id1 equals id3, but not id2 41 | assertNotEquals(id1, id2); 42 | assertNotEquals(id1.hashCode(), id2.hashCode()); 43 | assertEquals(id1, id3); 44 | assertEquals(id1.hashCode(), id3.hashCode()); 45 | } 46 | 47 | @Test 48 | public void testComparison() { 49 | // test a single subject against 8 different and one equal values 50 | final KafkaMessageId subject = new KafkaMessageId(1, 1234); 51 | 52 | // test comparisons with null 53 | assertNotEquals(subject, null); 54 | assertThat(subject, greaterThan((KafkaMessageId) null)); 55 | 56 | assertEquals(subject, new KafkaMessageId(1, 1234)); 57 | assertThat(subject, comparesEqualTo(new KafkaMessageId(1, 1234))); 58 | 59 | // use message id < 0 for testing (comparison is numerical) 60 | assertThat(subject, greaterThan(new KafkaMessageId(-1, 1234))); 61 | assertThat(subject, greaterThan(new KafkaMessageId(-1, 123))); 62 | assertThat(subject, greaterThan(new KafkaMessageId(-1, 12345))); 63 | 64 | assertThat(subject, greaterThan(new KafkaMessageId(0, 0))); 65 | assertThat(subject, greaterThan(new KafkaMessageId(0, 1234))); 66 | assertThat(subject, greaterThan(new KafkaMessageId(0, 12345))); 67 | assertThat(subject, greaterThan(new KafkaMessageId(1, 123))); 68 | // include test for value < min int 69 | assertThat(subject, greaterThan(new KafkaMessageId(1, -9876543210L))); 70 | 71 | assertThat(subject, lessThan(new KafkaMessageId(2, 0))); 72 | assertThat(subject, lessThan(new KafkaMessageId(2, 1234))); 73 | assertThat(subject, lessThan(new KafkaMessageId(2, 12345))); 74 | assertThat(subject, lessThan(new KafkaMessageId(1, 12345))); 75 | assertThat(subject, lessThan(new KafkaMessageId(3, 0))); 76 | assertThat(subject, lessThan(new KafkaMessageId(3, 123))); 77 | assertThat(subject, lessThan(new KafkaMessageId(3, 1234))); 78 | assertThat(subject, lessThan(new KafkaMessageId(3, 12345))); 79 | // include test for value > max int 80 | assertThat(subject, lessThan(new KafkaMessageId(1, 9876543210L))); 81 | } 82 | 83 | @Test 84 | public void testOrdering() { 85 | // use a list of ids out of order, with a single duplicate (1,1234) 86 | final List ids = Arrays.asList( 87 | new KafkaMessageId(1, -9876543210L), 88 | new KafkaMessageId(1, 1234), 89 | new KafkaMessageId(0, 1234), 90 | new KafkaMessageId(2, 1234), 91 | new KafkaMessageId(3, 0), 92 | new KafkaMessageId(3, 1234), 93 | new KafkaMessageId(3, 12345), 94 | new KafkaMessageId(1, 123), 95 | new KafkaMessageId(1, 1234), 96 | new KafkaMessageId(1, 12345), 97 | new KafkaMessageId(1, 9876543210L) 98 | ); 99 | final SortedSet subject = new TreeSet(ids); 100 | 101 | // test the behaviour of a sorted set of message ids is as expected 102 | assertEquals(10, subject.size()); // ids.size() - 1; a single duplicate was inserted 103 | assertEquals(0, subject.first().getPartition()); 104 | assertEquals(3, subject.last().getPartition()); 105 | assertEquals(3, subject.subSet(new KafkaMessageId(1, 0), new KafkaMessageId(1, 123456)).size()); 106 | } 107 | 108 | @Test 109 | public void testToStringEquality() { 110 | final KafkaMessageId id1 = new KafkaMessageId(1, 1234); 111 | final KafkaMessageId id2 = new KafkaMessageId(1, 1234); 112 | 113 | assertEquals(id1.toString(), id2.toString()); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/KafkaSpoutFailurePolicyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka; 18 | 19 | import static org.hamcrest.core.IsInstanceOf.instanceOf; 20 | import static org.junit.Assert.assertEquals; 21 | import static org.junit.Assert.assertFalse; 22 | import static org.junit.Assert.assertSame; 23 | import static org.junit.Assert.assertThat; 24 | import static org.junit.Assert.assertTrue; 25 | import static org.mockito.Matchers.any; 26 | import static org.mockito.Matchers.eq; 27 | import static org.mockito.Mockito.mock; 28 | import static org.mockito.Mockito.spy; 29 | import static org.mockito.Mockito.verify; 30 | 31 | import java.util.Map; 32 | 33 | import org.apache.storm.spout.SpoutOutputCollector; 34 | import org.apache.storm.task.TopologyContext; 35 | import org.junit.Before; 36 | import org.junit.Test; 37 | 38 | import kafka.javaapi.consumer.ConsumerConnector; 39 | import nl.minvenj.nfi.storm.kafka.fail.FailHandler; 40 | import nl.minvenj.nfi.storm.kafka.fail.ReliableFailHandler; 41 | import nl.minvenj.nfi.storm.kafka.fail.UnreliableFailHandler; 42 | import nl.minvenj.nfi.storm.kafka.util.ConfigUtils; 43 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 44 | 45 | public class KafkaSpoutFailurePolicyTest { 46 | protected KafkaSpout _subject; 47 | 48 | @Before 49 | public void setup() { 50 | _subject = new KafkaSpout(); 51 | } 52 | 53 | @Test 54 | public void testCreateFailHandlerNull() { 55 | _subject.createFailHandler(null); 56 | assertSame(ConfigUtils.DEFAULT_FAIL_HANDLER, _subject._failHandler); 57 | } 58 | 59 | @Test 60 | public void testCreateFailHandlerDelegation() { 61 | _subject.createFailHandler(UnreliableFailHandler.IDENTIFIER); 62 | assertThat(_subject._failHandler, instanceOf(UnreliableFailHandler.class)); 63 | } 64 | 65 | @Test 66 | public void testReliableFailure() { 67 | final FailHandler reliable = spy(new ReliableFailHandler()); 68 | _subject._failHandler = reliable; 69 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 70 | final byte[] message = {1, 2, 3, 4}; 71 | _subject._inProgress.put(id, message); 72 | 73 | // fail the message 74 | _subject.fail(id); 75 | 76 | // verify decision delegate to fail handler and replay 77 | verify(reliable).shouldReplay(eq(id)); 78 | assertEquals(id, _subject._queue.peek()); 79 | assertTrue(_subject._inProgress.containsKey(id)); 80 | } 81 | 82 | @Test 83 | public void testUnreliableFailure() { 84 | final FailHandler unreliable = spy(new UnreliableFailHandler()); 85 | _subject._failHandler = unreliable; 86 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 87 | final byte[] message = {1, 2, 3, 4}; 88 | _subject._inProgress.put(id, message); 89 | 90 | // fail the message 91 | _subject.fail(id); 92 | 93 | // verify decision and message delegate to fail handler and no replay 94 | verify(unreliable).shouldReplay(eq(id)); 95 | verify(unreliable).fail(eq(id), eq(message)); 96 | assertTrue(_subject._queue.isEmpty()); 97 | assertFalse(_subject._inProgress.containsKey(id)); 98 | } 99 | 100 | @Test 101 | public void testDelegatedCalls() { 102 | final FailHandler unreliable = spy(new UnreliableFailHandler()); 103 | _subject = new KafkaSpout() { 104 | @Override 105 | protected void createConsumer(final Map config) { 106 | // do nothing (would connect to zookeeper otherwise) 107 | } 108 | 109 | @Override 110 | protected void createFailHandler(final String failHandler) { 111 | _failHandler = unreliable; 112 | } 113 | }; 114 | _subject._failHandler = unreliable; 115 | _subject._consumer = mock(ConsumerConnector.class); 116 | 117 | // call all the methods that should trigger policy notification 118 | _subject.open(mock(Map.class), mock(TopologyContext.class), mock(SpoutOutputCollector.class)); 119 | verify(unreliable).open(any(Map.class), any(TopologyContext.class), any(SpoutOutputCollector.class)); 120 | _subject.activate(); 121 | verify(unreliable).activate(); 122 | _subject.deactivate(); 123 | verify(unreliable).deactivate(); 124 | _subject.close(); 125 | verify(unreliable).close(); 126 | 127 | // NB: _subject will have set consumer to null, mock a new one 128 | _subject._consumer = mock(ConsumerConnector.class); 129 | 130 | // simulate an acknowledged message 131 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 132 | final byte[] message = {1, 2, 3, 4}; 133 | _subject._inProgress.put(id, message); 134 | _subject.ack(id); 135 | verify(unreliable).ack(id); 136 | 137 | // failure is tested above 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/KafkaSpoutConstructorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka; 18 | 19 | import static org.junit.Assert.assertEquals; 20 | import static org.mockito.Matchers.argThat; 21 | import static org.mockito.Mockito.doNothing; 22 | import static org.mockito.Mockito.mock; 23 | import static org.mockito.Mockito.spy; 24 | import static org.mockito.Mockito.times; 25 | import static org.mockito.Mockito.verify; 26 | 27 | import java.nio.ByteBuffer; 28 | import java.util.Arrays; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | import org.junit.Test; 34 | import org.mockito.ArgumentMatcher; 35 | 36 | import org.apache.storm.spout.Scheme; 37 | import org.apache.storm.spout.SpoutOutputCollector; 38 | import org.apache.storm.task.TopologyContext; 39 | import org.apache.storm.topology.OutputFieldsDeclarer; 40 | import org.apache.storm.tuple.Fields; 41 | import nl.minvenj.nfi.storm.kafka.util.ConfigUtils; 42 | 43 | public class KafkaSpoutConstructorTest { 44 | /** 45 | * Using the default constructor, a topic name must be in the storm config 46 | */ 47 | @Test 48 | public void testOpenWithDefaultConstructor() { 49 | KafkaSpout spout = spy(new KafkaSpout()); 50 | 51 | TopologyContext topology = mock(TopologyContext.class); 52 | SpoutOutputCollector collector = mock(SpoutOutputCollector.class); 53 | 54 | Map config = new HashMap(); 55 | config.put(ConfigUtils.CONFIG_TOPIC, "topic"); 56 | doNothing().when(spout).createConsumer(config); 57 | 58 | spout.open(config, topology, collector); 59 | 60 | assertEquals("Wrong Topic Name", spout._topic, "topic"); 61 | } 62 | 63 | /** 64 | * Using the default constructor, a topic name must be in the storm config. 65 | */ 66 | @Test 67 | public void testOpenWithDefaultTopicName() { 68 | KafkaSpout spout = spy(new KafkaSpout()); 69 | 70 | TopologyContext topology = mock(TopologyContext.class); 71 | SpoutOutputCollector collector = mock(SpoutOutputCollector.class); 72 | 73 | Map config = new HashMap(); 74 | doNothing().when(spout).createConsumer(config); 75 | 76 | spout.open(config, topology, collector); 77 | 78 | assertEquals("Wrong Topic Name", spout._topic, ConfigUtils.DEFAULT_TOPIC); 79 | } 80 | 81 | /** 82 | * If we use the overloaded constructor, do not even look at the storm config for the topic name. 83 | */ 84 | @Test 85 | public void testOpenWithOverloadedConstructor() { 86 | KafkaSpout spout = spy(new KafkaSpout("OVERLOAD")); 87 | 88 | TopologyContext topology = mock(TopologyContext.class); 89 | SpoutOutputCollector collector = mock(SpoutOutputCollector.class); 90 | 91 | Map config = new HashMap(); 92 | doNothing().when(spout).createConsumer(config); 93 | 94 | spout.open(config, topology, collector); 95 | assertEquals("Wrong Topic Name", spout._topic, "OVERLOAD"); 96 | } 97 | 98 | /** 99 | * If we use the overloaded constructor, with the topic name, it does not matter what is in the storm config. 100 | */ 101 | @Test 102 | public void testOpenWithOverloadedConstructorAndStormConfig() { 103 | KafkaSpout spout = spy(new KafkaSpout("OVERLOAD")); 104 | 105 | TopologyContext topology = mock(TopologyContext.class); 106 | SpoutOutputCollector collector = mock(SpoutOutputCollector.class); 107 | 108 | Map config = new HashMap(); 109 | config.put(ConfigUtils.CONFIG_TOPIC, "topic"); 110 | doNothing().when(spout).createConsumer(config); 111 | 112 | spout.open(config, topology, collector); 113 | 114 | assertEquals("Wrong Topic Name", spout._topic, "OVERLOAD"); 115 | } 116 | 117 | @Test 118 | public void testRawSchemeForDefaultConstructor() { 119 | final KafkaSpout spout = spy(new KafkaSpout()); 120 | final OutputFieldsDeclarer declarer = mock(OutputFieldsDeclarer.class); 121 | 122 | spout.declareOutputFields(declarer); 123 | 124 | // Fields doesn't implement equals; match it manually 125 | verify(declarer).declare(argThat(new ArgumentMatcher() { 126 | @Override 127 | public boolean matches(final Fields fields) { 128 | return fields.size() == 1 && fields.get(0).equals("bytes"); 129 | } 130 | })); 131 | } 132 | 133 | @Test 134 | public void testDelegateCustomScheme() { 135 | final Scheme scheme = new Scheme() { 136 | @Override 137 | public List deserialize(final ByteBuffer bytes) { 138 | final byte[] result = new byte[bytes.limit() - 1]; 139 | bytes.get(result, 1, bytes.limit()); 140 | 141 | return Arrays.asList( 142 | new byte[]{bytes.get()}, 143 | result 144 | ); 145 | } 146 | 147 | @Override 148 | public Fields getOutputFields() { 149 | return new Fields("head", "tail"); 150 | } 151 | }; 152 | final OutputFieldsDeclarer declarer = mock(OutputFieldsDeclarer.class); 153 | 154 | // test for both constructors that accept a scheme 155 | new KafkaSpout(scheme).declareOutputFields(declarer); 156 | new KafkaSpout("topic", scheme).declareOutputFields(declarer); 157 | 158 | // Fields doesn't implement equals; match it manually 159 | verify(declarer, times(2)).declare(argThat(new ArgumentMatcher() { 160 | @Override 161 | public boolean matches(final Fields fields) { 162 | return fields.size() == 2 && fields.get(0).equals("head") && fields.get(1).equals("tail"); 163 | } 164 | })); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | org.sonatype.oss 9 | oss-parent 10 | 7 11 | 12 | 13 | nl.minvenj.nfi.storm 14 | kafka-spout 15 | 0.5-SNAPSHOT 16 | jar 17 | ${project.groupId}:${project.artifactId} 18 | Storm spout emitting messages from a Kafka topic 19 | https://github.com/HolmesNL/kafka-spout 20 | 21 | 22 | scm:git:git@github.com:HolmesNL/kafka-spout.git 23 | scm:git:git@github.com:HolmesNL/kafka-spout.git 24 | git@github.com:HolmesNL/kafka-spout.git 25 | 26 | 27 | 28 | 29 | Mattijs Ugen 30 | github@holmes.nl 31 | Netherlands Forensic Institute 32 | http://www.forensicinstitute.nl 33 | 34 | 35 | 36 | 37 | 38 | Apache License, Version 2.0 39 | http://www.apache.org/licenses/LICENSE-2.0.txt 40 | 41 | 42 | 43 | 44 | 1.8 45 | 1.8 46 | 47 | 4.3.0 48 | 1.3 49 | 0.7.9 50 | 2.10.4 51 | 4.12 52 | 0.11.0.0 53 | 2.8.47 54 | 1.7.25 55 | 3.0.1 56 | 1.1.0 57 | 58 | 59 | 60 | 61 | org.apache.storm 62 | storm-core 63 | ${storm.version} 64 | provided 65 | 66 | 67 | org.slf4j 68 | log4j-over-slf4j 69 | 70 | 71 | ch.qos.logback 72 | logback-classic 73 | 74 | 75 | 76 | 77 | org.apache.kafka 78 | kafka_2.12 79 | ${kafka.version} 80 | 81 | 82 | log4j 83 | log4j 84 | 85 | 86 | org.slf4j 87 | slf4j-simple 88 | 89 | 90 | 91 | 92 | org.slf4j 93 | slf4j-api 94 | ${slf4j.version} 95 | 96 | 97 | 98 | junit 99 | junit 100 | ${junit.version} 101 | test 102 | 103 | 104 | org.hamcrest 105 | hamcrest-library 106 | ${hamcrest.version} 107 | test 108 | 109 | 110 | org.mockito 111 | mockito-core 112 | ${mockito.version} 113 | test 114 | 115 | 116 | 117 | org.slf4j 118 | slf4j-log4j12 119 | ${slf4j.version} 120 | test 121 | 122 | 123 | 124 | 125 | 126 | 127 | org.jacoco 128 | jacoco-maven-plugin 129 | ${jacoco.version} 130 | 131 | 132 | prepare-agent 133 | 134 | prepare-agent 135 | 136 | 137 | 138 | 139 | 140 | org.eluder.coveralls 141 | coveralls-maven-plugin 142 | ${coveralls.version} 143 | 144 | UTF-8 145 | 146 | 147 | 148 | true 149 | org.apache.maven.plugins 150 | maven-source-plugin 151 | ${source.plugin.version} 152 | 153 | 154 | 155 | jar 156 | 157 | package 158 | 159 | 160 | 161 | 162 | true 163 | org.apache.maven.plugins 164 | maven-javadoc-plugin 165 | ${javadoc.plugin.version} 166 | 167 | 168 | 169 | jar 170 | 171 | package 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/KafkaSpoutBufferBehaviourTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka; 18 | 19 | import static org.hamcrest.Matchers.containsString; 20 | import static org.junit.Assert.assertArrayEquals; 21 | import static org.junit.Assert.assertEquals; 22 | import static org.junit.Assert.assertFalse; 23 | import static org.junit.Assert.assertNull; 24 | import static org.junit.Assert.assertThat; 25 | import static org.junit.Assert.assertTrue; 26 | import static org.junit.Assert.fail; 27 | import static org.mockito.ArgumentCaptor.forClass; 28 | import static org.mockito.ArgumentMatchers.anyList; 29 | import static org.mockito.Matchers.any; 30 | import static org.mockito.Matchers.argThat; 31 | import static org.mockito.Matchers.eq; 32 | import static org.mockito.Mockito.mock; 33 | import static org.mockito.Mockito.spy; 34 | import static org.mockito.Mockito.times; 35 | import static org.mockito.Mockito.verify; 36 | import static org.mockito.Mockito.verifyNoMoreInteractions; 37 | import static org.mockito.Mockito.when; 38 | 39 | import java.nio.ByteBuffer; 40 | import java.util.Arrays; 41 | import java.util.HashMap; 42 | import java.util.List; 43 | import java.util.Map; 44 | import java.util.SortedMap; 45 | 46 | import kafka.consumer.ConsumerIterator; 47 | import kafka.consumer.ConsumerTimeoutException; 48 | import kafka.consumer.KafkaStream; 49 | import kafka.javaapi.consumer.ConsumerConnector; 50 | import kafka.message.MessageAndMetadata; 51 | import org.apache.storm.spout.RawScheme; 52 | import org.apache.storm.spout.Scheme; 53 | import org.apache.storm.spout.SpoutOutputCollector; 54 | import org.apache.storm.topology.OutputFieldsDeclarer; 55 | import org.apache.storm.tuple.Fields; 56 | import org.junit.Before; 57 | import org.junit.Test; 58 | import org.mockito.ArgumentCaptor; 59 | import org.mockito.ArgumentMatcher; 60 | 61 | import nl.minvenj.nfi.storm.kafka.util.ConfigUtils; 62 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 63 | 64 | /** 65 | * Tests the behaviour of the kafka spout with regards to it's buffer and acknowledgements / failures. 66 | * 67 | * NB: default buffering behaviour operates on default failure policy (reliable). 68 | * 69 | * @see KafkaSpoutFailurePolicyTest 70 | */ 71 | public class KafkaSpoutBufferBehaviourTest { 72 | // stream mapping of a single stream that won't provide messages 73 | protected static final Map>> EMPTY_STREAM = new HashMap>>() {{ 74 | final KafkaStream mockedStream = mock(KafkaStream.class); 75 | when(mockedStream.iterator()).thenReturn(mock(ConsumerIterator.class)); 76 | put("test-topic", Arrays.asList(mockedStream)); 77 | }}; 78 | protected static final Map>> SINGLE_MESSAGE_STREAM = new HashMap>>() {{ 79 | final KafkaStream mockedStream = mock(KafkaStream.class); 80 | final ConsumerIterator iterator = mock(ConsumerIterator.class); 81 | final MessageAndMetadata message = mock(MessageAndMetadata.class); 82 | when(message.partition()).thenReturn(1); 83 | when(message.offset()).thenReturn(1234L); 84 | 85 | // make the iterator indicate a next message available once 86 | when(iterator.hasNext()).thenReturn(true); 87 | when(iterator.next()) 88 | .thenReturn(message) 89 | .thenThrow(ConsumerTimeoutException.class); 90 | when(mockedStream.iterator()).thenReturn(iterator); 91 | put("test-topic", Arrays.asList(mockedStream)); 92 | }}; 93 | private KafkaSpout _subject; 94 | private ConsumerConnector _consumer; 95 | 96 | @Before 97 | public void setup() { 98 | final Scheme scheme = spy(new RawScheme()); 99 | // main test subject 100 | _subject = new KafkaSpout(scheme); 101 | 102 | // assign the topic to be used for stream retrieval 103 | _subject._topic = "test-topic"; 104 | // use a buffer size higher than the expected amount of messages available 105 | _subject._bufSize = 4; 106 | // assign the default FailHandler 107 | _subject._failHandler = ConfigUtils.DEFAULT_FAIL_HANDLER; 108 | // mocked consumer to avoid actually contacting zookeeper 109 | _consumer = mock(ConsumerConnector.class); 110 | // ... but make sure it will return a valid (empty) stream 111 | when(_consumer.createMessageStreams(any(Map.class))).thenReturn(EMPTY_STREAM); 112 | // assign the consumer to the test subject 113 | _subject._consumer = _consumer; 114 | // provide a mocked collector to be able to check for emitted values 115 | _subject._collector = mock(SpoutOutputCollector.class); 116 | } 117 | 118 | @Test 119 | public void testDeclarations() { 120 | final OutputFieldsDeclarer declarer = mock(OutputFieldsDeclarer.class); 121 | 122 | _subject.declareOutputFields(declarer); 123 | // verify the spout declares to output single-field tuples 124 | verify(declarer).declare(argThat(new ArgumentMatcher() { 125 | @Override 126 | public boolean matches(final Fields fields) { 127 | return fields.size() == 1 && fields.get(0).equals("bytes"); 128 | } 129 | })); 130 | 131 | // verify the spout will not provide component configuration 132 | assertNull(_subject.getComponentConfiguration()); 133 | } 134 | 135 | @Test 136 | public void testInitiallyEmpty() { 137 | assertTrue(_subject._queue.isEmpty()); 138 | assertTrue(_subject._inProgress.isEmpty()); 139 | } 140 | 141 | @Test 142 | public void testRefillOnEmpty() { 143 | // request activity from subject 144 | _subject.nextTuple(); 145 | 146 | // verify that subject requested more messages from the kafka consumer 147 | verify(_consumer).createMessageStreams(any(Map.class)); 148 | } 149 | 150 | @Test(expected = IllegalStateException.class) 151 | public void testRefuseRefillOnNonEmptyBuffer() { 152 | _subject._queue.add(new KafkaMessageId(1, 1234)); 153 | _subject.fillBuffer(); 154 | } 155 | 156 | @Test(expected = IllegalStateException.class) 157 | public void testRefuseRefillOnNonEmptyPending() { 158 | _subject._inProgress.put(new KafkaMessageId(1, 1234), new byte[0]); 159 | _subject.fillBuffer(); 160 | } 161 | 162 | @Test(expected = IllegalStateException.class) 163 | public void testRefuseRefillOnNonEmptyBoth() { 164 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 165 | _subject._queue.add(id); 166 | _subject._inProgress.put(id, new byte[0]); 167 | _subject.fillBuffer(); 168 | } 169 | 170 | @Test 171 | public void testRefillBothOnMessageAvailable() { 172 | // NB: update the consumer mock for this test to return the single message stream 173 | when(_consumer.createMessageStreams(any(Map.class))).thenReturn(SINGLE_MESSAGE_STREAM); 174 | 175 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 176 | // test whether the single message in the stream is added to both the buffer and pending 177 | _subject.fillBuffer(); 178 | assertEquals(1, _subject._queue.size()); 179 | assertEquals(id, _subject._queue.peek()); 180 | assertEquals(1, _subject._inProgress.size()); 181 | assertEquals(id, _subject._inProgress.firstKey()); 182 | } 183 | 184 | @Test 185 | public void testEmitOnAvailable() { 186 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 187 | final byte[] message = {5, 6, 7, 8}; 188 | _subject._queue.add(id); 189 | _subject._inProgress.put(id, message); 190 | 191 | // request to emit message and id 192 | _subject.nextTuple(); 193 | 194 | final ArgumentCaptor bufferCaptor = forClass(ByteBuffer.class); 195 | final ArgumentCaptor valuesCaptor = forClass(List.class); 196 | 197 | // subject should have emitted a Values object identified by id 198 | verify(_subject._serializationScheme).deserialize(bufferCaptor.capture()); 199 | assertArrayEquals(message, bufferCaptor.getValue().array()); 200 | verify(_subject._collector).emit(valuesCaptor.capture(), eq(id)); 201 | assertArrayEquals(message, (byte[]) valuesCaptor.getValue().get(0)); 202 | } 203 | 204 | @Test 205 | public void testEmitOneAtATime() { 206 | final KafkaMessageId id1 = new KafkaMessageId(1, 1234); 207 | final KafkaMessageId id2 = new KafkaMessageId(1, 1235); 208 | final byte[] message1 = {5, 6, 7, 8}; 209 | final byte[] message2 = {9, 0, 1, 2}; 210 | _subject._queue.add(id1); 211 | _subject._queue.add(id2); 212 | _subject._inProgress.put(id1, message1); 213 | _subject._inProgress.put(id2, message2); 214 | 215 | _subject.nextTuple(); 216 | 217 | final ArgumentCaptor captor = forClass(List.class); 218 | 219 | // subject should have emitted only the first message 220 | verify(_subject._collector).emit(captor.capture(), eq(id1)); 221 | assertArrayEquals(message1, (byte[]) captor.getValue().get(0)); 222 | verifyNoMoreInteractions(_subject._collector); 223 | } 224 | 225 | @Test 226 | public void testIllegalQueueState() { 227 | // queue a single id with no corresponding message 228 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 229 | _subject._queue.add(id); 230 | 231 | try { 232 | _subject.nextTuple(); 233 | fail("illegal queue state didn't trigger error"); 234 | } 235 | catch (final IllegalStateException e) { 236 | assertThat(e.getMessage(), containsString(id.toString())); 237 | } 238 | } 239 | 240 | @Test 241 | public void testAck() { 242 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 243 | _subject._queue.add(id); 244 | _subject._inProgress.put(id, new byte[0]); 245 | 246 | _subject.nextTuple(); 247 | // verify that the message left buffer but not pending 248 | assertTrue(_subject._queue.isEmpty()); 249 | assertTrue(_subject._inProgress.containsKey(id)); 250 | 251 | _subject.ack(id); 252 | // verify that the buffer is still empty and the key is no longer in pending 253 | assertTrue(_subject._queue.isEmpty()); 254 | assertFalse(_subject._inProgress.containsKey(id)); 255 | 256 | // verify that a non-KafkaMessageId argument is ignored 257 | final SortedMap spy = spy(_subject._inProgress); 258 | _subject.ack(new Object()); 259 | verifyNoMoreInteractions(spy); 260 | } 261 | 262 | @Test 263 | public void testFail() { 264 | final KafkaMessageId id = new KafkaMessageId(1, 1234); 265 | _subject._queue.add(id); 266 | _subject._inProgress.put(id, new byte[0]); 267 | 268 | _subject.nextTuple(); 269 | // verify that the message left buffer but not pending 270 | assertTrue(_subject._queue.isEmpty()); 271 | assertTrue(_subject._inProgress.containsKey(id)); 272 | 273 | _subject.fail(id); 274 | // verify that the buffer is no longer empty and id is still pending 275 | assertFalse(_subject._queue.isEmpty()); 276 | assertTrue(_subject._inProgress.containsKey(id)); 277 | 278 | _subject.nextTuple(); 279 | // verify that the buffer is once again empty and the id has been emitted twice 280 | assertTrue(_subject._queue.isEmpty()); 281 | verify(_subject._collector, times(2)).emit(anyList(), eq(id)); 282 | 283 | // verify that a non-KafkaMessageId argument is ignored 284 | final SortedMap spy = spy(_subject._inProgress); 285 | _subject.fail(new Object()); 286 | verifyNoMoreInteractions(spy); 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/KafkaSpout.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka; 18 | 19 | import static java.nio.ByteBuffer.wrap; 20 | 21 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.CONFIG_FAIL_HANDLER; 22 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.DEFAULT_FAIL_HANDLER; 23 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.createFailHandlerFromString; 24 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.createKafkaConfig; 25 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.getMaxBufSize; 26 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.getTopic; 27 | 28 | import java.util.Collections; 29 | import java.util.LinkedList; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Properties; 33 | import java.util.Queue; 34 | import java.util.SortedMap; 35 | import java.util.TreeMap; 36 | 37 | import org.apache.storm.spout.RawScheme; 38 | import org.apache.storm.spout.Scheme; 39 | import org.apache.storm.spout.SpoutOutputCollector; 40 | import org.apache.storm.task.TopologyContext; 41 | import org.apache.storm.topology.IRichSpout; 42 | import org.apache.storm.topology.OutputFieldsDeclarer; 43 | import org.slf4j.Logger; 44 | import org.slf4j.LoggerFactory; 45 | 46 | import kafka.consumer.Consumer; 47 | import kafka.consumer.ConsumerConfig; 48 | import kafka.consumer.ConsumerIterator; 49 | import kafka.consumer.ConsumerTimeoutException; 50 | import kafka.consumer.KafkaStream; 51 | import kafka.javaapi.consumer.ConsumerConnector; 52 | import kafka.message.InvalidMessageException; 53 | import kafka.message.MessageAndMetadata; 54 | import nl.minvenj.nfi.storm.kafka.fail.FailHandler; 55 | import nl.minvenj.nfi.storm.kafka.util.ConfigUtils; 56 | import nl.minvenj.nfi.storm.kafka.util.KafkaMessageId; 57 | 58 | /** 59 | * Storm spout reading messages from kafka, emitting them as single field tuples. 60 | * 61 | * Implementation tracks a queue of message ids (partition and offset) and a set of those ids that are pending to be 62 | * acknowledged by the topology. The buffer will only be populated with new message when *all* messages from the buffer 63 | * have been acknowledged because the {@link ConsumerConnector} allows committing of the currently processed offset only 64 | * through {@link kafka.javaapi.consumer.ConsumerConnector#commitOffsets()}, which commits *all* offsets that have been 65 | * read, which does not necessarily correspond to the offsets that were successfully processed by the storm topology. 66 | * Optimizing this behaviour is work for the (near) future. 67 | * 68 | * Aside from the properties used to configure the kafka consumer, the kafka spout reads the following configuration 69 | * parameters in storm configuration: 70 | *
    71 | *
  • {@code kafka.spout.topic}: the kafka topic to read messages from (default {@code storm});
  • 72 | *
  • {@code kafka.spout.fail.handler}: the policy to be used when messages fail, whether to replay them, default 73 | * {@code "reliable"} (either {@code "reliable"}, {@code "unreliable"} or a fully qualified class name of an 74 | * implementation of {@link FailHandler});
  • 75 | *
  • {@code kafka.spout.consumer.group}: The kafka consumer group id.
  • 76 | *
  • {@code kafka.spout.buffer.size.max}: The maximum number of kafka messages to buffer.
  • 77 | *
78 | * 79 | * @author Netherlands Forensics Institute 80 | */ 81 | public class KafkaSpout implements IRichSpout { 82 | private static final long serialVersionUID = -1L; 83 | private static final Logger LOG = LoggerFactory.getLogger(KafkaSpout.class); 84 | protected final Scheme _serializationScheme; 85 | /** 86 | * Collection of messages being processed by the topology (either waiting to be emitted or waiting to be 87 | * acknowledged). Processed message offset is committed when this is becomes empty. 88 | * 89 | * @see #fillBuffer() 90 | */ 91 | protected final SortedMap _inProgress = new TreeMap(); 92 | /** 93 | * Queue of messages waiting to be emitted by this spout. 94 | * 95 | * @see #fillBuffer() 96 | */ 97 | protected final Queue _queue = new LinkedList(); 98 | protected String _topic; 99 | protected int _bufSize; 100 | protected FailHandler _failHandler; 101 | protected ConsumerIterator _iterator; 102 | protected transient SpoutOutputCollector _collector; 103 | protected transient ConsumerConnector _consumer; 104 | 105 | /** 106 | * Creates a new kafka spout to be submitted in a storm topology. Configuration is read from storm config when the 107 | * spout is opened. Uses a {@link RawScheme} to serialize messages from kafka as a single {@code byte[]}. 108 | */ 109 | public KafkaSpout() { 110 | _serializationScheme = new RawScheme(); 111 | } 112 | 113 | /** 114 | * Creates a new kafka spout to be submitted in a storm topology with the provided {@link Scheme}. This impacts 115 | * output fields, see {@link #declareOutputFields(OutputFieldsDeclarer)}). Configuration is read from storm config 116 | * when the spout is opened. 117 | * 118 | * @param serializationScheme The serialization to apply to messages read from kafka. 119 | */ 120 | public KafkaSpout(final Scheme serializationScheme) { 121 | _serializationScheme = serializationScheme; 122 | } 123 | 124 | /** 125 | * Creates a new kafka spout to be submitted in a storm topology. Configuration is read from storm config when the 126 | * spout is opened. 127 | * 128 | * @param topic The kafka topic to read messages from. 129 | */ 130 | public KafkaSpout(final String topic) { 131 | this(); 132 | _topic = topic; 133 | } 134 | 135 | /** 136 | * Creates a new kafka spout to be submitted in a storm topology with the provided {@link Scheme}. This impacts 137 | * output fields, see {@link #declareOutputFields(OutputFieldsDeclarer)}). Configuration is read from storm config 138 | * when the spout is opened. 139 | * 140 | * @param topic The kafka topic to read messages from. 141 | * @param serializationScheme The serialization to apply to messages read from kafka. 142 | */ 143 | public KafkaSpout(final String topic, final Scheme serializationScheme) { 144 | this(serializationScheme); 145 | _topic = topic; 146 | } 147 | 148 | /** 149 | * Convenience method assigning a {@link FailHandler} instance to this kafka spout. If the configured value is 150 | * {@code null}, {@link ConfigUtils#DEFAULT_FAIL_HANDLER} will be used, otherwise the creation is delegated to 151 | * {@link ConfigUtils#createFailHandlerFromString(String)}. 152 | * 153 | * @param failHandler The configuration value for the failure policy. 154 | */ 155 | protected void createFailHandler(final String failHandler) { 156 | if (failHandler == null) { 157 | _failHandler = DEFAULT_FAIL_HANDLER; 158 | } 159 | else { 160 | _failHandler = createFailHandlerFromString(failHandler); 161 | } 162 | } 163 | 164 | /** 165 | * Ensures an initialized kafka {@link ConsumerConnector} is present. 166 | * 167 | * @param config The storm configuration passed to {@link #open(Map, TopologyContext, SpoutOutputCollector)}. 168 | * @throws IllegalArgumentException When a required configuration parameter is missing or a sanity check fails. 169 | */ 170 | protected void createConsumer(final Map config) { 171 | final Properties consumerConfig = createKafkaConfig(config); 172 | 173 | LOG.info("connecting kafka client to zookeeper at {} as client group {}", 174 | consumerConfig.getProperty("zookeeper.connect"), 175 | consumerConfig.getProperty("group.id")); 176 | _consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerConfig)); 177 | } 178 | 179 | /** 180 | * Refills the buffer with messages from the configured kafka topic if available. 181 | * 182 | * @return Whether the buffer contains messages to be emitted after this call. 183 | * @throws IllegalStateException When current buffer is not empty or messages not acknowledged by topology. 184 | */ 185 | protected boolean fillBuffer() { 186 | if (!_inProgress.isEmpty() || !_queue.isEmpty()) { 187 | throw new IllegalStateException("cannot fill buffer when buffer or pending messages are non-empty"); 188 | } 189 | 190 | if (_iterator == null) { 191 | // create a stream of messages from _consumer using the streams as defined on construction 192 | final Map>> streams = _consumer.createMessageStreams(Collections.singletonMap(_topic, 1)); 193 | _iterator = streams.get(_topic).get(0).iterator(); 194 | } 195 | 196 | // We'll iterate the stream in a try-clause; kafka stream will poll its client channel for the next message, 197 | // throwing a ConsumerTimeoutException when the configured timeout is exceeded. 198 | try { 199 | int size = 0; 200 | while (size < _bufSize && _iterator.hasNext()) { 201 | final MessageAndMetadata message = _iterator.next(); 202 | final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset()); 203 | _inProgress.put(id, message.message()); 204 | size++; 205 | } 206 | } 207 | catch (final InvalidMessageException e) { 208 | LOG.warn(e.getMessage(), e); 209 | } 210 | catch (final ConsumerTimeoutException e) { 211 | // ignore, storm will call nextTuple again at some point in the near future 212 | // timeout does *not* mean that no messages were read (state is checked below) 213 | } 214 | 215 | if (_inProgress.size() > 0) { 216 | // set _queue to all currently pending kafka message ids 217 | _queue.addAll(_inProgress.keySet()); 218 | LOG.debug("buffer now has {} messages to be emitted", _queue.size()); 219 | // message(s) appended to buffer 220 | return true; 221 | } 222 | else { 223 | // no messages appended to buffer 224 | return false; 225 | } 226 | } 227 | 228 | @Override 229 | public void declareOutputFields(final OutputFieldsDeclarer declarer) { 230 | // delegate fields mapping to specified scheme (single field "bytes" by default) 231 | declarer.declare(_serializationScheme.getOutputFields()); 232 | } 233 | 234 | @Override 235 | public Map getComponentConfiguration() { 236 | return null; 237 | } 238 | 239 | @Override 240 | public void open(final Map config, final TopologyContext topology, final SpoutOutputCollector collector) { 241 | _collector = collector; 242 | 243 | if (_topic == null) { 244 | _topic = getTopic((Map) config); 245 | } 246 | 247 | _bufSize = getMaxBufSize((Map) config); 248 | 249 | createFailHandler((String) config.get(CONFIG_FAIL_HANDLER)); 250 | 251 | // ensure availability of kafka consumer 252 | createConsumer((Map) config); 253 | 254 | // inform the failure policy of spout being opened 255 | _failHandler.open(config, topology, collector); 256 | 257 | LOG.info("kafka spout opened, reading from topic {}, using failure policy {}", _topic, _failHandler.getIdentifier()); 258 | } 259 | 260 | @Override 261 | public void close() { 262 | // reset state by setting members to null 263 | _collector = null; 264 | _iterator = null; 265 | 266 | if (_consumer != null) { 267 | try { 268 | _consumer.shutdown(); 269 | } 270 | finally { 271 | _consumer = null; 272 | } 273 | } 274 | 275 | _failHandler.close(); 276 | } 277 | 278 | @Override 279 | public void activate() { 280 | _failHandler.activate(); 281 | } 282 | 283 | @Override 284 | public void deactivate() { 285 | _failHandler.deactivate(); 286 | } 287 | 288 | @Override 289 | public void nextTuple() { 290 | // next tuple available when _queue contains ids or fillBuffer() is allowed and indicates more messages were available 291 | // see class documentation for implementation note on the rationale behind this condition 292 | if (!_queue.isEmpty() || (_inProgress.isEmpty() && fillBuffer())) { 293 | final KafkaMessageId nextId = _queue.poll(); 294 | if (nextId != null) { 295 | final byte[] message = _inProgress.get(nextId); 296 | // the next id from buffer should correspond to a message in the pending map 297 | if (message == null) { 298 | throw new IllegalStateException("no pending message for next id " + nextId); 299 | } 300 | // use specified scheme to deserialize messages (single-field Values by default) 301 | _collector.emit(_serializationScheme.deserialize(wrap(message)), nextId); 302 | LOG.debug("emitted kafka message id {} ({} bytes payload)", nextId, message.length); 303 | } 304 | } 305 | } 306 | 307 | @Override 308 | public void ack(final Object o) { 309 | if (o instanceof KafkaMessageId) { 310 | final KafkaMessageId id = (KafkaMessageId) o; 311 | // message corresponding to o is no longer pending 312 | _inProgress.remove(id); 313 | LOG.debug("kafka message {} acknowledged", id); 314 | if (_inProgress.isEmpty()) { 315 | // commit offsets to zookeeper when pending is now empty 316 | // (buffer will be filled on next call to nextTuple()) 317 | LOG.debug("all pending messages acknowledged, committing client offsets"); 318 | _consumer.commitOffsets(); 319 | } 320 | // notify fail handler of tuple success 321 | _failHandler.ack(id); 322 | } 323 | } 324 | 325 | @Override 326 | public void fail(final Object o) { 327 | if (o instanceof KafkaMessageId) { 328 | final KafkaMessageId id = (KafkaMessageId) o; 329 | // delegate decision of replaying the message to failure policy 330 | if (_failHandler.shouldReplay(id)) { 331 | LOG.debug("kafka message id {} failed in topology, adding to buffer again", id); 332 | _queue.add(id); 333 | } 334 | else { 335 | LOG.debug("kafka message id {} failed in topology, delegating failure to policy", id); 336 | // remove message from pending; _failHandler will take action if needed 337 | _failHandler.fail(id, _inProgress.remove(id)); 338 | } 339 | } 340 | } 341 | } 342 | -------------------------------------------------------------------------------- /src/test/java/nl/minvenj/nfi/storm/kafka/util/ConfigUtilsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.util; 18 | 19 | import static org.hamcrest.Matchers.containsString; 20 | import static org.junit.Assert.assertEquals; 21 | import static org.junit.Assert.assertNotEquals; 22 | import static org.junit.Assert.assertNull; 23 | import static org.junit.Assert.assertThat; 24 | import static org.junit.Assert.assertTrue; 25 | import static org.junit.Assert.fail; 26 | 27 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.CONFIG_BUFFER_MAX_MESSAGES; 28 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.CONFIG_FILE; 29 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.CONFIG_GROUP; 30 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.CONFIG_TOPIC; 31 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.DEFAULT_BUFFER_MAX_MESSAGES; 32 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.DEFAULT_GROUP; 33 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.DEFAULT_TOPIC; 34 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.checkConfigSanity; 35 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.configFromPrefix; 36 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.configFromResource; 37 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.createFailHandlerFromString; 38 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.createKafkaConfig; 39 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.getMaxBufSize; 40 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.getStormZookeepers; 41 | import static nl.minvenj.nfi.storm.kafka.util.ConfigUtils.getTopic; 42 | 43 | import java.util.Arrays; 44 | import java.util.Collections; 45 | import java.util.HashMap; 46 | import java.util.Map; 47 | import java.util.Properties; 48 | 49 | import org.apache.storm.Config; 50 | import org.junit.Test; 51 | 52 | import nl.minvenj.nfi.storm.kafka.fail.AbstractFailHandler; 53 | import nl.minvenj.nfi.storm.kafka.fail.FailHandler; 54 | import nl.minvenj.nfi.storm.kafka.fail.ReliableFailHandler; 55 | import nl.minvenj.nfi.storm.kafka.fail.UnreliableFailHandler; 56 | 57 | public class ConfigUtilsTest { 58 | @Test 59 | public void testPolicyIdentifierSanity() { 60 | // check sanity on policy identifiers 61 | assertNotEquals(ReliableFailHandler.IDENTIFIER, UnreliableFailHandler.IDENTIFIER); 62 | assertNotEquals(new ReliableFailHandler().getIdentifier(), new UnreliableFailHandler().getIdentifier()); 63 | } 64 | 65 | @Test 66 | public void testCreateKafkaConfigFromResource() { 67 | final Map stormConfig = new HashMap() {{ 68 | put(CONFIG_FILE, "kafka-config.properties"); 69 | }}; 70 | 71 | final Properties config = createKafkaConfig(stormConfig); 72 | 73 | // assert the values in the kafka-config file are present and have been read correctly 74 | assertEquals("non-existent.host:2181", config.getProperty("zookeeper.connect")); 75 | assertEquals("100", config.getProperty("consumer.timeout.ms")); 76 | } 77 | 78 | @Test 79 | public void testCreateKafkaConfigFromStorm() { 80 | final Map stormConfig = new HashMap() {{ 81 | put("kafka.zookeeper.connect", "non-existent.host:2181"); 82 | put("kafka.consumer.timeout.ms", "100"); 83 | put("kafka.property.that.makes.little.sense", "nonsense"); 84 | }}; 85 | 86 | final Properties config = createKafkaConfig(stormConfig); 87 | 88 | // assert existence of values for keys without the prefix 89 | assertEquals("non-existent.host:2181", config.getProperty("zookeeper.connect")); 90 | assertEquals("nonsense", config.getProperty("property.that.makes.little.sense")); 91 | 92 | // assert that required keys have been added 93 | assertEquals(config.getProperty("auto.commit.enable"), "false"); 94 | } 95 | 96 | @Test 97 | public void testCreateKafkaConfigZookeeperOverride() { 98 | final Map stormConfig = new HashMap() {{ 99 | put(Config.STORM_ZOOKEEPER_SERVERS, Arrays.asList("non-existent.host")); 100 | put(Config.STORM_ZOOKEEPER_PORT, 2181); 101 | put("kafka.consumer.timeout.ms", "100"); 102 | }}; 103 | 104 | final Properties config = createKafkaConfig(stormConfig); 105 | 106 | // assert that the value used for kafka (zookeeper.connect) is derived from the value configured for storm 107 | assertEquals("non-existent.host:2181", config.getProperty("zookeeper.connect")); 108 | } 109 | 110 | @Test 111 | public void testCreateKafkaConfigMissingZookeeper() { 112 | final Map stormConfig = new HashMap() {{ 113 | put("kafka.consumer.timeout.ms", "100"); 114 | }}; 115 | 116 | try { 117 | createKafkaConfig(stormConfig); 118 | fail("missing zookeeper configuration not detected"); 119 | } 120 | catch (final IllegalArgumentException e) { 121 | assertTrue(e.getMessage().contains("zookeeper.connect")); 122 | } 123 | } 124 | 125 | @Test 126 | public void testCreateKafkaConfigGroupId() { 127 | final Map stormConfig = new HashMap() {{ 128 | put("kafka.zookeeper.connect", "non-existent.host:2181"); 129 | put("kafka.consumer.timeout.ms", "100"); 130 | }}; 131 | Properties config = createKafkaConfig(stormConfig); 132 | // verify no group.id defaults to DEFAULT_GROUP 133 | assertEquals(DEFAULT_GROUP, config.get("group.id")); 134 | 135 | stormConfig.put("kafka.group.id", ""); 136 | config = createKafkaConfig(stormConfig); 137 | // verify empty group.id also defaults to DEFAULT_GROUP 138 | assertEquals(DEFAULT_GROUP, config.get("group.id")); 139 | 140 | stormConfig.remove("kafka.group.id"); 141 | stormConfig.put(CONFIG_GROUP, "group-id"); 142 | config = createKafkaConfig(stormConfig); 143 | // verify empty group.id also defaults to DEFAULT_GROUP 144 | assertEquals("group-id", config.get("group.id")); 145 | } 146 | 147 | @Test 148 | public void testSanityCheckSuccess() { 149 | final Properties properties = new Properties(); 150 | properties.setProperty("consumer.timeout.ms", "35"); 151 | properties.setProperty("auto.commit.enable", "false"); 152 | 153 | // check sanity (should not raise exception 154 | checkConfigSanity(properties); 155 | } 156 | 157 | @Test 158 | public void testSanityCheckFailureTimeout() { 159 | final Properties properties = new Properties(); 160 | // set blocking operation of consumer 161 | properties.setProperty("consumer.timeout.ms", "-1"); 162 | // set valid value for auto-commit 163 | properties.setProperty("auto.commit.enable", "false"); 164 | 165 | try { 166 | checkConfigSanity(properties); 167 | } 168 | catch (final IllegalArgumentException e) { 169 | // this is expected, blocking consumer config should be rejected 170 | } 171 | } 172 | 173 | @Test 174 | public void testSanityCheckFailureAutoCommit() { 175 | final Properties properties = new Properties(); 176 | // set auto-commit 177 | properties.setProperty("auto.commit.enable", "true"); 178 | // set valid value for timeout 179 | properties.setProperty("consumer.timeout.ms", "35"); 180 | 181 | try { 182 | checkConfigSanity(properties); 183 | } 184 | catch (final IllegalArgumentException e) { 185 | // this is expected, auto-committing config should be rejected 186 | } 187 | } 188 | 189 | @Test 190 | public void testConfigFromResource() { 191 | // load from file in resources 192 | final Properties fromFile = configFromResource("test-config.properties"); 193 | 194 | assertEquals(6, fromFile.size()); 195 | assertEquals("value", fromFile.getProperty("key")); 196 | assertEquals("silly value", fromFile.getProperty("dashed-key")); 197 | assertEquals("sillier value", fromFile.getProperty("a.test.property")); 198 | } 199 | 200 | @Test(expected = IllegalArgumentException.class) 201 | public void testConfigFromResourceMissing() { 202 | // file should not exist 203 | final Properties fromFile = configFromResource("non-existent-file.properties"); 204 | fail("loaded non-existent-file.properties from classpath"); 205 | } 206 | 207 | @Test 208 | public void testConfigFromPrefix() { 209 | final Properties fromFile = configFromResource("test-config.properties"); 210 | final Properties prefixed = configFromPrefix((Map) fromFile, "prefix."); 211 | 212 | // test file contains two prefixed keys, should be available without prefix 213 | assertEquals(2, prefixed.size()); 214 | assertEquals("value", prefixed.getProperty("key")); 215 | assertEquals("another value", prefixed.getProperty("another.key")); 216 | } 217 | 218 | @Test 219 | public void testGetStormZookeepers() { 220 | final Map stormConfig = new HashMap() {{ 221 | put(Config.STORM_ZOOKEEPER_SERVERS, Arrays.asList("non-existent.host", "zookeeper.example.net")); 222 | put(Config.STORM_ZOOKEEPER_PORT, 1234); 223 | }}; 224 | 225 | // result should be ":,:" 226 | final String zookeepers = getStormZookeepers(stormConfig); 227 | assertThat(zookeepers, containsString("non-existent")); 228 | assertThat(zookeepers, containsString("example.net")); 229 | assertThat(zookeepers, containsString(":1234")); 230 | } 231 | 232 | @Test 233 | public void testGetStormZookeepersFail() { 234 | // port is not a number 235 | String zookeepers = getStormZookeepers(new HashMap() {{ 236 | put(Config.STORM_ZOOKEEPER_SERVERS, Arrays.asList("non-existent.host", "zookeeper.example.net")); 237 | put(Config.STORM_ZOOKEEPER_PORT, "not a number"); 238 | }}); 239 | assertNull(zookeepers); 240 | 241 | // servers is missing 242 | zookeepers = getStormZookeepers(Collections.singletonMap(Config.STORM_ZOOKEEPER_PORT, (Object) "1234")); 243 | assertNull(zookeepers); 244 | 245 | // no configuration keys 246 | zookeepers = getStormZookeepers(new HashMap()); 247 | assertNull(zookeepers); 248 | } 249 | 250 | @Test 251 | public void testCreateFailHandlerFromString() { 252 | assertTrue(createFailHandlerFromString("reliable") instanceof ReliableFailHandler); 253 | assertTrue(createFailHandlerFromString("unreliable") instanceof UnreliableFailHandler); 254 | // load from class name known to be a FailHandler implementation 255 | assertTrue(createFailHandlerFromString(TestFailHandler.class.getName()) instanceof FailHandler); 256 | } 257 | 258 | @Test 259 | public void testCreateFailHandlerFromStringFail() { 260 | try { 261 | // class cannot be loaded, should yield nested ClassNotFoundException 262 | createFailHandlerFromString("net.example.AbstractClassThatDoesNotActuallyExistImplFactory"); 263 | fail("created fail handler from non-existing class name"); 264 | } 265 | catch (final IllegalArgumentException e) { 266 | assertTrue(e.getCause() instanceof ClassNotFoundException); 267 | } 268 | 269 | try { 270 | // class cannot be instantiated 271 | createFailHandlerFromString(FailHandler.class.getName()); 272 | fail("created fail handler from interface only"); 273 | } 274 | catch (final IllegalArgumentException e) { 275 | assertTrue(e.getCause() instanceof InstantiationException); 276 | } 277 | 278 | try { 279 | createFailHandlerFromString(PrivateFailHandler.class.getName()); 280 | fail("created fail handler from broken test class"); 281 | } 282 | catch (final IllegalArgumentException e) { 283 | assertTrue(e.getCause() instanceof IllegalAccessException); 284 | } 285 | 286 | try { 287 | // class cannot be cast to FailHandler, should yield nested ClassCastException 288 | createFailHandlerFromString(ConfigUtilsTest.class.getName()); 289 | fail("created fail handler from class not implementing FailHandler"); 290 | } 291 | catch (final IllegalArgumentException e) { 292 | assertTrue(e.getCause() instanceof ClassCastException); 293 | } 294 | } 295 | 296 | @Test 297 | public void testGetMaxBufSize() { 298 | // use a value not equal to the default 299 | Map stormConfig = Collections.singletonMap(CONFIG_BUFFER_MAX_MESSAGES, (Object) (DEFAULT_BUFFER_MAX_MESSAGES * 2)); 300 | assertEquals(DEFAULT_BUFFER_MAX_MESSAGES * 2, getMaxBufSize(stormConfig)); 301 | 302 | // assert use of default on missing value 303 | assertEquals(DEFAULT_BUFFER_MAX_MESSAGES, getMaxBufSize(new HashMap())); 304 | 305 | // assert use of default on invalid value 306 | stormConfig = Collections.singletonMap(CONFIG_BUFFER_MAX_MESSAGES, (Object) "not a number"); 307 | assertEquals(DEFAULT_BUFFER_MAX_MESSAGES, getMaxBufSize(stormConfig)); 308 | } 309 | 310 | @Test 311 | public void testGetTopic() { 312 | // assert use of default on missing value 313 | assertEquals(DEFAULT_TOPIC, getTopic(new HashMap())); 314 | 315 | Map stormConfig = Collections.singletonMap(CONFIG_TOPIC, (Object) ""); 316 | // assert use of default on empty value 317 | assertEquals(DEFAULT_TOPIC, getTopic(stormConfig)); 318 | 319 | // assert use of default on trimmed empty value 320 | stormConfig = Collections.singletonMap(CONFIG_TOPIC, (Object) " "); 321 | assertEquals(DEFAULT_TOPIC, getTopic(stormConfig)); 322 | 323 | // assert use of configured value 324 | stormConfig = Collections.singletonMap(CONFIG_TOPIC, (Object) "test-topic"); 325 | assertEquals("test-topic", getTopic(stormConfig)); 326 | 327 | // assert configured value is trimmed 328 | stormConfig = Collections.singletonMap(CONFIG_TOPIC, (Object) " test-topic "); 329 | assertEquals("test-topic", getTopic(stormConfig)); 330 | } 331 | 332 | /** 333 | * Dummy implementation of FailHandler. 334 | */ 335 | protected static class TestFailHandler extends AbstractFailHandler { 336 | @Override 337 | public boolean shouldReplay(final KafkaMessageId id) { 338 | return false; 339 | } 340 | 341 | @Override 342 | public String getIdentifier() { 343 | return "test"; 344 | } 345 | } 346 | 347 | /** 348 | * Broken implementation of FailHandler (private constructor should break instantiation through reflection). 349 | */ 350 | protected static class PrivateFailHandler extends AbstractFailHandler { 351 | private PrivateFailHandler() { 352 | } 353 | 354 | @Override 355 | public boolean shouldReplay(KafkaMessageId id) { 356 | return false; 357 | } 358 | 359 | @Override 360 | public String getIdentifier() { 361 | return "test"; 362 | } 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /src/main/java/nl/minvenj/nfi/storm/kafka/util/ConfigUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Netherlands Forensic Institute 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package nl.minvenj.nfi.storm.kafka.util; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.util.Iterator; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.Properties; 25 | 26 | import org.apache.storm.Config; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import nl.minvenj.nfi.storm.kafka.fail.FailHandler; 31 | import nl.minvenj.nfi.storm.kafka.fail.ReliableFailHandler; 32 | import nl.minvenj.nfi.storm.kafka.fail.UnreliableFailHandler; 33 | 34 | /** 35 | * Utilities for {@link nl.minvenj.nfi.storm.kafka.KafkaSpout} regarding its configuration and reading values from the 36 | * storm configuration. 37 | * 38 | * @author Netherlands Forensics Institute 39 | */ 40 | public class ConfigUtils { 41 | /** 42 | * Configuration key prefix in storm config for kafka configuration parameters ({@code "kafka."}). The prefix is 43 | * stripped from all keys that use it and passed to kafka (see class documentation for additional required values). 44 | * 45 | * @see kafka documentation 46 | */ 47 | public static final String CONFIG_PREFIX = "kafka."; 48 | /** 49 | * Storm configuration key pointing to a file containing kafka configuration ({@code "kafka.config"}). 50 | */ 51 | public static final String CONFIG_FILE = "kafka.config"; 52 | /** 53 | * Storm configuration key used to determine the kafka topic to read from ({@code "kafka.spout.topic"}). 54 | */ 55 | public static final String CONFIG_TOPIC = "kafka.spout.topic"; 56 | /** 57 | * Default kafka topic to read from ({@code "storm"}). 58 | */ 59 | public static final String DEFAULT_TOPIC = "storm"; 60 | /** 61 | * Storm configuration key used to determine the failure policy to use ({@code "kafka.spout.fail.handler"}). 62 | */ 63 | public static final String CONFIG_FAIL_HANDLER = "kafka.spout.fail.handler"; 64 | /** 65 | * Default failure policy instance (a {@link ReliableFailHandler} instance). 66 | */ 67 | public static final FailHandler DEFAULT_FAIL_HANDLER = new ReliableFailHandler(); 68 | /** 69 | * Storm configuration key used to determine the failure policy to use ({@code "kafka.spout.consumer.group"}). 70 | */ 71 | public static final String CONFIG_GROUP = "kafka.spout.consumer.group"; 72 | /** 73 | * Default kafka consumer group id ({@code "kafka_spout"}). 74 | */ 75 | public static final String DEFAULT_GROUP = "kafka_spout"; 76 | /** 77 | * Storm configuration key used to determine the maximum number of message to buffer 78 | * ({@code "kafka.spout.buffer.size.max"}). 79 | */ 80 | public static final String CONFIG_BUFFER_MAX_MESSAGES = "kafka.spout.buffer.size.max"; 81 | /** 82 | * Default maximum buffer size in number of messages ({@code 1024}). 83 | */ 84 | public static final int DEFAULT_BUFFER_MAX_MESSAGES = 1024; 85 | private static final Logger LOG = LoggerFactory.getLogger(ConfigUtils.class); 86 | 87 | /** 88 | * Reads configuration from a classpath resource stream obtained from the current thread's class loader through 89 | * {@link ClassLoader#getSystemResourceAsStream(String)}. 90 | * 91 | * @param resource The resource to be read. 92 | * @return A {@link java.util.Properties} object read from the specified resource. 93 | * @throws IllegalArgumentException When the configuration file could not be found or another I/O error occurs. 94 | */ 95 | public static Properties configFromResource(final String resource) { 96 | InputStream input = Thread.currentThread().getContextClassLoader().getResourceAsStream(resource); 97 | if (input == null) { 98 | // non-existent resource will *not* throw an exception, do this anyway 99 | throw new IllegalArgumentException("configuration file '" + resource + "' not found on classpath"); 100 | } 101 | 102 | final Properties config = new Properties(); 103 | try { 104 | config.load(input); 105 | } 106 | catch (final IOException e) { 107 | throw new IllegalArgumentException("reading configuration from '" + resource + "' failed", e); 108 | } 109 | return config; 110 | } 111 | 112 | /** 113 | * Creates a {@link Properties} object to create the consumer configuration for the kafka spout. 114 | * 115 | * @param config The storm configuration mapping. 116 | * @return Configuration for a kafka consumer encoded as a {@link Properties} object. 117 | * @throws IllegalArgumentException When required configuration parameters are missing or sanity checks fail. 118 | */ 119 | public static Properties createKafkaConfig(final Map config) { 120 | final Properties consumerConfig; 121 | if (config.get(CONFIG_FILE) != null) { 122 | final String configFile = String.valueOf(config.get(CONFIG_FILE)); 123 | // read values from separate config file 124 | LOG.info("loading kafka configuration from {}", configFile); 125 | consumerConfig = configFromResource(configFile); 126 | } 127 | else { 128 | // configuration file not set, read values from storm config with kafka prefix 129 | LOG.info("reading kafka configuration from storm config using prefix '{}'", CONFIG_PREFIX); 130 | consumerConfig = configFromPrefix(config, CONFIG_PREFIX); 131 | } 132 | 133 | // zookeeper connection string is critical, try to make sure it's present 134 | if (!consumerConfig.containsKey("zookeeper.connect")) { 135 | final String zookeepers = getStormZookeepers(config); 136 | if (zookeepers != null) { 137 | consumerConfig.setProperty("zookeeper.connect", zookeepers); 138 | LOG.info("no explicit zookeeper configured for kafka, falling back on storm's zookeeper ({})", zookeepers); 139 | } 140 | else { 141 | // consumer will fail to start without zookeeper.connect 142 | throw new IllegalArgumentException("required kafka configuration key 'zookeeper.connect' not found"); 143 | } 144 | } 145 | 146 | // group id string is critical, try to make sure it's present 147 | if (!consumerConfig.containsKey("group.id") || String.valueOf(consumerConfig.get("group.id")).isEmpty()) { 148 | final Object groupId = config.get(CONFIG_GROUP); 149 | if (groupId != null && !String.valueOf(groupId).isEmpty()) { 150 | consumerConfig.setProperty("group.id", String.valueOf(groupId)); 151 | } 152 | else { 153 | consumerConfig.setProperty("group.id", DEFAULT_GROUP); 154 | LOG.info("kafka consumer group id not configured or empty, using default ({})", DEFAULT_GROUP); 155 | } 156 | } 157 | 158 | // auto-committing offsets to zookeeper should be disabled 159 | if (!consumerConfig.containsKey("auto.commit.enable")) { 160 | consumerConfig.setProperty("auto.commit.enable", "false"); 161 | } 162 | 163 | // check configuration sanity before returning 164 | checkConfigSanity(consumerConfig); 165 | return consumerConfig; 166 | } 167 | 168 | /** 169 | * Reads a configuration subset from storm's configuration, stripping {@code prefix} from keys using it. 170 | * 171 | * @param base Storm's configuration mapping. 172 | * @param prefix The prefix to match and strip from the beginning. 173 | * @return A {@link Properties} object created from storm's configuration. 174 | */ 175 | public static Properties configFromPrefix(final Map base, final String prefix) { 176 | final Properties config = new Properties(); 177 | // load configuration from base, stripping prefix 178 | for (Map.Entry entry : base.entrySet()) { 179 | if (entry.getKey().startsWith(prefix)) { 180 | config.setProperty(entry.getKey().substring(prefix.length()), String.valueOf(entry.getValue())); 181 | } 182 | } 183 | 184 | return config; 185 | } 186 | 187 | /** 188 | * Creates a zookeeper connect string usable for the kafka configuration property {@code "zookeeper.connect"} from 189 | * storm's configuration map by looking up the {@link org.apache.storm.Config#STORM_ZOOKEEPER_SERVERS} and 190 | * {@link org.apache.storm.Config#STORM_ZOOKEEPER_PORT} values. Returns null when this procedure fails. 191 | * 192 | * @param stormConfig Storm's configuration map. 193 | * @return A zookeeper connect string if it can be created from storm's config or null. 194 | */ 195 | public static String getStormZookeepers(final Map stormConfig) { 196 | final Object stormZookeepers = stormConfig.get(Config.STORM_ZOOKEEPER_SERVERS); 197 | final Object stormZookeepersPort = stormConfig.get(Config.STORM_ZOOKEEPER_PORT); 198 | if (stormZookeepers instanceof List && stormZookeepersPort instanceof Number) { 199 | // join the servers and the port together to a single zookeeper connection string for kafka 200 | final StringBuilder zookeepers = new StringBuilder(); 201 | final int port = ((Number) stormZookeepersPort).intValue(); 202 | 203 | for (final Iterator iterator = ((List) stormZookeepers).iterator(); iterator.hasNext(); ) { 204 | zookeepers.append(String.valueOf(iterator.next())); 205 | zookeepers.append(':'); 206 | zookeepers.append(port); 207 | if (iterator.hasNext()) { 208 | zookeepers.append(','); 209 | } 210 | } 211 | return zookeepers.toString(); 212 | } 213 | 214 | // no valid zookeeper configuration found 215 | return null; 216 | } 217 | 218 | /** 219 | * Creates a {@link FailHandler} implementation from a string argument. If the argument fails to qualify as either 220 | * {@link ReliableFailHandler#IDENTIFIER} or {@link UnreliableFailHandler#IDENTIFIER}, the argument is interpreted 221 | * as a class name and instantiated through {@code Class.forName(failHandler).newInstance()}. 222 | * 223 | * @param failHandler A fail handler identifier or class name. 224 | * @return A {@link FailHandler} instance. 225 | * @throws IllegalArgumentException When instantiating {@code failHandler} fails or is not a {@link FailHandler}. 226 | */ 227 | public static FailHandler createFailHandlerFromString(final String failHandler) { 228 | // determine fail handler implementation from string value 229 | if (failHandler.equalsIgnoreCase(ReliableFailHandler.IDENTIFIER)) { 230 | return new ReliableFailHandler(); 231 | } 232 | else if (failHandler.equalsIgnoreCase(UnreliableFailHandler.IDENTIFIER)) { 233 | return new UnreliableFailHandler(); 234 | } 235 | else { 236 | // create fail handler using parameter as identifier or class name 237 | try { 238 | return (FailHandler) Class.forName(failHandler).newInstance(); 239 | } 240 | catch (final ClassNotFoundException e) { 241 | throw new IllegalArgumentException("failed to instantiate FailHandler instance from argument " + 242 | failHandler, e); 243 | } 244 | catch (final InstantiationException e) { 245 | throw new IllegalArgumentException("failed to instantiate FailHandler instance from argument " + 246 | failHandler, e); 247 | } 248 | catch (final IllegalAccessException e) { 249 | throw new IllegalArgumentException("failed to instantiate FailHandler instance from argument " + 250 | failHandler, e); 251 | } 252 | catch (final ClassCastException e) { 253 | throw new IllegalArgumentException("instance from argument " + failHandler + 254 | " does not implement FailHandler", e); 255 | } 256 | } 257 | } 258 | 259 | /** 260 | * Retrieves the maximum buffer size to be used from storm's configuration map, or the 261 | * {@link #DEFAULT_BUFFER_MAX_MESSAGES} if no such value was found using {@link #CONFIG_BUFFER_MAX_MESSAGES}. 262 | * 263 | * @param stormConfig Storm's configuration map. 264 | * @return The maximum buffer size to use. 265 | */ 266 | public static int getMaxBufSize(final Map stormConfig) { 267 | final Object value = stormConfig.get(CONFIG_BUFFER_MAX_MESSAGES); 268 | if (value != null) { 269 | try { 270 | return Integer.parseInt(String.valueOf(value).trim()); 271 | } 272 | catch (final NumberFormatException e) { 273 | LOG.warn("invalid value for '{}' in storm config ({}); falling back to default ({})", CONFIG_BUFFER_MAX_MESSAGES, value, DEFAULT_BUFFER_MAX_MESSAGES); 274 | } 275 | } 276 | 277 | return DEFAULT_BUFFER_MAX_MESSAGES; 278 | } 279 | 280 | /** 281 | * Retrieves the topic to be consumed from storm's configuration map, or the {@link #DEFAULT_TOPIC} if no 282 | * (non-empty) value was found using {@link #CONFIG_TOPIC}. 283 | * 284 | * @param stormConfig Storm's configuration map. 285 | * @return The topic to be consumed. 286 | */ 287 | public static String getTopic(final Map stormConfig) { 288 | if (stormConfig.containsKey(CONFIG_TOPIC)) { 289 | // get configured topic from config as string, removing whitespace from both ends 290 | final String topic = String.valueOf(stormConfig.get(CONFIG_TOPIC)).trim(); 291 | if (topic.length() > 0) { 292 | return topic; 293 | } 294 | else { 295 | LOG.warn("configured topic found in storm config is empty, defaulting to topic '{}'", DEFAULT_TOPIC); 296 | return DEFAULT_TOPIC; 297 | } 298 | } 299 | else { 300 | LOG.warn("no configured topic found in storm config, defaulting to topic '{}'", DEFAULT_TOPIC); 301 | return DEFAULT_TOPIC; 302 | } 303 | } 304 | 305 | /** 306 | * Checks the sanity of a kafka consumer configuration for use in storm. 307 | * 308 | * @param config The configuration parameters to check. 309 | * @throws IllegalArgumentException When a sanity check fails. 310 | */ 311 | public static void checkConfigSanity(final Properties config) { 312 | // auto-committing offsets should be disabled 313 | final Object autoCommit = config.getProperty("auto.commit.enable"); 314 | if (autoCommit == null || Boolean.parseBoolean(String.valueOf(autoCommit))) { 315 | throw new IllegalArgumentException("kafka configuration 'auto.commit.enable' should be set to false for operation in storm"); 316 | } 317 | 318 | // consumer timeout should not block calls indefinitely 319 | final Object consumerTimeout = config.getProperty("consumer.timeout.ms"); 320 | if (consumerTimeout == null || Integer.parseInt(String.valueOf(consumerTimeout)) < 0) { 321 | throw new IllegalArgumentException("kafka configuration value for 'consumer.timeout.ms' is not suitable for operation in storm"); 322 | } 323 | } 324 | } 325 | --------------------------------------------------------------------------------