├── esper+storm+kafka ├── doc │ └── dataView.png ├── src │ ├── main │ │ ├── java │ │ │ ├── IFeedItemProvider.java │ │ │ ├── JsonHelper.java │ │ │ ├── RandomSentenceSpout.java │ │ │ ├── WordCountTopology.java │ │ │ ├── SentimentClassifier.java │ │ │ ├── EmitSpecialWordGivenProbabilitySpout.java │ │ │ ├── TopologyInitializer.java │ │ │ ├── TwitterFeedItemProvider.java │ │ │ ├── RandomSentenceGenerator.java │ │ │ ├── ExternalFeedToKafkaAdapterSpout.java │ │ │ ├── KafkaOutputBolt.java │ │ │ ├── EsperFilteredTwitterFeedTopology.java │ │ │ └── ServerAndThreadCoordinationUtils.java │ │ └── assembly │ │ │ └── dep.xml │ └── test │ │ └── java │ │ ├── TestFeedItemProvider.java │ │ ├── VerboseCollectorBolt.java │ │ ├── SentenceSpout.java │ │ ├── KafkaOutputBoltTest.java │ │ ├── ExternalFeedToKafkaAdapterSpoutTest.java │ │ ├── StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.java │ │ ├── KafkaMessageConsumer.java │ │ ├── VerifyItemsFromFeedAreSentToMockKafkaProducer.java │ │ ├── ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.java │ │ ├── FacebookFeedItemProvider.java │ │ └── AbstractStormWithKafkaTest.java ├── README.md ├── pom.xml └── zookeeper.out ├── .gitignore ├── kafka-0.8.x ├── README.md ├── pom.xml └── src │ └── main │ └── java │ └── TestKafkaProducer.java ├── README.md ├── storm+kafka ├── src │ └── main │ │ └── java │ │ ├── VerboseCollectorBolt.java │ │ ├── RandomSentenceGenerator.java │ │ ├── KafkaProducer.java │ │ ├── ServerAndThreadCoordinationUtils.java │ │ └── TestTopology.java ├── pom.xml └── README.md └── kafka ├── pom.xml ├── src └── main │ └── java │ └── TestKafkaProducer.java └── README.md /esper+storm+kafka/doc/dataView.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buildlackey/cep/HEAD/esper+storm+kafka/doc/dataView.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Eclipse Junk 2 | .classpath 3 | .project 4 | .settings/ 5 | 6 | # Intellij Junk 7 | .idea/ 8 | *.iml 9 | *.iws 10 | 11 | # Maven Junk 12 | log/ 13 | target/ 14 | 15 | 16 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/IFeedItemProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/1/13 4 | * Time: 9:58 PM 5 | */ 6 | 7 | import java.io.Serializable; 8 | 9 | public interface IFeedItemProvider extends Serializable { 10 | Runnable getRunnableTask(); 11 | Object getNextItemIfAvailable(); 12 | } 13 | -------------------------------------------------------------------------------- /kafka-0.8.x/README.md: -------------------------------------------------------------------------------- 1 | # Kafka Spout Example 2 | 3 | 4 | A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers - Kafka 0.8.* 5 | 6 | 7 | ## Description 8 | 9 | This example updates the code in ../kafka (which worked against 0.7.*) to the latest version at the 10 | time of this writing. 11 | 12 | Please see comments in the earlier version, here: 13 | https://github.com/buildlackey/cep/blob/master/kafka/README.md 14 | 15 | 16 | The previous document basically applies to this version, although some source code level 17 | statements have been tweaked to conform to the newer API. 18 | 19 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/assembly/dep.xml: -------------------------------------------------------------------------------- 1 | 2 | jar 3 | 4 | jar 5 | 6 | false 7 | 8 | 9 | 10 | / 11 | true 12 | 13 | true 14 | runtime 15 | 16 | 17 | storm:storm 18 | ch.qos.logback:logback-classic 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/JsonHelper.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.tuple.Fields; 2 | import backtype.storm.tuple.Tuple; 3 | import com.google.gson.Gson; 4 | 5 | import java.io.Serializable; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | public class JsonHelper implements Serializable { 11 | 12 | public static String toJson(Tuple input) { 13 | Fields fields = input.getFields(); 14 | List fieldNames = fields.toList(); 15 | 16 | Map tupleAsMap = new HashMap(); 17 | for (String fieldName : fieldNames) { 18 | tupleAsMap.put(fieldName, input.getValueByField(fieldName)); 19 | } 20 | 21 | String json = new Gson().toJson(tupleAsMap); 22 | System.out.println("====++++++++++++++++++++++++++::> tuple as Json:" + json); 23 | return json; 24 | } 25 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | This project includes: 4 | 5 | unit tests and sample programs that illustrate how to 6 | develop complex event processing (CEP) applications on top of Storm, Kafka 7 | and Esper. 8 | 9 | a Wiki containing notes on best practices and guidelines for using 10 | the above frameworks for CEP development. 11 | 12 | 13 | 14 | Subdirectories: 15 | 16 | 17 | 18 | 19 | kafka 20 | 21 | includes 22 | 23 | * A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers 24 | 25 | (for Kafka 0.7.x) 26 | 27 | 28 | kafka-0.8.x 29 | 30 | includes 31 | 32 | * A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers 33 | 34 | (for Kafka 0.8.x) 35 | 36 | storm+kafka 37 | 38 | includes 39 | 40 | * An Integration test that pushes messages into Kafka and retrieves those messages with a Kafka/Storm spout. 41 | 42 | 43 | 44 | esper+storm+kafka 45 | 46 | includes 47 | 48 | 49 | * An Integration test that pushes messages into Kafka, pulls them back (via Kafka Spout), filters them using Esper, and dumps them back out to another Kafka Spout (via KafkaOutputBolt). 50 | 51 | 52 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/TestFeedItemProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/4/13 4 | * Time: 6:01 PM 5 | */ 6 | 7 | 8 | import java.util.concurrent.ConcurrentLinkedQueue; 9 | 10 | class TestFeedItemProvider implements IFeedItemProvider { 11 | ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue(); 12 | 13 | private String[] sentences = ExternalFeedToKafkaAdapterSpoutTest.sentences; // default 14 | 15 | TestFeedItemProvider() {} 16 | 17 | TestFeedItemProvider(String[] sentences) { 18 | this.sentences = sentences; 19 | } 20 | 21 | @Override 22 | public Runnable getRunnableTask() { 23 | return new Runnable() { 24 | @Override 25 | public void run() { 26 | for (String sentence : sentences) { 27 | itemQueue.offer(sentence); 28 | } 29 | try { 30 | Thread.sleep(1000 * 100); 31 | } catch (InterruptedException e) { 32 | e.printStackTrace(); // do something more meaningful here? 33 | } 34 | } 35 | }; 36 | } 37 | 38 | @Override 39 | public Object getNextItemIfAvailable() { 40 | return itemQueue.poll(); 41 | } 42 | } -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.spout.SpoutOutputCollector; 2 | import backtype.storm.task.TopologyContext; 3 | import backtype.storm.topology.OutputFieldsDeclarer; 4 | import backtype.storm.topology.base.BaseRichSpout; 5 | import backtype.storm.tuple.Fields; 6 | import backtype.storm.tuple.Values; 7 | import backtype.storm.utils.Utils; 8 | 9 | import java.util.Map; 10 | import java.util.Random; 11 | 12 | public class RandomSentenceSpout extends BaseRichSpout { 13 | SpoutOutputCollector _collector; 14 | Random _rand; 15 | 16 | 17 | @Override 18 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 19 | _collector = collector; 20 | _rand = new Random(); 21 | } 22 | 23 | @Override 24 | public void nextTuple() { 25 | Utils.sleep(100); 26 | String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away", 27 | "four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" }; 28 | String sentence = sentences[_rand.nextInt(sentences.length)]; 29 | System.out.println("EMITTING+++++++++++++++++++++++++++++++>"+sentence); 30 | _collector.emit(new Values(sentence)); 31 | } 32 | 33 | @Override 34 | public void ack(Object id) { 35 | } 36 | 37 | @Override 38 | public void fail(Object id) { 39 | } 40 | 41 | @Override 42 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 43 | declarer.declare(new Fields("word")); 44 | } 45 | 46 | } -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/WordCountTopology.java: -------------------------------------------------------------------------------- 1 | 2 | import backtype.storm.Config; 3 | import backtype.storm.LocalCluster; 4 | import backtype.storm.StormSubmitter; 5 | import backtype.storm.task.ShellBolt; 6 | import backtype.storm.topology.BasicOutputCollector; 7 | import backtype.storm.topology.IRichBolt; 8 | import backtype.storm.topology.OutputFieldsDeclarer; 9 | import backtype.storm.topology.TopologyBuilder; 10 | import backtype.storm.topology.base.BaseBasicBolt; 11 | import backtype.storm.tuple.Fields; 12 | import backtype.storm.tuple.Tuple; 13 | import backtype.storm.tuple.Values; 14 | 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | 18 | /** 19 | * This topology demonstrates Storm's stream groupings and multilang capabilities. 20 | */ 21 | public class WordCountTopology { 22 | 23 | 24 | public static void main(String[] args) throws Exception { 25 | 26 | TopologyBuilder builder = new TopologyBuilder(); 27 | 28 | builder.setSpout("spout", new RandomSentenceSpout(), 5); 29 | 30 | 31 | Config conf = new Config(); 32 | conf.setDebug(true); 33 | 34 | 35 | if (args != null && args.length > 0) { 36 | conf.setNumWorkers(2); 37 | 38 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 39 | } 40 | else { 41 | conf.setMaxTaskParallelism(3); 42 | 43 | LocalCluster cluster = new LocalCluster(); 44 | cluster.submitTopology("word-count", conf, builder.createTopology()); 45 | 46 | Thread.sleep(20000); 47 | 48 | cluster.shutdown(); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/VerboseCollectorBolt.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.topology.BasicOutputCollector; 2 | import backtype.storm.topology.OutputFieldsDeclarer; 3 | import backtype.storm.topology.base.BaseBasicBolt; 4 | import backtype.storm.tuple.Tuple; 5 | 6 | import java.util.concurrent.CountDownLatch; 7 | 8 | public class VerboseCollectorBolt extends BaseBasicBolt { 9 | 10 | ; 11 | private int expectedNumMessages; 12 | private int countReceivedMessages = 0; 13 | 14 | VerboseCollectorBolt(int expectedNumMessages) { 15 | this.expectedNumMessages = expectedNumMessages; 16 | } 17 | 18 | 19 | 20 | public void prepare(java.util.Map stormConf, backtype.storm.task.TopologyContext context) { 21 | 22 | } 23 | 24 | 25 | @Override 26 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 27 | } 28 | 29 | @Override 30 | public void execute(Tuple tuple, BasicOutputCollector collector) { 31 | final String msg = tuple.toString(); 32 | 33 | countReceivedMessages++; 34 | String info = " recvd: " + countReceivedMessages + " expected: " + expectedNumMessages; 35 | System.out.println(info + " >>>>>>>>>>>>>" + msg); 36 | 37 | if (countReceivedMessages == expectedNumMessages) { 38 | System.out.println(" +++++++++++++++++++++ MARKING"); 39 | StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.finishedCollecting = true; 40 | } 41 | 42 | if (countReceivedMessages > expectedNumMessages) { 43 | System.out.print("Fatal error: too many messages received"); 44 | System.exit(-1); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /storm+kafka/src/main/java/VerboseCollectorBolt.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.topology.BasicOutputCollector; 2 | import backtype.storm.topology.OutputFieldsDeclarer; 3 | import backtype.storm.topology.base.BaseBasicBolt; 4 | import backtype.storm.tuple.Tuple; 5 | 6 | import java.util.concurrent.CountDownLatch; 7 | 8 | public class VerboseCollectorBolt extends BaseBasicBolt { 9 | 10 | ; 11 | private int expectedNumMessages; 12 | private int countReceivedMessages = 0; 13 | 14 | VerboseCollectorBolt(int expectedNumMessages) { 15 | this.expectedNumMessages = expectedNumMessages; 16 | } 17 | 18 | 19 | 20 | public void prepare(java.util.Map stormConf, backtype.storm.task.TopologyContext context) { 21 | 22 | } 23 | 24 | 25 | @Override 26 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 27 | } 28 | 29 | @Override 30 | public void execute(Tuple tuple, BasicOutputCollector collector) { 31 | final String msg = tuple.toString(); 32 | 33 | countReceivedMessages++; 34 | String info = " recvd: " + countReceivedMessages + " expected: " + expectedNumMessages; 35 | System.out.println(info + " >>>>>>>>>>>>>" + msg); 36 | 37 | TestTopology.recordRecievedMessage(msg); 38 | if (countReceivedMessages == expectedNumMessages) { 39 | System.out.println(" +++++++++++++++++++++ MARKING"); 40 | TestTopology.finishedCollecting = true; 41 | } 42 | 43 | if (countReceivedMessages > expectedNumMessages) { 44 | System.out.print("Fatal error: too many messages received"); 45 | System.exit(-1); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/SentimentClassifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/10/13 4 | * Time: 7:57 PM 5 | */ 6 | 7 | 8 | import com.aliasi.classify.ConditionalClassification; 9 | import com.aliasi.classify.LMClassifier; 10 | import com.aliasi.util.AbstractExternalizable; 11 | import sun.misc.IOUtils; 12 | 13 | import java.io.BufferedReader; 14 | import java.io.File; 15 | import java.io.FileReader; 16 | import java.io.IOException; 17 | 18 | public class SentimentClassifier { 19 | String[] categories; 20 | LMClassifier clazz; 21 | 22 | public static void main(String[] args) throws IOException { 23 | SentimentClassifier classifier = new SentimentClassifier(); 24 | 25 | File tweets = new File("/tmp/tweets"); 26 | BufferedReader br = new BufferedReader(new FileReader(tweets)); 27 | String line; 28 | while ((line = br.readLine()) != null) { 29 | String classification = classifier.classify(line); 30 | System.out.println(classification + ": | " + line); 31 | } 32 | br.close(); 33 | } 34 | 35 | public SentimentClassifier() { 36 | try { 37 | File serializedClassifier = new File("/home/chris/esper/TwitterSentiment-master/classifier.obj"); 38 | clazz = (LMClassifier) AbstractExternalizable.readObject(serializedClassifier); 39 | categories = clazz.categories(); 40 | } catch (Exception e) { 41 | e.printStackTrace(); 42 | } 43 | } 44 | 45 | public String classify(String text) { 46 | ConditionalClassification classification = clazz.classify(text); 47 | return classification.bestCategory(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/SentenceSpout.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/31/13 4 | * Time: 2:16 PM 5 | */ 6 | 7 | 8 | import backtype.storm.spout.SpoutOutputCollector; 9 | import backtype.storm.task.TopologyContext; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.base.BaseRichSpout; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Values; 14 | import backtype.storm.utils.Utils; 15 | 16 | import java.util.Map; 17 | 18 | public class SentenceSpout extends BaseRichSpout { 19 | private transient SpoutOutputCollector collector; 20 | 21 | private static String[] sentences; 22 | private int sentenceIndex = 0; 23 | 24 | SentenceSpout(String[] sentences) { 25 | this.sentences = sentences; 26 | } 27 | 28 | @Override 29 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 30 | this.collector = collector; 31 | } 32 | 33 | @Override 34 | public void nextTuple() { 35 | try { 36 | Thread.sleep(10); 37 | } catch (InterruptedException e) { 38 | e.printStackTrace(); 39 | } 40 | if (sentenceIndex < sentences.length) { 41 | String sentence = sentences[sentenceIndex]; 42 | System.out.println("+++++++++++++++++ >>> output sentence: " + sentence); 43 | collector.emit(new Values(sentence)); 44 | sentenceIndex++; 45 | } 46 | } 47 | 48 | @Override 49 | public void ack(Object id) { 50 | } 51 | 52 | @Override 53 | public void fail(Object id) { 54 | } 55 | 56 | @Override 57 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 58 | declarer.declare(new Fields("sentence")); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/KafkaOutputBoltTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.generated.StormTopology; 9 | import backtype.storm.topology.IRichSpout; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import org.testng.annotations.Test; 12 | 13 | import java.io.IOException; 14 | 15 | 16 | public class KafkaOutputBoltTest extends AbstractStormWithKafkaTest { 17 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 10 /* seconds */; 18 | protected static final int SECOND = 1000; 19 | 20 | private static String[] sentences = new String[]{ 21 | "one dog9 - saw the fox over the moon", 22 | "two cats9 - saw the fox over the moon", 23 | "four bears9 - saw the fox over the moon", 24 | "five goats9 - saw the fox over the moon", 25 | "SHUTDOWN", 26 | }; 27 | 28 | 29 | @Test 30 | public void runTestWithTopology() throws IOException { 31 | submitTopology(); 32 | verifyResults(null, -1); 33 | 34 | } 35 | 36 | 37 | protected StormTopology createTopology() { 38 | TopologyBuilder builder = new TopologyBuilder(); 39 | IRichSpout spout = new SentenceSpout(sentences); 40 | KafkaOutputBolt kafkaOutputBolt = 41 | new KafkaOutputBolt(BROKER_CONNECT_STRING, getTopicName(), null, false); 42 | 43 | builder.setSpout("sentenceSpout", spout); 44 | builder.setBolt("kafkaOutputBolt", kafkaOutputBolt, 1) 45 | .shuffleGrouping("sentenceSpout"); 46 | 47 | return builder.createTopology(); 48 | } 49 | 50 | 51 | protected int getMaxAllowedToRunMillisecs() { 52 | return KafkaOutputBoltTest.MAX_ALLOWED_TO_RUN_MILLISECS; 53 | } 54 | } 55 | 56 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/ExternalFeedToKafkaAdapterSpoutTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.generated.StormTopology; 9 | import backtype.storm.topology.IRichSpout; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import org.testng.annotations.Test; 12 | 13 | import java.io.IOException; 14 | 15 | /** 16 | * In this test messages from an external feed (a hard coded array of strings) are dumped into a 17 | * Kafka topic by an instance of ExternalFeedToKafkaAdapterSpout. We then use an instance of 18 | * KafkaMessageConsumer to pull those messages off the topic, and verify that what we 19 | * got is equal to what we expect. 20 | */ 21 | @Test 22 | public class ExternalFeedToKafkaAdapterSpoutTest extends AbstractStormWithKafkaTest { 23 | 24 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 30 /* seconds */; 25 | protected static final int SECOND = 1000; 26 | 27 | 28 | @Test 29 | public void runTestWithTopology() throws IOException { 30 | submitTopology(); 31 | try { 32 | Thread.sleep(1000 * 5); 33 | } catch (InterruptedException e) { 34 | e.printStackTrace(); // do something more meaningful here? 35 | } 36 | verifyResults(null, -1); 37 | 38 | } 39 | 40 | @Override 41 | protected StormTopology createTopology() { 42 | TopologyBuilder builder = new TopologyBuilder(); 43 | IRichSpout feedSpout = 44 | new ExternalFeedToKafkaAdapterSpout( 45 | new TestFeedItemProvider(), 46 | BROKER_CONNECT_STRING, 47 | getTopicName(), null); 48 | builder.setSpout("externalFeedSpout", feedSpout); 49 | 50 | 51 | return builder.createTopology(); 52 | } 53 | 54 | 55 | protected int getMaxAllowedToRunMillisecs() { 56 | return ExternalFeedToKafkaAdapterSpoutTest.MAX_ALLOWED_TO_RUN_MILLISECS; 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/EmitSpecialWordGivenProbabilitySpout.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.spout.SpoutOutputCollector; 2 | import backtype.storm.task.TopologyContext; 3 | import backtype.storm.topology.OutputFieldsDeclarer; 4 | import backtype.storm.topology.base.BaseRichSpout; 5 | import backtype.storm.tuple.Fields; 6 | 7 | import java.util.Map; 8 | 9 | import static backtype.storm.utils.Utils.tuple; 10 | 11 | public class EmitSpecialWordGivenProbabilitySpout extends BaseRichSpout 12 | { 13 | private static final long serialVersionUID = 1L; 14 | 15 | private final String specialWord; 16 | private final int sleepMillisecsAfterEmission; 17 | private final double defaultWordEmissionProbability; 18 | private transient SpoutOutputCollector collector; 19 | 20 | public EmitSpecialWordGivenProbabilitySpout(String specialWord, double emissionProbability, int timesPerSec) { 21 | if (emissionProbability < 0 || emissionProbability > 1.0 ) { 22 | throw new IllegalArgumentException("Probability must be between 0 and 1.0"); 23 | } 24 | this.specialWord = specialWord; 25 | this.defaultWordEmissionProbability = 1.0 - emissionProbability; 26 | this.sleepMillisecsAfterEmission = 1000 / timesPerSec; 27 | } 28 | 29 | @Override 30 | public void declareOutputFields(OutputFieldsDeclarer declarer) 31 | { 32 | declarer.declare(new Fields("word")); 33 | 34 | } 35 | 36 | @Override 37 | public void nextTuple() 38 | { 39 | String wordToEmit = "default"; 40 | if (Math.random() > defaultWordEmissionProbability) { 41 | wordToEmit = specialWord + Math.floor ((Math.random() * 100)); 42 | } 43 | collector.emit( tuple(wordToEmit)); 44 | System.out.println("+++++emitted: " + wordToEmit); 45 | 46 | try { 47 | Thread.sleep(sleepMillisecsAfterEmission); 48 | } catch (InterruptedException e) { 49 | System.out.println("Fatal error"); 50 | System.exit(-1); 51 | } 52 | } 53 | 54 | @Override 55 | public void open(@SuppressWarnings("rawtypes") Map conf, 56 | TopologyContext context, 57 | SpoutOutputCollector collector) 58 | { 59 | this.collector = collector; 60 | } 61 | 62 | @Override 63 | public void close() {} 64 | 65 | @Override 66 | public void ack(Object msgId) {} 67 | 68 | @Override 69 | public void fail(Object msgId) {} 70 | } 71 | -------------------------------------------------------------------------------- /esper+storm+kafka/README.md: -------------------------------------------------------------------------------- 1 | # Kafka Spout Example 2 | 3 | 4 | 5 | Example Illustrating a Kafka Consumer Spout, a Kafka Producer Bolt, and an Esper Streaming Query Bolt 6 | 7 | ## Description 8 | 9 | 10 | The test class 'ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest' illustrates how to wire up Kafka, Storm and Esper. In this test, ExternalFeedToKafkaAdapterSpout pushes messages into a topic. These messages are then routed into an EsperBolt which uses the Esper query language to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which dumps the filtered messages on a second topic. We use an instance of Kafka MessageConsumer to pull those messages off the second topic, and we verify that what we got is equal to what we expect. 11 | 12 | We use Thomas Dudziak's storm-esper library to bind an Esper query processing engine instance to a Storm Bolt (More info on that library is available here: http://tomdzk.wordpress.com/2011/09/28/storm-esper) 13 | 14 | 15 | 16 | A list of the main components involved in this example follows: 17 | 18 | KafkaOutputBolt 19 | 20 | A first pass implementation of a generic Kafka Output Bolt that takes whatever tuple it 21 | recieves, JSON-ifies it, and dumps it on the Kafka topic that is configured in the 22 | constructor. 23 | 24 | ExternalFeedToKafkaAdapterSpout 25 | 26 | Accepts an IFeedItemProvider instance (running on a separte thread spawned by this 27 | adapter spout) that is responsible for acquiring data from an external source, which 28 | is then transferred to the adapter spout to be deposited on a Kafka topic (the name 29 | of which is set as an argument to the adapter spout constructor.) 30 | 31 | 32 | 33 | Testing Support: 34 | 35 | AbstractStormWithKafkaTest 36 | 37 | Simplifies testing of Storm components that consume or produce data items from or to Kafka. 38 | Operates via a 'template method' series of steps, wherein the BeforeClass method sets up a 39 | Storm Local cluster, then waits for the zookeeper instance started by that cluster to 'boot up', 40 | then starts an in-process Kafka server using that zookeeper, and then creates a topic whose 41 | name is derived from the name of the base class test. 42 | 43 | Subclasses only need to implement the abstract createTopology() method (and perhaps 44 | override 'verifyResults())' which is currently kind of hard coded to our first two subclasses of 45 | this base class. 46 | 47 | 48 | 49 | 50 | 51 | 52 | ## Building and Running 53 | 54 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands below 55 | 56 | mvn clean compile test 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/TopologyInitializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/12/13 4 | * Time: 4:58 PM 5 | */ 6 | 7 | 8 | import backtype.storm.generated.StormTopology; 9 | import backtype.storm.spout.SchemeAsMultiScheme; 10 | import backtype.storm.topology.IRichSpout; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import org.tomdz.storm.esper.EsperBolt; 13 | import storm.kafka.*; 14 | 15 | public class TopologyInitializer { 16 | public static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1; 17 | public static int STORM_KAFKA_FROM_READ_FROM_START = -2; 18 | 19 | public static StormTopology createTopology(String zookeeperConnectString, 20 | String kafkaBrokerConnectString, 21 | String inputTopic, 22 | String outputTopic, 23 | IFeedItemProvider feedItemProvider, 24 | boolean kafkaOutputBoltRawMode) { 25 | TopologyBuilder builder = new TopologyBuilder(); 26 | IRichSpout feedSpout = 27 | new ExternalFeedToKafkaAdapterSpout( 28 | feedItemProvider, 29 | kafkaBrokerConnectString, 30 | inputTopic, null); 31 | EsperBolt esperBolt = createEsperBolt(); 32 | KafkaOutputBolt kafkaOutputBolt = 33 | new KafkaOutputBolt(kafkaBrokerConnectString, outputTopic, null, kafkaOutputBoltRawMode); 34 | 35 | builder.setSpout("externalFeedSpout", feedSpout); // these spouts are bound together by shared topic 36 | builder.setSpout("kafkaSpout", createKafkaSpout(zookeeperConnectString, inputTopic)); 37 | 38 | builder.setBolt("esperBolt", esperBolt, 1) 39 | .shuffleGrouping("kafkaSpout"); 40 | builder.setBolt("kafkaOutputBolt", kafkaOutputBolt, 1) 41 | .shuffleGrouping("esperBolt"); 42 | return builder.createTopology(); 43 | } 44 | 45 | public static EsperBolt createEsperBolt() { 46 | String esperQuery= 47 | "select str as found from OneWordMsg.win:length_batch(2) where str like '%at%'"; 48 | EsperBolt esperBolt = new EsperBolt.Builder() 49 | .inputs().aliasComponent("kafkaSpout"). 50 | withFields("str").ofType(String.class).toEventType("OneWordMsg") 51 | .outputs().onDefaultStream().emit("found") 52 | .statements().add(esperQuery) 53 | .build(); 54 | return esperBolt; 55 | } 56 | 57 | public static KafkaSpout createKafkaSpout(String zkConnect, String topicName) { 58 | BrokerHosts brokerHosts = new ZkHosts(zkConnect); 59 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, topicName, "", "storm"); 60 | kafkaConfig.forceStartOffsetTime(STORM_KAFKA_FROM_READ_FROM_START); 61 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); 62 | return new KafkaSpout(kafkaConfig); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.generated.StormTopology; 9 | import backtype.storm.spout.SchemeAsMultiScheme; 10 | import backtype.storm.topology.IRichSpout; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import org.testng.annotations.Test; 13 | import storm.kafka.*; 14 | 15 | import java.io.IOException; 16 | 17 | 18 | /** 19 | * This test builds on ExternalFeedToKafkaAdapterSpoutTest. The external feed messages are dumped 20 | * into a Kafka topic by ExternalFeedToKafkaAdapterSpout as in the first test. We add the second step 21 | * of pulling the messages from the topic by a KafkaSpout and making sure those messages are what 22 | * we expect. To clarify: ExternalFeedToKafkaAdapterSpout pushes messages into a topic, and KafkaSpout 23 | * pulls messages out of a topic. 24 | */ 25 | @Test 26 | public class StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest extends AbstractStormWithKafkaTest { 27 | protected static volatile boolean finishedCollecting = false; 28 | 29 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 20 /* seconds */; 30 | protected static final int SECOND = 1000; 31 | 32 | private static int STORM_KAFKA_FROM_READ_FROM_START = -2; 33 | private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1; 34 | 35 | 36 | @Test 37 | public void runTestWithTopology() throws IOException { 38 | System.out.println("topic: " + getTopicName()); 39 | submitTopology(); 40 | waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt(); 41 | verifyResults(null, -1); 42 | 43 | } 44 | 45 | private void waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt() { 46 | while (!finishedCollecting) { 47 | try { 48 | Thread.sleep(500); 49 | } catch (InterruptedException e) { 50 | e.printStackTrace(); 51 | } 52 | } 53 | System.out.println("DONE"); 54 | } 55 | 56 | @Override 57 | protected StormTopology createTopology() { 58 | TopologyBuilder builder = new TopologyBuilder(); 59 | IRichSpout feedSpout = 60 | new ExternalFeedToKafkaAdapterSpout( 61 | new TestFeedItemProvider(), 62 | BROKER_CONNECT_STRING, 63 | getTopicName(), null); 64 | builder.setSpout("externalFeedSpout", feedSpout); 65 | builder.setSpout("kafkaSpout", createKafkaSpout()); 66 | VerboseCollectorBolt bolt = new VerboseCollectorBolt(5); 67 | builder.setBolt("collector", bolt).shuffleGrouping("kafkaSpout"); 68 | 69 | return builder.createTopology(); 70 | } 71 | 72 | 73 | private KafkaSpout createKafkaSpout() { 74 | BrokerHosts brokerHosts = new ZkHosts(getZkConnect()); 75 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, getTopicName(), "", "storm"); 76 | kafkaConfig.forceStartOffsetTime(STORM_KAFKA_FROM_READ_FROM_START); 77 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); 78 | return new KafkaSpout(kafkaConfig); 79 | } 80 | 81 | 82 | protected int getMaxAllowedToRunMillisecs() { 83 | return StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.MAX_ALLOWED_TO_RUN_MILLISECS; 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/KafkaMessageConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/31/13 4 | * Time: 2:37 PM 5 | */ 6 | 7 | 8 | import com.google.common.collect.ImmutableMap; 9 | import kafka.consumer.ConsumerConfig; 10 | import kafka.consumer.ConsumerIterator; 11 | import kafka.consumer.KafkaStream; 12 | import kafka.javaapi.consumer.ConsumerConnector; 13 | import kafka.serializer.StringDecoder; 14 | import kafka.utils.VerifiableProperties; 15 | 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | import java.util.Map; 19 | import java.util.Properties; 20 | 21 | 22 | /** 23 | * Uses Kafka high level consumer API to read from the topic passed in as a constructor argument and 24 | * accumulates all messages read in so that after the test the received messages can be obtained by a 25 | * call to getMessagesReceived(). This enables test driver code to verify that sent messages actually 26 | * equal received messages. 27 | */ 28 | public class KafkaMessageConsumer { 29 | private final String zkConnect; 30 | 31 | private List messagesReceived = new ArrayList(); 32 | private final String topic; 33 | private final String groupId = "KafkaMessageConsumer." + Math.random(); 34 | 35 | public KafkaMessageConsumer(String zkConnect, String topic) { 36 | this.zkConnect = zkConnect; 37 | this.topic = topic; 38 | } 39 | 40 | public List consumeMessages() { 41 | String ttt = topic; 42 | System.out.println("topic in kafka consumer: " + topic); 43 | try { 44 | final ConsumerConnector consumer = 45 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 46 | final Map topicCountMap = ImmutableMap.of(topic, 1); 47 | final Map>> consumerMap; 48 | 49 | StringDecoder decoder = new StringDecoder(new VerifiableProperties()); 50 | consumerMap = consumer.createMessageStreams(topicCountMap, decoder, decoder); 51 | 52 | final KafkaStream stream = consumerMap.get(topic).get(0); 53 | final ConsumerIterator iterator = stream.iterator(); 54 | while (iterator.hasNext()) { 55 | String msg = iterator.next().message(); 56 | msg = ( msg == null ? "" : msg ); 57 | System.out.println("got message" + msg); 58 | messagesReceived.add(msg); 59 | if (msg.contains("SHUTDOWN")) { 60 | consumer.shutdown(); 61 | return messagesReceived; 62 | } 63 | } 64 | } catch (Exception e) { 65 | e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 66 | } 67 | return messagesReceived; 68 | } 69 | 70 | public List getMessagesReceived() { 71 | return messagesReceived; 72 | } 73 | 74 | 75 | private ConsumerConfig createConsumerConfig() { 76 | Properties props = new Properties(); 77 | props.put("zookeeper.connect", zkConnect); 78 | props.put("group.id", groupId); 79 | props.put("zk.sessiontimeout.ms", "400"); 80 | props.put("fetch.min.bytes", "1"); 81 | props.put("auto.offset.reset", "smallest"); 82 | props.put("zk.synctime.ms", "200"); 83 | props.put("autocommit.interval.ms", "1000"); 84 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 85 | 86 | return new ConsumerConfig(props); 87 | } 88 | 89 | } 90 | 91 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/TwitterFeedItemProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/10/13 4 | * Time: 1:19 PM 5 | */ 6 | 7 | 8 | import twitter4j.*; 9 | import twitter4j.conf.Configuration; 10 | import twitter4j.conf.ConfigurationBuilder; 11 | 12 | import java.io.IOException; 13 | import java.util.List; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | 16 | 17 | public class TwitterFeedItemProvider implements IFeedItemProvider { 18 | private final ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue(); 19 | 20 | private final String oAuthConsumerKey; 21 | private final String oAuthConsumerSecret; 22 | private final String oAuthAccessToken; 23 | private final String oAuthAccessTokenSecret; 24 | private final String[] searchTerms; 25 | 26 | 27 | public class TwitterListener implements StatusListener { 28 | @Override 29 | public void onStatus(Status status) { 30 | String text = status.getText(); 31 | if (status.isRetweet()) { 32 | text = status.getRetweetedStatus().getText(); 33 | } 34 | itemQueue.offer(text); 35 | } 36 | 37 | @Override 38 | public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { 39 | } 40 | 41 | @Override 42 | public void onTrackLimitationNotice(int numberOfLimitedStatuses) { 43 | } 44 | 45 | @Override 46 | public void onScrubGeo(long userId, long upToStatusId) { 47 | } 48 | 49 | @Override 50 | public void onStallWarning(StallWarning warning) { 51 | } 52 | 53 | @Override 54 | public void onException(Exception ex) { 55 | ex.printStackTrace(); 56 | } 57 | } 58 | 59 | /* 60 | 61 | TwitterFeedItemProvider(List searchTermsList) { 62 | this.searchTerms = searchTermsList.toArray(new String[searchTermsList.size()]); 63 | } 64 | */ 65 | 66 | TwitterFeedItemProvider(final String oAuthConsumerKey, 67 | final String oAuthConsumerSecret, 68 | final String oAuthAccessToken, 69 | final String oAuthAccessTokenSecret, 70 | String... terms) { 71 | this.oAuthConsumerKey = oAuthConsumerKey; 72 | this.oAuthConsumerSecret = oAuthConsumerSecret; 73 | this.oAuthAccessToken = oAuthAccessToken; 74 | this.oAuthAccessTokenSecret = oAuthAccessTokenSecret; 75 | 76 | this.searchTerms = terms; 77 | } 78 | 79 | @Override 80 | public Runnable getRunnableTask() { 81 | return new Runnable() { 82 | @Override 83 | public void run() { 84 | TwitterStream twitterStream = getTwitterStream(); 85 | twitterStream.addListener(new TwitterListener()); 86 | long[] followArray = new long[0]; 87 | twitterStream.filter(new FilterQuery(0, followArray, searchTerms)); 88 | } 89 | }; 90 | } 91 | 92 | private TwitterStream getTwitterStream() { 93 | TwitterStream twitterStream; 94 | ConfigurationBuilder builder = new ConfigurationBuilder(); 95 | builder.setOAuthConsumerKey(oAuthConsumerKey); 96 | builder.setOAuthConsumerSecret(oAuthConsumerSecret); 97 | builder.setOAuthAccessToken(oAuthAccessToken); 98 | builder.setOAuthAccessTokenSecret(oAuthAccessTokenSecret); 99 | 100 | Configuration conf = builder.build(); 101 | 102 | twitterStream = new TwitterStreamFactory(conf).getInstance(); 103 | return twitterStream; 104 | } 105 | 106 | @Override 107 | public Object getNextItemIfAvailable() { 108 | return itemQueue.poll(); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/VerifyItemsFromFeedAreSentToMockKafkaProducer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/2/13 4 | * Time: 5:58 PM 5 | */ 6 | 7 | 8 | import kafka.javaapi.producer.Producer; 9 | import kafka.producer.KeyedMessage; 10 | import kafka.producer.ProducerConfig; 11 | import org.easymock.Capture; 12 | import org.easymock.EasyMock; 13 | import org.testng.annotations.*; 14 | import org.testng.TestNG; 15 | import org.testng.TestListenerAdapter; 16 | 17 | import static org.easymock.EasyMock.*; 18 | 19 | import org.slf4j.*; 20 | 21 | import java.util.List; 22 | import java.util.Properties; 23 | import java.util.concurrent.ConcurrentLinkedQueue; 24 | 25 | public class VerifyItemsFromFeedAreSentToMockKafkaProducer { 26 | private static String TOPIC = "someTopic"; 27 | private static String MESSAGE = "i-am-a-message"; 28 | 29 | private static class TestItemProvider implements IFeedItemProvider { 30 | ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue(); 31 | 32 | @Override 33 | public Runnable getRunnableTask() { 34 | return new Runnable() { 35 | @Override 36 | public void run() { 37 | try { 38 | Thread.sleep(10); 39 | } catch (InterruptedException e) { 40 | e.printStackTrace(); 41 | } 42 | 43 | itemQueue.offer(MESSAGE); 44 | } 45 | }; 46 | } 47 | 48 | @Override 49 | public Object getNextItemIfAvailable() { 50 | return itemQueue.poll(); 51 | } 52 | } 53 | 54 | 55 | @Test(enabled = true) 56 | public void testEqualsOfKeyedMessage() { 57 | KeyedMessage 58 | data1 = 59 | new KeyedMessage("foo", "bar"); 60 | KeyedMessage 61 | data2 = 62 | new KeyedMessage( 63 | new String("foo".getBytes()), new String("bar".getBytes())); 64 | 65 | assert data1.equals(data2); 66 | } 67 | 68 | 69 | @Test(enabled = true) 70 | public void testItemsProducedByFeedProviderAreSentToKafka() { 71 | Capture> capturedArgument = 72 | new Capture> (); 73 | 74 | 75 | @SuppressWarnings("unchecked") 76 | Producer producer = createMock(Producer.class); 77 | producer.send(capture(capturedArgument)); 78 | expectLastCall(); 79 | 80 | ExternalFeedToKafkaAdapterSpout spout = 81 | EasyMock.createMockBuilder(ExternalFeedToKafkaAdapterSpout.class). 82 | addMockedMethod("setupProducer").createMock(); 83 | expect(spout.setupProducer()).andReturn(producer); 84 | 85 | 86 | replay(producer); 87 | replay(spout); 88 | 89 | 90 | 91 | verifyNextTupleReceivesItemFromProviderAndSendsToKafkaProducer(spout); 92 | 93 | verify(producer); 94 | verify(spout); 95 | 96 | KeyedMessage got = capturedArgument.getValue(); 97 | assert got.message().contains(MESSAGE); 98 | 99 | } 100 | 101 | private void verifyNextTupleReceivesItemFromProviderAndSendsToKafkaProducer( 102 | ExternalFeedToKafkaAdapterSpout spout) 103 | { 104 | spout.setFeedProvider(new TestItemProvider()); 105 | spout.setTopicName(TOPIC); 106 | spout.open(null, null, null); 107 | 108 | for (int i = 0; i < 10; i++) { 109 | try { 110 | Thread.sleep(10); 111 | } catch (InterruptedException e) { 112 | e.printStackTrace(); // do something more meaningful here? 113 | } 114 | spout.nextTuple(); 115 | } 116 | } 117 | 118 | } 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /storm+kafka/src/main/java/RandomSentenceGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/27/13 4 | * Time: 11:25 PM 5 | */ 6 | 7 | import java.io.BufferedReader; 8 | import java.io.IOException; 9 | import java.io.InputStreamReader; 10 | import java.util.Hashtable; 11 | import java.util.Random; 12 | import java.util.Vector; 13 | 14 | /* not currently used */ 15 | 16 | public class RandomSentenceGenerator { 17 | 18 | // Hashmap 19 | public static Hashtable> markovChain = new Hashtable>(); 20 | static Random rnd = new Random(); 21 | 22 | private static String[] sentences = new String[]{ 23 | "one king took the fox over the car.", 24 | "two queens bent the fox under the bed.", 25 | "four bears mined the pig into the house.", 26 | "Joe goats rolled the boat over the lodge.", 27 | }; 28 | 29 | 30 | RandomSentenceGenerator() { 31 | // Create the first two entries (k:_start, k:_end) 32 | markovChain.put("_start", new Vector()); 33 | markovChain.put("_end", new Vector()); 34 | } 35 | 36 | String next() { 37 | int index = Math.abs(rnd.nextInt() % sentences.length); 38 | addWords(sentences[index]); 39 | return generateSentence(); 40 | } 41 | 42 | 43 | /* 44 | * Main constructor 45 | */ 46 | public static void main(String[] args) throws IOException { 47 | RandomSentenceGenerator generator = new RandomSentenceGenerator(); 48 | 49 | while(true) { 50 | System.out.println("sentence: " + generator.next()); 51 | } 52 | } 53 | 54 | /* 55 | * Add words 56 | */ 57 | public static void addWords(String phrase) { 58 | // put each word into an array 59 | String[] words = phrase.split(" "); 60 | 61 | // Loop through each word, check if it's already added 62 | // if its added, then get the suffix vector and add the word 63 | // if it hasn't been added then add the word to the list 64 | // if its the first or last word then select the _start / _end key 65 | 66 | for (int i=0; i startWords = markovChain.get("_start"); 71 | startWords.add(words[i]); 72 | 73 | Vector suffix = markovChain.get(words[i]); 74 | if (suffix == null) { 75 | suffix = new Vector(); 76 | suffix.add(words[i+1]); 77 | markovChain.put(words[i], suffix); 78 | } 79 | 80 | } else if (i == words.length-1) { 81 | Vector endWords = markovChain.get("_end"); 82 | endWords.add(words[i]); 83 | 84 | } else { 85 | Vector suffix = markovChain.get(words[i]); 86 | if (suffix == null) { 87 | suffix = new Vector(); 88 | suffix.add(words[i+1]); 89 | markovChain.put(words[i], suffix); 90 | } else { 91 | suffix.add(words[i+1]); 92 | markovChain.put(words[i], suffix); 93 | } 94 | } 95 | } 96 | } 97 | 98 | 99 | /* 100 | * Generate a markov phrase 101 | */ 102 | public static String generateSentence() { 103 | 104 | // Vector to hold the phrase 105 | Vector newPhrase = new Vector(); 106 | 107 | // String for the next word 108 | String nextWord = ""; 109 | 110 | // Select the first word 111 | Vector startWords = markovChain.get("_start"); 112 | int startWordsLen = startWords.size(); 113 | nextWord = startWords.get(rnd.nextInt(startWordsLen)); 114 | newPhrase.add(nextWord); 115 | 116 | // Keep looping through the words until we've reached the end 117 | while (nextWord.charAt(nextWord.length()-1) != '.') { 118 | Vector wordSelection = markovChain.get(nextWord); 119 | int wordSelectionLen = wordSelection.size(); 120 | nextWord = wordSelection.get(rnd.nextInt(wordSelectionLen)); 121 | newPhrase.add(nextWord); 122 | } 123 | 124 | String retval = newPhrase.toString().replaceAll(",", ""); 125 | return retval; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/RandomSentenceGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/27/13 4 | * Time: 11:25 PM 5 | */ 6 | 7 | import java.io.BufferedReader; 8 | import java.io.IOException; 9 | import java.io.InputStreamReader; 10 | import java.util.Hashtable; 11 | import java.util.Random; 12 | import java.util.Vector; 13 | 14 | /* not currently used */ 15 | 16 | public class RandomSentenceGenerator { 17 | 18 | // Hashmap 19 | public static Hashtable> markovChain = new Hashtable>(); 20 | static Random rnd = new Random(); 21 | 22 | private static String[] sentences = new String[]{ 23 | "one king took the fox over the car.", 24 | "two queens bent the fox under the bed.", 25 | "four bears mined the pig into the house.", 26 | "Joe goats rolled the boat over the lodge.", 27 | }; 28 | 29 | 30 | RandomSentenceGenerator() { 31 | // Create the first two entries (k:_start, k:_end) 32 | markovChain.put("_start", new Vector()); 33 | markovChain.put("_end", new Vector()); 34 | } 35 | 36 | String next() { 37 | int index = Math.abs(rnd.nextInt() % sentences.length); 38 | addWords(sentences[index]); 39 | return generateSentence(); 40 | } 41 | 42 | 43 | /* 44 | * Main constructor 45 | */ 46 | public static void main(String[] args) throws IOException { 47 | RandomSentenceGenerator generator = new RandomSentenceGenerator(); 48 | 49 | while(true) { 50 | System.out.println("sentence: " + generator.next()); 51 | } 52 | } 53 | 54 | /* 55 | * Add words 56 | */ 57 | public static void addWords(String phrase) { 58 | // put each word into an array 59 | String[] words = phrase.split(" "); 60 | 61 | // Loop through each word, check if it's already added 62 | // if its added, then get the suffix vector and add the word 63 | // if it hasn't been added then add the word to the list 64 | // if its the first or last word then select the _start / _end key 65 | 66 | for (int i=0; i startWords = markovChain.get("_start"); 71 | startWords.add(words[i]); 72 | 73 | Vector suffix = markovChain.get(words[i]); 74 | if (suffix == null) { 75 | suffix = new Vector(); 76 | suffix.add(words[i+1]); 77 | markovChain.put(words[i], suffix); 78 | } 79 | 80 | } else if (i == words.length-1) { 81 | Vector endWords = markovChain.get("_end"); 82 | endWords.add(words[i]); 83 | 84 | } else { 85 | Vector suffix = markovChain.get(words[i]); 86 | if (suffix == null) { 87 | suffix = new Vector(); 88 | suffix.add(words[i+1]); 89 | markovChain.put(words[i], suffix); 90 | } else { 91 | suffix.add(words[i+1]); 92 | markovChain.put(words[i], suffix); 93 | } 94 | } 95 | } 96 | } 97 | 98 | 99 | /* 100 | * Generate a markov phrase 101 | */ 102 | public static String generateSentence() { 103 | 104 | // Vector to hold the phrase 105 | Vector newPhrase = new Vector(); 106 | 107 | // String for the next word 108 | String nextWord = ""; 109 | 110 | // Select the first word 111 | Vector startWords = markovChain.get("_start"); 112 | int startWordsLen = startWords.size(); 113 | nextWord = startWords.get(rnd.nextInt(startWordsLen)); 114 | newPhrase.add(nextWord); 115 | 116 | // Keep looping through the words until we've reached the end 117 | while (nextWord.charAt(nextWord.length()-1) != '.') { 118 | Vector wordSelection = markovChain.get(nextWord); 119 | int wordSelectionLen = wordSelection.size(); 120 | nextWord = wordSelection.get(rnd.nextInt(wordSelectionLen)); 121 | newPhrase.add(nextWord); 122 | } 123 | 124 | String retval = newPhrase.toString().replaceAll(",", ""); 125 | return retval; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /storm+kafka/src/main/java/KafkaProducer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/28/13 4 | * Time: 6:07 PM 5 | */ 6 | 7 | 8 | import java.util.concurrent.CountDownLatch; 9 | /* 10 | * Author: cbedford 11 | * Date: 10/22/13 12 | * Time: 8:50 PM 13 | */ 14 | 15 | 16 | import com.google.common.io.Files; 17 | import kafka.admin.CreateTopicCommand; 18 | import kafka.javaapi.producer.Producer; 19 | import kafka.producer.KeyedMessage; 20 | import kafka.producer.ProducerConfig; 21 | import kafka.server.KafkaConfig; 22 | import kafka.server.KafkaServer; 23 | import kafka.utils.MockTime; 24 | 25 | import java.io.File; 26 | import java.util.Properties; 27 | 28 | 29 | public class KafkaProducer { 30 | 31 | private KafkaServer kafkaServer = null; 32 | private final String topicName; 33 | 34 | 35 | CountDownLatch topologyStartedLatch; 36 | public CountDownLatch producerFinishedInitialBatchLatch = new CountDownLatch(1); 37 | 38 | 39 | Producer producer; 40 | 41 | private String[] sentences; 42 | 43 | KafkaProducer(String[] sentences, String topicName, CountDownLatch topologyStartedLatch) { 44 | this.sentences = sentences; 45 | this.topicName = topicName; 46 | this.topologyStartedLatch = topologyStartedLatch; 47 | } 48 | 49 | public Thread startProducer() { 50 | Thread sender = new Thread( 51 | new Runnable() { 52 | @Override 53 | public void run() { 54 | emitBatch(); 55 | ServerAndThreadCoordinationUtils. 56 | countDown(producerFinishedInitialBatchLatch); 57 | ServerAndThreadCoordinationUtils. 58 | await(topologyStartedLatch); 59 | emitBatch(); // emit second batch after we know topology is up 60 | } 61 | }, 62 | "producerThread" 63 | ); 64 | sender.start(); 65 | return sender; 66 | } 67 | 68 | private void emitBatch() { 69 | Properties props = new Properties(); 70 | props.put("metadata.broker.list", "localhost:9092"); 71 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 72 | props.put("request.required.acks", "1"); 73 | ProducerConfig config = new ProducerConfig(props); 74 | Producer producer = new Producer(config); 75 | 76 | for (String sentence : sentences) { 77 | KeyedMessage data = 78 | new KeyedMessage(topicName, sentence); 79 | producer.send(data); 80 | } 81 | producer.close(); 82 | 83 | } 84 | 85 | public void createTopic(String topicName) { 86 | String[] arguments = new String[8]; 87 | arguments[0] = "--zookeeper"; 88 | arguments[1] = "localhost:2000"; 89 | arguments[2] = "--replica"; 90 | arguments[3] = "1"; 91 | arguments[4] = "--partition"; 92 | arguments[5] = "1"; 93 | arguments[6] = "--topic"; 94 | arguments[7] = topicName; 95 | 96 | CreateTopicCommand.main(arguments); 97 | } 98 | 99 | public void startKafkaServer() { 100 | File tmpDir = Files.createTempDir(); 101 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1); 102 | KafkaConfig kafkaConfig = new KafkaConfig(props); 103 | 104 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime()); 105 | kafkaServer.startup(); 106 | } 107 | 108 | public void shutdown() { 109 | kafkaServer.shutdown(); 110 | } 111 | 112 | private Properties createProperties(String logDir, int port, int brokerId) { 113 | Properties properties = new Properties(); 114 | properties.put("port", port + ""); 115 | properties.put("broker.id", brokerId + ""); 116 | properties.put("log.dir", logDir); 117 | properties.put("zookeeper.connect", "localhost:2000"); // Uses zookeeper created by LocalCluster 118 | return properties; 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /storm+kafka/src/main/java/ServerAndThreadCoordinationUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/28/13 4 | * Time: 2:20 PM 5 | */ 6 | 7 | 8 | import java.io.*; 9 | import java.net.Socket; 10 | import java.util.Date; 11 | import java.util.Timer; 12 | import java.util.TimerTask; 13 | import java.util.concurrent.CountDownLatch; 14 | import java.util.concurrent.TimeUnit; 15 | 16 | public class ServerAndThreadCoordinationUtils { 17 | 18 | public static void setMaxTimeToRunTimer(int millisecs) { 19 | Date timeLimit = 20 | new Date(new Date().getTime() + millisecs); 21 | Timer timer = new Timer(); 22 | 23 | timer.schedule(new TimerTask() { 24 | 25 | @Override 26 | public void run() { 27 | System.out.println("aborting test ! Took too long"); 28 | System.exit(-1); 29 | } 30 | }, timeLimit); 31 | } 32 | 33 | public static void pauseUntil() { 34 | boolean fileExists = false; 35 | while (!fileExists) { 36 | File pauseFile = new File("/tmp/go"); 37 | if (!pauseFile.exists()) { 38 | try { 39 | Thread.sleep(500); 40 | } catch (InterruptedException e) { 41 | e.printStackTrace(); 42 | } 43 | } else { 44 | fileExists = true; 45 | } 46 | } 47 | 48 | } 49 | 50 | 51 | public static String send4LetterWord(String host, int port, String cmd) 52 | throws IOException { 53 | System.out.println("connecting to " + host + " " + port); 54 | Socket sock = new Socket(host, port); 55 | BufferedReader reader = null; 56 | try { 57 | OutputStream outstream = sock.getOutputStream(); 58 | outstream.write(cmd.getBytes()); 59 | outstream.flush(); 60 | // this replicates NC - close the output stream before reading 61 | sock.shutdownOutput(); 62 | 63 | reader = 64 | new BufferedReader( 65 | new InputStreamReader(sock.getInputStream())); 66 | StringBuilder sb = new StringBuilder(); 67 | String line; 68 | while ((line = reader.readLine()) != null) { 69 | sb.append(line + "\n"); 70 | } 71 | return sb.toString(); 72 | } finally { 73 | sock.close(); 74 | if (reader != null) { 75 | reader.close(); 76 | } 77 | } 78 | } 79 | 80 | public static boolean waitForServerUp(String host, int port, long timeout) { 81 | long start = System.currentTimeMillis(); 82 | while (true) { 83 | try { 84 | // if there are multiple hostports, just take the first one 85 | String result = send4LetterWord(host, port, "stat"); 86 | System.out.println("result of send: " + result); 87 | if (result.startsWith("Zookeeper version:")) { 88 | return true; 89 | } 90 | } catch (IOException e) { 91 | // ignore as this is expected 92 | System.out.println("server " + host + ":" + port + " not up " + e); 93 | } 94 | 95 | if (System.currentTimeMillis() > start + timeout) { 96 | break; 97 | } 98 | try { 99 | Thread.sleep(250); 100 | } catch (InterruptedException e) { 101 | // ignore 102 | } 103 | } 104 | return false; 105 | } 106 | 107 | public static void await(CountDownLatch latch) { 108 | try { 109 | latch.await(); 110 | } catch (InterruptedException e) { 111 | e.printStackTrace(); 112 | System.out.println("FATAL ERROR"); 113 | System.exit(-1); 114 | } 115 | } 116 | 117 | 118 | public static void countDown(CountDownLatch latch) { 119 | try { 120 | latch.countDown(); 121 | } catch (Exception e) { 122 | e.printStackTrace(); 123 | System.out.println("FATAL ERROR"); 124 | System.exit(-1); 125 | } 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/ExternalFeedToKafkaAdapterSpout.java: -------------------------------------------------------------------------------- 1 | import backtype.storm.spout.SpoutOutputCollector; 2 | import backtype.storm.task.TopologyContext; 3 | import backtype.storm.topology.OutputFieldsDeclarer; 4 | import backtype.storm.topology.base.BaseRichSpout; 5 | import backtype.storm.tuple.Fields; 6 | import com.google.common.collect.ImmutableMap; 7 | import com.google.gson.Gson; 8 | import kafka.javaapi.producer.Producer; 9 | import kafka.producer.KeyedMessage; 10 | import kafka.producer.ProducerConfig; 11 | 12 | import java.util.Map; 13 | import java.util.Properties; 14 | 15 | /** 16 | * A 17 | */ 18 | public class ExternalFeedToKafkaAdapterSpout extends BaseRichSpout { 19 | private static final long serialVersionUID = 1L; 20 | public static final String RECORD = "record"; 21 | 22 | 23 | private String brokerConnectString; 24 | 25 | private String topicName; 26 | private String serializerClass; 27 | 28 | private transient SpoutOutputCollector collector; 29 | private transient TopologyContext context; 30 | private transient Producer producer; 31 | 32 | private IFeedItemProvider feedProvider; 33 | 34 | public ExternalFeedToKafkaAdapterSpout(IFeedItemProvider feedProvider, 35 | String brokerConnectString, 36 | String topicName, 37 | String serializerClass) { 38 | this.feedProvider = feedProvider; 39 | this.brokerConnectString = brokerConnectString; 40 | this.topicName = topicName; 41 | if (serializerClass == null) { 42 | serializerClass = "kafka.serializer.StringEncoder"; 43 | } 44 | this.serializerClass = serializerClass; 45 | } 46 | 47 | 48 | public void setFeedProvider(IFeedItemProvider feedProvider) { // mainly for testing 49 | this.feedProvider = feedProvider; 50 | } 51 | 52 | public void setTopicName(String topicName) { // mainly for testing 53 | this.topicName = topicName; 54 | } 55 | 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 59 | declarer.declare(new Fields(RECORD)); 60 | } 61 | 62 | 63 | @Override 64 | public void open(@SuppressWarnings("rawtypes") Map conf, 65 | TopologyContext context, 66 | SpoutOutputCollector collector) { 67 | this.collector = collector; 68 | this.context = context; 69 | 70 | producer = setupProducer(); 71 | 72 | Thread feedProviderThread = 73 | new Thread(feedProvider.getRunnableTask(), "feedProviderThread"); 74 | feedProviderThread.start(); 75 | } 76 | 77 | 78 | @Override 79 | public void nextTuple() { 80 | try { 81 | Thread.sleep(10); 82 | } catch (InterruptedException e) { 83 | e.printStackTrace(); 84 | } 85 | 86 | Object feedItem = feedProvider.getNextItemIfAvailable(); 87 | 88 | if (feedItem != null) { 89 | System.out.println(">>>->>feed item is: " + feedItem); 90 | final Map itemAsMap = ImmutableMap.of(RECORD, feedItem); 91 | try { 92 | String itemAsJson = new Gson().toJson(itemAsMap); 93 | KeyedMessage data = 94 | new KeyedMessage(topicName, itemAsJson); 95 | producer.send(data); 96 | } catch (Exception e) { 97 | throw new RuntimeException("Conversion to json failed: " + feedItem); 98 | } 99 | 100 | } else { 101 | try { 102 | Thread.sleep(10); 103 | } catch (InterruptedException e) { 104 | e.printStackTrace(); // do something more meaningful here? 105 | } 106 | } 107 | 108 | } 109 | 110 | // should be private, but have not gotten PowerMock unit testing to work yet. 111 | protected Producer setupProducer() { 112 | Properties props = new Properties(); 113 | props.put("metadata.broker.list", brokerConnectString); 114 | props.put("serializer.class", serializerClass); 115 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 116 | props.put("producer.type", "sync"); 117 | props.put("batch.size", "1"); 118 | 119 | ProducerConfig config = new ProducerConfig(props); 120 | return new Producer(config); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/KafkaOutputBolt.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/30/13 4 | * Time: 9:39 PM 5 | */ 6 | 7 | 8 | import backtype.storm.task.OutputCollector; 9 | import backtype.storm.task.TopologyContext; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.base.BaseRichBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import com.google.gson.Gson; 15 | import kafka.javaapi.producer.Producer; 16 | import kafka.producer.KeyedMessage; 17 | import kafka.producer.ProducerConfig; 18 | 19 | import java.io.IOException; 20 | import java.util.HashMap; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Properties; 24 | 25 | /** 26 | * A first pass implementation of a generic Kafka Output Bolt that takes whatever tuple it 27 | * recieves, JSON-ifies it, and dumps it on the Kafka topic that is configured in the 28 | * constructor. By default the JSON-ification algorithms works such that the Json object's 29 | * attribute names are the field names of the tuples (currently only 1-tuples are supported). 30 | * In other words, the JSON-ified value is contructed as a map with key names derived from 31 | * tuple field names and corresponding values set as the JSON-ified tuple object. 32 | * 33 | * However, if the KafkaOutputBolt constructor is called with rawMode=true, then for a 1-tuple 34 | * we will assume the tuple value is a valid JSON string. TODO - we will eventually support 35 | * tuples of length 2 and greater, at which point raw mode will boil down to putting the 'raw' 36 | * valid JSON strings given by the i-th element of each tuple into an array. 37 | */ 38 | public class KafkaOutputBolt extends BaseRichBolt { 39 | private static final long serialVersionUID = 1L; 40 | private final boolean rawMode; 41 | 42 | private String brokerConnectString; 43 | private String topicName; 44 | private String serializerClass; 45 | 46 | private transient Producer producer; 47 | private transient OutputCollector collector; 48 | private transient TopologyContext context; 49 | 50 | public KafkaOutputBolt(String brokerConnectString, 51 | String topicName, 52 | String serializerClass, 53 | boolean rawMode) { 54 | if (serializerClass == null) { 55 | serializerClass = "kafka.serializer.StringEncoder"; 56 | } 57 | this.brokerConnectString = brokerConnectString; 58 | this.serializerClass = serializerClass; 59 | this.topicName = topicName; 60 | this.rawMode = rawMode; 61 | } 62 | 63 | @Override 64 | public void prepare(Map stormConf, 65 | TopologyContext context, 66 | OutputCollector collector) { 67 | Properties props = new Properties(); 68 | props.put("metadata.broker.list", brokerConnectString); 69 | props.put("serializer.class", serializerClass); 70 | props.put("producer.type", "sync"); 71 | props.put("batch.size", "1"); 72 | 73 | ProducerConfig config = new ProducerConfig(props); 74 | producer = new Producer(config); 75 | 76 | this.context = context; 77 | this.collector = collector; 78 | } 79 | 80 | @Override 81 | public void execute(Tuple input) { 82 | String tupleAsJson = null; 83 | try { 84 | if (rawMode) { 85 | tupleAsJson = input.getString(0); 86 | 87 | } else { 88 | tupleAsJson = JsonHelper.toJson(input); 89 | } 90 | KeyedMessage data = 91 | new KeyedMessage(topicName, tupleAsJson); 92 | producer.send(data); 93 | collector.ack(input); 94 | } catch (Exception e) { 95 | collector.fail(input); 96 | } 97 | } 98 | 99 | @Override 100 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 101 | 102 | } 103 | 104 | public static Producer initProducer() throws IOException { 105 | Properties props = new Properties(); 106 | props.put("metadata.broker.list", "localhost:9092"); 107 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 108 | props.put("producer.type", "async"); 109 | props.put("batch.size", "1"); 110 | ProducerConfig config = new ProducerConfig(props); 111 | 112 | return new Producer(config); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.buildlackey 6 | kafka-producer-to-consumer-example 7 | kafka-producer-to-consumer-example 8 | 1.0 9 | jar 10 | Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers 11 | 12 | 13 | 14 | leadLackey 15 | Chris Bedford 16 | 17 | 18 | 19 | 20 | 21 | 22 | storm 23 | storm 24 | 0.9.0-wip17 25 | 26 | 27 | storm 28 | storm-core 29 | 0.9.0-wip17 30 | 31 | 32 | storm 33 | storm-kafka 34 | 0.9.0-wip16a-scala292 35 | 36 | 37 | com.netflix.curator 38 | curator-test 39 | 1.2.5 40 | 41 | 42 | 43 | org.slf4j 44 | slf4j-log4j12 45 | 46 | 47 | log4j 48 | log4j 49 | 50 | 51 | 52 | 53 | 54 | 55 | 2.2.1 56 | 57 | 58 | 59 | 60 | 61 | org.apache.maven.plugins 62 | maven-enforcer-plugin 63 | 1.1.1 64 | 65 | 66 | enforce-versions 67 | 68 | enforce 69 | 70 | 71 | 72 | 73 | 2.2.1 74 | 75 | 76 | 1.7 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | org.apache.maven.plugins 86 | maven-compiler-plugin 87 | 3.1 88 | 89 | 1.7 90 | 1.7 91 | 92 | 93 | 94 | 95 | org.apache.maven.plugins 96 | maven-jar-plugin 97 | 2.4 98 | 99 | 100 | org.apache.maven.plugins 101 | maven-source-plugin 102 | 2.2 103 | 104 | true 105 | 106 | 107 | 108 | create-source-jar 109 | 110 | jar-no-fork 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/EsperFilteredTwitterFeedTopology.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.Config; 9 | import backtype.storm.StormSubmitter; 10 | import backtype.storm.generated.AlreadyAliveException; 11 | import backtype.storm.generated.InvalidTopologyException; 12 | import backtype.storm.generated.StormTopology; 13 | 14 | import java.io.IOException; 15 | 16 | 17 | /** 18 | * This example pulls tweets from twitter and runs them from a filter written in Esper query language (EQL). Our 19 | * ExternalFeedToKafkaAdapterSpout pushes messages into a topic. These messages are then routed into an EsperBolt which 20 | * uses EQL to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which 21 | * dumps the filtered messages on a second topic. 22 | */ 23 | public class EsperFilteredTwitterFeedTopology { 24 | 25 | private final String outputTopic = this.getClass().getSimpleName() + "_output"; 26 | private final String firstTopic = this.getClass().getSimpleName() + "_input"; 27 | 28 | private final String oAuthConsumerKey; 29 | private final String oAuthConsumerSecret; 30 | private final String oAuthAccessToken; 31 | private final String oAuthAccessTokenSecret; 32 | private final String brokerConnectString; // kakfa broker server/port info 33 | private final String searchTerm; // twitter feed filter search term 34 | 35 | 36 | public EsperFilteredTwitterFeedTopology( 37 | final String oAuthConsumerKey, 38 | final String oAuthConsumerSecret, 39 | final String oAuthAccessToken, 40 | final String oAuthAccessTokenSecret, 41 | final String brokerConnectString, 42 | final String searchTerm) { 43 | this.oAuthConsumerKey = oAuthConsumerKey; 44 | this.oAuthConsumerSecret = oAuthConsumerSecret; 45 | this.oAuthAccessToken = oAuthAccessToken; 46 | this.oAuthAccessTokenSecret = oAuthAccessTokenSecret; 47 | this.brokerConnectString = brokerConnectString; 48 | this.searchTerm = searchTerm; 49 | } 50 | 51 | public static void main(String[] args) throws InvalidTopologyException, AlreadyAliveException, IOException { 52 | if (args.length != 6) { 53 | throw new RuntimeException("USAGE: " 54 | + " " 55 | + " " 56 | + " " 57 | + "" 58 | + "" 59 | + " " 60 | ); 61 | } 62 | 63 | final String oAuthConsumerKey = args[0]; 64 | final String oAuthConsumerSecret = args[1]; 65 | final String oAuthAccessToken = args[2]; 66 | final String oAuthAccessTokenSecret = args[3]; 67 | final String brokerConnectString = args[4]; 68 | final String searchTerm = args[5]; 69 | 70 | 71 | EsperFilteredTwitterFeedTopology topology = new EsperFilteredTwitterFeedTopology( 72 | oAuthConsumerKey, 73 | oAuthConsumerSecret, 74 | oAuthAccessToken, 75 | oAuthAccessTokenSecret, 76 | brokerConnectString, 77 | searchTerm 78 | ); 79 | topology.submitTopology(); 80 | 81 | } 82 | 83 | public String getTopicName() { // input topic 84 | return firstTopic; 85 | } 86 | 87 | public String getSecondTopicName() { // output topic 88 | return outputTopic; 89 | } 90 | 91 | protected String getZkConnect() { // Uses zookeeper created by LocalCluster 92 | return "localhost:2181"; 93 | } 94 | 95 | 96 | public void submitTopology() throws IOException, AlreadyAliveException, InvalidTopologyException { 97 | System.out.println("topic: " + getTopicName() + "second topic:" + getSecondTopicName()); 98 | final Config conf = getDebugConfigForStormTopology(); 99 | conf.setNumWorkers(2); 100 | StormSubmitter.submitTopology(this.getClass().getSimpleName(), conf, createTopology()); 101 | } 102 | 103 | protected StormTopology createTopology() { 104 | TwitterFeedItemProvider feedItemProvider = new TwitterFeedItemProvider( 105 | oAuthConsumerKey, 106 | oAuthConsumerSecret, 107 | oAuthAccessToken, 108 | oAuthAccessTokenSecret, 109 | searchTerm); 110 | return TopologyInitializer. 111 | createTopology( 112 | getZkConnect(), 113 | brokerConnectString, 114 | getTopicName(), 115 | getSecondTopicName(), 116 | feedItemProvider, 117 | true); 118 | } 119 | 120 | public static Config getDebugConfigForStormTopology() { 121 | Config config = new Config(); 122 | config.setDebug(true); 123 | config.put(Config.STORM_ZOOKEEPER_CONNECTION_TIMEOUT, 900 * 1000); 124 | config.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 900 * 1000); 125 | return config; 126 | } 127 | } 128 | 129 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.generated.StormTopology; 9 | import org.testng.annotations.BeforeClass; 10 | import org.testng.annotations.Test; 11 | 12 | import java.io.File; 13 | import java.io.IOException; 14 | 15 | 16 | /** 17 | * This test builds on StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest. Our ExternalFeedToKafkaAdapterSpout 18 | * pushes messages into a topic. These messages are then routed into an EsperBolt which uses the Esper query 19 | * language to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which 20 | * dumps the filtered messages on a second topic. We use an instance of Kafka MessageConsumer to pull those 21 | * messages off the second topic, and we verify that what we got is equal to what we expect. 22 | */ 23 | public class ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest extends AbstractStormWithKafkaTest { 24 | public static final int EXPECTED_COUNT = 6; 25 | protected static volatile boolean finishedCollecting = false; 26 | 27 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 25 /* seconds */; 28 | protected static final int SECOND = 1000; 29 | 30 | private final String secondTopic = this.getClass().getSimpleName() + "topic" + getRandomInteger(1000); 31 | private volatile boolean testPassed = true; // assume the best 32 | 33 | @BeforeClass 34 | protected void deleteFiles() { 35 | deleteSentinelFile("/tmp/before.storm"); 36 | deleteSentinelFile("/tmp/after.storm"); 37 | } 38 | 39 | private void deleteSentinelFile(String pathname) { 40 | File sentinel = new File(pathname); 41 | sentinel.delete(); 42 | if (sentinel.exists()) { 43 | throw new RuntimeException("Could not delete sentinel file"); 44 | } 45 | } 46 | 47 | @Test 48 | public void runTestWithTopology() throws IOException { 49 | System.out.println("topic: " + getTopicName() + "second topic:" + getSecondTopicName()); 50 | //ServerAndThreadCoordinationUtils.pauseUntil("/tmp/before.storm"); 51 | submitTopology(); // The last bolt in this topology will write to second topic 52 | //ServerAndThreadCoordinationUtils.pauseUntil("/tmp/after.storm"); 53 | Thread verifyThread = setupVerifyThreadToListenOnSecondTopic(); 54 | try { 55 | verifyThread.join(); 56 | } catch (InterruptedException e) { 57 | e.printStackTrace(); 58 | } 59 | if (!testPassed) { 60 | throw new RuntimeException("Test did not pass. Got messages: "); 61 | } 62 | } 63 | 64 | @Override 65 | public String getSecondTopicName() { 66 | return secondTopic; 67 | } 68 | 69 | @Override 70 | protected StormTopology createTopology() { 71 | return TopologyInitializer. 72 | createTopology( 73 | getZkConnect(), 74 | BROKER_CONNECT_STRING, 75 | getTopicName(), 76 | getSecondTopicName(), 77 | new TestFeedItemProvider(getTestSentences()), false); 78 | } 79 | 80 | protected int getMaxAllowedToRunMillisecs() { 81 | return ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.MAX_ALLOWED_TO_RUN_MILLISECS; 82 | } 83 | 84 | private void waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt() { 85 | while (!finishedCollecting) { 86 | try { 87 | Thread.sleep(500); 88 | } catch (InterruptedException e) { 89 | e.printStackTrace(); 90 | } 91 | } 92 | System.out.println("DONE"); 93 | } 94 | 95 | 96 | // EXPECTED_COUNT - consumer will see 6 occurrences of cat out of 6 batches of 2 97 | // The shutdown will trigger when we see the first 'cat - SHUTDOWN'. That's why the 98 | // consumer does not see 7 cats. 99 | private String[] getTestSentences() { 100 | return new String[]{ 101 | "cat", 102 | "pig", 103 | 104 | "pig", 105 | "pig", 106 | 107 | "pig", 108 | "cat", 109 | 110 | "cat", 111 | "pig", 112 | 113 | "cat", 114 | "cat", 115 | 116 | "cat - SHUTDOWN", 117 | "cat - SHUTDOWN", 118 | }; 119 | 120 | } 121 | 122 | private Thread setupVerifyThreadToListenOnSecondTopic() { 123 | Thread.UncaughtExceptionHandler uncaughtHandler = new Thread.UncaughtExceptionHandler() { 124 | @Override 125 | public void uncaughtException(Thread th, Throwable ex) { 126 | testPassed = false; 127 | } 128 | }; 129 | Thread verifyThread = new Thread( 130 | new Runnable() { 131 | @Override 132 | public void run() { 133 | verifyResults(getSecondTopicName(), EXPECTED_COUNT); 134 | } 135 | }, 136 | "verifyThread" 137 | ); 138 | verifyThread.setUncaughtExceptionHandler(uncaughtHandler); 139 | verifyThread.start(); 140 | return verifyThread; 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /storm+kafka/src/main/java/TestTopology.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/22/13 4 | * Time: 8:50 PM 5 | */ 6 | 7 | 8 | import backtype.storm.Config; 9 | import backtype.storm.LocalCluster; 10 | import backtype.storm.spout.SchemeAsMultiScheme; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import storm.kafka.*; 13 | 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.Random; 17 | import java.util.concurrent.CountDownLatch; 18 | 19 | public class TestTopology { 20 | 21 | 22 | final static int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 90 /* seconds */; 23 | 24 | CountDownLatch topologyStartedLatch = new CountDownLatch(1); 25 | 26 | private static int STORM_KAFKA_FROM_READ_FROM_START = -2; 27 | private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1; 28 | private static int readFromMode = STORM_KAFKA_FROM_READ_FROM_START; 29 | private int expectedNumMessages = 8; 30 | 31 | private static final int SECOND = 1000; 32 | private static List messagesReceived = new ArrayList(); 33 | 34 | private LocalCluster cluster = new LocalCluster(); 35 | 36 | private static final String TOPIC_NAME = "big-topix-" + new Random().nextInt(); 37 | volatile static boolean finishedCollecting = false; 38 | 39 | private static String[] sentences = new String[]{ 40 | "one dog9 - saw the fox over the moon", 41 | "two cats9 - saw the fox over the moon", 42 | "four bears9 - saw the fox over the moon", 43 | "five goats9 - saw the fox over the moon", 44 | }; 45 | 46 | private KafkaProducer kafkaProducer = new KafkaProducer(sentences, TOPIC_NAME, topologyStartedLatch); 47 | 48 | 49 | public static void recordRecievedMessage(String msg) { 50 | synchronized (TestTopology.class) { // ensure visibility of list updates between threads 51 | messagesReceived.add(msg); 52 | } 53 | } 54 | 55 | 56 | public static void main(String[] args) { 57 | TestTopology testTopology = new TestTopology(); 58 | 59 | if (args.length == 1 && args[0].equals("--fromCurrent")) { 60 | readFromMode = STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET; 61 | testTopology.expectedNumMessages = 4; 62 | } 63 | 64 | testTopology.runTest(); 65 | } 66 | 67 | private void runTest() { 68 | ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(MAX_ALLOWED_TO_RUN_MILLISECS); 69 | ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * SECOND); // Wait for zookeeper to come up 70 | 71 | kafkaProducer.startKafkaServer(); 72 | kafkaProducer.createTopic(TOPIC_NAME); 73 | 74 | try { 75 | 76 | 77 | kafkaProducer.startProducer(); 78 | ServerAndThreadCoordinationUtils.await(kafkaProducer.producerFinishedInitialBatchLatch); 79 | 80 | setupKafkaSpoutAndSubmitTopology(); 81 | try { 82 | Thread.sleep(5000); // Would be nice to have a call back inform us when ready 83 | } catch (InterruptedException e) { 84 | e.printStackTrace(); 85 | } 86 | ServerAndThreadCoordinationUtils.countDown(topologyStartedLatch); 87 | 88 | awaitResults(); 89 | } catch (InterruptedException e) { 90 | e.printStackTrace(); 91 | } 92 | 93 | verifyResults(); 94 | shutdown(); 95 | System.out.println("SUCCESSFUL COMPLETION"); 96 | System.exit(0); 97 | } 98 | 99 | 100 | 101 | private void awaitResults() { 102 | while (!finishedCollecting) { 103 | try { 104 | Thread.sleep(500); 105 | } catch (InterruptedException e) { 106 | e.printStackTrace(); 107 | } 108 | } 109 | 110 | // Sleep another couple of seconds in case any more messages than expected come into the bolt. 111 | // In this case the bolt should throw a fatal error 112 | try { 113 | Thread.sleep(2000); 114 | } catch (InterruptedException e) { 115 | e.printStackTrace(); 116 | } 117 | 118 | 119 | System.out.println("after await"); 120 | } 121 | 122 | private void verifyResults() { 123 | synchronized (TestTopology.class) { // ensure visibility of list updates between threads 124 | int count = 0; 125 | for (String msg : messagesReceived) { 126 | if (msg.contains("cat") || msg.contains("dog") || msg.contains("bear") || msg.contains("goat")) { 127 | count++; 128 | } 129 | } 130 | if (count != expectedNumMessages) { 131 | System.out.println(">>>>>>>>>>>>>>>>>>>>FAILURE - Did not receive expected messages"); 132 | System.exit(-1); 133 | } 134 | 135 | } 136 | } 137 | 138 | private void setupKafkaSpoutAndSubmitTopology() throws InterruptedException { 139 | BrokerHosts brokerHosts = new ZkHosts("localhost:2000"); 140 | 141 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, TOPIC_NAME, "", "storm"); 142 | kafkaConfig.forceStartOffsetTime(readFromMode /* either earliest or current offset */); 143 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); 144 | 145 | 146 | TopologyBuilder builder = new TopologyBuilder(); 147 | builder.setSpout("words", new KafkaSpout(kafkaConfig), 1); 148 | VerboseCollectorBolt bolt = new VerboseCollectorBolt(expectedNumMessages); 149 | builder.setBolt("print", bolt).shuffleGrouping("words"); 150 | 151 | 152 | Config config = new Config(); 153 | 154 | cluster.submitTopology("kafka-test", config, builder.createTopology()); 155 | } 156 | 157 | private void shutdown() { 158 | cluster.shutdown(); 159 | kafkaProducer.shutdown(); 160 | } 161 | 162 | 163 | 164 | } 165 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/FacebookFeedItemProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/4/13 4 | * Time: 6:01 PM 5 | */ 6 | 7 | 8 | import com.restfb.Connection; 9 | import com.restfb.DefaultFacebookClient; 10 | import com.restfb.FacebookClient; 11 | import com.restfb.Parameter; 12 | import com.restfb.types.Post; 13 | 14 | import java.text.ParseException; 15 | import java.text.SimpleDateFormat; 16 | import java.util.Date; 17 | import java.util.Iterator; 18 | import java.util.List; 19 | import java.util.concurrent.ConcurrentLinkedQueue; 20 | import org.apache.commons.collections.buffer.CircularFifoBuffer; 21 | 22 | 23 | public class FacebookFeedItemProvider implements IFeedItemProvider { 24 | public static final SimpleDateFormat GMT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); 25 | 26 | private static final int TIME_OVERLAP = 1000 * 60; // one minute 27 | private static final int NUM_REMEMBERED_PREVIOUSLY_SEEN_ITEM_IDS = 1000; 28 | 29 | private final String queryString; 30 | private final FacebookClient facebookClient; 31 | private final ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue(); 32 | 33 | private final CircularFifoBuffer prevSeenItemIds = new CircularFifoBuffer(NUM_REMEMBERED_PREVIOUSLY_SEEN_ITEM_IDS); 34 | 35 | 36 | private volatile Date lastQueryTime = new Date(); 37 | //private volatile Date lastQueryTime = parseDate("2013-11-08T19:33:20-0800"); 38 | 39 | 40 | public FacebookFeedItemProvider(String authToken, String queryString) { 41 | facebookClient = new DefaultFacebookClient(authToken); 42 | this.queryString = queryString; 43 | } 44 | 45 | public static void main(String[] args) { 46 | Date startDate = parseDate("2013-11-08T19:33:20-0800"); 47 | 48 | FacebookFeedItemProvider provider = new FacebookFeedItemProvider(args[0], "Rizal"); 49 | Thread thread = new Thread(provider.getRunnableTask(), "facebookFeedItemProviderThread"); 50 | thread.start(); 51 | 52 | //System.out.println("Getting from queue"); 53 | 54 | while (true) { 55 | try { 56 | Thread.sleep(5000); 57 | } catch (InterruptedException e) { 58 | e.printStackTrace(); 59 | } 60 | String item = provider.itemQueue.poll(); 61 | if (item != null) { 62 | System.out.println("+++++++++++++ >>>: " + item); 63 | } else { 64 | //System.out.println("+++++++++++++ no queue item"); 65 | } 66 | } 67 | } 68 | 69 | private static Date parseDate(String dateString) { 70 | Date startDate = null; 71 | try { 72 | startDate = GMT_DATE_FORMAT.parse(dateString); 73 | System.out.println("result of parse is " + getFormattedDate(startDate)); 74 | } catch (ParseException e) { 75 | e.printStackTrace(); 76 | } 77 | return startDate; 78 | } 79 | 80 | 81 | @Override 82 | public Runnable getRunnableTask() { 83 | return new Runnable() { 84 | @Override 85 | public void run() { 86 | while (true) { 87 | // We set updatedLastQueryTime to some time before the time we start our search so we don't 88 | // miss any items posted while the search is being done. This means we can 89 | // double process some items. To avoid this we maintain a bounded queue of previously seen 90 | // item ids. If the number previously seen is more than the buffer bound we might double process, 91 | // but for our demo we won't worry about this. 92 | // 93 | //System.out.println("starting query from: " + getFormattedDate(lastQueryTime)); 94 | Date updatedLastQueryTime = new Date( new Date().getTime() - TIME_OVERLAP ); 95 | //Date updatedLastQueryTime = new Date(); 96 | Connection postStream = getPostStream(); 97 | List postList = postStream.getData(); 98 | if (postList.size() > 0) { 99 | for (Post p : postList) { 100 | //System.out.println("Post at : " + getFormattedDate(p.getCreatedTime()) + "\n" + p.getMessage() + " id = " + p.getId()); 101 | enqueueItemIfNotPreviouslySeen(p); 102 | } 103 | } 104 | lastQueryTime = updatedLastQueryTime; 105 | 106 | try { 107 | Thread.sleep(5000); 108 | } catch (InterruptedException e) { 109 | e.printStackTrace(); 110 | } 111 | } 112 | } 113 | 114 | private void enqueueItemIfNotPreviouslySeen(Post p) { 115 | String thisPostId = p.getId(); 116 | boolean sawBefore = false; 117 | 118 | Iterator iter = prevSeenItemIds.iterator(); 119 | while (iter.hasNext()) { 120 | String seenId = (String) iter.next(); 121 | if (thisPostId.equals(seenId)) { 122 | sawBefore = true; 123 | break; 124 | } 125 | } 126 | 127 | if (! sawBefore) { 128 | prevSeenItemIds.add(thisPostId); 129 | itemQueue.offer(p.getMessage()); 130 | } // on the other hand, if we saw it before then we do thing.. .just ignore 131 | } 132 | 133 | }; 134 | } 135 | 136 | 137 | private Connection getPostStream() { 138 | return facebookClient.fetchConnection( 139 | "search", 140 | Post.class, 141 | Parameter.with("q", queryString), 142 | Parameter.with("since", lastQueryTime), 143 | Parameter.with("type", "post")); 144 | } 145 | 146 | 147 | @Override 148 | public Object getNextItemIfAvailable() { 149 | return itemQueue.poll(); 150 | } 151 | 152 | private static String getFormattedDate(Date date) { 153 | String str; 154 | SimpleDateFormat sdf = GMT_DATE_FORMAT; 155 | return sdf.format(date); 156 | } 157 | 158 | } -------------------------------------------------------------------------------- /esper+storm+kafka/src/main/java/ServerAndThreadCoordinationUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/28/13 4 | * Time: 2:20 PM 5 | */ 6 | 7 | 8 | import java.io.*; 9 | import java.net.Socket; 10 | import java.util.Date; 11 | import java.util.Random; 12 | import java.util.Timer; 13 | import java.util.TimerTask; 14 | import java.util.concurrent.CountDownLatch; 15 | 16 | public class ServerAndThreadCoordinationUtils { 17 | 18 | public static void main(String[] args) { 19 | System.out.println("START"); 20 | 21 | Timer timer = ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(1000 *20); 22 | Thread thread = new Thread( 23 | new Runnable() { 24 | @Override 25 | public void run() { 26 | while (true) { 27 | System.out.println("JUNK-" + new Random().nextInt()); 28 | } 29 | } 30 | }, 31 | "threadBoy" 32 | ); 33 | thread.start(); 34 | } 35 | 36 | public static final String SENTINEL_FILE_PATH = "/tmp/go"; 37 | 38 | /** 39 | * Sets up a process termination task that will trigger if the given number of milliseconds 40 | * elapses and the test has not finished yet. We exit the JVM rather than just throwing an 41 | * exception because exceptions might be swallowed in the reams of output that could be produced 42 | * by Kafka and Storm servers that are running on threads that would not be stopped if we limited 43 | * ourselves to just throwing an exception. 44 | */ 45 | public static Timer setMaxTimeToRunTimer(int millisecs) { 46 | Date timeLimit = 47 | new Date(new Date().getTime() + millisecs); 48 | Timer timer = new Timer(); 49 | 50 | timer.schedule(new TimerTask() { 51 | 52 | @Override 53 | public void run() { 54 | for (int i = 0; i < 1000; i++) { 55 | System.out.println("aborting test ! Took too long"); 56 | } 57 | System.out.flush(); 58 | System.exit(-1); 59 | } 60 | }, timeLimit); 61 | 62 | return timer; 63 | } 64 | 65 | 66 | /** 67 | * Run in a tight sleep/wake loop until sentinel file (by default '/tmp/go') comes into 68 | * existence. We use this method in cases where we want to pause the flow of a test 69 | * but still be able to look around within zookeeper. If we were to merely pause in the 70 | * debugger then when we tried to connect to zookeeper to look around we would find the 71 | * server to be unresponsive (since the debugger pauses the whole process.) But if we use 72 | * the method below the zookeeper thread will still get some CPU cycles so we can connect to 73 | * it and examine its structure. 74 | */ 75 | public static void pauseUntil(String path) { 76 | if (path == null) { 77 | path = SENTINEL_FILE_PATH; 78 | } 79 | boolean fileExists = false; 80 | while (!fileExists) { 81 | File pauseFile = new File(path); 82 | if (!pauseFile.exists()) { 83 | try { 84 | Thread.sleep(500); 85 | } catch (InterruptedException e) { 86 | e.printStackTrace(); 87 | } 88 | } else { 89 | fileExists = true; 90 | } 91 | } 92 | } 93 | 94 | public static void removePauseSentinelFile() { 95 | File sentinel = new File(SENTINEL_FILE_PATH); 96 | //noinspection ResultOfMethodCallIgnored 97 | sentinel.delete(); 98 | if (sentinel.exists()) { 99 | throw new RuntimeException("Could not delete sentinel file"); 100 | } 101 | 102 | } 103 | 104 | 105 | public static String send4LetterWord(String host, int port, String cmd) 106 | throws IOException { 107 | System.out.println("connecting to " + host + " " + port); 108 | Socket sock = new Socket(host, port); 109 | BufferedReader reader = null; 110 | try { 111 | OutputStream outstream = sock.getOutputStream(); 112 | outstream.write(cmd.getBytes()); 113 | outstream.flush(); 114 | // this replicates NC - close the output stream before reading 115 | sock.shutdownOutput(); 116 | 117 | reader = 118 | new BufferedReader( 119 | new InputStreamReader(sock.getInputStream())); 120 | StringBuilder sb = new StringBuilder(); 121 | String line; 122 | while ((line = reader.readLine()) != null) { 123 | sb.append(line + "\n"); 124 | } 125 | return sb.toString(); 126 | } finally { 127 | sock.close(); 128 | if (reader != null) { 129 | reader.close(); 130 | } 131 | } 132 | } 133 | 134 | public static boolean waitForServerUp(String host, int port, long timeout) { 135 | long start = System.currentTimeMillis(); 136 | while (true) { 137 | try { 138 | // if there are multiple hostports, just take the first one 139 | String result = send4LetterWord(host, port, "stat"); 140 | System.out.println("result of send: " + result); 141 | if (result.startsWith("Zookeeper version:")) { 142 | return true; 143 | } 144 | } catch (IOException e) { 145 | // ignore as this is expected 146 | System.out.println("server " + host + ":" + port + " not up " + e); 147 | } 148 | 149 | if (System.currentTimeMillis() > start + timeout) { 150 | break; 151 | } 152 | try { 153 | Thread.sleep(250); 154 | } catch (InterruptedException e) { 155 | // ignore 156 | } 157 | } 158 | return false; 159 | } 160 | 161 | public static void await(CountDownLatch latch) { 162 | try { 163 | latch.await(); 164 | } catch (InterruptedException e) { 165 | e.printStackTrace(); 166 | System.out.println("FATAL ERROR"); 167 | System.exit(-1); 168 | } 169 | } 170 | 171 | 172 | public static void countDown(CountDownLatch latch) { 173 | try { 174 | latch.countDown(); 175 | } catch (Exception e) { 176 | e.printStackTrace(); 177 | System.out.println("FATAL ERROR"); 178 | System.exit(-1); 179 | } 180 | } 181 | 182 | } 183 | -------------------------------------------------------------------------------- /kafka/src/main/java/TestKafkaProducer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/20/13 4 | * Time: 8:54 PM 5 | */ 6 | 7 | 8 | import com.google.common.collect.ImmutableMap; 9 | import com.google.common.io.Files; 10 | import com.netflix.curator.test.TestingServer; 11 | import kafka.consumer.ConsumerConfig; 12 | import kafka.consumer.ConsumerIterator; 13 | import kafka.consumer.KafkaStream; 14 | import kafka.javaapi.consumer.ConsumerConnector; 15 | import kafka.javaapi.producer.Producer; 16 | import kafka.javaapi.producer.ProducerData; 17 | import kafka.producer.ProducerConfig; 18 | import kafka.serializer.StringDecoder; 19 | import kafka.server.KafkaConfig; 20 | import kafka.server.KafkaServer; 21 | import org.apache.commons.lang.StringUtils; 22 | 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.util.*; 26 | 27 | 28 | class TestKafkaProducer { 29 | private String topic = ""; 30 | private String zkConnectString = ""; 31 | private List messages = null; 32 | private List messagesReceived = new ArrayList(); 33 | private Producer producer; 34 | private KafkaServer kafkaServer; 35 | private Thread kafkaMessageReceiverThread; 36 | 37 | private static final String RANDOM_GROUP_ID = "RANDOM-GROUP-ID"; 38 | 39 | public static void main(String[] args) { 40 | TestKafkaProducer tkp = null; 41 | 42 | boolean success = false; 43 | 44 | try (TestingServer zookeeperTestServer = new TestingServer()) { 45 | 46 | final String theTopic = "someTopic-" + new Random().nextInt(); 47 | 48 | tkp = new TestKafkaProducer( 49 | theTopic, 50 | "localhost:" + zookeeperTestServer.getPort(), 51 | 10); 52 | 53 | tkp.sendMessages(); 54 | 55 | tkp.consumeMessages(); 56 | tkp.shutdownConsumers(); 57 | tkp.kafkaMessageReceiverThread.join(); 58 | tkp.shutdown(); 59 | 60 | String got = StringUtils.join(tkp.messagesReceived, "+"); 61 | String expected = StringUtils.join(tkp.messages, "+"); 62 | if (got.equals(expected)) { 63 | success = true; 64 | } 65 | } catch (Exception e) { 66 | e.printStackTrace(); 67 | } 68 | if (! success) { 69 | throw new RuntimeException("oh rats... we failed"); 70 | } 71 | System.out.println("SUCCESS -- WE ARE HAPPY !..."); 72 | } 73 | 74 | private void consumeMessages() { 75 | final ConsumerConnector consumer = 76 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 77 | final Map topicCountMap = ImmutableMap.of(topic, 1); 78 | final Map>> consumerMap; 79 | consumerMap = consumer.createMessageStreams(topicCountMap, new StringDecoder()); 80 | 81 | final KafkaStream stream = consumerMap.get(topic).get(0); 82 | final ConsumerIterator iterator = stream.iterator(); 83 | 84 | kafkaMessageReceiverThread = new Thread( 85 | new Runnable() { 86 | @Override 87 | public void run() { 88 | while (iterator.hasNext()) { 89 | String msg = iterator.next().message(); 90 | msg = msg == null ? "" : msg; 91 | System.out.println("got message" + msg); 92 | if (msg.equals("SHUTDOWN")) { 93 | consumer.shutdown(); 94 | return; 95 | } 96 | messagesReceived.add(msg); 97 | } 98 | } 99 | }, 100 | "kafkaMessageReceiverThread" 101 | ); 102 | kafkaMessageReceiverThread.start(); 103 | 104 | } 105 | 106 | 107 | private ConsumerConfig createConsumerConfig() { 108 | Properties props = new Properties(); 109 | props.put("zk.connect", this.zkConnectString); 110 | props.put("groupid", RANDOM_GROUP_ID); 111 | props.put("zk.sessiontimeout.ms", "400"); 112 | props.put("zk.synctime.ms", "200"); 113 | props.put("autocommit.interval.ms", "1000"); 114 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 115 | 116 | return new ConsumerConfig(props); 117 | 118 | } 119 | 120 | public void shutdownConsumers() { 121 | sendMessage("SHUTDOWN"); 122 | } 123 | 124 | 125 | public void shutdown() { 126 | producer.close(); 127 | kafkaServer.shutdown(); 128 | } 129 | 130 | 131 | TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException { 132 | final Random generator = new Random(); 133 | 134 | if (numRandomMessages <= 0) { 135 | throw new RuntimeException("no messages defined for test"); 136 | } 137 | 138 | messages = new ArrayList(); 139 | for (int i = 0; i < numRandomMessages; i++) { 140 | int num1 = Math.abs(generator.nextInt()); 141 | int num2 = Math.abs(generator.nextInt()); 142 | String messageToSend = num1 + ":-(a)-" + num2; 143 | messages.add(messageToSend); 144 | } 145 | 146 | 147 | this.topic = topic; 148 | 149 | this.zkConnectString = zkConnectString; 150 | initProducer(zkConnectString); 151 | } 152 | 153 | 154 | public void sendMessages() throws IOException { 155 | for (String msg : messages) { 156 | sendMessage(msg); 157 | } 158 | } 159 | 160 | private void sendMessage(String msg) { 161 | ProducerData data = new ProducerData(topic, msg); 162 | producer.send(data); 163 | } 164 | 165 | private void initProducer(String zkConnectString) throws IOException { 166 | kafkaServer = startKafkaServer(); 167 | 168 | 169 | Properties props = new Properties(); 170 | props.put("zk.connect", zkConnectString); 171 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 172 | props.put("producer.type", "async"); 173 | props.put("batch.size", "1"); 174 | ProducerConfig config = new ProducerConfig(props); 175 | 176 | producer = new Producer(config); 177 | } 178 | 179 | private KafkaServer startKafkaServer() { 180 | File tmpDir = Files.createTempDir(); 181 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1); 182 | KafkaConfig kafkaConfig = new KafkaConfig(props); 183 | 184 | kafkaServer = new KafkaServer(kafkaConfig); 185 | kafkaServer.startup(); 186 | return kafkaServer; 187 | } 188 | 189 | 190 | private Properties createProperties(String logDir, int port, int brokerId) { 191 | Properties properties = new Properties(); 192 | properties.put("port", port + ""); 193 | properties.put("brokerid", brokerId + ""); 194 | properties.put("log.dir", logDir); 195 | properties.put("zk.connect", this.zkConnectString); 196 | return properties; 197 | } 198 | 199 | } 200 | -------------------------------------------------------------------------------- /kafka-0.8.x/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.buildlackey 6 | kafka-producer-to-consumer-example 7 | kafka-producer-to-consumer-example 8 | 1.0 9 | jar 10 | Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers (Kafka 0.8.x) 11 | 12 | 13 | leadLackey 14 | Chris Bedford 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | org.scala-lang 24 | scala-library 25 | 2.9.3 26 | 27 | 28 | 29 | 30 | 31 | org.apache.kafka 32 | kafka_2.9.2 33 | 0.8.0-beta1 34 | 35 | 36 | com.sun.jmx 37 | jmxri 38 | 39 | 40 | 41 | com.sun.jdmk 42 | jmxtools 43 | 44 | 45 | 46 | javax.jms 47 | jms 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 60 | 61 | com.yammer.metrics 62 | metrics-core 63 | 2.2.0 64 | 65 | 66 | com.101tec 67 | zkclient 68 | 0.3 69 | 70 | 71 | 72 | log4j 73 | log4j 74 | 75 | 76 | 77 | 78 | 79 | 80 | net.sf.jopt-simple 81 | jopt-simple 82 | 4.5 83 | 84 | 85 | 86 | 87 | 88 | com.netflix.curator 89 | curator-test 90 | 1.2.5 91 | 92 | 93 | 94 | org.slf4j 95 | slf4j-log4j12 96 | 97 | 98 | log4j 99 | log4j 100 | 101 | 102 | 103 | 104 | 105 | 106 | org.apache.zookeeper 107 | zookeeper 108 | 3.4.1 109 | 110 | 111 | com.sun.jmx 112 | jmxri 113 | 114 | 115 | 116 | com.sun.jdmk 117 | jmxtools 118 | 119 | 120 | 121 | javax.jms 122 | jms 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | org.tomdz.storm 132 | storm-esper 133 | 0.8.1-SNAPSHOT 134 | 135 | 136 | 137 | org.testng 138 | testng 139 | 6.1.1 140 | test 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 2.2.1 154 | 155 | 156 | 157 | 158 | 159 | org.apache.maven.plugins 160 | maven-enforcer-plugin 161 | 1.1.1 162 | 163 | 164 | enforce-versions 165 | 166 | enforce 167 | 168 | 169 | 170 | 171 | 2.2.1 172 | 173 | 174 | 1.7 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | org.apache.maven.plugins 184 | maven-compiler-plugin 185 | 3.1 186 | 187 | 1.7 188 | 1.7 189 | 190 | 191 | 192 | 193 | org.apache.maven.plugins 194 | maven-jar-plugin 195 | 2.4 196 | 197 | 198 | org.apache.maven.plugins 199 | maven-source-plugin 200 | 2.2 201 | 202 | true 203 | 204 | 205 | 206 | create-source-jar 207 | 208 | jar-no-fork 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | clojars 218 | http://clojars.org/repo/ 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /kafka-0.8.x/src/main/java/TestKafkaProducer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 10/20/13 4 | * Time: 8:54 PM 5 | */ 6 | 7 | 8 | import com.google.common.collect.ImmutableMap; 9 | import com.google.common.io.Files; 10 | import com.netflix.curator.test.TestingServer; 11 | import kafka.consumer.ConsumerConfig; 12 | import kafka.consumer.ConsumerIterator; 13 | import kafka.consumer.KafkaStream; 14 | import kafka.javaapi.consumer.ConsumerConnector; 15 | import kafka.javaapi.producer.Producer; 16 | import kafka.producer.KeyedMessage; 17 | import kafka.producer.ProducerConfig; 18 | import kafka.serializer.StringDecoder; 19 | import kafka.server.KafkaConfig; 20 | import kafka.server.KafkaServer; 21 | import kafka.utils.MockTime; 22 | import kafka.utils.VerifiableProperties; 23 | import org.apache.commons.lang.StringUtils; 24 | 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.util.*; 28 | 29 | 30 | class TestKafkaProducer { 31 | private String topic = ""; 32 | private String zkConnectString = ""; 33 | private List messages = null; 34 | private List messagesReceived = new ArrayList(); 35 | private Producer producer; 36 | private KafkaServer kafkaServer; 37 | private Thread kafkaMessageReceiverThread; 38 | 39 | private static final String RANDOM_GROUP_ID = "RANDOM-GROUP-ID"; 40 | 41 | public static void main(String[] args) { 42 | TestKafkaProducer tkp = null; 43 | 44 | boolean success = false; 45 | 46 | try (TestingServer zookeeperTestServer = new TestingServer()) { 47 | 48 | final String theTopic = "someTopic-" + new Random().nextInt(); 49 | 50 | tkp = new TestKafkaProducer( 51 | theTopic, 52 | "localhost:" + zookeeperTestServer.getPort(), 53 | 4400); 54 | 55 | tkp.sendMessages(); 56 | tkp.consumeMessages(); 57 | 58 | try { // Give consumer some time... 59 | tkp.shutdownConsumers(); 60 | Thread.sleep(1000); 61 | tkp.kafkaMessageReceiverThread.join(); 62 | tkp.shutdown(); 63 | } catch (Exception e) { 64 | System.out.println("Error in shut down. we will ignore it as long as our messages came through"); 65 | e.printStackTrace(); 66 | } 67 | 68 | String got = StringUtils.join(tkp.messagesReceived, "+"); 69 | String expected = StringUtils.join(tkp.messages, "+"); 70 | if (got.equals(expected)) { 71 | success = true; 72 | } 73 | } catch (Exception e) { 74 | e.printStackTrace(); 75 | } 76 | if (! success) { 77 | throw new RuntimeException("oh rats... we failed"); 78 | } 79 | System.out.println("SUCCESS -- WE ARE HAPPY !..."); 80 | } 81 | 82 | private void consumeMessages() { 83 | final ConsumerConnector consumer = 84 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 85 | final Map topicCountMap = 86 | ImmutableMap.of(topic, 1); 87 | final StringDecoder decoder = 88 | new StringDecoder(new VerifiableProperties()); 89 | final Map>> consumerMap = 90 | consumer.createMessageStreams(topicCountMap, decoder, decoder); 91 | final KafkaStream stream = 92 | consumerMap.get(topic).get(0); 93 | final ConsumerIterator iterator = stream.iterator(); 94 | 95 | kafkaMessageReceiverThread = new Thread( 96 | new Runnable() { 97 | @Override 98 | public void run() { 99 | while (iterator.hasNext()) { 100 | String msg = iterator.next().message(); 101 | msg = msg == null ? "" : msg; 102 | System.out.println("got message" + msg); 103 | if (msg.equals("SHUTDOWN")) { 104 | consumer.shutdown(); 105 | return; 106 | } 107 | messagesReceived.add(msg); 108 | } 109 | } 110 | }, 111 | "kafkaMessageReceiverThread" 112 | ); 113 | kafkaMessageReceiverThread.start(); 114 | 115 | } 116 | 117 | 118 | private ConsumerConfig createConsumerConfig() { 119 | Properties props = new Properties(); 120 | props.put("zookeeper.connect", this.zkConnectString); 121 | props.put("group.id", RANDOM_GROUP_ID); 122 | props.put("zk.sessiontimeout.ms", "400"); 123 | props.put("zk.synctime.ms", "200"); 124 | props.put("autocommit.interval.ms", "1000"); 125 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 126 | 127 | return new ConsumerConfig(props); 128 | 129 | } 130 | 131 | public void shutdownConsumers() { 132 | sendMessage("SHUTDOWN"); 133 | } 134 | 135 | 136 | public void shutdown() { 137 | producer.close(); 138 | try { // Give producer some time... 139 | Thread.sleep(1000); 140 | } catch (InterruptedException e) { 141 | e.printStackTrace(); 142 | } 143 | 144 | kafkaServer.shutdown(); 145 | kafkaServer.awaitShutdown(); 146 | } 147 | 148 | 149 | TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException { 150 | final Random generator = new Random(); 151 | 152 | if (numRandomMessages <= 0) { 153 | throw new RuntimeException("no messages defined for test"); 154 | } 155 | 156 | messages = new ArrayList(); 157 | for (int i = 0; i < numRandomMessages; i++) { 158 | int num1 = Math.abs(generator.nextInt()); 159 | int num2 = Math.abs(generator.nextInt()); 160 | String messageToSend = num1 + ":-(a)-" + num2; 161 | messages.add(messageToSend); 162 | } 163 | 164 | 165 | this.topic = topic; 166 | 167 | this.zkConnectString = zkConnectString; 168 | initProducer(zkConnectString); 169 | } 170 | 171 | 172 | public void sendMessages() throws IOException { 173 | for (String msg : messages) { 174 | sendMessage(msg); 175 | } 176 | } 177 | 178 | private void sendMessage(String msg) { 179 | KeyedMessage data = new KeyedMessage(topic, msg); 180 | producer.send(data); 181 | } 182 | 183 | private void initProducer(String zkConnectString) throws IOException { 184 | kafkaServer = startKafkaServer(); 185 | Properties props = new Properties(); 186 | props.put("metadata.broker.list", "localhost:9092"); 187 | props.put("serializer.class", "kafka.serializer.StringEncoder"); 188 | props.put("producer.type", "async"); 189 | props.put("batch.size", "1"); 190 | ProducerConfig config = new ProducerConfig(props); 191 | 192 | producer = new Producer(config); 193 | } 194 | 195 | private KafkaServer startKafkaServer() { 196 | File tmpDir = Files.createTempDir(); 197 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1); 198 | KafkaConfig kafkaConfig = new KafkaConfig(props); 199 | 200 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime()); 201 | 202 | kafkaServer.startup(); 203 | return kafkaServer; 204 | } 205 | 206 | 207 | 208 | private Properties createProperties(String logDir, int port, int brokerId) { 209 | Properties properties = new Properties(); 210 | properties.put("port", port + ""); 211 | properties.put("broker.id", brokerId + ""); 212 | properties.put("log.dir", logDir); 213 | properties.put("zookeeper.connect", this.zkConnectString); 214 | return properties; 215 | } 216 | 217 | 218 | } 219 | -------------------------------------------------------------------------------- /esper+storm+kafka/src/test/java/AbstractStormWithKafkaTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: cbedford 3 | * Date: 11/1/13 4 | * Time: 5:00 PM 5 | */ 6 | 7 | 8 | import backtype.storm.Config; 9 | import backtype.storm.LocalCluster; 10 | import backtype.storm.generated.StormTopology; 11 | import com.google.common.io.Files; 12 | import kafka.admin.CreateTopicCommand; 13 | import kafka.server.KafkaConfig; 14 | import kafka.server.KafkaServer; 15 | import kafka.utils.MockTime; 16 | import org.testng.annotations.AfterClass; 17 | import org.testng.annotations.BeforeClass; 18 | 19 | import java.io.File; 20 | import java.util.Properties; 21 | import java.util.Timer; 22 | import java.util.concurrent.CountDownLatch; 23 | 24 | 25 | /** 26 | * Simplifies testing of Storm components that consume or produce data items from or to Kafka. 27 | * Operates via a 'template method' series of steps, wherein the BeforeClass method sets up a 28 | * Storm Local cluster, then waits for the zookeeper instance started by that cluster to 'boot up', 29 | * then starts an-process Kafka server using that zookeeper, and then creates a topic whose 30 | * name is derived from the name of the base class test. 31 | *

32 | * Subclasses only need to implement the abstract createTopology() method (and perhaps 33 | * override 'verifyResults())' which is currently kind of hard coded to our first two subclasses of 34 | * this base class. 35 | */ 36 | public abstract class AbstractStormWithKafkaTest { 37 | public static String[] sentences = new String[]{ 38 | "one dog9 - saw the fox over the moon", 39 | "two cats9 - saw the fox over the moon", 40 | "four bears9 - saw the fox over the moon", 41 | "five goats9 - saw the fox over the moon", 42 | "SHUTDOWN", 43 | }; 44 | protected final String BROKER_CONNECT_STRING = "localhost:9092"; // kakfa broker server/port info 45 | private final String topicName = this.getClass().getSimpleName() + "_topic_" + getRandomInteger(1000); 46 | protected final String topologyName = this.getClass().getSimpleName() + "-topology" + getRandomInteger(1000); 47 | 48 | protected LocalCluster cluster = null; 49 | 50 | private final File kafkaWorkingDir = Files.createTempDir(); 51 | private final CountDownLatch kafkaTopicCreatedLatch = new CountDownLatch(1); 52 | private KafkaServer kafkaServer = null; 53 | private Timer timer; 54 | private Thread kafkaServerThread = null; 55 | 56 | @BeforeClass(alwaysRun = true) 57 | protected void setUp() { 58 | timer = ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(getMaxAllowedToRunMillisecs()); 59 | ServerAndThreadCoordinationUtils.removePauseSentinelFile(); 60 | cluster = new LocalCluster(); 61 | ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * KafkaOutputBoltTest.SECOND); // Wait for zookeeper to come up 62 | 63 | /* 64 | * Below we start up kafka and create topic in a separate thread. If we don't do this then we 65 | * get very bizarre behavior, such as tuples never being emitted from our spouts and bolts 66 | * as expected. Haven't figure out why this is needed... But doing it 'cause that's what makes 67 | * things work. 68 | */ 69 | kafkaServerThread = new Thread( 70 | new Runnable() { 71 | @Override 72 | public void run() { 73 | startKafkaServer(); 74 | createTopic(getTopicName()); 75 | if (getSecondTopicName() != null) { 76 | createTopic(getSecondTopicName()); 77 | } 78 | ServerAndThreadCoordinationUtils.countDown(kafkaTopicCreatedLatch); 79 | } 80 | }, 81 | "kafkaServerThread" 82 | ); 83 | kafkaServerThread.start(); 84 | ServerAndThreadCoordinationUtils.await(kafkaTopicCreatedLatch); 85 | } 86 | 87 | 88 | public String getSecondTopicName() { 89 | return null; 90 | } 91 | 92 | 93 | abstract protected int getMaxAllowedToRunMillisecs(); 94 | 95 | @AfterClass(alwaysRun = true) 96 | protected void tearDown() { 97 | try { 98 | kafkaServerThread.join(); 99 | } catch (InterruptedException e) { 100 | e.printStackTrace(); 101 | } 102 | 103 | cluster.shutdown(); 104 | kafkaServer.shutdown(); 105 | timer.cancel(); 106 | } 107 | 108 | protected void createTopic(String topicName) { 109 | String[] arguments = new String[6]; 110 | arguments[0] = "--zookeeper"; 111 | arguments[1] = "localhost:2000"; 112 | arguments[2] = "--partition"; 113 | arguments[3] = "1"; 114 | arguments[4] = "--topic"; 115 | arguments[5] = topicName; 116 | 117 | CreateTopicCommand.main(arguments); 118 | } 119 | 120 | protected void startKafkaServer() { 121 | Properties props = createProperties(kafkaWorkingDir.getAbsolutePath(), 9092, 1); 122 | KafkaConfig kafkaConfig = new KafkaConfig(props); 123 | 124 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime()); 125 | kafkaServer.startup(); 126 | } 127 | 128 | protected String getZkConnect() { // Uses zookeeper created by LocalCluster 129 | 130 | return "localhost:2000"; 131 | } 132 | 133 | protected int getRandomInteger(int max) { 134 | return (int) Math.floor((Math.random() * max)); 135 | } 136 | 137 | private Properties createProperties(String logDir, int port, int brokerId) { 138 | Properties properties = new Properties(); 139 | properties.put("port", port + ""); 140 | properties.put("broker.id", brokerId + ""); 141 | properties.put("log.dir", logDir); 142 | properties.put("zookeeper.connect", getZkConnect()); 143 | return properties; 144 | } 145 | 146 | 147 | protected abstract StormTopology createTopology(); 148 | 149 | 150 | /** 151 | * @return a Config object with time outs set very high so that the storm to zookeeper 152 | * session will be kept alive, even as we are rooting around in a debugger. 153 | */ 154 | public static Config getDebugConfigForStormTopology() { 155 | Config config = new Config(); 156 | config.setDebug(true); 157 | config.put(Config.STORM_ZOOKEEPER_CONNECTION_TIMEOUT, 900 * 1000); 158 | config.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 900 * 1000); 159 | return config; 160 | } 161 | 162 | public void verifyResults(String topic, int expectedCount) { 163 | if (topic == null) { 164 | topic = this.getTopicName(); 165 | } 166 | if (expectedCount == -1) { 167 | expectedCount = sentences.length; 168 | } 169 | 170 | int foundCount = 0; 171 | KafkaMessageConsumer msgConsumer = null; 172 | try { 173 | msgConsumer = new KafkaMessageConsumer(getZkConnect(), topic); 174 | msgConsumer.consumeMessages(); 175 | 176 | foundCount = 0; 177 | for (String msg : msgConsumer.getMessagesReceived()) { 178 | System.out.println("message: " + msg); 179 | if (msg.contains("cat") || 180 | msg.contains("dog") || 181 | msg.contains("bear") || 182 | msg.contains("goat") || 183 | msg.contains("SHUTDOWN")) { 184 | foundCount++; 185 | } 186 | } 187 | } catch (Exception e) { 188 | e.printStackTrace(); 189 | } 190 | 191 | if (foundCount != expectedCount) { 192 | if (msgConsumer != null) { 193 | System.out.println("Did not receive expected messages. Got: " + 194 | msgConsumer.getMessagesReceived()); 195 | } 196 | 197 | throw new RuntimeException(">>>>>>>>>>>>>>>>>>>> Did not receive expected messages"); 198 | } 199 | } 200 | 201 | protected void submitTopology() { 202 | 203 | final Config conf = getDebugConfigForStormTopology(); 204 | 205 | cluster.submitTopology(topologyName, conf, createTopology()); 206 | } 207 | 208 | public String getTopicName() { 209 | return topicName; 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /storm+kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.buildlackey 6 | kafka-spout-example 7 | kafka-spout-example 8 | 1.0 9 | jar 10 | Simple Kafka Spout Example 11 | 12 | 13 | leadLackey 14 | Chris Bedford 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | com.jayway.awaitility 24 | awaitility 25 | 1.3.5 26 | 27 | 28 | 29 | 30 | storm 31 | storm 32 | 0.9.0-rc2 33 | 34 | 35 | storm 36 | storm-core 37 | 0.9.0-rc2 38 | 39 | 40 | 41 | 42 | 43 | 67 | 68 | 69 | org.scala-lang 70 | scala-library 71 | 2.9.3 72 | 73 | 74 | 75 | 76 | 77 | org.apache.kafka 78 | kafka_2.9.2 79 | 0.8.0-beta1 80 | 81 | 82 | com.sun.jmx 83 | jmxri 84 | 85 | 86 | 87 | com.sun.jdmk 88 | jmxtools 89 | 90 | 91 | 92 | javax.jms 93 | jms 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 106 | 107 | com.yammer.metrics 108 | metrics-core 109 | 2.2.0 110 | 111 | 112 | com.101tec 113 | zkclient 114 | 0.3 115 | 116 | 117 | net.sf.jopt-simple 118 | jopt-simple 119 | 4.5 120 | 121 | 122 | 123 | 124 | 125 | com.netflix.curator 126 | curator-test 127 | 1.2.5 128 | 129 | 130 | 131 | org.slf4j 132 | slf4j-log4j12 133 | 134 | 135 | log4j 136 | log4j 137 | 138 | 139 | 140 | 141 | 142 | org.apache.zookeeper 143 | zookeeper 144 | 3.3.3 145 | 146 | 147 | com.sun.jmx 148 | jmxri 149 | 150 | 151 | 152 | com.sun.jdmk 153 | jmxtools 154 | 155 | 156 | 157 | javax.jms 158 | jms 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | org.testng 167 | testng 168 | 6.1.1 169 | test 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 2.2.1 183 | 184 | 185 | 186 | 187 | 188 | org.apache.maven.plugins 189 | maven-enforcer-plugin 190 | 1.1.1 191 | 192 | 193 | enforce-versions 194 | 195 | enforce 196 | 197 | 198 | 199 | 200 | 2.2.1 201 | 202 | 203 | 1.6 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | org.apache.maven.plugins 213 | maven-compiler-plugin 214 | 3.1 215 | 216 | 1.6 217 | 1.6 218 | 219 | 220 | 221 | 222 | org.apache.maven.plugins 223 | maven-jar-plugin 224 | 2.4 225 | 226 | 227 | org.apache.maven.plugins 228 | maven-source-plugin 229 | 2.2 230 | 231 | true 232 | 233 | 234 | 235 | create-source-jar 236 | 237 | jar-no-fork 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | clojars 247 | http://clojars.org/repo/ 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /kafka/README.md: -------------------------------------------------------------------------------- 1 | # Kafka Produce/Consumer Example 2 | 3 | A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers 4 | 5 | 6 | ## Description 7 | 8 | This example illustrates how to: 9 | 10 | * unit test message passing between Kafka producers and consumers 11 | using basic String serialization. 12 | 13 | * use of Netflix's curator API to instantiate an in-process zookeeper 14 | server, together with an in-memory instance of the 15 | kafka.server.KafkaServer class 16 | 17 | * ensure that all threads launched by Kafka and zookeeper are cleanly 18 | shutdown (this seem to be working pretty well so far.) 19 | 20 | 21 | 22 | By keeping all test fixtures in memory (rather than depending on out-of-process servers 23 | being 'somehow' set up before the test) we make it very easy to get the basics of 24 | Kafka working in the environments of other developers and/or build systems. 25 | 26 | 27 | The main problem with the initial cut of this test program is that I had to use some very strange 28 | dependencies in my maven pom.xml in order to be able to get everything working through a public 29 | repo. (See 'Current Issues', below.) 30 | 31 | 32 | 33 | 34 | ## Building and Running 35 | 36 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands: 37 | 38 | mvn clean install 39 | 40 | mvn exec:java -Dexec.mainClass=TestKafkaProducer 41 | 42 | If you see 'SUCCESS' printed out towards the very end, then you know everything is working. 43 | 44 | 45 | 46 | ## Implementation Details 47 | 48 | The test program pumps a small set of random messages from a producer to a consumer, and 49 | asserts that the messages received are identical with messages sent (lines 22-26, below.) 50 | 51 | 52 | The main method creates an instance of the Netflix's curator API TestingServer class 53 | with default parameters which cause it to select a random unused port, as well as a 54 | random temp directory for the zookeeper files (line 6). On line 12 we interogate 55 | zookeeperTestServer for its port to construct the zookeeper connect string 56 | ("zk.connect" property) used by both the Producer (created in initProducer) 57 | and the consumer (whose connect properties are created in createConsumerConfig, at line 70.) 58 | 59 | 60 | 61 | 62 | Listing 1, Main Routine 63 | 64 | 1 public static void main(String[] args) { 65 | 2 TestKafkaProducer tkp = null; 66 | 3 67 | 4 boolean success = false; 68 | 5 69 | 6 try (TestingServer zookeeperTestServer = new TestingServer()) { 70 | 7 71 | 8 final String theTopic = "someTopic-" + new Random().nextInt(); 72 | 9 73 | 10 tkp = new TestKafkaProducer( 74 | 11 theTopic, 75 | 12 "localhost:" + zookeeperTestServer.getPort(), 76 | 13 10); 77 | 14 78 | 15 tkp.sendMessages(); 79 | 16 80 | 17 tkp.consumeMessages(); 81 | 18 tkp.shutdownConsumers(); 82 | 19 tkp.kafkaMessageReceiverThread.join(); 83 | 20 tkp.shutdown(); 84 | 21 85 | 22 String got = StringUtils.join(tkp.messagesReceived, "+"); 86 | 23 String expected = StringUtils.join(tkp.messages, "+"); 87 | 24 if (got.equals(expected)) { 88 | 25 success = true; 89 | 26 } 90 | 27 } catch (Exception e) { 91 | 28 e.printStackTrace(); 92 | 29 } 93 | 30 if (! success) { 94 | 31 throw new RuntimeException("oh rats... we failed"); 95 | 32 } 96 | 33 System.out.println("SUCCESS - WE ARE HAPPY !..."); 97 | 34 } 98 | 99 | .... 100 | 101 | 38 private void consumeMessages() { 102 | 39 final ConsumerConnector consumer = 103 | 40 kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 104 | 41 final Map topicCountMap = ImmutableMap.of(topic, 1); 105 | 42 final Map>> consumerMap; 106 | 43 consumerMap = consumer.createMessageStreams(topicCountMap, new StringDecoder()); 107 | 44 108 | 45 final KafkaStream stream = consumerMap.get(topic).get(0); 109 | 46 final ConsumerIterator iterator = stream.iterator(); 110 | 47 111 | 48 kafkaMessageReceiverThread = new Thread( 112 | 49 new Runnable() { 113 | 50 @Override 114 | 51 public void run() { 115 | 52 while (iterator.hasNext()) { 116 | 53 String msg = iterator.next().message(); 117 | 54 msg = msg == null ? "" : msg; 118 | 55 System.out.println("got message" + msg); 119 | 56 if (msg.equals("SHUTDOWN")) { 120 | 57 consumer.shutdown(); 121 | 58 return; 122 | 59 } 123 | 60 messagesReceived.add(msg); 124 | 61 } 125 | 62 } 126 | 63 }, 127 | 64 "kafkaMessageReceiverThread" 128 | 65 ); 129 | 66 kafkaMessageReceiverThread.start(); 130 | 67 } 131 | 68 132 | 69 133 | 70 private ConsumerConfig createConsumerConfig() { 134 | 71 Properties props = new Properties(); 135 | 72 props.put("zk.connect", this.zkConnectString); 136 | 73 props.put("groupid", RANDOM_GROUP_ID); 137 | 74 props.put("zk.sessiontimeout.ms", "400"); 138 | 75 props.put("zk.synctime.ms", "200"); 139 | 76 props.put("autocommit.interval.ms", "1000"); 140 | 77 props.put("serializer.class", "kafka.serializer.StringEncoder"); 141 | 78 142 | 79 return new ConsumerConfig(props); 143 | 80 } 144 | 81 145 | 146 | 147 | The TestKafkaProducer constructor (line 83) sets up the producer in initProducer (line 117), 148 | and an array of random strings to send to the consumer (stored in the 'messages' 149 | member variable, at line 95) These messages are sent via sendMessages() at line 15 150 | (see Listing 1, above.) 151 | 152 | 153 | Listing 2, TestKafkaProducer Constructor 154 | 155 | 156 | 83 TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException { 157 | 84 final Random generator = new Random(); 158 | 85 159 | 86 if (numRandomMessages <= 0) { 160 | 87 throw new RuntimeException("no messages defined for test"); 161 | 88 } 162 | 89 163 | 90 messages = new ArrayList(); 164 | 91 for (int i = 0; i < numRandomMessages; i++) { 165 | 92 int num1 = Math.abs(generator.nextInt()); 166 | 93 int num2 = Math.abs(generator.nextInt()); 167 | 94 String messageToSend = num1 + ":-(a)-" + num2; 168 | 95 messages.add(messageToSend); 169 | 96 } 170 | 97 171 | 98 172 | 99 this.topic = topic; 173 | 100 174 | 101 this.zkConnectString = zkConnectString; 175 | 102 initProducer(zkConnectString); 176 | 103 } 177 | 104 178 | 105 179 | 106 public void sendMessages() throws IOException { 180 | 107 for (String msg : messages) { 181 | 108 sendMessage(msg); 182 | 109 } 183 | 110 } 184 | 111 185 | 112 private void sendMessage(String msg) { 186 | 113 ProducerData data = new ProducerData(topic, msg); 187 | 114 producer.send(data); 188 | 115 } 189 | 116 190 | 117 private void initProducer(String zkConnectString) throws IOException { 191 | 118 kafkaServer = startKafkaServer(); 192 | 119 193 | 120 194 | 121 Properties props = new Properties(); 195 | 122 props.put("zk.connect", zkConnectString); 196 | 123 props.put("serializer.class", "kafka.serializer.StringEncoder"); 197 | 124 props.put("producer.type", "async"); 198 | 125 props.put("batch.size", "1"); 199 | 126 ProducerConfig config = new ProducerConfig(props); 200 | 127 201 | 128 producer = new Producer(config); 202 | 129 } 203 | 204 | 205 | 206 | Note that the sequence of events we follow after sending the messages is to launch a thread that 207 | consumes the messages (in consumeMessages at line 48.) We then get the consumer to shutdown cleanly 208 | by sending it a 'poison pill' 209 | ( see: http://books.google.com/books?id=EK43StEVfJIC&pg=PT172&lpg=PT172&dq=shut+down+poison+pill+queue&source=bl&ots=un-zA8wMgs&sig=EWSRAdzaFYlCBGc4NoGh8-TunIw&hl=en&sa=X&ei=qelmUsCeF6muyQGW-4DgAg&ved=0CHQQ6AEwCA#v=onepage&q=shut%20down%20poison%20pill%20queue&f=false ) 210 | 211 | This ensures that the consumer gets a chance to processes all pending messages and then call 'consumer.shutdown()' to cleanly shut down. 212 | We make sure that the consumer has completed its shut down by joining its thread (line 19), and only then do we shut down the producer 213 | (line 20.) 214 | 215 | 216 | 217 | ## Current Issues 218 | 219 | I have been struggling to find a Maven pom.xml recipe that will allow me to pull in an official version of 220 | Kafka from a public Maven repository. Kafka is a very recent project so many of the currently available on-line 221 | examples (as of this writing -- October of 2013) don't seem to build correctly out of the box (at least for me.) By contributing this project at least the 'run out of the box'requirement should be met. 222 | 223 | Many examples depend on using maven install-file to get a Kafka jar that you build yourself from sources into your local 224 | repo ($HOME/.m2/repository). A recent stack exchange article 225 | (see: http://stackoverflow.com/questions/17037209/where-can-i-find-maven-repository-for-kafka) 226 | suggests an official Kafka .jar is available, but I haven't figured out the Maven incantations to have 227 | my build download this .jar. 228 | 229 | If someone could provide me with a patch for 'the right way' to do this with Maven I will update my project 230 | accordingly.... Hopefully it will serve as a useful resource for other beginning Kafka developers. 231 | 232 | 233 | For now, I have hacked my dependencies so that the version of Kafka I use is pulled from a work-in-progress 234 | version of a storm-kafka integration project. Well... it works for now, but I'm concerned the 'wip' versions 235 | below will be deprecated. Then this project will loose its dependencies and fail to build properly. 236 | Also, I really shouldn't be introducing storm for this simple Kafka example at this point in any case. 237 | 238 | 239 | 240 | 241 | storm 242 | storm 243 | 0.9.0-wip17 244 | 245 | 246 | storm 247 | storm-core 248 | 0.9.0-wip17 249 | 250 | 251 | storm 252 | storm-kafka 253 | 0.9.0-wip16a-scala292 254 | 255 | 256 | -------------------------------------------------------------------------------- /esper+storm+kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.buildlackey 6 | esper-and-kafka-spout-example 7 | kafka-input-spout-with-kafka-output-bolt-and-esper-bolt-example 8 | 1.0 9 | jar 10 | Example Illustrating a Kafka Consumer Spout, a Kafka Producer Bolt, and an Esper Streaming Query Bolt 11 | 12 | 13 | 14 | leadLackey 15 | Chris Bedford 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | com.restfb 24 | restfb 25 | 1.6.12 26 | 27 | 28 | 29 | org.twitter4j 30 | twitter4j-core 31 | [3.0,) 32 | 33 | 34 | 35 | com.aliasi 36 | lingpipe 37 | 4.0.1 38 | 39 | 40 | 41 | org.twitter4j 42 | twitter4j-stream 43 | [3.0,) 44 | 45 | 46 | 47 | commons-collections 48 | commons-collections 49 | 3.2.1 50 | 51 | 52 | 53 | com.google.code.gson 54 | gson 55 | 1.7.1 56 | 57 | 58 | 59 | 60 | storm 61 | storm 62 | 0.9.0-rc2 63 | provided 64 | 65 | 66 | storm 67 | storm-core 68 | 0.9.0-rc2 69 | provided 70 | 71 | 72 | 73 | 74 | 75 | 82 | 83 | org.clojars.brenden 84 | storm-kafka-0.8-plus 85 | 0.1.3-SNAPSHOT 86 | 87 | 88 | org.apache.kafka 89 | kafka_2.10 90 | 91 | 92 | 93 | org.scala-lang 94 | scala-library 95 | 96 | 97 | 98 | 99 | 100 | 101 | org.scala-lang 102 | scala-library 103 | 2.9.3 104 | 105 | 106 | 107 | 108 | org.apache.kafka 109 | kafka_2.9.2 110 | 0.8.0-beta1 111 | 112 | 113 | com.sun.jmx 114 | jmxri 115 | 116 | 117 | 118 | com.sun.jdmk 119 | jmxtools 120 | 121 | 122 | 123 | javax.jms 124 | jms 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 137 | 138 | com.yammer.metrics 139 | metrics-core 140 | 2.2.0 141 | 142 | 143 | 144 | slf4j 145 | slf4j-api 146 | 147 | 148 | 149 | 150 | 151 | com.101tec 152 | zkclient 153 | 0.3 154 | 155 | 156 | log4j 157 | log4j 158 | 159 | 160 | 161 | 162 | 163 | net.sf.jopt-simple 164 | jopt-simple 165 | 4.5 166 | 167 | 168 | 172 | 173 | org.apache.zookeeper 174 | zookeeper 175 | 3.3.3 176 | 177 | 178 | org.slf4j 179 | slf4j-log4j12 180 | 181 | 182 | log4j 183 | log4j 184 | 185 | 186 | log4j 187 | log4j 188 | 189 | 190 | 191 | com.sun.jmx 192 | jmxri 193 | 194 | 195 | 196 | com.sun.jdmk 197 | jmxtools 198 | 199 | 200 | 201 | javax.jms 202 | jms 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | org.tomdz.storm 212 | storm-esper 213 | 0.8.1-SNAPSHOT 214 | 215 | 216 | 217 | 218 | 219 | 220 | com.netflix.curator 221 | curator-test 222 | 1.2.5 223 | 224 | 225 | 226 | org.slf4j 227 | slf4j-log4j12 228 | 229 | 230 | log4j 231 | log4j 232 | 233 | 234 | 235 | 236 | 237 | 238 | org.testng 239 | testng 240 | 6.1.1 241 | test 242 | 243 | 244 | 245 | org.easymock 246 | easymock 247 | 3.0 248 | test 249 | 250 | 251 | 252 | 253 | 254 | 2.2.1 255 | 256 | 257 | 258 | 259 | 260 | org.apache.maven.plugins 261 | maven-enforcer-plugin 262 | 1.1.1 263 | 264 | 265 | enforce-versions 266 | 267 | enforce 268 | 269 | 270 | 271 | 272 | 2.2.1 273 | 274 | 275 | 1.7 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | org.apache.maven.plugins 285 | maven-compiler-plugin 286 | 3.1 287 | 288 | 1.7 289 | 1.7 290 | 291 | 292 | 293 | 294 | org.apache.maven.plugins 295 | maven-jar-plugin 296 | 2.4 297 | 298 | 299 | org.apache.maven.plugins 300 | maven-source-plugin 301 | 2.2 302 | 303 | true 304 | 305 | 306 | 307 | create-source-jar 308 | 309 | jar-no-fork 310 | 311 | 312 | 313 | 314 | 315 | 321 | 322 | maven-assembly-plugin 323 | 324 | 325 | src/main/assembly/dep.xml 326 | 327 | 328 | 329 | EsperFilteredTwitterFeedTopology 330 | 331 | 332 | 333 | 334 | 335 | make-assembly 336 | package 337 | 338 | single 339 | 340 | 341 | 342 | 343 | 344 | 345 | 349 | 350 | org.codehaus.gmaven 351 | gmaven-plugin 352 | 353 | 354 | package 355 | 356 | execute 357 | 358 | 359 | 360 | File targetDir = new File("${project.basedir.path}/target".toString()) 361 | println "dir is ${targetDir.path}" 362 | String jarBaseName = "${project.artifactId}-${project.version}" 363 | File jarWithUnwantedStuff = new File(targetDir, "${jarBaseName}-jar.jar".toString()) 364 | 365 | def explodedJarDir = new File(targetDir, "explodedJar".toString()) 366 | def ant = new AntBuilder() // create an antbuilder 367 | ant.unzip(src: "${jarWithUnwantedStuff.path}", 368 | dest: explodedJarDir.path, 369 | overwrite: "false") 370 | File finalJar = new File(targetDir, "${jarBaseName}-deployable.jar") 371 | unwantedClassesDir = new File(explodedJarDir, "/org/slf4j/impl".toString()) 372 | unwantedClassesDir.deleteDir() 373 | ant.zip(basedir: explodedJarDir.path, destFile: finalJar.path) 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | clojars 385 | http://clojars.org/repo/ 386 | 387 | 388 | 389 | 390 | -------------------------------------------------------------------------------- /esper+storm+kafka/zookeeper.out: -------------------------------------------------------------------------------- 1 | 2013-10-27 15:24:21,823 [myid:] - INFO [main:QuorumPeerConfig@101] - Reading configuration from: /home/chris/esper/zookeeper/bin/../conf/zoo.cfg 2 | 2013-10-27 15:24:21,845 [myid:] - INFO [main:DatadirCleanupManager@78] - autopurge.snapRetainCount set to 3 3 | 2013-10-27 15:24:21,852 [myid:] - INFO [main:DatadirCleanupManager@79] - autopurge.purgeInterval set to 0 4 | 2013-10-27 15:24:21,855 [myid:] - INFO [main:DatadirCleanupManager@101] - Purge task is not scheduled. 5 | 2013-10-27 15:24:21,858 [myid:] - WARN [main:QuorumPeerMain@113] - Either no config or no quorum defined in config, running in standalone mode 6 | 2013-10-27 15:24:21,927 [myid:] - INFO [main:QuorumPeerConfig@101] - Reading configuration from: /home/chris/esper/zookeeper/bin/../conf/zoo.cfg 7 | 2013-10-27 15:24:21,933 [myid:] - INFO [main:ZooKeeperServerMain@95] - Starting server 8 | 2013-10-27 15:24:21,978 [myid:] - INFO [main:Environment@100] - Server environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT 9 | 2013-10-27 15:24:21,980 [myid:] - INFO [main:Environment@100] - Server environment:host.name=ubuntu 10 | 2013-10-27 15:24:21,983 [myid:] - INFO [main:Environment@100] - Server environment:java.version=1.7.0_25 11 | 2013-10-27 15:24:21,984 [myid:] - INFO [main:Environment@100] - Server environment:java.vendor=Oracle Corporation 12 | 2013-10-27 15:24:21,986 [myid:] - INFO [main:Environment@100] - Server environment:java.home=/home/chris/Dropbox2/3rdparty/java/jdk1.7.0_25/jre 13 | 2013-10-27 15:24:21,987 [myid:] - INFO [main:Environment@100] - Server environment:java.class.path=/home/chris/esper/zookeeper/bin/../build/classes:/home/chris/esper/zookeeper/bin/../build/lib/*.jar:/home/chris/esper/zookeeper/bin/../lib/slf4j-log4j12-1.6.1.jar:/home/chris/esper/zookeeper/bin/../lib/slf4j-api-1.6.1.jar:/home/chris/esper/zookeeper/bin/../lib/netty-3.2.2.Final.jar:/home/chris/esper/zookeeper/bin/../lib/log4j-1.2.15.jar:/home/chris/esper/zookeeper/bin/../lib/jline-0.9.94.jar:/home/chris/esper/zookeeper/bin/../zookeeper-3.4.5.jar:/home/chris/esper/zookeeper/bin/../src/java/lib/*.jar:/home/chris/esper/zookeeper/bin/../conf: 14 | 2013-10-27 15:24:21,989 [myid:] - INFO [main:Environment@100] - Server environment:java.library.path=/usr/local/lib::/usr/java/packages/lib/i386:/lib:/usr/lib 15 | 2013-10-27 15:24:21,990 [myid:] - INFO [main:Environment@100] - Server environment:java.io.tmpdir=/tmp 16 | 2013-10-27 15:24:21,993 [myid:] - INFO [main:Environment@100] - Server environment:java.compiler= 17 | 2013-10-27 15:24:21,994 [myid:] - INFO [main:Environment@100] - Server environment:os.name=Linux 18 | 2013-10-27 15:24:21,995 [myid:] - INFO [main:Environment@100] - Server environment:os.arch=i386 19 | 2013-10-27 15:24:21,997 [myid:] - INFO [main:Environment@100] - Server environment:os.version=3.8.0-19-generic 20 | 2013-10-27 15:24:22,000 [myid:] - INFO [main:Environment@100] - Server environment:user.name=chris 21 | 2013-10-27 15:24:22,012 [myid:] - INFO [main:Environment@100] - Server environment:user.home=/home/chris 22 | 2013-10-27 15:24:22,014 [myid:] - INFO [main:Environment@100] - Server environment:user.dir=/home/chris/esper/cep/storm+kafka 23 | 2013-10-27 15:24:22,039 [myid:] - INFO [main:ZooKeeperServer@726] - tickTime set to 2000 24 | 2013-10-27 15:24:22,041 [myid:] - INFO [main:ZooKeeperServer@735] - minSessionTimeout set to -1 25 | 2013-10-27 15:24:22,042 [myid:] - INFO [main:ZooKeeperServer@744] - maxSessionTimeout set to -1 26 | 2013-10-27 15:24:22,107 [myid:] - INFO [main:NIOServerCnxnFactory@94] - binding to port 0.0.0.0/0.0.0.0:2181 27 | 2013-10-27 15:24:22,188 [myid:] - INFO [main:FileSnap@83] - Reading snapshot /tmp/data/version-2/snapshot.c 28 | 2013-10-27 15:24:30,338 [myid:] - INFO [main:FileTxnSnapLog@240] - Snapshotting: 0x5382 to /tmp/data/version-2/snapshot.5382 29 | 2013-10-27 15:24:52,002 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340016, timeout of 20000ms exceeded 30 | 2013-10-27 15:24:52,004 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340013, timeout of 20000ms exceeded 31 | 2013-10-27 15:24:52,004 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d34001a, timeout of 20000ms exceeded 32 | 2013-10-27 15:24:52,005 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340014, timeout of 20000ms exceeded 33 | 2013-10-27 15:24:52,006 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340016 34 | 2013-10-27 15:24:52,007 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340013 35 | 2013-10-27 15:24:52,007 [myid:] - INFO [SyncThread:0:FileTxnLog@199] - Creating new log file: log.5383 36 | 2013-10-27 15:24:52,007 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d34001a 37 | 2013-10-27 15:24:52,011 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340014 38 | 2013-10-27 15:29:09,756 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50662 39 | 2013-10-27 15:29:09,778 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50663 40 | 2013-10-27 15:29:09,784 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50662 41 | 2013-10-27 15:29:09,800 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50663 42 | 2013-10-27 15:29:09,807 [myid:] - INFO [Thread-1:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50662 (no session established for client) 43 | 2013-10-27 15:29:09,817 [myid:] - INFO [Thread-2:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50663 (no session established for client) 44 | 2013-10-27 15:29:11,536 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50664 45 | 2013-10-27 15:29:11,538 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50664 46 | 2013-10-27 15:29:11,541 [myid:] - INFO [Thread-3:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50664 (no session established for client) 47 | 2013-10-27 15:29:14,035 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50665 48 | 2013-10-27 15:29:14,037 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50665 49 | 2013-10-27 15:29:14,041 [myid:] - INFO [Thread-4:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50665 (no session established for client) 50 | 2013-10-27 15:29:15,213 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50666 51 | 2013-10-27 15:29:15,242 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:ZooKeeperServer@839] - Client attempting to establish new session at /127.0.0.1:50666 52 | 2013-10-27 15:29:15,264 [myid:] - INFO [SyncThread:0:ZooKeeperServer@595] - Established session 0x141fc04ba8d0000 with negotiated timeout 40000 for client /127.0.0.1:50666 53 | 2013-10-27 15:29:16,537 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50667 54 | 2013-10-27 15:29:16,538 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50667 55 | 2013-10-27 15:29:16,542 [myid:] - INFO [Thread-5:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50667 (no session established for client) 56 | 2013-10-27 15:29:19,037 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50668 57 | 2013-10-27 15:29:19,038 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50668 58 | 2013-10-27 15:29:19,042 [myid:] - INFO [Thread-6:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50668 (no session established for client) 59 | 2013-10-27 15:29:21,539 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50669 60 | 2013-10-27 15:29:21,542 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50669 61 | 2013-10-27 15:29:21,544 [myid:] - INFO [Thread-7:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50669 (no session established for client) 62 | 2013-10-27 15:29:24,039 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50670 63 | 2013-10-27 15:29:24,041 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50670 64 | 2013-10-27 15:29:24,045 [myid:] - INFO [Thread-8:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50670 (no session established for client) 65 | 2013-10-27 15:29:26,541 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50671 66 | 2013-10-27 15:29:26,542 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50671 67 | 2013-10-27 15:29:26,546 [myid:] - INFO [Thread-9:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50671 (no session established for client) 68 | 2013-10-27 15:29:29,042 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50672 69 | 2013-10-27 15:29:29,043 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50672 70 | 2013-10-27 15:29:29,052 [myid:] - INFO [Thread-10:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50672 (no session established for client) 71 | 2013-10-27 15:29:31,543 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50673 72 | 2013-10-27 15:29:31,544 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50673 73 | 2013-10-27 15:29:31,548 [myid:] - INFO [Thread-11:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50673 (no session established for client) 74 | 2013-10-27 15:29:34,044 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50674 75 | 2013-10-27 15:29:34,045 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50674 76 | 2013-10-27 15:29:34,049 [myid:] - INFO [Thread-12:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50674 (no session established for client) 77 | 2013-10-27 15:29:36,546 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50675 78 | 2013-10-27 15:29:36,547 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50675 79 | 2013-10-27 15:29:36,550 [myid:] - INFO [Thread-13:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50675 (no session established for client) 80 | 2013-10-27 15:29:39,046 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50676 81 | 2013-10-27 15:29:39,048 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50676 82 | 2013-10-27 15:29:39,053 [myid:] - INFO [Thread-14:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50676 (no session established for client) 83 | 2013-10-27 15:29:41,548 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50677 84 | 2013-10-27 15:29:41,550 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50677 85 | 2013-10-27 15:29:41,552 [myid:] - INFO [Thread-15:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50677 (no session established for client) 86 | 2013-10-27 15:29:44,053 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50678 87 | 2013-10-27 15:29:44,054 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50678 88 | 2013-10-27 15:29:44,057 [myid:] - INFO [Thread-16:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50678 (no session established for client) 89 | 2013-10-27 15:29:46,552 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50680 90 | 2013-10-27 15:29:46,553 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50680 91 | 2013-10-27 15:29:46,555 [myid:] - INFO [Thread-17:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50680 (no session established for client) 92 | 2013-10-27 15:29:49,051 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50681 93 | 2013-10-27 15:29:49,053 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50681 94 | 2013-10-27 15:29:49,055 [myid:] - INFO [Thread-18:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50681 (no session established for client) 95 | 2013-10-27 15:29:51,554 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50682 96 | 2013-10-27 15:29:51,556 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50682 97 | 2013-10-27 15:29:51,558 [myid:] - INFO [Thread-19:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50682 (no session established for client) 98 | 2013-10-27 15:29:54,054 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50683 99 | 2013-10-27 15:29:54,056 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50683 100 | 2013-10-27 15:29:54,058 [myid:] - INFO [Thread-20:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50683 (no session established for client) 101 | 2013-10-27 15:29:56,556 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50684 102 | 2013-10-27 15:29:56,558 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50684 103 | 2013-10-27 15:29:56,560 [myid:] - INFO [Thread-21:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50684 (no session established for client) 104 | 2013-10-27 15:29:59,057 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50685 105 | 2013-10-27 15:29:59,059 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50685 106 | 2013-10-27 15:29:59,060 [myid:] - INFO [Thread-22:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50685 (no session established for client) 107 | 2013-10-27 15:30:01,807 [myid:] - WARN [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@349] - caught end of stream exception 108 | EndOfStreamException: Unable to read additional data from client sessionid 0x141fc04ba8d0000, likely client has closed socket 109 | at org.apache.zookeeper.server.NIOServerCnxn.doIO(NIOServerCnxn.java:220) 110 | at org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:208) 111 | at java.lang.Thread.run(Thread.java:724) 112 | 2013-10-27 15:30:01,811 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50666 which had sessionid 0x141fc04ba8d0000 113 | -------------------------------------------------------------------------------- /storm+kafka/README.md: -------------------------------------------------------------------------------- 1 | # Kafka Spout Example 2 | 3 | 4 | Kafka Spout Integration Test With Local Storm Cluster, and In-Memory Kafka, and Zookeeper Instances 5 | 6 | 7 | ## Description 8 | 9 | This example illustrates how to: 10 | 11 | * push messages into Kafka and retrieves those messages with a Kafka/Storm sput. 12 | 13 | * set up your Kafka spout so that it reads all messages in its configured topic from the very first message 14 | in that topic (the default behavior), or so that it reads only the messages that are emitted to the topic after 15 | the spout has been initialized (to get the latter behavior, specify the --fromCurrent option as shown below.) 16 | 17 | * use the in process Zookeeper server that Storm's LocalCluster seems to 'hardwire' 18 | at port 2000 by default. 19 | 20 | NOTE: there does not seem to be anyway to override LocalCluster's 21 | behavior of instantiating its own zookeeper instance by passing in our own Zookeeper 22 | instance and telling the LocalCluster about that instance via the Map argument 23 | passed to LocalCluster(Map)... Oh well.. this example shows a work-around for that. 24 | 25 | 26 | By keeping all test fixtures in memory (rather than depending on out-of-process servers 27 | being 'somehow' set up before the test) we make it very easy to get the basics of 28 | Kafka Storm integration working in the environments of other developers and/or build systems. 29 | 30 | 31 | 32 | ## Building and Running 33 | 34 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands below 35 | (note that the second command has two variants): 36 | 37 | mvn clean compile 38 | 39 | mvn exec:java -Dexec.mainClass=TestTopology 40 | .. or ... 41 | mvn exec:java -Dexec.mainClass=TestTopology -Dexec.args="--fromCurrent" 42 | 43 | 44 | 45 | If you see 'SUCCESSFUL COMPLETION' printed out towards the very end, then you know everything is working. 46 | 47 | 48 | 49 | ## Implementation Details 50 | 51 | The test program pumps a small set of random messages from a Kafka producer thread (started 52 | at line 77 of Listing 1) to a Kafka Spout consumer, and then asserts that the messages received are identical 53 | with messages sent (see the verifyResults method of Listing1, starting at line 122.) 54 | 55 | 56 | The main method creates an instance of the TestTopology class whose constructor instantiates an 57 | instance of a Storm LocalCluster. We use the Zookeeper server in that LocalCluster instance 58 | since there doesn't seem to be anyway to instantiate our own Zookeeper and pass that into the 59 | LocalCluster (as mentioned above.). Next, we wait for that Zookeeper instance to come up completely 60 | (line 69 of listing 1.) We then start our Kafka server using the Zookeeper instance created by LocalCluster. 61 | This is done by hard coding the default value for the Storm LocalCluster's self launched zookeeper 62 | server to its preferred host/port value (localhost:2000). See lines 74 and 103 of Listing 2. 63 | 64 | The Kafka producer thread kicked off at line 77 of listing 1 emits a batch of 4 messages 65 | BEFORE our topology is even initialized (line 40 of listing 2). After emitting that first batch 66 | the producer thread unleashes the countdown latch 'producerFinishedInitialBatchLatch'. 67 | This lets the main thread proceed from its wait at line 78 of listing 1. The main thread 68 | then sets up our test topology, which includes a Kafka spout configured to connect to the Zookeeper 69 | instance at port 2000. This the same zookeeper instance that we use when we configure 70 | the Kafka server, so it seems the Kafka spout discovers the 71 | Kafka broker it needs to connect with via Zookeeper. Our topology wires the 72 | Kafka spout to our VerboseCollectorBolt instance whose only job is to dump each tuple it receives 73 | to the console, and collect up each sentence it is transmitted. In verifyResults (line 102 of 74 | listing 1) we check to make sure that what the VerboseCollectorBolt has recorded actually matches what 75 | we know we have sent. 76 | 77 | Note that after we setup our topology (line 80 of Listing 1), we give it a few seconds to launch, then 78 | we unleash the topologyStartedLatch which causes the KafkaProducer thread to proceed from its wait 79 | point at line 43 of Listing 2 and emit the second batch of messages. 80 | 81 | 82 | Listing 1, TestTopology.java 83 | 84 | 1 /* 85 | 2 * Author: cbedford 86 | 3 * Date: 10/22/13 87 | 4 * Time: 8:50 PM 88 | 5 */ 89 | 6 90 | 7 91 | 8 import backtype.storm.Config; 92 | 9 import backtype.storm.LocalCluster; 93 | 10 import backtype.storm.spout.SchemeAsMultiScheme; 94 | 11 import backtype.storm.topology.TopologyBuilder; 95 | 12 import storm.kafka.*; 96 | 13 97 | 14 import java.util.ArrayList; 98 | 15 import java.util.List; 99 | 16 import java.util.Random; 100 | 17 import java.util.concurrent.CountDownLatch; 101 | 18 102 | 19 public class TestTopology { 103 | 20 104 | 21 105 | 22 final static int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 90 /* seconds */; 106 | 23 107 | 24 CountDownLatch topologyStartedLatch = new CountDownLatch(1); 108 | 25 109 | 26 private static int STORM_KAFKA_FROM_READ_FROM_START = -2; 110 | 27 private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1; 111 | 28 private static int readFromMode = STORM_KAFKA_FROM_READ_FROM_START; 112 | 29 private int expectedNumMessages = 8; 113 | 30 114 | 31 private static final int SECOND = 1000; 115 | 32 private static List messagesReceived = new ArrayList(); 116 | 33 117 | 34 private LocalCluster cluster = new LocalCluster(); 118 | 35 119 | 36 private static final String TOPIC_NAME = "big-topix-" + new Random().nextInt(); 120 | 37 volatile static boolean finishedCollecting = false; 121 | 38 122 | 39 private static String[] sentences = new String[]{ 123 | 40 "one dog9 - saw the fox over the moon", 124 | 41 "two cats9 - saw the fox over the moon", 125 | 42 "four bears9 - saw the fox over the moon", 126 | 43 "five goats9 - saw the fox over the moon", 127 | 44 }; 128 | 45 129 | 46 private KafkaProducer kafkaProducer = new KafkaProducer(sentences, TOPIC_NAME, topologyStartedLatch); 130 | 47 131 | 48 132 | 49 public static void recordRecievedMessage(String msg) { 133 | 50 synchronized (TestTopology.class) { // ensure visibility of list updates between threads 134 | 51 messagesReceived.add(msg); 135 | 52 } 136 | 53 } 137 | 54 138 | 55 139 | 56 public static void main(String[] args) { 140 | 57 TestTopology testTopology = new TestTopology(); 141 | 58 142 | 59 if (args.length == 1 && args[0].equals("--fromCurrent")) { 143 | 60 readFromMode = STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET; 144 | 61 testTopology.expectedNumMessages = 4; 145 | 62 } 146 | 63 147 | 64 testTopology.runTest(); 148 | 65 } 149 | 66 150 | 67 private void runTest() { 151 | 68 ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(MAX_ALLOWED_TO_RUN_MILLISECS); 152 | 69 ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * SECOND); // Wait for zookeeper to come up 153 | 70 154 | 71 kafkaProducer.startKafkaServer(); 155 | 72 kafkaProducer.createTopic(TOPIC_NAME); 156 | 73 157 | 74 try { 158 | 75 159 | 76 160 | 77 kafkaProducer.startProducer(); 161 | 78 ServerAndThreadCoordinationUtils.await(kafkaProducer.producerFinishedInitialBatchLatch); 162 | 79 163 | 80 setupKafkaSpoutAndSubmitTopology(); 164 | 81 try { 165 | 82 Thread.sleep(5000); // Would be nice to have a call back inform us when ready 166 | 83 } catch (InterruptedException e) { 167 | 84 e.printStackTrace(); 168 | 85 } 169 | 86 ServerAndThreadCoordinationUtils.countDown(topologyStartedLatch); 170 | 87 171 | 88 awaitResults(); 172 | 89 } catch (InterruptedException e) { 173 | 90 e.printStackTrace(); 174 | 91 } 175 | 92 176 | 93 verifyResults(); 177 | 94 shutdown(); 178 | 95 System.out.println("SUCCESSFUL COMPLETION"); 179 | 96 System.exit(0); 180 | 97 } 181 | 98 182 | 99 183 | 100 184 | 101 private void awaitResults() { 185 | 102 while (!finishedCollecting) { 186 | 103 try { 187 | 104 Thread.sleep(500); 188 | 105 } catch (InterruptedException e) { 189 | 106 e.printStackTrace(); 190 | 107 } 191 | 108 } 192 | 109 193 | 110 // Sleep another couple of seconds in case any more messages than expected come into the bolt. 194 | 111 // In this case the bolt should throw a fatal error 195 | 112 try { 196 | 113 Thread.sleep(2000); 197 | 114 } catch (InterruptedException e) { 198 | 115 e.printStackTrace(); 199 | 116 } 200 | 117 201 | 118 202 | 119 System.out.println("after await"); 203 | 120 } 204 | 121 205 | 122 private void verifyResults() { 206 | 123 synchronized (TestTopology.class) { // ensure visibility of list updates between threads 207 | 124 int count = 0; 208 | 125 for (String msg : messagesReceived) { 209 | 126 if (msg.contains("cat") || msg.contains("dog") || msg.contains("bear") || msg.contains("goat")) { 210 | 127 count++; 211 | 128 } 212 | 129 } 213 | 130 if (count != expectedNumMessages) { 214 | 131 System.out.println(">>>>>>>>>>>>>>>>>>>>FAILURE - Did not receive expected messages"); 215 | 132 System.exit(-1); 216 | 133 } 217 | 134 218 | 135 } 219 | 136 } 220 | 137 221 | 138 private void setupKafkaSpoutAndSubmitTopology() throws InterruptedException { 222 | 139 BrokerHosts brokerHosts = new ZkHosts("localhost:2000"); 223 | 140 224 | 141 SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, TOPIC_NAME, "", "storm"); 225 | 142 kafkaConfig.forceStartOffsetTime(readFromMode /* either earliest or current offset */); 226 | 143 kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); 227 | 144 228 | 145 229 | 146 TopologyBuilder builder = new TopologyBuilder(); 230 | 147 builder.setSpout("words", new KafkaSpout(kafkaConfig), 1); 231 | 148 VerboseCollectorBolt bolt = new VerboseCollectorBolt(expectedNumMessages); 232 | 149 builder.setBolt("print", bolt).shuffleGrouping("words"); 233 | 150 234 | 151 235 | 152 Config config = new Config(); 236 | 153 237 | 154 cluster.submitTopology("kafka-test", config, builder.createTopology()); 238 | 155 } 239 | 156 240 | 157 private void shutdown() { 241 | 158 cluster.shutdown(); 242 | 159 kafkaProducer.shutdown(); 243 | 160 } 244 | 161 245 | 162 246 | 163 247 | 164 } 248 | 165 249 | 250 | 251 | 252 | 253 | Listing 2, KafkaProducer.java 254 | 255 | 1 import java.util.concurrent.CountDownLatch; 256 | 2 import com.google.common.io.Files; 257 | 3 import kafka.admin.CreateTopicCommand; 258 | 4 import kafka.javaapi.producer.Producer; 259 | 5 import kafka.producer.KeyedMessage; 260 | 6 import kafka.producer.ProducerConfig; 261 | 7 import kafka.server.KafkaConfig; 262 | 8 import kafka.server.KafkaServer; 263 | 9 import kafka.utils.MockTime; 264 | 10 265 | 11 import java.io.File; 266 | 12 import java.util.Properties; 267 | 13 268 | 14 269 | 15 public class KafkaProducer { 270 | 16 271 | 17 private KafkaServer kafkaServer = null; 272 | 18 private final String topicName; 273 | 19 274 | 20 275 | 21 CountDownLatch topologyStartedLatch; 276 | 22 public CountDownLatch producerFinishedInitialBatchLatch = new CountDownLatch(1); 277 | 23 278 | 24 279 | 25 Producer producer; 280 | 26 281 | 27 private String[] sentences; 282 | 28 283 | 29 KafkaProducer(String[] sentences, String topicName, CountDownLatch topologyStartedLatch) { 284 | 30 this.sentences = sentences; 285 | 31 this.topicName = topicName; 286 | 32 this.topologyStartedLatch = topologyStartedLatch; 287 | 33 } 288 | 34 289 | 35 public Thread startProducer() { 290 | 36 Thread sender = new Thread( 291 | 37 new Runnable() { 292 | 38 @Override 293 | 39 public void run() { 294 | 40 emitBatch(); 295 | 41 ServerAndThreadCoordinationUtils. 296 | 42 countDown(producerFinishedInitialBatchLatch); 297 | 43 ServerAndThreadCoordinationUtils. 298 | 44 await(topologyStartedLatch); 299 | 45 emitBatch(); // emit second batch after we know topology is up 300 | 46 } 301 | 47 }, 302 | 48 "producerThread" 303 | 49 ); 304 | 50 sender.start(); 305 | 51 return sender; 306 | 52 } 307 | 53 308 | 54 private void emitBatch() { 309 | 55 Properties props = new Properties(); 310 | 56 props.put("metadata.broker.list", "localhost:9092"); 311 | 57 props.put("serializer.class", "kafka.serializer.StringEncoder"); 312 | 58 props.put("request.required.acks", "1"); 313 | 59 ProducerConfig config = new ProducerConfig(props); 314 | 60 Producer producer = new Producer(config); 315 | 61 316 | 62 for (String sentence : sentences) { 317 | 63 KeyedMessage data = 318 | 64 new KeyedMessage(topicName, sentence); 319 | 65 producer.send(data); 320 | 66 } 321 | 67 producer.close(); 322 | 68 323 | 69 } 324 | 70 325 | 71 public void createTopic(String topicName) { 326 | 72 String[] arguments = new String[8]; 327 | 73 arguments[0] = "--zookeeper"; 328 | 74 arguments[1] = "localhost:2000"; 329 | 75 arguments[2] = "--replica"; 330 | 76 arguments[3] = "1"; 331 | 77 arguments[4] = "--partition"; 332 | 78 arguments[5] = "1"; 333 | 79 arguments[6] = "--topic"; 334 | 80 arguments[7] = topicName; 335 | 81 336 | 82 CreateTopicCommand.main(arguments); 337 | 83 } 338 | 84 339 | 85 public void startKafkaServer() { 340 | 86 File tmpDir = Files.createTempDir(); 341 | 87 Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1); 342 | 88 KafkaConfig kafkaConfig = new KafkaConfig(props); 343 | 89 344 | 90 kafkaServer = new KafkaServer(kafkaConfig, new MockTime()); 345 | 91 kafkaServer.startup(); 346 | 92 } 347 | 93 348 | 94 public void shutdown() { 349 | 95 kafkaServer.shutdown(); 350 | 96 } 351 | 97 352 | 98 private Properties createProperties(String logDir, int port, int brokerId) { 353 | 99 Properties properties = new Properties(); 354 | 100 properties.put("port", port + ""); 355 | 101 properties.put("broker.id", brokerId + ""); 356 | 102 properties.put("log.dir", logDir); 357 | 103 properties.put("zookeeper.connect", "localhost:2000"); // Uses zookeeper created by LocalCluster 358 | 104 return properties; 359 | 105 } 360 | 106 361 | 107 } 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | ## Current Issues 372 | 373 | I am not confident that the Maven dependencies that I have put together are optimal, but they seem to work for now. 374 | 375 | ## Acknowledgements 376 | 377 | Got a good start from this github repo: https://github.com/wurstmeister 378 | 379 | 380 | 381 | --------------------------------------------------------------------------------