├── esper+storm+kafka
├── doc
│ └── dataView.png
├── src
│ ├── main
│ │ ├── java
│ │ │ ├── IFeedItemProvider.java
│ │ │ ├── JsonHelper.java
│ │ │ ├── RandomSentenceSpout.java
│ │ │ ├── WordCountTopology.java
│ │ │ ├── SentimentClassifier.java
│ │ │ ├── EmitSpecialWordGivenProbabilitySpout.java
│ │ │ ├── TopologyInitializer.java
│ │ │ ├── TwitterFeedItemProvider.java
│ │ │ ├── RandomSentenceGenerator.java
│ │ │ ├── ExternalFeedToKafkaAdapterSpout.java
│ │ │ ├── KafkaOutputBolt.java
│ │ │ ├── EsperFilteredTwitterFeedTopology.java
│ │ │ └── ServerAndThreadCoordinationUtils.java
│ │ └── assembly
│ │ │ └── dep.xml
│ └── test
│ │ └── java
│ │ ├── TestFeedItemProvider.java
│ │ ├── VerboseCollectorBolt.java
│ │ ├── SentenceSpout.java
│ │ ├── KafkaOutputBoltTest.java
│ │ ├── ExternalFeedToKafkaAdapterSpoutTest.java
│ │ ├── StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.java
│ │ ├── KafkaMessageConsumer.java
│ │ ├── VerifyItemsFromFeedAreSentToMockKafkaProducer.java
│ │ ├── ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.java
│ │ ├── FacebookFeedItemProvider.java
│ │ └── AbstractStormWithKafkaTest.java
├── README.md
├── pom.xml
└── zookeeper.out
├── .gitignore
├── kafka-0.8.x
├── README.md
├── pom.xml
└── src
│ └── main
│ └── java
│ └── TestKafkaProducer.java
├── README.md
├── storm+kafka
├── src
│ └── main
│ │ └── java
│ │ ├── VerboseCollectorBolt.java
│ │ ├── RandomSentenceGenerator.java
│ │ ├── KafkaProducer.java
│ │ ├── ServerAndThreadCoordinationUtils.java
│ │ └── TestTopology.java
├── pom.xml
└── README.md
└── kafka
├── pom.xml
├── src
└── main
│ └── java
│ └── TestKafkaProducer.java
└── README.md
/esper+storm+kafka/doc/dataView.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buildlackey/cep/HEAD/esper+storm+kafka/doc/dataView.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Eclipse Junk
2 | .classpath
3 | .project
4 | .settings/
5 |
6 | # Intellij Junk
7 | .idea/
8 | *.iml
9 | *.iws
10 |
11 | # Maven Junk
12 | log/
13 | target/
14 |
15 |
16 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/IFeedItemProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/1/13
4 | * Time: 9:58 PM
5 | */
6 |
7 | import java.io.Serializable;
8 |
9 | public interface IFeedItemProvider extends Serializable {
10 | Runnable getRunnableTask();
11 | Object getNextItemIfAvailable();
12 | }
13 |
--------------------------------------------------------------------------------
/kafka-0.8.x/README.md:
--------------------------------------------------------------------------------
1 | # Kafka Spout Example
2 |
3 |
4 | A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers - Kafka 0.8.*
5 |
6 |
7 | ## Description
8 |
9 | This example updates the code in ../kafka (which worked against 0.7.*) to the latest version at the
10 | time of this writing.
11 |
12 | Please see comments in the earlier version, here:
13 | https://github.com/buildlackey/cep/blob/master/kafka/README.md
14 |
15 |
16 | The previous document basically applies to this version, although some source code level
17 | statements have been tweaked to conform to the newer API.
18 |
19 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/assembly/dep.xml:
--------------------------------------------------------------------------------
1 |
2 | jar
3 |
4 | jar
5 |
6 | false
7 |
8 |
9 |
10 | /
11 | true
12 |
13 | true
14 | runtime
15 |
16 |
17 | storm:storm
18 | ch.qos.logback:logback-classic
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/JsonHelper.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.tuple.Fields;
2 | import backtype.storm.tuple.Tuple;
3 | import com.google.gson.Gson;
4 |
5 | import java.io.Serializable;
6 | import java.util.HashMap;
7 | import java.util.List;
8 | import java.util.Map;
9 |
10 | public class JsonHelper implements Serializable {
11 |
12 | public static String toJson(Tuple input) {
13 | Fields fields = input.getFields();
14 | List fieldNames = fields.toList();
15 |
16 | Map tupleAsMap = new HashMap();
17 | for (String fieldName : fieldNames) {
18 | tupleAsMap.put(fieldName, input.getValueByField(fieldName));
19 | }
20 |
21 | String json = new Gson().toJson(tupleAsMap);
22 | System.out.println("====++++++++++++++++++++++++++::> tuple as Json:" + json);
23 | return json;
24 | }
25 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | This project includes:
4 |
5 | unit tests and sample programs that illustrate how to
6 | develop complex event processing (CEP) applications on top of Storm, Kafka
7 | and Esper.
8 |
9 | a Wiki containing notes on best practices and guidelines for using
10 | the above frameworks for CEP development.
11 |
12 |
13 |
14 | Subdirectories:
15 |
16 |
17 |
18 |
19 | kafka
20 |
21 | includes
22 |
23 | * A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers
24 |
25 | (for Kafka 0.7.x)
26 |
27 |
28 | kafka-0.8.x
29 |
30 | includes
31 |
32 | * A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers
33 |
34 | (for Kafka 0.8.x)
35 |
36 | storm+kafka
37 |
38 | includes
39 |
40 | * An Integration test that pushes messages into Kafka and retrieves those messages with a Kafka/Storm spout.
41 |
42 |
43 |
44 | esper+storm+kafka
45 |
46 | includes
47 |
48 |
49 | * An Integration test that pushes messages into Kafka, pulls them back (via Kafka Spout), filters them using Esper, and dumps them back out to another Kafka Spout (via KafkaOutputBolt).
50 |
51 |
52 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/TestFeedItemProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/4/13
4 | * Time: 6:01 PM
5 | */
6 |
7 |
8 | import java.util.concurrent.ConcurrentLinkedQueue;
9 |
10 | class TestFeedItemProvider implements IFeedItemProvider {
11 | ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue();
12 |
13 | private String[] sentences = ExternalFeedToKafkaAdapterSpoutTest.sentences; // default
14 |
15 | TestFeedItemProvider() {}
16 |
17 | TestFeedItemProvider(String[] sentences) {
18 | this.sentences = sentences;
19 | }
20 |
21 | @Override
22 | public Runnable getRunnableTask() {
23 | return new Runnable() {
24 | @Override
25 | public void run() {
26 | for (String sentence : sentences) {
27 | itemQueue.offer(sentence);
28 | }
29 | try {
30 | Thread.sleep(1000 * 100);
31 | } catch (InterruptedException e) {
32 | e.printStackTrace(); // do something more meaningful here?
33 | }
34 | }
35 | };
36 | }
37 |
38 | @Override
39 | public Object getNextItemIfAvailable() {
40 | return itemQueue.poll();
41 | }
42 | }
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.spout.SpoutOutputCollector;
2 | import backtype.storm.task.TopologyContext;
3 | import backtype.storm.topology.OutputFieldsDeclarer;
4 | import backtype.storm.topology.base.BaseRichSpout;
5 | import backtype.storm.tuple.Fields;
6 | import backtype.storm.tuple.Values;
7 | import backtype.storm.utils.Utils;
8 |
9 | import java.util.Map;
10 | import java.util.Random;
11 |
12 | public class RandomSentenceSpout extends BaseRichSpout {
13 | SpoutOutputCollector _collector;
14 | Random _rand;
15 |
16 |
17 | @Override
18 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
19 | _collector = collector;
20 | _rand = new Random();
21 | }
22 |
23 | @Override
24 | public void nextTuple() {
25 | Utils.sleep(100);
26 | String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away",
27 | "four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };
28 | String sentence = sentences[_rand.nextInt(sentences.length)];
29 | System.out.println("EMITTING+++++++++++++++++++++++++++++++>"+sentence);
30 | _collector.emit(new Values(sentence));
31 | }
32 |
33 | @Override
34 | public void ack(Object id) {
35 | }
36 |
37 | @Override
38 | public void fail(Object id) {
39 | }
40 |
41 | @Override
42 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
43 | declarer.declare(new Fields("word"));
44 | }
45 |
46 | }
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/WordCountTopology.java:
--------------------------------------------------------------------------------
1 |
2 | import backtype.storm.Config;
3 | import backtype.storm.LocalCluster;
4 | import backtype.storm.StormSubmitter;
5 | import backtype.storm.task.ShellBolt;
6 | import backtype.storm.topology.BasicOutputCollector;
7 | import backtype.storm.topology.IRichBolt;
8 | import backtype.storm.topology.OutputFieldsDeclarer;
9 | import backtype.storm.topology.TopologyBuilder;
10 | import backtype.storm.topology.base.BaseBasicBolt;
11 | import backtype.storm.tuple.Fields;
12 | import backtype.storm.tuple.Tuple;
13 | import backtype.storm.tuple.Values;
14 |
15 | import java.util.HashMap;
16 | import java.util.Map;
17 |
18 | /**
19 | * This topology demonstrates Storm's stream groupings and multilang capabilities.
20 | */
21 | public class WordCountTopology {
22 |
23 |
24 | public static void main(String[] args) throws Exception {
25 |
26 | TopologyBuilder builder = new TopologyBuilder();
27 |
28 | builder.setSpout("spout", new RandomSentenceSpout(), 5);
29 |
30 |
31 | Config conf = new Config();
32 | conf.setDebug(true);
33 |
34 |
35 | if (args != null && args.length > 0) {
36 | conf.setNumWorkers(2);
37 |
38 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
39 | }
40 | else {
41 | conf.setMaxTaskParallelism(3);
42 |
43 | LocalCluster cluster = new LocalCluster();
44 | cluster.submitTopology("word-count", conf, builder.createTopology());
45 |
46 | Thread.sleep(20000);
47 |
48 | cluster.shutdown();
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/VerboseCollectorBolt.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.topology.BasicOutputCollector;
2 | import backtype.storm.topology.OutputFieldsDeclarer;
3 | import backtype.storm.topology.base.BaseBasicBolt;
4 | import backtype.storm.tuple.Tuple;
5 |
6 | import java.util.concurrent.CountDownLatch;
7 |
8 | public class VerboseCollectorBolt extends BaseBasicBolt {
9 |
10 | ;
11 | private int expectedNumMessages;
12 | private int countReceivedMessages = 0;
13 |
14 | VerboseCollectorBolt(int expectedNumMessages) {
15 | this.expectedNumMessages = expectedNumMessages;
16 | }
17 |
18 |
19 |
20 | public void prepare(java.util.Map stormConf, backtype.storm.task.TopologyContext context) {
21 |
22 | }
23 |
24 |
25 | @Override
26 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
27 | }
28 |
29 | @Override
30 | public void execute(Tuple tuple, BasicOutputCollector collector) {
31 | final String msg = tuple.toString();
32 |
33 | countReceivedMessages++;
34 | String info = " recvd: " + countReceivedMessages + " expected: " + expectedNumMessages;
35 | System.out.println(info + " >>>>>>>>>>>>>" + msg);
36 |
37 | if (countReceivedMessages == expectedNumMessages) {
38 | System.out.println(" +++++++++++++++++++++ MARKING");
39 | StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.finishedCollecting = true;
40 | }
41 |
42 | if (countReceivedMessages > expectedNumMessages) {
43 | System.out.print("Fatal error: too many messages received");
44 | System.exit(-1);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/storm+kafka/src/main/java/VerboseCollectorBolt.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.topology.BasicOutputCollector;
2 | import backtype.storm.topology.OutputFieldsDeclarer;
3 | import backtype.storm.topology.base.BaseBasicBolt;
4 | import backtype.storm.tuple.Tuple;
5 |
6 | import java.util.concurrent.CountDownLatch;
7 |
8 | public class VerboseCollectorBolt extends BaseBasicBolt {
9 |
10 | ;
11 | private int expectedNumMessages;
12 | private int countReceivedMessages = 0;
13 |
14 | VerboseCollectorBolt(int expectedNumMessages) {
15 | this.expectedNumMessages = expectedNumMessages;
16 | }
17 |
18 |
19 |
20 | public void prepare(java.util.Map stormConf, backtype.storm.task.TopologyContext context) {
21 |
22 | }
23 |
24 |
25 | @Override
26 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
27 | }
28 |
29 | @Override
30 | public void execute(Tuple tuple, BasicOutputCollector collector) {
31 | final String msg = tuple.toString();
32 |
33 | countReceivedMessages++;
34 | String info = " recvd: " + countReceivedMessages + " expected: " + expectedNumMessages;
35 | System.out.println(info + " >>>>>>>>>>>>>" + msg);
36 |
37 | TestTopology.recordRecievedMessage(msg);
38 | if (countReceivedMessages == expectedNumMessages) {
39 | System.out.println(" +++++++++++++++++++++ MARKING");
40 | TestTopology.finishedCollecting = true;
41 | }
42 |
43 | if (countReceivedMessages > expectedNumMessages) {
44 | System.out.print("Fatal error: too many messages received");
45 | System.exit(-1);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/SentimentClassifier.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/10/13
4 | * Time: 7:57 PM
5 | */
6 |
7 |
8 | import com.aliasi.classify.ConditionalClassification;
9 | import com.aliasi.classify.LMClassifier;
10 | import com.aliasi.util.AbstractExternalizable;
11 | import sun.misc.IOUtils;
12 |
13 | import java.io.BufferedReader;
14 | import java.io.File;
15 | import java.io.FileReader;
16 | import java.io.IOException;
17 |
18 | public class SentimentClassifier {
19 | String[] categories;
20 | LMClassifier clazz;
21 |
22 | public static void main(String[] args) throws IOException {
23 | SentimentClassifier classifier = new SentimentClassifier();
24 |
25 | File tweets = new File("/tmp/tweets");
26 | BufferedReader br = new BufferedReader(new FileReader(tweets));
27 | String line;
28 | while ((line = br.readLine()) != null) {
29 | String classification = classifier.classify(line);
30 | System.out.println(classification + ": | " + line);
31 | }
32 | br.close();
33 | }
34 |
35 | public SentimentClassifier() {
36 | try {
37 | File serializedClassifier = new File("/home/chris/esper/TwitterSentiment-master/classifier.obj");
38 | clazz = (LMClassifier) AbstractExternalizable.readObject(serializedClassifier);
39 | categories = clazz.categories();
40 | } catch (Exception e) {
41 | e.printStackTrace();
42 | }
43 | }
44 |
45 | public String classify(String text) {
46 | ConditionalClassification classification = clazz.classify(text);
47 | return classification.bestCategory();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/SentenceSpout.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/31/13
4 | * Time: 2:16 PM
5 | */
6 |
7 |
8 | import backtype.storm.spout.SpoutOutputCollector;
9 | import backtype.storm.task.TopologyContext;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.base.BaseRichSpout;
12 | import backtype.storm.tuple.Fields;
13 | import backtype.storm.tuple.Values;
14 | import backtype.storm.utils.Utils;
15 |
16 | import java.util.Map;
17 |
18 | public class SentenceSpout extends BaseRichSpout {
19 | private transient SpoutOutputCollector collector;
20 |
21 | private static String[] sentences;
22 | private int sentenceIndex = 0;
23 |
24 | SentenceSpout(String[] sentences) {
25 | this.sentences = sentences;
26 | }
27 |
28 | @Override
29 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
30 | this.collector = collector;
31 | }
32 |
33 | @Override
34 | public void nextTuple() {
35 | try {
36 | Thread.sleep(10);
37 | } catch (InterruptedException e) {
38 | e.printStackTrace();
39 | }
40 | if (sentenceIndex < sentences.length) {
41 | String sentence = sentences[sentenceIndex];
42 | System.out.println("+++++++++++++++++ >>> output sentence: " + sentence);
43 | collector.emit(new Values(sentence));
44 | sentenceIndex++;
45 | }
46 | }
47 |
48 | @Override
49 | public void ack(Object id) {
50 | }
51 |
52 | @Override
53 | public void fail(Object id) {
54 | }
55 |
56 | @Override
57 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
58 | declarer.declare(new Fields("sentence"));
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/KafkaOutputBoltTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.generated.StormTopology;
9 | import backtype.storm.topology.IRichSpout;
10 | import backtype.storm.topology.TopologyBuilder;
11 | import org.testng.annotations.Test;
12 |
13 | import java.io.IOException;
14 |
15 |
16 | public class KafkaOutputBoltTest extends AbstractStormWithKafkaTest {
17 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 10 /* seconds */;
18 | protected static final int SECOND = 1000;
19 |
20 | private static String[] sentences = new String[]{
21 | "one dog9 - saw the fox over the moon",
22 | "two cats9 - saw the fox over the moon",
23 | "four bears9 - saw the fox over the moon",
24 | "five goats9 - saw the fox over the moon",
25 | "SHUTDOWN",
26 | };
27 |
28 |
29 | @Test
30 | public void runTestWithTopology() throws IOException {
31 | submitTopology();
32 | verifyResults(null, -1);
33 |
34 | }
35 |
36 |
37 | protected StormTopology createTopology() {
38 | TopologyBuilder builder = new TopologyBuilder();
39 | IRichSpout spout = new SentenceSpout(sentences);
40 | KafkaOutputBolt kafkaOutputBolt =
41 | new KafkaOutputBolt(BROKER_CONNECT_STRING, getTopicName(), null, false);
42 |
43 | builder.setSpout("sentenceSpout", spout);
44 | builder.setBolt("kafkaOutputBolt", kafkaOutputBolt, 1)
45 | .shuffleGrouping("sentenceSpout");
46 |
47 | return builder.createTopology();
48 | }
49 |
50 |
51 | protected int getMaxAllowedToRunMillisecs() {
52 | return KafkaOutputBoltTest.MAX_ALLOWED_TO_RUN_MILLISECS;
53 | }
54 | }
55 |
56 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/ExternalFeedToKafkaAdapterSpoutTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.generated.StormTopology;
9 | import backtype.storm.topology.IRichSpout;
10 | import backtype.storm.topology.TopologyBuilder;
11 | import org.testng.annotations.Test;
12 |
13 | import java.io.IOException;
14 |
15 | /**
16 | * In this test messages from an external feed (a hard coded array of strings) are dumped into a
17 | * Kafka topic by an instance of ExternalFeedToKafkaAdapterSpout. We then use an instance of
18 | * KafkaMessageConsumer to pull those messages off the topic, and verify that what we
19 | * got is equal to what we expect.
20 | */
21 | @Test
22 | public class ExternalFeedToKafkaAdapterSpoutTest extends AbstractStormWithKafkaTest {
23 |
24 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 30 /* seconds */;
25 | protected static final int SECOND = 1000;
26 |
27 |
28 | @Test
29 | public void runTestWithTopology() throws IOException {
30 | submitTopology();
31 | try {
32 | Thread.sleep(1000 * 5);
33 | } catch (InterruptedException e) {
34 | e.printStackTrace(); // do something more meaningful here?
35 | }
36 | verifyResults(null, -1);
37 |
38 | }
39 |
40 | @Override
41 | protected StormTopology createTopology() {
42 | TopologyBuilder builder = new TopologyBuilder();
43 | IRichSpout feedSpout =
44 | new ExternalFeedToKafkaAdapterSpout(
45 | new TestFeedItemProvider(),
46 | BROKER_CONNECT_STRING,
47 | getTopicName(), null);
48 | builder.setSpout("externalFeedSpout", feedSpout);
49 |
50 |
51 | return builder.createTopology();
52 | }
53 |
54 |
55 | protected int getMaxAllowedToRunMillisecs() {
56 | return ExternalFeedToKafkaAdapterSpoutTest.MAX_ALLOWED_TO_RUN_MILLISECS;
57 | }
58 | }
59 |
60 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/EmitSpecialWordGivenProbabilitySpout.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.spout.SpoutOutputCollector;
2 | import backtype.storm.task.TopologyContext;
3 | import backtype.storm.topology.OutputFieldsDeclarer;
4 | import backtype.storm.topology.base.BaseRichSpout;
5 | import backtype.storm.tuple.Fields;
6 |
7 | import java.util.Map;
8 |
9 | import static backtype.storm.utils.Utils.tuple;
10 |
11 | public class EmitSpecialWordGivenProbabilitySpout extends BaseRichSpout
12 | {
13 | private static final long serialVersionUID = 1L;
14 |
15 | private final String specialWord;
16 | private final int sleepMillisecsAfterEmission;
17 | private final double defaultWordEmissionProbability;
18 | private transient SpoutOutputCollector collector;
19 |
20 | public EmitSpecialWordGivenProbabilitySpout(String specialWord, double emissionProbability, int timesPerSec) {
21 | if (emissionProbability < 0 || emissionProbability > 1.0 ) {
22 | throw new IllegalArgumentException("Probability must be between 0 and 1.0");
23 | }
24 | this.specialWord = specialWord;
25 | this.defaultWordEmissionProbability = 1.0 - emissionProbability;
26 | this.sleepMillisecsAfterEmission = 1000 / timesPerSec;
27 | }
28 |
29 | @Override
30 | public void declareOutputFields(OutputFieldsDeclarer declarer)
31 | {
32 | declarer.declare(new Fields("word"));
33 |
34 | }
35 |
36 | @Override
37 | public void nextTuple()
38 | {
39 | String wordToEmit = "default";
40 | if (Math.random() > defaultWordEmissionProbability) {
41 | wordToEmit = specialWord + Math.floor ((Math.random() * 100));
42 | }
43 | collector.emit( tuple(wordToEmit));
44 | System.out.println("+++++emitted: " + wordToEmit);
45 |
46 | try {
47 | Thread.sleep(sleepMillisecsAfterEmission);
48 | } catch (InterruptedException e) {
49 | System.out.println("Fatal error");
50 | System.exit(-1);
51 | }
52 | }
53 |
54 | @Override
55 | public void open(@SuppressWarnings("rawtypes") Map conf,
56 | TopologyContext context,
57 | SpoutOutputCollector collector)
58 | {
59 | this.collector = collector;
60 | }
61 |
62 | @Override
63 | public void close() {}
64 |
65 | @Override
66 | public void ack(Object msgId) {}
67 |
68 | @Override
69 | public void fail(Object msgId) {}
70 | }
71 |
--------------------------------------------------------------------------------
/esper+storm+kafka/README.md:
--------------------------------------------------------------------------------
1 | # Kafka Spout Example
2 |
3 |
4 |
5 | Example Illustrating a Kafka Consumer Spout, a Kafka Producer Bolt, and an Esper Streaming Query Bolt
6 |
7 | ## Description
8 |
9 |
10 | The test class 'ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest' illustrates how to wire up Kafka, Storm and Esper. In this test, ExternalFeedToKafkaAdapterSpout pushes messages into a topic. These messages are then routed into an EsperBolt which uses the Esper query language to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which dumps the filtered messages on a second topic. We use an instance of Kafka MessageConsumer to pull those messages off the second topic, and we verify that what we got is equal to what we expect.
11 |
12 | We use Thomas Dudziak's storm-esper library to bind an Esper query processing engine instance to a Storm Bolt (More info on that library is available here: http://tomdzk.wordpress.com/2011/09/28/storm-esper)
13 |
14 |
15 |
16 | A list of the main components involved in this example follows:
17 |
18 | KafkaOutputBolt
19 |
20 | A first pass implementation of a generic Kafka Output Bolt that takes whatever tuple it
21 | recieves, JSON-ifies it, and dumps it on the Kafka topic that is configured in the
22 | constructor.
23 |
24 | ExternalFeedToKafkaAdapterSpout
25 |
26 | Accepts an IFeedItemProvider instance (running on a separte thread spawned by this
27 | adapter spout) that is responsible for acquiring data from an external source, which
28 | is then transferred to the adapter spout to be deposited on a Kafka topic (the name
29 | of which is set as an argument to the adapter spout constructor.)
30 |
31 |
32 |
33 | Testing Support:
34 |
35 | AbstractStormWithKafkaTest
36 |
37 | Simplifies testing of Storm components that consume or produce data items from or to Kafka.
38 | Operates via a 'template method' series of steps, wherein the BeforeClass method sets up a
39 | Storm Local cluster, then waits for the zookeeper instance started by that cluster to 'boot up',
40 | then starts an in-process Kafka server using that zookeeper, and then creates a topic whose
41 | name is derived from the name of the base class test.
42 |
43 | Subclasses only need to implement the abstract createTopology() method (and perhaps
44 | override 'verifyResults())' which is currently kind of hard coded to our first two subclasses of
45 | this base class.
46 |
47 |
48 |
49 |
50 |
51 |
52 | ## Building and Running
53 |
54 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands below
55 |
56 | mvn clean compile test
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/TopologyInitializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/12/13
4 | * Time: 4:58 PM
5 | */
6 |
7 |
8 | import backtype.storm.generated.StormTopology;
9 | import backtype.storm.spout.SchemeAsMultiScheme;
10 | import backtype.storm.topology.IRichSpout;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import org.tomdz.storm.esper.EsperBolt;
13 | import storm.kafka.*;
14 |
15 | public class TopologyInitializer {
16 | public static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1;
17 | public static int STORM_KAFKA_FROM_READ_FROM_START = -2;
18 |
19 | public static StormTopology createTopology(String zookeeperConnectString,
20 | String kafkaBrokerConnectString,
21 | String inputTopic,
22 | String outputTopic,
23 | IFeedItemProvider feedItemProvider,
24 | boolean kafkaOutputBoltRawMode) {
25 | TopologyBuilder builder = new TopologyBuilder();
26 | IRichSpout feedSpout =
27 | new ExternalFeedToKafkaAdapterSpout(
28 | feedItemProvider,
29 | kafkaBrokerConnectString,
30 | inputTopic, null);
31 | EsperBolt esperBolt = createEsperBolt();
32 | KafkaOutputBolt kafkaOutputBolt =
33 | new KafkaOutputBolt(kafkaBrokerConnectString, outputTopic, null, kafkaOutputBoltRawMode);
34 |
35 | builder.setSpout("externalFeedSpout", feedSpout); // these spouts are bound together by shared topic
36 | builder.setSpout("kafkaSpout", createKafkaSpout(zookeeperConnectString, inputTopic));
37 |
38 | builder.setBolt("esperBolt", esperBolt, 1)
39 | .shuffleGrouping("kafkaSpout");
40 | builder.setBolt("kafkaOutputBolt", kafkaOutputBolt, 1)
41 | .shuffleGrouping("esperBolt");
42 | return builder.createTopology();
43 | }
44 |
45 | public static EsperBolt createEsperBolt() {
46 | String esperQuery=
47 | "select str as found from OneWordMsg.win:length_batch(2) where str like '%at%'";
48 | EsperBolt esperBolt = new EsperBolt.Builder()
49 | .inputs().aliasComponent("kafkaSpout").
50 | withFields("str").ofType(String.class).toEventType("OneWordMsg")
51 | .outputs().onDefaultStream().emit("found")
52 | .statements().add(esperQuery)
53 | .build();
54 | return esperBolt;
55 | }
56 |
57 | public static KafkaSpout createKafkaSpout(String zkConnect, String topicName) {
58 | BrokerHosts brokerHosts = new ZkHosts(zkConnect);
59 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, topicName, "", "storm");
60 | kafkaConfig.forceStartOffsetTime(STORM_KAFKA_FROM_READ_FROM_START);
61 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
62 | return new KafkaSpout(kafkaConfig);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.generated.StormTopology;
9 | import backtype.storm.spout.SchemeAsMultiScheme;
10 | import backtype.storm.topology.IRichSpout;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import org.testng.annotations.Test;
13 | import storm.kafka.*;
14 |
15 | import java.io.IOException;
16 |
17 |
18 | /**
19 | * This test builds on ExternalFeedToKafkaAdapterSpoutTest. The external feed messages are dumped
20 | * into a Kafka topic by ExternalFeedToKafkaAdapterSpout as in the first test. We add the second step
21 | * of pulling the messages from the topic by a KafkaSpout and making sure those messages are what
22 | * we expect. To clarify: ExternalFeedToKafkaAdapterSpout pushes messages into a topic, and KafkaSpout
23 | * pulls messages out of a topic.
24 | */
25 | @Test
26 | public class StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest extends AbstractStormWithKafkaTest {
27 | protected static volatile boolean finishedCollecting = false;
28 |
29 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 20 /* seconds */;
30 | protected static final int SECOND = 1000;
31 |
32 | private static int STORM_KAFKA_FROM_READ_FROM_START = -2;
33 | private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1;
34 |
35 |
36 | @Test
37 | public void runTestWithTopology() throws IOException {
38 | System.out.println("topic: " + getTopicName());
39 | submitTopology();
40 | waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt();
41 | verifyResults(null, -1);
42 |
43 | }
44 |
45 | private void waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt() {
46 | while (!finishedCollecting) {
47 | try {
48 | Thread.sleep(500);
49 | } catch (InterruptedException e) {
50 | e.printStackTrace();
51 | }
52 | }
53 | System.out.println("DONE");
54 | }
55 |
56 | @Override
57 | protected StormTopology createTopology() {
58 | TopologyBuilder builder = new TopologyBuilder();
59 | IRichSpout feedSpout =
60 | new ExternalFeedToKafkaAdapterSpout(
61 | new TestFeedItemProvider(),
62 | BROKER_CONNECT_STRING,
63 | getTopicName(), null);
64 | builder.setSpout("externalFeedSpout", feedSpout);
65 | builder.setSpout("kafkaSpout", createKafkaSpout());
66 | VerboseCollectorBolt bolt = new VerboseCollectorBolt(5);
67 | builder.setBolt("collector", bolt).shuffleGrouping("kafkaSpout");
68 |
69 | return builder.createTopology();
70 | }
71 |
72 |
73 | private KafkaSpout createKafkaSpout() {
74 | BrokerHosts brokerHosts = new ZkHosts(getZkConnect());
75 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, getTopicName(), "", "storm");
76 | kafkaConfig.forceStartOffsetTime(STORM_KAFKA_FROM_READ_FROM_START);
77 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
78 | return new KafkaSpout(kafkaConfig);
79 | }
80 |
81 |
82 | protected int getMaxAllowedToRunMillisecs() {
83 | return StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest.MAX_ALLOWED_TO_RUN_MILLISECS;
84 | }
85 | }
86 |
87 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/KafkaMessageConsumer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/31/13
4 | * Time: 2:37 PM
5 | */
6 |
7 |
8 | import com.google.common.collect.ImmutableMap;
9 | import kafka.consumer.ConsumerConfig;
10 | import kafka.consumer.ConsumerIterator;
11 | import kafka.consumer.KafkaStream;
12 | import kafka.javaapi.consumer.ConsumerConnector;
13 | import kafka.serializer.StringDecoder;
14 | import kafka.utils.VerifiableProperties;
15 |
16 | import java.util.ArrayList;
17 | import java.util.List;
18 | import java.util.Map;
19 | import java.util.Properties;
20 |
21 |
22 | /**
23 | * Uses Kafka high level consumer API to read from the topic passed in as a constructor argument and
24 | * accumulates all messages read in so that after the test the received messages can be obtained by a
25 | * call to getMessagesReceived(). This enables test driver code to verify that sent messages actually
26 | * equal received messages.
27 | */
28 | public class KafkaMessageConsumer {
29 | private final String zkConnect;
30 |
31 | private List messagesReceived = new ArrayList();
32 | private final String topic;
33 | private final String groupId = "KafkaMessageConsumer." + Math.random();
34 |
35 | public KafkaMessageConsumer(String zkConnect, String topic) {
36 | this.zkConnect = zkConnect;
37 | this.topic = topic;
38 | }
39 |
40 | public List consumeMessages() {
41 | String ttt = topic;
42 | System.out.println("topic in kafka consumer: " + topic);
43 | try {
44 | final ConsumerConnector consumer =
45 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
46 | final Map topicCountMap = ImmutableMap.of(topic, 1);
47 | final Map>> consumerMap;
48 |
49 | StringDecoder decoder = new StringDecoder(new VerifiableProperties());
50 | consumerMap = consumer.createMessageStreams(topicCountMap, decoder, decoder);
51 |
52 | final KafkaStream stream = consumerMap.get(topic).get(0);
53 | final ConsumerIterator iterator = stream.iterator();
54 | while (iterator.hasNext()) {
55 | String msg = iterator.next().message();
56 | msg = ( msg == null ? "" : msg );
57 | System.out.println("got message" + msg);
58 | messagesReceived.add(msg);
59 | if (msg.contains("SHUTDOWN")) {
60 | consumer.shutdown();
61 | return messagesReceived;
62 | }
63 | }
64 | } catch (Exception e) {
65 | e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
66 | }
67 | return messagesReceived;
68 | }
69 |
70 | public List getMessagesReceived() {
71 | return messagesReceived;
72 | }
73 |
74 |
75 | private ConsumerConfig createConsumerConfig() {
76 | Properties props = new Properties();
77 | props.put("zookeeper.connect", zkConnect);
78 | props.put("group.id", groupId);
79 | props.put("zk.sessiontimeout.ms", "400");
80 | props.put("fetch.min.bytes", "1");
81 | props.put("auto.offset.reset", "smallest");
82 | props.put("zk.synctime.ms", "200");
83 | props.put("autocommit.interval.ms", "1000");
84 | props.put("serializer.class", "kafka.serializer.StringEncoder");
85 |
86 | return new ConsumerConfig(props);
87 | }
88 |
89 | }
90 |
91 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/TwitterFeedItemProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/10/13
4 | * Time: 1:19 PM
5 | */
6 |
7 |
8 | import twitter4j.*;
9 | import twitter4j.conf.Configuration;
10 | import twitter4j.conf.ConfigurationBuilder;
11 |
12 | import java.io.IOException;
13 | import java.util.List;
14 | import java.util.concurrent.ConcurrentLinkedQueue;
15 |
16 |
17 | public class TwitterFeedItemProvider implements IFeedItemProvider {
18 | private final ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue();
19 |
20 | private final String oAuthConsumerKey;
21 | private final String oAuthConsumerSecret;
22 | private final String oAuthAccessToken;
23 | private final String oAuthAccessTokenSecret;
24 | private final String[] searchTerms;
25 |
26 |
27 | public class TwitterListener implements StatusListener {
28 | @Override
29 | public void onStatus(Status status) {
30 | String text = status.getText();
31 | if (status.isRetweet()) {
32 | text = status.getRetweetedStatus().getText();
33 | }
34 | itemQueue.offer(text);
35 | }
36 |
37 | @Override
38 | public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
39 | }
40 |
41 | @Override
42 | public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
43 | }
44 |
45 | @Override
46 | public void onScrubGeo(long userId, long upToStatusId) {
47 | }
48 |
49 | @Override
50 | public void onStallWarning(StallWarning warning) {
51 | }
52 |
53 | @Override
54 | public void onException(Exception ex) {
55 | ex.printStackTrace();
56 | }
57 | }
58 |
59 | /*
60 |
61 | TwitterFeedItemProvider(List searchTermsList) {
62 | this.searchTerms = searchTermsList.toArray(new String[searchTermsList.size()]);
63 | }
64 | */
65 |
66 | TwitterFeedItemProvider(final String oAuthConsumerKey,
67 | final String oAuthConsumerSecret,
68 | final String oAuthAccessToken,
69 | final String oAuthAccessTokenSecret,
70 | String... terms) {
71 | this.oAuthConsumerKey = oAuthConsumerKey;
72 | this.oAuthConsumerSecret = oAuthConsumerSecret;
73 | this.oAuthAccessToken = oAuthAccessToken;
74 | this.oAuthAccessTokenSecret = oAuthAccessTokenSecret;
75 |
76 | this.searchTerms = terms;
77 | }
78 |
79 | @Override
80 | public Runnable getRunnableTask() {
81 | return new Runnable() {
82 | @Override
83 | public void run() {
84 | TwitterStream twitterStream = getTwitterStream();
85 | twitterStream.addListener(new TwitterListener());
86 | long[] followArray = new long[0];
87 | twitterStream.filter(new FilterQuery(0, followArray, searchTerms));
88 | }
89 | };
90 | }
91 |
92 | private TwitterStream getTwitterStream() {
93 | TwitterStream twitterStream;
94 | ConfigurationBuilder builder = new ConfigurationBuilder();
95 | builder.setOAuthConsumerKey(oAuthConsumerKey);
96 | builder.setOAuthConsumerSecret(oAuthConsumerSecret);
97 | builder.setOAuthAccessToken(oAuthAccessToken);
98 | builder.setOAuthAccessTokenSecret(oAuthAccessTokenSecret);
99 |
100 | Configuration conf = builder.build();
101 |
102 | twitterStream = new TwitterStreamFactory(conf).getInstance();
103 | return twitterStream;
104 | }
105 |
106 | @Override
107 | public Object getNextItemIfAvailable() {
108 | return itemQueue.poll();
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/VerifyItemsFromFeedAreSentToMockKafkaProducer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/2/13
4 | * Time: 5:58 PM
5 | */
6 |
7 |
8 | import kafka.javaapi.producer.Producer;
9 | import kafka.producer.KeyedMessage;
10 | import kafka.producer.ProducerConfig;
11 | import org.easymock.Capture;
12 | import org.easymock.EasyMock;
13 | import org.testng.annotations.*;
14 | import org.testng.TestNG;
15 | import org.testng.TestListenerAdapter;
16 |
17 | import static org.easymock.EasyMock.*;
18 |
19 | import org.slf4j.*;
20 |
21 | import java.util.List;
22 | import java.util.Properties;
23 | import java.util.concurrent.ConcurrentLinkedQueue;
24 |
25 | public class VerifyItemsFromFeedAreSentToMockKafkaProducer {
26 | private static String TOPIC = "someTopic";
27 | private static String MESSAGE = "i-am-a-message";
28 |
29 | private static class TestItemProvider implements IFeedItemProvider {
30 | ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue();
31 |
32 | @Override
33 | public Runnable getRunnableTask() {
34 | return new Runnable() {
35 | @Override
36 | public void run() {
37 | try {
38 | Thread.sleep(10);
39 | } catch (InterruptedException e) {
40 | e.printStackTrace();
41 | }
42 |
43 | itemQueue.offer(MESSAGE);
44 | }
45 | };
46 | }
47 |
48 | @Override
49 | public Object getNextItemIfAvailable() {
50 | return itemQueue.poll();
51 | }
52 | }
53 |
54 |
55 | @Test(enabled = true)
56 | public void testEqualsOfKeyedMessage() {
57 | KeyedMessage
58 | data1 =
59 | new KeyedMessage("foo", "bar");
60 | KeyedMessage
61 | data2 =
62 | new KeyedMessage(
63 | new String("foo".getBytes()), new String("bar".getBytes()));
64 |
65 | assert data1.equals(data2);
66 | }
67 |
68 |
69 | @Test(enabled = true)
70 | public void testItemsProducedByFeedProviderAreSentToKafka() {
71 | Capture> capturedArgument =
72 | new Capture> ();
73 |
74 |
75 | @SuppressWarnings("unchecked")
76 | Producer producer = createMock(Producer.class);
77 | producer.send(capture(capturedArgument));
78 | expectLastCall();
79 |
80 | ExternalFeedToKafkaAdapterSpout spout =
81 | EasyMock.createMockBuilder(ExternalFeedToKafkaAdapterSpout.class).
82 | addMockedMethod("setupProducer").createMock();
83 | expect(spout.setupProducer()).andReturn(producer);
84 |
85 |
86 | replay(producer);
87 | replay(spout);
88 |
89 |
90 |
91 | verifyNextTupleReceivesItemFromProviderAndSendsToKafkaProducer(spout);
92 |
93 | verify(producer);
94 | verify(spout);
95 |
96 | KeyedMessage got = capturedArgument.getValue();
97 | assert got.message().contains(MESSAGE);
98 |
99 | }
100 |
101 | private void verifyNextTupleReceivesItemFromProviderAndSendsToKafkaProducer(
102 | ExternalFeedToKafkaAdapterSpout spout)
103 | {
104 | spout.setFeedProvider(new TestItemProvider());
105 | spout.setTopicName(TOPIC);
106 | spout.open(null, null, null);
107 |
108 | for (int i = 0; i < 10; i++) {
109 | try {
110 | Thread.sleep(10);
111 | } catch (InterruptedException e) {
112 | e.printStackTrace(); // do something more meaningful here?
113 | }
114 | spout.nextTuple();
115 | }
116 | }
117 |
118 | }
119 |
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/storm+kafka/src/main/java/RandomSentenceGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/27/13
4 | * Time: 11:25 PM
5 | */
6 |
7 | import java.io.BufferedReader;
8 | import java.io.IOException;
9 | import java.io.InputStreamReader;
10 | import java.util.Hashtable;
11 | import java.util.Random;
12 | import java.util.Vector;
13 |
14 | /* not currently used */
15 |
16 | public class RandomSentenceGenerator {
17 |
18 | // Hashmap
19 | public static Hashtable> markovChain = new Hashtable>();
20 | static Random rnd = new Random();
21 |
22 | private static String[] sentences = new String[]{
23 | "one king took the fox over the car.",
24 | "two queens bent the fox under the bed.",
25 | "four bears mined the pig into the house.",
26 | "Joe goats rolled the boat over the lodge.",
27 | };
28 |
29 |
30 | RandomSentenceGenerator() {
31 | // Create the first two entries (k:_start, k:_end)
32 | markovChain.put("_start", new Vector());
33 | markovChain.put("_end", new Vector());
34 | }
35 |
36 | String next() {
37 | int index = Math.abs(rnd.nextInt() % sentences.length);
38 | addWords(sentences[index]);
39 | return generateSentence();
40 | }
41 |
42 |
43 | /*
44 | * Main constructor
45 | */
46 | public static void main(String[] args) throws IOException {
47 | RandomSentenceGenerator generator = new RandomSentenceGenerator();
48 |
49 | while(true) {
50 | System.out.println("sentence: " + generator.next());
51 | }
52 | }
53 |
54 | /*
55 | * Add words
56 | */
57 | public static void addWords(String phrase) {
58 | // put each word into an array
59 | String[] words = phrase.split(" ");
60 |
61 | // Loop through each word, check if it's already added
62 | // if its added, then get the suffix vector and add the word
63 | // if it hasn't been added then add the word to the list
64 | // if its the first or last word then select the _start / _end key
65 |
66 | for (int i=0; i startWords = markovChain.get("_start");
71 | startWords.add(words[i]);
72 |
73 | Vector suffix = markovChain.get(words[i]);
74 | if (suffix == null) {
75 | suffix = new Vector();
76 | suffix.add(words[i+1]);
77 | markovChain.put(words[i], suffix);
78 | }
79 |
80 | } else if (i == words.length-1) {
81 | Vector endWords = markovChain.get("_end");
82 | endWords.add(words[i]);
83 |
84 | } else {
85 | Vector suffix = markovChain.get(words[i]);
86 | if (suffix == null) {
87 | suffix = new Vector();
88 | suffix.add(words[i+1]);
89 | markovChain.put(words[i], suffix);
90 | } else {
91 | suffix.add(words[i+1]);
92 | markovChain.put(words[i], suffix);
93 | }
94 | }
95 | }
96 | }
97 |
98 |
99 | /*
100 | * Generate a markov phrase
101 | */
102 | public static String generateSentence() {
103 |
104 | // Vector to hold the phrase
105 | Vector newPhrase = new Vector();
106 |
107 | // String for the next word
108 | String nextWord = "";
109 |
110 | // Select the first word
111 | Vector startWords = markovChain.get("_start");
112 | int startWordsLen = startWords.size();
113 | nextWord = startWords.get(rnd.nextInt(startWordsLen));
114 | newPhrase.add(nextWord);
115 |
116 | // Keep looping through the words until we've reached the end
117 | while (nextWord.charAt(nextWord.length()-1) != '.') {
118 | Vector wordSelection = markovChain.get(nextWord);
119 | int wordSelectionLen = wordSelection.size();
120 | nextWord = wordSelection.get(rnd.nextInt(wordSelectionLen));
121 | newPhrase.add(nextWord);
122 | }
123 |
124 | String retval = newPhrase.toString().replaceAll(",", "");
125 | return retval;
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/RandomSentenceGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/27/13
4 | * Time: 11:25 PM
5 | */
6 |
7 | import java.io.BufferedReader;
8 | import java.io.IOException;
9 | import java.io.InputStreamReader;
10 | import java.util.Hashtable;
11 | import java.util.Random;
12 | import java.util.Vector;
13 |
14 | /* not currently used */
15 |
16 | public class RandomSentenceGenerator {
17 |
18 | // Hashmap
19 | public static Hashtable> markovChain = new Hashtable>();
20 | static Random rnd = new Random();
21 |
22 | private static String[] sentences = new String[]{
23 | "one king took the fox over the car.",
24 | "two queens bent the fox under the bed.",
25 | "four bears mined the pig into the house.",
26 | "Joe goats rolled the boat over the lodge.",
27 | };
28 |
29 |
30 | RandomSentenceGenerator() {
31 | // Create the first two entries (k:_start, k:_end)
32 | markovChain.put("_start", new Vector());
33 | markovChain.put("_end", new Vector());
34 | }
35 |
36 | String next() {
37 | int index = Math.abs(rnd.nextInt() % sentences.length);
38 | addWords(sentences[index]);
39 | return generateSentence();
40 | }
41 |
42 |
43 | /*
44 | * Main constructor
45 | */
46 | public static void main(String[] args) throws IOException {
47 | RandomSentenceGenerator generator = new RandomSentenceGenerator();
48 |
49 | while(true) {
50 | System.out.println("sentence: " + generator.next());
51 | }
52 | }
53 |
54 | /*
55 | * Add words
56 | */
57 | public static void addWords(String phrase) {
58 | // put each word into an array
59 | String[] words = phrase.split(" ");
60 |
61 | // Loop through each word, check if it's already added
62 | // if its added, then get the suffix vector and add the word
63 | // if it hasn't been added then add the word to the list
64 | // if its the first or last word then select the _start / _end key
65 |
66 | for (int i=0; i startWords = markovChain.get("_start");
71 | startWords.add(words[i]);
72 |
73 | Vector suffix = markovChain.get(words[i]);
74 | if (suffix == null) {
75 | suffix = new Vector();
76 | suffix.add(words[i+1]);
77 | markovChain.put(words[i], suffix);
78 | }
79 |
80 | } else if (i == words.length-1) {
81 | Vector endWords = markovChain.get("_end");
82 | endWords.add(words[i]);
83 |
84 | } else {
85 | Vector suffix = markovChain.get(words[i]);
86 | if (suffix == null) {
87 | suffix = new Vector();
88 | suffix.add(words[i+1]);
89 | markovChain.put(words[i], suffix);
90 | } else {
91 | suffix.add(words[i+1]);
92 | markovChain.put(words[i], suffix);
93 | }
94 | }
95 | }
96 | }
97 |
98 |
99 | /*
100 | * Generate a markov phrase
101 | */
102 | public static String generateSentence() {
103 |
104 | // Vector to hold the phrase
105 | Vector newPhrase = new Vector();
106 |
107 | // String for the next word
108 | String nextWord = "";
109 |
110 | // Select the first word
111 | Vector startWords = markovChain.get("_start");
112 | int startWordsLen = startWords.size();
113 | nextWord = startWords.get(rnd.nextInt(startWordsLen));
114 | newPhrase.add(nextWord);
115 |
116 | // Keep looping through the words until we've reached the end
117 | while (nextWord.charAt(nextWord.length()-1) != '.') {
118 | Vector wordSelection = markovChain.get(nextWord);
119 | int wordSelectionLen = wordSelection.size();
120 | nextWord = wordSelection.get(rnd.nextInt(wordSelectionLen));
121 | newPhrase.add(nextWord);
122 | }
123 |
124 | String retval = newPhrase.toString().replaceAll(",", "");
125 | return retval;
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/storm+kafka/src/main/java/KafkaProducer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/28/13
4 | * Time: 6:07 PM
5 | */
6 |
7 |
8 | import java.util.concurrent.CountDownLatch;
9 | /*
10 | * Author: cbedford
11 | * Date: 10/22/13
12 | * Time: 8:50 PM
13 | */
14 |
15 |
16 | import com.google.common.io.Files;
17 | import kafka.admin.CreateTopicCommand;
18 | import kafka.javaapi.producer.Producer;
19 | import kafka.producer.KeyedMessage;
20 | import kafka.producer.ProducerConfig;
21 | import kafka.server.KafkaConfig;
22 | import kafka.server.KafkaServer;
23 | import kafka.utils.MockTime;
24 |
25 | import java.io.File;
26 | import java.util.Properties;
27 |
28 |
29 | public class KafkaProducer {
30 |
31 | private KafkaServer kafkaServer = null;
32 | private final String topicName;
33 |
34 |
35 | CountDownLatch topologyStartedLatch;
36 | public CountDownLatch producerFinishedInitialBatchLatch = new CountDownLatch(1);
37 |
38 |
39 | Producer producer;
40 |
41 | private String[] sentences;
42 |
43 | KafkaProducer(String[] sentences, String topicName, CountDownLatch topologyStartedLatch) {
44 | this.sentences = sentences;
45 | this.topicName = topicName;
46 | this.topologyStartedLatch = topologyStartedLatch;
47 | }
48 |
49 | public Thread startProducer() {
50 | Thread sender = new Thread(
51 | new Runnable() {
52 | @Override
53 | public void run() {
54 | emitBatch();
55 | ServerAndThreadCoordinationUtils.
56 | countDown(producerFinishedInitialBatchLatch);
57 | ServerAndThreadCoordinationUtils.
58 | await(topologyStartedLatch);
59 | emitBatch(); // emit second batch after we know topology is up
60 | }
61 | },
62 | "producerThread"
63 | );
64 | sender.start();
65 | return sender;
66 | }
67 |
68 | private void emitBatch() {
69 | Properties props = new Properties();
70 | props.put("metadata.broker.list", "localhost:9092");
71 | props.put("serializer.class", "kafka.serializer.StringEncoder");
72 | props.put("request.required.acks", "1");
73 | ProducerConfig config = new ProducerConfig(props);
74 | Producer producer = new Producer(config);
75 |
76 | for (String sentence : sentences) {
77 | KeyedMessage data =
78 | new KeyedMessage(topicName, sentence);
79 | producer.send(data);
80 | }
81 | producer.close();
82 |
83 | }
84 |
85 | public void createTopic(String topicName) {
86 | String[] arguments = new String[8];
87 | arguments[0] = "--zookeeper";
88 | arguments[1] = "localhost:2000";
89 | arguments[2] = "--replica";
90 | arguments[3] = "1";
91 | arguments[4] = "--partition";
92 | arguments[5] = "1";
93 | arguments[6] = "--topic";
94 | arguments[7] = topicName;
95 |
96 | CreateTopicCommand.main(arguments);
97 | }
98 |
99 | public void startKafkaServer() {
100 | File tmpDir = Files.createTempDir();
101 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1);
102 | KafkaConfig kafkaConfig = new KafkaConfig(props);
103 |
104 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime());
105 | kafkaServer.startup();
106 | }
107 |
108 | public void shutdown() {
109 | kafkaServer.shutdown();
110 | }
111 |
112 | private Properties createProperties(String logDir, int port, int brokerId) {
113 | Properties properties = new Properties();
114 | properties.put("port", port + "");
115 | properties.put("broker.id", brokerId + "");
116 | properties.put("log.dir", logDir);
117 | properties.put("zookeeper.connect", "localhost:2000"); // Uses zookeeper created by LocalCluster
118 | return properties;
119 | }
120 |
121 | }
122 |
--------------------------------------------------------------------------------
/storm+kafka/src/main/java/ServerAndThreadCoordinationUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/28/13
4 | * Time: 2:20 PM
5 | */
6 |
7 |
8 | import java.io.*;
9 | import java.net.Socket;
10 | import java.util.Date;
11 | import java.util.Timer;
12 | import java.util.TimerTask;
13 | import java.util.concurrent.CountDownLatch;
14 | import java.util.concurrent.TimeUnit;
15 |
16 | public class ServerAndThreadCoordinationUtils {
17 |
18 | public static void setMaxTimeToRunTimer(int millisecs) {
19 | Date timeLimit =
20 | new Date(new Date().getTime() + millisecs);
21 | Timer timer = new Timer();
22 |
23 | timer.schedule(new TimerTask() {
24 |
25 | @Override
26 | public void run() {
27 | System.out.println("aborting test ! Took too long");
28 | System.exit(-1);
29 | }
30 | }, timeLimit);
31 | }
32 |
33 | public static void pauseUntil() {
34 | boolean fileExists = false;
35 | while (!fileExists) {
36 | File pauseFile = new File("/tmp/go");
37 | if (!pauseFile.exists()) {
38 | try {
39 | Thread.sleep(500);
40 | } catch (InterruptedException e) {
41 | e.printStackTrace();
42 | }
43 | } else {
44 | fileExists = true;
45 | }
46 | }
47 |
48 | }
49 |
50 |
51 | public static String send4LetterWord(String host, int port, String cmd)
52 | throws IOException {
53 | System.out.println("connecting to " + host + " " + port);
54 | Socket sock = new Socket(host, port);
55 | BufferedReader reader = null;
56 | try {
57 | OutputStream outstream = sock.getOutputStream();
58 | outstream.write(cmd.getBytes());
59 | outstream.flush();
60 | // this replicates NC - close the output stream before reading
61 | sock.shutdownOutput();
62 |
63 | reader =
64 | new BufferedReader(
65 | new InputStreamReader(sock.getInputStream()));
66 | StringBuilder sb = new StringBuilder();
67 | String line;
68 | while ((line = reader.readLine()) != null) {
69 | sb.append(line + "\n");
70 | }
71 | return sb.toString();
72 | } finally {
73 | sock.close();
74 | if (reader != null) {
75 | reader.close();
76 | }
77 | }
78 | }
79 |
80 | public static boolean waitForServerUp(String host, int port, long timeout) {
81 | long start = System.currentTimeMillis();
82 | while (true) {
83 | try {
84 | // if there are multiple hostports, just take the first one
85 | String result = send4LetterWord(host, port, "stat");
86 | System.out.println("result of send: " + result);
87 | if (result.startsWith("Zookeeper version:")) {
88 | return true;
89 | }
90 | } catch (IOException e) {
91 | // ignore as this is expected
92 | System.out.println("server " + host + ":" + port + " not up " + e);
93 | }
94 |
95 | if (System.currentTimeMillis() > start + timeout) {
96 | break;
97 | }
98 | try {
99 | Thread.sleep(250);
100 | } catch (InterruptedException e) {
101 | // ignore
102 | }
103 | }
104 | return false;
105 | }
106 |
107 | public static void await(CountDownLatch latch) {
108 | try {
109 | latch.await();
110 | } catch (InterruptedException e) {
111 | e.printStackTrace();
112 | System.out.println("FATAL ERROR");
113 | System.exit(-1);
114 | }
115 | }
116 |
117 |
118 | public static void countDown(CountDownLatch latch) {
119 | try {
120 | latch.countDown();
121 | } catch (Exception e) {
122 | e.printStackTrace();
123 | System.out.println("FATAL ERROR");
124 | System.exit(-1);
125 | }
126 | }
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/ExternalFeedToKafkaAdapterSpout.java:
--------------------------------------------------------------------------------
1 | import backtype.storm.spout.SpoutOutputCollector;
2 | import backtype.storm.task.TopologyContext;
3 | import backtype.storm.topology.OutputFieldsDeclarer;
4 | import backtype.storm.topology.base.BaseRichSpout;
5 | import backtype.storm.tuple.Fields;
6 | import com.google.common.collect.ImmutableMap;
7 | import com.google.gson.Gson;
8 | import kafka.javaapi.producer.Producer;
9 | import kafka.producer.KeyedMessage;
10 | import kafka.producer.ProducerConfig;
11 |
12 | import java.util.Map;
13 | import java.util.Properties;
14 |
15 | /**
16 | * A
17 | */
18 | public class ExternalFeedToKafkaAdapterSpout extends BaseRichSpout {
19 | private static final long serialVersionUID = 1L;
20 | public static final String RECORD = "record";
21 |
22 |
23 | private String brokerConnectString;
24 |
25 | private String topicName;
26 | private String serializerClass;
27 |
28 | private transient SpoutOutputCollector collector;
29 | private transient TopologyContext context;
30 | private transient Producer producer;
31 |
32 | private IFeedItemProvider feedProvider;
33 |
34 | public ExternalFeedToKafkaAdapterSpout(IFeedItemProvider feedProvider,
35 | String brokerConnectString,
36 | String topicName,
37 | String serializerClass) {
38 | this.feedProvider = feedProvider;
39 | this.brokerConnectString = brokerConnectString;
40 | this.topicName = topicName;
41 | if (serializerClass == null) {
42 | serializerClass = "kafka.serializer.StringEncoder";
43 | }
44 | this.serializerClass = serializerClass;
45 | }
46 |
47 |
48 | public void setFeedProvider(IFeedItemProvider feedProvider) { // mainly for testing
49 | this.feedProvider = feedProvider;
50 | }
51 |
52 | public void setTopicName(String topicName) { // mainly for testing
53 | this.topicName = topicName;
54 | }
55 |
56 |
57 | @Override
58 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
59 | declarer.declare(new Fields(RECORD));
60 | }
61 |
62 |
63 | @Override
64 | public void open(@SuppressWarnings("rawtypes") Map conf,
65 | TopologyContext context,
66 | SpoutOutputCollector collector) {
67 | this.collector = collector;
68 | this.context = context;
69 |
70 | producer = setupProducer();
71 |
72 | Thread feedProviderThread =
73 | new Thread(feedProvider.getRunnableTask(), "feedProviderThread");
74 | feedProviderThread.start();
75 | }
76 |
77 |
78 | @Override
79 | public void nextTuple() {
80 | try {
81 | Thread.sleep(10);
82 | } catch (InterruptedException e) {
83 | e.printStackTrace();
84 | }
85 |
86 | Object feedItem = feedProvider.getNextItemIfAvailable();
87 |
88 | if (feedItem != null) {
89 | System.out.println(">>>->>feed item is: " + feedItem);
90 | final Map itemAsMap = ImmutableMap.of(RECORD, feedItem);
91 | try {
92 | String itemAsJson = new Gson().toJson(itemAsMap);
93 | KeyedMessage data =
94 | new KeyedMessage(topicName, itemAsJson);
95 | producer.send(data);
96 | } catch (Exception e) {
97 | throw new RuntimeException("Conversion to json failed: " + feedItem);
98 | }
99 |
100 | } else {
101 | try {
102 | Thread.sleep(10);
103 | } catch (InterruptedException e) {
104 | e.printStackTrace(); // do something more meaningful here?
105 | }
106 | }
107 |
108 | }
109 |
110 | // should be private, but have not gotten PowerMock unit testing to work yet.
111 | protected Producer setupProducer() {
112 | Properties props = new Properties();
113 | props.put("metadata.broker.list", brokerConnectString);
114 | props.put("serializer.class", serializerClass);
115 | props.put("serializer.class", "kafka.serializer.StringEncoder");
116 | props.put("producer.type", "sync");
117 | props.put("batch.size", "1");
118 |
119 | ProducerConfig config = new ProducerConfig(props);
120 | return new Producer(config);
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/KafkaOutputBolt.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/30/13
4 | * Time: 9:39 PM
5 | */
6 |
7 |
8 | import backtype.storm.task.OutputCollector;
9 | import backtype.storm.task.TopologyContext;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.base.BaseRichBolt;
12 | import backtype.storm.tuple.Fields;
13 | import backtype.storm.tuple.Tuple;
14 | import com.google.gson.Gson;
15 | import kafka.javaapi.producer.Producer;
16 | import kafka.producer.KeyedMessage;
17 | import kafka.producer.ProducerConfig;
18 |
19 | import java.io.IOException;
20 | import java.util.HashMap;
21 | import java.util.List;
22 | import java.util.Map;
23 | import java.util.Properties;
24 |
25 | /**
26 | * A first pass implementation of a generic Kafka Output Bolt that takes whatever tuple it
27 | * recieves, JSON-ifies it, and dumps it on the Kafka topic that is configured in the
28 | * constructor. By default the JSON-ification algorithms works such that the Json object's
29 | * attribute names are the field names of the tuples (currently only 1-tuples are supported).
30 | * In other words, the JSON-ified value is contructed as a map with key names derived from
31 | * tuple field names and corresponding values set as the JSON-ified tuple object.
32 | *
33 | * However, if the KafkaOutputBolt constructor is called with rawMode=true, then for a 1-tuple
34 | * we will assume the tuple value is a valid JSON string. TODO - we will eventually support
35 | * tuples of length 2 and greater, at which point raw mode will boil down to putting the 'raw'
36 | * valid JSON strings given by the i-th element of each tuple into an array.
37 | */
38 | public class KafkaOutputBolt extends BaseRichBolt {
39 | private static final long serialVersionUID = 1L;
40 | private final boolean rawMode;
41 |
42 | private String brokerConnectString;
43 | private String topicName;
44 | private String serializerClass;
45 |
46 | private transient Producer producer;
47 | private transient OutputCollector collector;
48 | private transient TopologyContext context;
49 |
50 | public KafkaOutputBolt(String brokerConnectString,
51 | String topicName,
52 | String serializerClass,
53 | boolean rawMode) {
54 | if (serializerClass == null) {
55 | serializerClass = "kafka.serializer.StringEncoder";
56 | }
57 | this.brokerConnectString = brokerConnectString;
58 | this.serializerClass = serializerClass;
59 | this.topicName = topicName;
60 | this.rawMode = rawMode;
61 | }
62 |
63 | @Override
64 | public void prepare(Map stormConf,
65 | TopologyContext context,
66 | OutputCollector collector) {
67 | Properties props = new Properties();
68 | props.put("metadata.broker.list", brokerConnectString);
69 | props.put("serializer.class", serializerClass);
70 | props.put("producer.type", "sync");
71 | props.put("batch.size", "1");
72 |
73 | ProducerConfig config = new ProducerConfig(props);
74 | producer = new Producer(config);
75 |
76 | this.context = context;
77 | this.collector = collector;
78 | }
79 |
80 | @Override
81 | public void execute(Tuple input) {
82 | String tupleAsJson = null;
83 | try {
84 | if (rawMode) {
85 | tupleAsJson = input.getString(0);
86 |
87 | } else {
88 | tupleAsJson = JsonHelper.toJson(input);
89 | }
90 | KeyedMessage data =
91 | new KeyedMessage(topicName, tupleAsJson);
92 | producer.send(data);
93 | collector.ack(input);
94 | } catch (Exception e) {
95 | collector.fail(input);
96 | }
97 | }
98 |
99 | @Override
100 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
101 |
102 | }
103 |
104 | public static Producer initProducer() throws IOException {
105 | Properties props = new Properties();
106 | props.put("metadata.broker.list", "localhost:9092");
107 | props.put("serializer.class", "kafka.serializer.StringEncoder");
108 | props.put("producer.type", "async");
109 | props.put("batch.size", "1");
110 | ProducerConfig config = new ProducerConfig(props);
111 |
112 | return new Producer(config);
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | com.buildlackey
6 | kafka-producer-to-consumer-example
7 | kafka-producer-to-consumer-example
8 | 1.0
9 | jar
10 | Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers
11 |
12 |
13 |
14 | leadLackey
15 | Chris Bedford
16 |
17 |
18 |
19 |
20 |
21 |
22 | storm
23 | storm
24 | 0.9.0-wip17
25 |
26 |
27 | storm
28 | storm-core
29 | 0.9.0-wip17
30 |
31 |
32 | storm
33 | storm-kafka
34 | 0.9.0-wip16a-scala292
35 |
36 |
37 | com.netflix.curator
38 | curator-test
39 | 1.2.5
40 |
41 |
42 |
43 | org.slf4j
44 | slf4j-log4j12
45 |
46 |
47 | log4j
48 | log4j
49 |
50 |
51 |
52 |
53 |
54 |
55 | 2.2.1
56 |
57 |
58 |
59 |
60 |
61 | org.apache.maven.plugins
62 | maven-enforcer-plugin
63 | 1.1.1
64 |
65 |
66 | enforce-versions
67 |
68 | enforce
69 |
70 |
71 |
72 |
73 | 2.2.1
74 |
75 |
76 | 1.7
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 | org.apache.maven.plugins
86 | maven-compiler-plugin
87 | 3.1
88 |
89 | 1.7
90 | 1.7
91 |
92 |
93 |
94 |
95 | org.apache.maven.plugins
96 | maven-jar-plugin
97 | 2.4
98 |
99 |
100 | org.apache.maven.plugins
101 | maven-source-plugin
102 | 2.2
103 |
104 | true
105 |
106 |
107 |
108 | create-source-jar
109 |
110 | jar-no-fork
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/EsperFilteredTwitterFeedTopology.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.Config;
9 | import backtype.storm.StormSubmitter;
10 | import backtype.storm.generated.AlreadyAliveException;
11 | import backtype.storm.generated.InvalidTopologyException;
12 | import backtype.storm.generated.StormTopology;
13 |
14 | import java.io.IOException;
15 |
16 |
17 | /**
18 | * This example pulls tweets from twitter and runs them from a filter written in Esper query language (EQL). Our
19 | * ExternalFeedToKafkaAdapterSpout pushes messages into a topic. These messages are then routed into an EsperBolt which
20 | * uses EQL to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which
21 | * dumps the filtered messages on a second topic.
22 | */
23 | public class EsperFilteredTwitterFeedTopology {
24 |
25 | private final String outputTopic = this.getClass().getSimpleName() + "_output";
26 | private final String firstTopic = this.getClass().getSimpleName() + "_input";
27 |
28 | private final String oAuthConsumerKey;
29 | private final String oAuthConsumerSecret;
30 | private final String oAuthAccessToken;
31 | private final String oAuthAccessTokenSecret;
32 | private final String brokerConnectString; // kakfa broker server/port info
33 | private final String searchTerm; // twitter feed filter search term
34 |
35 |
36 | public EsperFilteredTwitterFeedTopology(
37 | final String oAuthConsumerKey,
38 | final String oAuthConsumerSecret,
39 | final String oAuthAccessToken,
40 | final String oAuthAccessTokenSecret,
41 | final String brokerConnectString,
42 | final String searchTerm) {
43 | this.oAuthConsumerKey = oAuthConsumerKey;
44 | this.oAuthConsumerSecret = oAuthConsumerSecret;
45 | this.oAuthAccessToken = oAuthAccessToken;
46 | this.oAuthAccessTokenSecret = oAuthAccessTokenSecret;
47 | this.brokerConnectString = brokerConnectString;
48 | this.searchTerm = searchTerm;
49 | }
50 |
51 | public static void main(String[] args) throws InvalidTopologyException, AlreadyAliveException, IOException {
52 | if (args.length != 6) {
53 | throw new RuntimeException("USAGE: "
54 | + " "
55 | + " "
56 | + " "
57 | + ""
58 | + ""
59 | + " "
60 | );
61 | }
62 |
63 | final String oAuthConsumerKey = args[0];
64 | final String oAuthConsumerSecret = args[1];
65 | final String oAuthAccessToken = args[2];
66 | final String oAuthAccessTokenSecret = args[3];
67 | final String brokerConnectString = args[4];
68 | final String searchTerm = args[5];
69 |
70 |
71 | EsperFilteredTwitterFeedTopology topology = new EsperFilteredTwitterFeedTopology(
72 | oAuthConsumerKey,
73 | oAuthConsumerSecret,
74 | oAuthAccessToken,
75 | oAuthAccessTokenSecret,
76 | brokerConnectString,
77 | searchTerm
78 | );
79 | topology.submitTopology();
80 |
81 | }
82 |
83 | public String getTopicName() { // input topic
84 | return firstTopic;
85 | }
86 |
87 | public String getSecondTopicName() { // output topic
88 | return outputTopic;
89 | }
90 |
91 | protected String getZkConnect() { // Uses zookeeper created by LocalCluster
92 | return "localhost:2181";
93 | }
94 |
95 |
96 | public void submitTopology() throws IOException, AlreadyAliveException, InvalidTopologyException {
97 | System.out.println("topic: " + getTopicName() + "second topic:" + getSecondTopicName());
98 | final Config conf = getDebugConfigForStormTopology();
99 | conf.setNumWorkers(2);
100 | StormSubmitter.submitTopology(this.getClass().getSimpleName(), conf, createTopology());
101 | }
102 |
103 | protected StormTopology createTopology() {
104 | TwitterFeedItemProvider feedItemProvider = new TwitterFeedItemProvider(
105 | oAuthConsumerKey,
106 | oAuthConsumerSecret,
107 | oAuthAccessToken,
108 | oAuthAccessTokenSecret,
109 | searchTerm);
110 | return TopologyInitializer.
111 | createTopology(
112 | getZkConnect(),
113 | brokerConnectString,
114 | getTopicName(),
115 | getSecondTopicName(),
116 | feedItemProvider,
117 | true);
118 | }
119 |
120 | public static Config getDebugConfigForStormTopology() {
121 | Config config = new Config();
122 | config.setDebug(true);
123 | config.put(Config.STORM_ZOOKEEPER_CONNECTION_TIMEOUT, 900 * 1000);
124 | config.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 900 * 1000);
125 | return config;
126 | }
127 | }
128 |
129 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.generated.StormTopology;
9 | import org.testng.annotations.BeforeClass;
10 | import org.testng.annotations.Test;
11 |
12 | import java.io.File;
13 | import java.io.IOException;
14 |
15 |
16 | /**
17 | * This test builds on StormKafkaSpoutGetsInputViaAdaptedExternalFeedTest. Our ExternalFeedToKafkaAdapterSpout
18 | * pushes messages into a topic. These messages are then routed into an EsperBolt which uses the Esper query
19 | * language to do some simple filtering, We then route the filtered messages to a KafkaOutputBolt which
20 | * dumps the filtered messages on a second topic. We use an instance of Kafka MessageConsumer to pull those
21 | * messages off the second topic, and we verify that what we got is equal to what we expect.
22 | */
23 | public class ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest extends AbstractStormWithKafkaTest {
24 | public static final int EXPECTED_COUNT = 6;
25 | protected static volatile boolean finishedCollecting = false;
26 |
27 | protected static final int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 25 /* seconds */;
28 | protected static final int SECOND = 1000;
29 |
30 | private final String secondTopic = this.getClass().getSimpleName() + "topic" + getRandomInteger(1000);
31 | private volatile boolean testPassed = true; // assume the best
32 |
33 | @BeforeClass
34 | protected void deleteFiles() {
35 | deleteSentinelFile("/tmp/before.storm");
36 | deleteSentinelFile("/tmp/after.storm");
37 | }
38 |
39 | private void deleteSentinelFile(String pathname) {
40 | File sentinel = new File(pathname);
41 | sentinel.delete();
42 | if (sentinel.exists()) {
43 | throw new RuntimeException("Could not delete sentinel file");
44 | }
45 | }
46 |
47 | @Test
48 | public void runTestWithTopology() throws IOException {
49 | System.out.println("topic: " + getTopicName() + "second topic:" + getSecondTopicName());
50 | //ServerAndThreadCoordinationUtils.pauseUntil("/tmp/before.storm");
51 | submitTopology(); // The last bolt in this topology will write to second topic
52 | //ServerAndThreadCoordinationUtils.pauseUntil("/tmp/after.storm");
53 | Thread verifyThread = setupVerifyThreadToListenOnSecondTopic();
54 | try {
55 | verifyThread.join();
56 | } catch (InterruptedException e) {
57 | e.printStackTrace();
58 | }
59 | if (!testPassed) {
60 | throw new RuntimeException("Test did not pass. Got messages: ");
61 | }
62 | }
63 |
64 | @Override
65 | public String getSecondTopicName() {
66 | return secondTopic;
67 | }
68 |
69 | @Override
70 | protected StormTopology createTopology() {
71 | return TopologyInitializer.
72 | createTopology(
73 | getZkConnect(),
74 | BROKER_CONNECT_STRING,
75 | getTopicName(),
76 | getSecondTopicName(),
77 | new TestFeedItemProvider(getTestSentences()), false);
78 | }
79 |
80 | protected int getMaxAllowedToRunMillisecs() {
81 | return ExternalFeedRoutedToEsperAndThenToKakfaOutputBoltTest.MAX_ALLOWED_TO_RUN_MILLISECS;
82 | }
83 |
84 | private void waitForResultsFromStormKafkaSpoutToAppearInCollectorBolt() {
85 | while (!finishedCollecting) {
86 | try {
87 | Thread.sleep(500);
88 | } catch (InterruptedException e) {
89 | e.printStackTrace();
90 | }
91 | }
92 | System.out.println("DONE");
93 | }
94 |
95 |
96 | // EXPECTED_COUNT - consumer will see 6 occurrences of cat out of 6 batches of 2
97 | // The shutdown will trigger when we see the first 'cat - SHUTDOWN'. That's why the
98 | // consumer does not see 7 cats.
99 | private String[] getTestSentences() {
100 | return new String[]{
101 | "cat",
102 | "pig",
103 |
104 | "pig",
105 | "pig",
106 |
107 | "pig",
108 | "cat",
109 |
110 | "cat",
111 | "pig",
112 |
113 | "cat",
114 | "cat",
115 |
116 | "cat - SHUTDOWN",
117 | "cat - SHUTDOWN",
118 | };
119 |
120 | }
121 |
122 | private Thread setupVerifyThreadToListenOnSecondTopic() {
123 | Thread.UncaughtExceptionHandler uncaughtHandler = new Thread.UncaughtExceptionHandler() {
124 | @Override
125 | public void uncaughtException(Thread th, Throwable ex) {
126 | testPassed = false;
127 | }
128 | };
129 | Thread verifyThread = new Thread(
130 | new Runnable() {
131 | @Override
132 | public void run() {
133 | verifyResults(getSecondTopicName(), EXPECTED_COUNT);
134 | }
135 | },
136 | "verifyThread"
137 | );
138 | verifyThread.setUncaughtExceptionHandler(uncaughtHandler);
139 | verifyThread.start();
140 | return verifyThread;
141 | }
142 | }
143 |
144 |
--------------------------------------------------------------------------------
/storm+kafka/src/main/java/TestTopology.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/22/13
4 | * Time: 8:50 PM
5 | */
6 |
7 |
8 | import backtype.storm.Config;
9 | import backtype.storm.LocalCluster;
10 | import backtype.storm.spout.SchemeAsMultiScheme;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import storm.kafka.*;
13 |
14 | import java.util.ArrayList;
15 | import java.util.List;
16 | import java.util.Random;
17 | import java.util.concurrent.CountDownLatch;
18 |
19 | public class TestTopology {
20 |
21 |
22 | final static int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 90 /* seconds */;
23 |
24 | CountDownLatch topologyStartedLatch = new CountDownLatch(1);
25 |
26 | private static int STORM_KAFKA_FROM_READ_FROM_START = -2;
27 | private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1;
28 | private static int readFromMode = STORM_KAFKA_FROM_READ_FROM_START;
29 | private int expectedNumMessages = 8;
30 |
31 | private static final int SECOND = 1000;
32 | private static List messagesReceived = new ArrayList();
33 |
34 | private LocalCluster cluster = new LocalCluster();
35 |
36 | private static final String TOPIC_NAME = "big-topix-" + new Random().nextInt();
37 | volatile static boolean finishedCollecting = false;
38 |
39 | private static String[] sentences = new String[]{
40 | "one dog9 - saw the fox over the moon",
41 | "two cats9 - saw the fox over the moon",
42 | "four bears9 - saw the fox over the moon",
43 | "five goats9 - saw the fox over the moon",
44 | };
45 |
46 | private KafkaProducer kafkaProducer = new KafkaProducer(sentences, TOPIC_NAME, topologyStartedLatch);
47 |
48 |
49 | public static void recordRecievedMessage(String msg) {
50 | synchronized (TestTopology.class) { // ensure visibility of list updates between threads
51 | messagesReceived.add(msg);
52 | }
53 | }
54 |
55 |
56 | public static void main(String[] args) {
57 | TestTopology testTopology = new TestTopology();
58 |
59 | if (args.length == 1 && args[0].equals("--fromCurrent")) {
60 | readFromMode = STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET;
61 | testTopology.expectedNumMessages = 4;
62 | }
63 |
64 | testTopology.runTest();
65 | }
66 |
67 | private void runTest() {
68 | ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(MAX_ALLOWED_TO_RUN_MILLISECS);
69 | ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * SECOND); // Wait for zookeeper to come up
70 |
71 | kafkaProducer.startKafkaServer();
72 | kafkaProducer.createTopic(TOPIC_NAME);
73 |
74 | try {
75 |
76 |
77 | kafkaProducer.startProducer();
78 | ServerAndThreadCoordinationUtils.await(kafkaProducer.producerFinishedInitialBatchLatch);
79 |
80 | setupKafkaSpoutAndSubmitTopology();
81 | try {
82 | Thread.sleep(5000); // Would be nice to have a call back inform us when ready
83 | } catch (InterruptedException e) {
84 | e.printStackTrace();
85 | }
86 | ServerAndThreadCoordinationUtils.countDown(topologyStartedLatch);
87 |
88 | awaitResults();
89 | } catch (InterruptedException e) {
90 | e.printStackTrace();
91 | }
92 |
93 | verifyResults();
94 | shutdown();
95 | System.out.println("SUCCESSFUL COMPLETION");
96 | System.exit(0);
97 | }
98 |
99 |
100 |
101 | private void awaitResults() {
102 | while (!finishedCollecting) {
103 | try {
104 | Thread.sleep(500);
105 | } catch (InterruptedException e) {
106 | e.printStackTrace();
107 | }
108 | }
109 |
110 | // Sleep another couple of seconds in case any more messages than expected come into the bolt.
111 | // In this case the bolt should throw a fatal error
112 | try {
113 | Thread.sleep(2000);
114 | } catch (InterruptedException e) {
115 | e.printStackTrace();
116 | }
117 |
118 |
119 | System.out.println("after await");
120 | }
121 |
122 | private void verifyResults() {
123 | synchronized (TestTopology.class) { // ensure visibility of list updates between threads
124 | int count = 0;
125 | for (String msg : messagesReceived) {
126 | if (msg.contains("cat") || msg.contains("dog") || msg.contains("bear") || msg.contains("goat")) {
127 | count++;
128 | }
129 | }
130 | if (count != expectedNumMessages) {
131 | System.out.println(">>>>>>>>>>>>>>>>>>>>FAILURE - Did not receive expected messages");
132 | System.exit(-1);
133 | }
134 |
135 | }
136 | }
137 |
138 | private void setupKafkaSpoutAndSubmitTopology() throws InterruptedException {
139 | BrokerHosts brokerHosts = new ZkHosts("localhost:2000");
140 |
141 | SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, TOPIC_NAME, "", "storm");
142 | kafkaConfig.forceStartOffsetTime(readFromMode /* either earliest or current offset */);
143 | kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
144 |
145 |
146 | TopologyBuilder builder = new TopologyBuilder();
147 | builder.setSpout("words", new KafkaSpout(kafkaConfig), 1);
148 | VerboseCollectorBolt bolt = new VerboseCollectorBolt(expectedNumMessages);
149 | builder.setBolt("print", bolt).shuffleGrouping("words");
150 |
151 |
152 | Config config = new Config();
153 |
154 | cluster.submitTopology("kafka-test", config, builder.createTopology());
155 | }
156 |
157 | private void shutdown() {
158 | cluster.shutdown();
159 | kafkaProducer.shutdown();
160 | }
161 |
162 |
163 |
164 | }
165 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/FacebookFeedItemProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/4/13
4 | * Time: 6:01 PM
5 | */
6 |
7 |
8 | import com.restfb.Connection;
9 | import com.restfb.DefaultFacebookClient;
10 | import com.restfb.FacebookClient;
11 | import com.restfb.Parameter;
12 | import com.restfb.types.Post;
13 |
14 | import java.text.ParseException;
15 | import java.text.SimpleDateFormat;
16 | import java.util.Date;
17 | import java.util.Iterator;
18 | import java.util.List;
19 | import java.util.concurrent.ConcurrentLinkedQueue;
20 | import org.apache.commons.collections.buffer.CircularFifoBuffer;
21 |
22 |
23 | public class FacebookFeedItemProvider implements IFeedItemProvider {
24 | public static final SimpleDateFormat GMT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
25 |
26 | private static final int TIME_OVERLAP = 1000 * 60; // one minute
27 | private static final int NUM_REMEMBERED_PREVIOUSLY_SEEN_ITEM_IDS = 1000;
28 |
29 | private final String queryString;
30 | private final FacebookClient facebookClient;
31 | private final ConcurrentLinkedQueue itemQueue = new ConcurrentLinkedQueue();
32 |
33 | private final CircularFifoBuffer prevSeenItemIds = new CircularFifoBuffer(NUM_REMEMBERED_PREVIOUSLY_SEEN_ITEM_IDS);
34 |
35 |
36 | private volatile Date lastQueryTime = new Date();
37 | //private volatile Date lastQueryTime = parseDate("2013-11-08T19:33:20-0800");
38 |
39 |
40 | public FacebookFeedItemProvider(String authToken, String queryString) {
41 | facebookClient = new DefaultFacebookClient(authToken);
42 | this.queryString = queryString;
43 | }
44 |
45 | public static void main(String[] args) {
46 | Date startDate = parseDate("2013-11-08T19:33:20-0800");
47 |
48 | FacebookFeedItemProvider provider = new FacebookFeedItemProvider(args[0], "Rizal");
49 | Thread thread = new Thread(provider.getRunnableTask(), "facebookFeedItemProviderThread");
50 | thread.start();
51 |
52 | //System.out.println("Getting from queue");
53 |
54 | while (true) {
55 | try {
56 | Thread.sleep(5000);
57 | } catch (InterruptedException e) {
58 | e.printStackTrace();
59 | }
60 | String item = provider.itemQueue.poll();
61 | if (item != null) {
62 | System.out.println("+++++++++++++ >>>: " + item);
63 | } else {
64 | //System.out.println("+++++++++++++ no queue item");
65 | }
66 | }
67 | }
68 |
69 | private static Date parseDate(String dateString) {
70 | Date startDate = null;
71 | try {
72 | startDate = GMT_DATE_FORMAT.parse(dateString);
73 | System.out.println("result of parse is " + getFormattedDate(startDate));
74 | } catch (ParseException e) {
75 | e.printStackTrace();
76 | }
77 | return startDate;
78 | }
79 |
80 |
81 | @Override
82 | public Runnable getRunnableTask() {
83 | return new Runnable() {
84 | @Override
85 | public void run() {
86 | while (true) {
87 | // We set updatedLastQueryTime to some time before the time we start our search so we don't
88 | // miss any items posted while the search is being done. This means we can
89 | // double process some items. To avoid this we maintain a bounded queue of previously seen
90 | // item ids. If the number previously seen is more than the buffer bound we might double process,
91 | // but for our demo we won't worry about this.
92 | //
93 | //System.out.println("starting query from: " + getFormattedDate(lastQueryTime));
94 | Date updatedLastQueryTime = new Date( new Date().getTime() - TIME_OVERLAP );
95 | //Date updatedLastQueryTime = new Date();
96 | Connection postStream = getPostStream();
97 | List postList = postStream.getData();
98 | if (postList.size() > 0) {
99 | for (Post p : postList) {
100 | //System.out.println("Post at : " + getFormattedDate(p.getCreatedTime()) + "\n" + p.getMessage() + " id = " + p.getId());
101 | enqueueItemIfNotPreviouslySeen(p);
102 | }
103 | }
104 | lastQueryTime = updatedLastQueryTime;
105 |
106 | try {
107 | Thread.sleep(5000);
108 | } catch (InterruptedException e) {
109 | e.printStackTrace();
110 | }
111 | }
112 | }
113 |
114 | private void enqueueItemIfNotPreviouslySeen(Post p) {
115 | String thisPostId = p.getId();
116 | boolean sawBefore = false;
117 |
118 | Iterator iter = prevSeenItemIds.iterator();
119 | while (iter.hasNext()) {
120 | String seenId = (String) iter.next();
121 | if (thisPostId.equals(seenId)) {
122 | sawBefore = true;
123 | break;
124 | }
125 | }
126 |
127 | if (! sawBefore) {
128 | prevSeenItemIds.add(thisPostId);
129 | itemQueue.offer(p.getMessage());
130 | } // on the other hand, if we saw it before then we do thing.. .just ignore
131 | }
132 |
133 | };
134 | }
135 |
136 |
137 | private Connection getPostStream() {
138 | return facebookClient.fetchConnection(
139 | "search",
140 | Post.class,
141 | Parameter.with("q", queryString),
142 | Parameter.with("since", lastQueryTime),
143 | Parameter.with("type", "post"));
144 | }
145 |
146 |
147 | @Override
148 | public Object getNextItemIfAvailable() {
149 | return itemQueue.poll();
150 | }
151 |
152 | private static String getFormattedDate(Date date) {
153 | String str;
154 | SimpleDateFormat sdf = GMT_DATE_FORMAT;
155 | return sdf.format(date);
156 | }
157 |
158 | }
--------------------------------------------------------------------------------
/esper+storm+kafka/src/main/java/ServerAndThreadCoordinationUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/28/13
4 | * Time: 2:20 PM
5 | */
6 |
7 |
8 | import java.io.*;
9 | import java.net.Socket;
10 | import java.util.Date;
11 | import java.util.Random;
12 | import java.util.Timer;
13 | import java.util.TimerTask;
14 | import java.util.concurrent.CountDownLatch;
15 |
16 | public class ServerAndThreadCoordinationUtils {
17 |
18 | public static void main(String[] args) {
19 | System.out.println("START");
20 |
21 | Timer timer = ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(1000 *20);
22 | Thread thread = new Thread(
23 | new Runnable() {
24 | @Override
25 | public void run() {
26 | while (true) {
27 | System.out.println("JUNK-" + new Random().nextInt());
28 | }
29 | }
30 | },
31 | "threadBoy"
32 | );
33 | thread.start();
34 | }
35 |
36 | public static final String SENTINEL_FILE_PATH = "/tmp/go";
37 |
38 | /**
39 | * Sets up a process termination task that will trigger if the given number of milliseconds
40 | * elapses and the test has not finished yet. We exit the JVM rather than just throwing an
41 | * exception because exceptions might be swallowed in the reams of output that could be produced
42 | * by Kafka and Storm servers that are running on threads that would not be stopped if we limited
43 | * ourselves to just throwing an exception.
44 | */
45 | public static Timer setMaxTimeToRunTimer(int millisecs) {
46 | Date timeLimit =
47 | new Date(new Date().getTime() + millisecs);
48 | Timer timer = new Timer();
49 |
50 | timer.schedule(new TimerTask() {
51 |
52 | @Override
53 | public void run() {
54 | for (int i = 0; i < 1000; i++) {
55 | System.out.println("aborting test ! Took too long");
56 | }
57 | System.out.flush();
58 | System.exit(-1);
59 | }
60 | }, timeLimit);
61 |
62 | return timer;
63 | }
64 |
65 |
66 | /**
67 | * Run in a tight sleep/wake loop until sentinel file (by default '/tmp/go') comes into
68 | * existence. We use this method in cases where we want to pause the flow of a test
69 | * but still be able to look around within zookeeper. If we were to merely pause in the
70 | * debugger then when we tried to connect to zookeeper to look around we would find the
71 | * server to be unresponsive (since the debugger pauses the whole process.) But if we use
72 | * the method below the zookeeper thread will still get some CPU cycles so we can connect to
73 | * it and examine its structure.
74 | */
75 | public static void pauseUntil(String path) {
76 | if (path == null) {
77 | path = SENTINEL_FILE_PATH;
78 | }
79 | boolean fileExists = false;
80 | while (!fileExists) {
81 | File pauseFile = new File(path);
82 | if (!pauseFile.exists()) {
83 | try {
84 | Thread.sleep(500);
85 | } catch (InterruptedException e) {
86 | e.printStackTrace();
87 | }
88 | } else {
89 | fileExists = true;
90 | }
91 | }
92 | }
93 |
94 | public static void removePauseSentinelFile() {
95 | File sentinel = new File(SENTINEL_FILE_PATH);
96 | //noinspection ResultOfMethodCallIgnored
97 | sentinel.delete();
98 | if (sentinel.exists()) {
99 | throw new RuntimeException("Could not delete sentinel file");
100 | }
101 |
102 | }
103 |
104 |
105 | public static String send4LetterWord(String host, int port, String cmd)
106 | throws IOException {
107 | System.out.println("connecting to " + host + " " + port);
108 | Socket sock = new Socket(host, port);
109 | BufferedReader reader = null;
110 | try {
111 | OutputStream outstream = sock.getOutputStream();
112 | outstream.write(cmd.getBytes());
113 | outstream.flush();
114 | // this replicates NC - close the output stream before reading
115 | sock.shutdownOutput();
116 |
117 | reader =
118 | new BufferedReader(
119 | new InputStreamReader(sock.getInputStream()));
120 | StringBuilder sb = new StringBuilder();
121 | String line;
122 | while ((line = reader.readLine()) != null) {
123 | sb.append(line + "\n");
124 | }
125 | return sb.toString();
126 | } finally {
127 | sock.close();
128 | if (reader != null) {
129 | reader.close();
130 | }
131 | }
132 | }
133 |
134 | public static boolean waitForServerUp(String host, int port, long timeout) {
135 | long start = System.currentTimeMillis();
136 | while (true) {
137 | try {
138 | // if there are multiple hostports, just take the first one
139 | String result = send4LetterWord(host, port, "stat");
140 | System.out.println("result of send: " + result);
141 | if (result.startsWith("Zookeeper version:")) {
142 | return true;
143 | }
144 | } catch (IOException e) {
145 | // ignore as this is expected
146 | System.out.println("server " + host + ":" + port + " not up " + e);
147 | }
148 |
149 | if (System.currentTimeMillis() > start + timeout) {
150 | break;
151 | }
152 | try {
153 | Thread.sleep(250);
154 | } catch (InterruptedException e) {
155 | // ignore
156 | }
157 | }
158 | return false;
159 | }
160 |
161 | public static void await(CountDownLatch latch) {
162 | try {
163 | latch.await();
164 | } catch (InterruptedException e) {
165 | e.printStackTrace();
166 | System.out.println("FATAL ERROR");
167 | System.exit(-1);
168 | }
169 | }
170 |
171 |
172 | public static void countDown(CountDownLatch latch) {
173 | try {
174 | latch.countDown();
175 | } catch (Exception e) {
176 | e.printStackTrace();
177 | System.out.println("FATAL ERROR");
178 | System.exit(-1);
179 | }
180 | }
181 |
182 | }
183 |
--------------------------------------------------------------------------------
/kafka/src/main/java/TestKafkaProducer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/20/13
4 | * Time: 8:54 PM
5 | */
6 |
7 |
8 | import com.google.common.collect.ImmutableMap;
9 | import com.google.common.io.Files;
10 | import com.netflix.curator.test.TestingServer;
11 | import kafka.consumer.ConsumerConfig;
12 | import kafka.consumer.ConsumerIterator;
13 | import kafka.consumer.KafkaStream;
14 | import kafka.javaapi.consumer.ConsumerConnector;
15 | import kafka.javaapi.producer.Producer;
16 | import kafka.javaapi.producer.ProducerData;
17 | import kafka.producer.ProducerConfig;
18 | import kafka.serializer.StringDecoder;
19 | import kafka.server.KafkaConfig;
20 | import kafka.server.KafkaServer;
21 | import org.apache.commons.lang.StringUtils;
22 |
23 | import java.io.File;
24 | import java.io.IOException;
25 | import java.util.*;
26 |
27 |
28 | class TestKafkaProducer {
29 | private String topic = "";
30 | private String zkConnectString = "";
31 | private List messages = null;
32 | private List messagesReceived = new ArrayList();
33 | private Producer producer;
34 | private KafkaServer kafkaServer;
35 | private Thread kafkaMessageReceiverThread;
36 |
37 | private static final String RANDOM_GROUP_ID = "RANDOM-GROUP-ID";
38 |
39 | public static void main(String[] args) {
40 | TestKafkaProducer tkp = null;
41 |
42 | boolean success = false;
43 |
44 | try (TestingServer zookeeperTestServer = new TestingServer()) {
45 |
46 | final String theTopic = "someTopic-" + new Random().nextInt();
47 |
48 | tkp = new TestKafkaProducer(
49 | theTopic,
50 | "localhost:" + zookeeperTestServer.getPort(),
51 | 10);
52 |
53 | tkp.sendMessages();
54 |
55 | tkp.consumeMessages();
56 | tkp.shutdownConsumers();
57 | tkp.kafkaMessageReceiverThread.join();
58 | tkp.shutdown();
59 |
60 | String got = StringUtils.join(tkp.messagesReceived, "+");
61 | String expected = StringUtils.join(tkp.messages, "+");
62 | if (got.equals(expected)) {
63 | success = true;
64 | }
65 | } catch (Exception e) {
66 | e.printStackTrace();
67 | }
68 | if (! success) {
69 | throw new RuntimeException("oh rats... we failed");
70 | }
71 | System.out.println("SUCCESS -- WE ARE HAPPY !...");
72 | }
73 |
74 | private void consumeMessages() {
75 | final ConsumerConnector consumer =
76 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
77 | final Map topicCountMap = ImmutableMap.of(topic, 1);
78 | final Map>> consumerMap;
79 | consumerMap = consumer.createMessageStreams(topicCountMap, new StringDecoder());
80 |
81 | final KafkaStream stream = consumerMap.get(topic).get(0);
82 | final ConsumerIterator iterator = stream.iterator();
83 |
84 | kafkaMessageReceiverThread = new Thread(
85 | new Runnable() {
86 | @Override
87 | public void run() {
88 | while (iterator.hasNext()) {
89 | String msg = iterator.next().message();
90 | msg = msg == null ? "" : msg;
91 | System.out.println("got message" + msg);
92 | if (msg.equals("SHUTDOWN")) {
93 | consumer.shutdown();
94 | return;
95 | }
96 | messagesReceived.add(msg);
97 | }
98 | }
99 | },
100 | "kafkaMessageReceiverThread"
101 | );
102 | kafkaMessageReceiverThread.start();
103 |
104 | }
105 |
106 |
107 | private ConsumerConfig createConsumerConfig() {
108 | Properties props = new Properties();
109 | props.put("zk.connect", this.zkConnectString);
110 | props.put("groupid", RANDOM_GROUP_ID);
111 | props.put("zk.sessiontimeout.ms", "400");
112 | props.put("zk.synctime.ms", "200");
113 | props.put("autocommit.interval.ms", "1000");
114 | props.put("serializer.class", "kafka.serializer.StringEncoder");
115 |
116 | return new ConsumerConfig(props);
117 |
118 | }
119 |
120 | public void shutdownConsumers() {
121 | sendMessage("SHUTDOWN");
122 | }
123 |
124 |
125 | public void shutdown() {
126 | producer.close();
127 | kafkaServer.shutdown();
128 | }
129 |
130 |
131 | TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException {
132 | final Random generator = new Random();
133 |
134 | if (numRandomMessages <= 0) {
135 | throw new RuntimeException("no messages defined for test");
136 | }
137 |
138 | messages = new ArrayList();
139 | for (int i = 0; i < numRandomMessages; i++) {
140 | int num1 = Math.abs(generator.nextInt());
141 | int num2 = Math.abs(generator.nextInt());
142 | String messageToSend = num1 + ":-(a)-" + num2;
143 | messages.add(messageToSend);
144 | }
145 |
146 |
147 | this.topic = topic;
148 |
149 | this.zkConnectString = zkConnectString;
150 | initProducer(zkConnectString);
151 | }
152 |
153 |
154 | public void sendMessages() throws IOException {
155 | for (String msg : messages) {
156 | sendMessage(msg);
157 | }
158 | }
159 |
160 | private void sendMessage(String msg) {
161 | ProducerData data = new ProducerData(topic, msg);
162 | producer.send(data);
163 | }
164 |
165 | private void initProducer(String zkConnectString) throws IOException {
166 | kafkaServer = startKafkaServer();
167 |
168 |
169 | Properties props = new Properties();
170 | props.put("zk.connect", zkConnectString);
171 | props.put("serializer.class", "kafka.serializer.StringEncoder");
172 | props.put("producer.type", "async");
173 | props.put("batch.size", "1");
174 | ProducerConfig config = new ProducerConfig(props);
175 |
176 | producer = new Producer(config);
177 | }
178 |
179 | private KafkaServer startKafkaServer() {
180 | File tmpDir = Files.createTempDir();
181 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1);
182 | KafkaConfig kafkaConfig = new KafkaConfig(props);
183 |
184 | kafkaServer = new KafkaServer(kafkaConfig);
185 | kafkaServer.startup();
186 | return kafkaServer;
187 | }
188 |
189 |
190 | private Properties createProperties(String logDir, int port, int brokerId) {
191 | Properties properties = new Properties();
192 | properties.put("port", port + "");
193 | properties.put("brokerid", brokerId + "");
194 | properties.put("log.dir", logDir);
195 | properties.put("zk.connect", this.zkConnectString);
196 | return properties;
197 | }
198 |
199 | }
200 |
--------------------------------------------------------------------------------
/kafka-0.8.x/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | com.buildlackey
6 | kafka-producer-to-consumer-example
7 | kafka-producer-to-consumer-example
8 | 1.0
9 | jar
10 | Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers (Kafka 0.8.x)
11 |
12 |
13 | leadLackey
14 | Chris Bedford
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | org.scala-lang
24 | scala-library
25 | 2.9.3
26 |
27 |
28 |
29 |
30 |
31 | org.apache.kafka
32 | kafka_2.9.2
33 | 0.8.0-beta1
34 |
35 |
36 | com.sun.jmx
37 | jmxri
38 |
39 |
40 |
41 | com.sun.jdmk
42 | jmxtools
43 |
44 |
45 |
46 | javax.jms
47 | jms
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
60 |
61 | com.yammer.metrics
62 | metrics-core
63 | 2.2.0
64 |
65 |
66 | com.101tec
67 | zkclient
68 | 0.3
69 |
70 |
71 |
72 | log4j
73 | log4j
74 |
75 |
76 |
77 |
78 |
79 |
80 | net.sf.jopt-simple
81 | jopt-simple
82 | 4.5
83 |
84 |
85 |
86 |
87 |
88 | com.netflix.curator
89 | curator-test
90 | 1.2.5
91 |
92 |
93 |
94 | org.slf4j
95 | slf4j-log4j12
96 |
97 |
98 | log4j
99 | log4j
100 |
101 |
102 |
103 |
104 |
105 |
106 | org.apache.zookeeper
107 | zookeeper
108 | 3.4.1
109 |
110 |
111 | com.sun.jmx
112 | jmxri
113 |
114 |
115 |
116 | com.sun.jdmk
117 | jmxtools
118 |
119 |
120 |
121 | javax.jms
122 | jms
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 | org.tomdz.storm
132 | storm-esper
133 | 0.8.1-SNAPSHOT
134 |
135 |
136 |
137 | org.testng
138 | testng
139 | 6.1.1
140 | test
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 | 2.2.1
154 |
155 |
156 |
157 |
158 |
159 | org.apache.maven.plugins
160 | maven-enforcer-plugin
161 | 1.1.1
162 |
163 |
164 | enforce-versions
165 |
166 | enforce
167 |
168 |
169 |
170 |
171 | 2.2.1
172 |
173 |
174 | 1.7
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 | org.apache.maven.plugins
184 | maven-compiler-plugin
185 | 3.1
186 |
187 | 1.7
188 | 1.7
189 |
190 |
191 |
192 |
193 | org.apache.maven.plugins
194 | maven-jar-plugin
195 | 2.4
196 |
197 |
198 | org.apache.maven.plugins
199 | maven-source-plugin
200 | 2.2
201 |
202 | true
203 |
204 |
205 |
206 | create-source-jar
207 |
208 | jar-no-fork
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 | clojars
218 | http://clojars.org/repo/
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
--------------------------------------------------------------------------------
/kafka-0.8.x/src/main/java/TestKafkaProducer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 10/20/13
4 | * Time: 8:54 PM
5 | */
6 |
7 |
8 | import com.google.common.collect.ImmutableMap;
9 | import com.google.common.io.Files;
10 | import com.netflix.curator.test.TestingServer;
11 | import kafka.consumer.ConsumerConfig;
12 | import kafka.consumer.ConsumerIterator;
13 | import kafka.consumer.KafkaStream;
14 | import kafka.javaapi.consumer.ConsumerConnector;
15 | import kafka.javaapi.producer.Producer;
16 | import kafka.producer.KeyedMessage;
17 | import kafka.producer.ProducerConfig;
18 | import kafka.serializer.StringDecoder;
19 | import kafka.server.KafkaConfig;
20 | import kafka.server.KafkaServer;
21 | import kafka.utils.MockTime;
22 | import kafka.utils.VerifiableProperties;
23 | import org.apache.commons.lang.StringUtils;
24 |
25 | import java.io.File;
26 | import java.io.IOException;
27 | import java.util.*;
28 |
29 |
30 | class TestKafkaProducer {
31 | private String topic = "";
32 | private String zkConnectString = "";
33 | private List messages = null;
34 | private List messagesReceived = new ArrayList();
35 | private Producer producer;
36 | private KafkaServer kafkaServer;
37 | private Thread kafkaMessageReceiverThread;
38 |
39 | private static final String RANDOM_GROUP_ID = "RANDOM-GROUP-ID";
40 |
41 | public static void main(String[] args) {
42 | TestKafkaProducer tkp = null;
43 |
44 | boolean success = false;
45 |
46 | try (TestingServer zookeeperTestServer = new TestingServer()) {
47 |
48 | final String theTopic = "someTopic-" + new Random().nextInt();
49 |
50 | tkp = new TestKafkaProducer(
51 | theTopic,
52 | "localhost:" + zookeeperTestServer.getPort(),
53 | 4400);
54 |
55 | tkp.sendMessages();
56 | tkp.consumeMessages();
57 |
58 | try { // Give consumer some time...
59 | tkp.shutdownConsumers();
60 | Thread.sleep(1000);
61 | tkp.kafkaMessageReceiverThread.join();
62 | tkp.shutdown();
63 | } catch (Exception e) {
64 | System.out.println("Error in shut down. we will ignore it as long as our messages came through");
65 | e.printStackTrace();
66 | }
67 |
68 | String got = StringUtils.join(tkp.messagesReceived, "+");
69 | String expected = StringUtils.join(tkp.messages, "+");
70 | if (got.equals(expected)) {
71 | success = true;
72 | }
73 | } catch (Exception e) {
74 | e.printStackTrace();
75 | }
76 | if (! success) {
77 | throw new RuntimeException("oh rats... we failed");
78 | }
79 | System.out.println("SUCCESS -- WE ARE HAPPY !...");
80 | }
81 |
82 | private void consumeMessages() {
83 | final ConsumerConnector consumer =
84 | kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
85 | final Map topicCountMap =
86 | ImmutableMap.of(topic, 1);
87 | final StringDecoder decoder =
88 | new StringDecoder(new VerifiableProperties());
89 | final Map>> consumerMap =
90 | consumer.createMessageStreams(topicCountMap, decoder, decoder);
91 | final KafkaStream stream =
92 | consumerMap.get(topic).get(0);
93 | final ConsumerIterator iterator = stream.iterator();
94 |
95 | kafkaMessageReceiverThread = new Thread(
96 | new Runnable() {
97 | @Override
98 | public void run() {
99 | while (iterator.hasNext()) {
100 | String msg = iterator.next().message();
101 | msg = msg == null ? "" : msg;
102 | System.out.println("got message" + msg);
103 | if (msg.equals("SHUTDOWN")) {
104 | consumer.shutdown();
105 | return;
106 | }
107 | messagesReceived.add(msg);
108 | }
109 | }
110 | },
111 | "kafkaMessageReceiverThread"
112 | );
113 | kafkaMessageReceiverThread.start();
114 |
115 | }
116 |
117 |
118 | private ConsumerConfig createConsumerConfig() {
119 | Properties props = new Properties();
120 | props.put("zookeeper.connect", this.zkConnectString);
121 | props.put("group.id", RANDOM_GROUP_ID);
122 | props.put("zk.sessiontimeout.ms", "400");
123 | props.put("zk.synctime.ms", "200");
124 | props.put("autocommit.interval.ms", "1000");
125 | props.put("serializer.class", "kafka.serializer.StringEncoder");
126 |
127 | return new ConsumerConfig(props);
128 |
129 | }
130 |
131 | public void shutdownConsumers() {
132 | sendMessage("SHUTDOWN");
133 | }
134 |
135 |
136 | public void shutdown() {
137 | producer.close();
138 | try { // Give producer some time...
139 | Thread.sleep(1000);
140 | } catch (InterruptedException e) {
141 | e.printStackTrace();
142 | }
143 |
144 | kafkaServer.shutdown();
145 | kafkaServer.awaitShutdown();
146 | }
147 |
148 |
149 | TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException {
150 | final Random generator = new Random();
151 |
152 | if (numRandomMessages <= 0) {
153 | throw new RuntimeException("no messages defined for test");
154 | }
155 |
156 | messages = new ArrayList();
157 | for (int i = 0; i < numRandomMessages; i++) {
158 | int num1 = Math.abs(generator.nextInt());
159 | int num2 = Math.abs(generator.nextInt());
160 | String messageToSend = num1 + ":-(a)-" + num2;
161 | messages.add(messageToSend);
162 | }
163 |
164 |
165 | this.topic = topic;
166 |
167 | this.zkConnectString = zkConnectString;
168 | initProducer(zkConnectString);
169 | }
170 |
171 |
172 | public void sendMessages() throws IOException {
173 | for (String msg : messages) {
174 | sendMessage(msg);
175 | }
176 | }
177 |
178 | private void sendMessage(String msg) {
179 | KeyedMessage data = new KeyedMessage(topic, msg);
180 | producer.send(data);
181 | }
182 |
183 | private void initProducer(String zkConnectString) throws IOException {
184 | kafkaServer = startKafkaServer();
185 | Properties props = new Properties();
186 | props.put("metadata.broker.list", "localhost:9092");
187 | props.put("serializer.class", "kafka.serializer.StringEncoder");
188 | props.put("producer.type", "async");
189 | props.put("batch.size", "1");
190 | ProducerConfig config = new ProducerConfig(props);
191 |
192 | producer = new Producer(config);
193 | }
194 |
195 | private KafkaServer startKafkaServer() {
196 | File tmpDir = Files.createTempDir();
197 | Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1);
198 | KafkaConfig kafkaConfig = new KafkaConfig(props);
199 |
200 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime());
201 |
202 | kafkaServer.startup();
203 | return kafkaServer;
204 | }
205 |
206 |
207 |
208 | private Properties createProperties(String logDir, int port, int brokerId) {
209 | Properties properties = new Properties();
210 | properties.put("port", port + "");
211 | properties.put("broker.id", brokerId + "");
212 | properties.put("log.dir", logDir);
213 | properties.put("zookeeper.connect", this.zkConnectString);
214 | return properties;
215 | }
216 |
217 |
218 | }
219 |
--------------------------------------------------------------------------------
/esper+storm+kafka/src/test/java/AbstractStormWithKafkaTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Author: cbedford
3 | * Date: 11/1/13
4 | * Time: 5:00 PM
5 | */
6 |
7 |
8 | import backtype.storm.Config;
9 | import backtype.storm.LocalCluster;
10 | import backtype.storm.generated.StormTopology;
11 | import com.google.common.io.Files;
12 | import kafka.admin.CreateTopicCommand;
13 | import kafka.server.KafkaConfig;
14 | import kafka.server.KafkaServer;
15 | import kafka.utils.MockTime;
16 | import org.testng.annotations.AfterClass;
17 | import org.testng.annotations.BeforeClass;
18 |
19 | import java.io.File;
20 | import java.util.Properties;
21 | import java.util.Timer;
22 | import java.util.concurrent.CountDownLatch;
23 |
24 |
25 | /**
26 | * Simplifies testing of Storm components that consume or produce data items from or to Kafka.
27 | * Operates via a 'template method' series of steps, wherein the BeforeClass method sets up a
28 | * Storm Local cluster, then waits for the zookeeper instance started by that cluster to 'boot up',
29 | * then starts an-process Kafka server using that zookeeper, and then creates a topic whose
30 | * name is derived from the name of the base class test.
31 | *
32 | * Subclasses only need to implement the abstract createTopology() method (and perhaps
33 | * override 'verifyResults())' which is currently kind of hard coded to our first two subclasses of
34 | * this base class.
35 | */
36 | public abstract class AbstractStormWithKafkaTest {
37 | public static String[] sentences = new String[]{
38 | "one dog9 - saw the fox over the moon",
39 | "two cats9 - saw the fox over the moon",
40 | "four bears9 - saw the fox over the moon",
41 | "five goats9 - saw the fox over the moon",
42 | "SHUTDOWN",
43 | };
44 | protected final String BROKER_CONNECT_STRING = "localhost:9092"; // kakfa broker server/port info
45 | private final String topicName = this.getClass().getSimpleName() + "_topic_" + getRandomInteger(1000);
46 | protected final String topologyName = this.getClass().getSimpleName() + "-topology" + getRandomInteger(1000);
47 |
48 | protected LocalCluster cluster = null;
49 |
50 | private final File kafkaWorkingDir = Files.createTempDir();
51 | private final CountDownLatch kafkaTopicCreatedLatch = new CountDownLatch(1);
52 | private KafkaServer kafkaServer = null;
53 | private Timer timer;
54 | private Thread kafkaServerThread = null;
55 |
56 | @BeforeClass(alwaysRun = true)
57 | protected void setUp() {
58 | timer = ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(getMaxAllowedToRunMillisecs());
59 | ServerAndThreadCoordinationUtils.removePauseSentinelFile();
60 | cluster = new LocalCluster();
61 | ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * KafkaOutputBoltTest.SECOND); // Wait for zookeeper to come up
62 |
63 | /*
64 | * Below we start up kafka and create topic in a separate thread. If we don't do this then we
65 | * get very bizarre behavior, such as tuples never being emitted from our spouts and bolts
66 | * as expected. Haven't figure out why this is needed... But doing it 'cause that's what makes
67 | * things work.
68 | */
69 | kafkaServerThread = new Thread(
70 | new Runnable() {
71 | @Override
72 | public void run() {
73 | startKafkaServer();
74 | createTopic(getTopicName());
75 | if (getSecondTopicName() != null) {
76 | createTopic(getSecondTopicName());
77 | }
78 | ServerAndThreadCoordinationUtils.countDown(kafkaTopicCreatedLatch);
79 | }
80 | },
81 | "kafkaServerThread"
82 | );
83 | kafkaServerThread.start();
84 | ServerAndThreadCoordinationUtils.await(kafkaTopicCreatedLatch);
85 | }
86 |
87 |
88 | public String getSecondTopicName() {
89 | return null;
90 | }
91 |
92 |
93 | abstract protected int getMaxAllowedToRunMillisecs();
94 |
95 | @AfterClass(alwaysRun = true)
96 | protected void tearDown() {
97 | try {
98 | kafkaServerThread.join();
99 | } catch (InterruptedException e) {
100 | e.printStackTrace();
101 | }
102 |
103 | cluster.shutdown();
104 | kafkaServer.shutdown();
105 | timer.cancel();
106 | }
107 |
108 | protected void createTopic(String topicName) {
109 | String[] arguments = new String[6];
110 | arguments[0] = "--zookeeper";
111 | arguments[1] = "localhost:2000";
112 | arguments[2] = "--partition";
113 | arguments[3] = "1";
114 | arguments[4] = "--topic";
115 | arguments[5] = topicName;
116 |
117 | CreateTopicCommand.main(arguments);
118 | }
119 |
120 | protected void startKafkaServer() {
121 | Properties props = createProperties(kafkaWorkingDir.getAbsolutePath(), 9092, 1);
122 | KafkaConfig kafkaConfig = new KafkaConfig(props);
123 |
124 | kafkaServer = new KafkaServer(kafkaConfig, new MockTime());
125 | kafkaServer.startup();
126 | }
127 |
128 | protected String getZkConnect() { // Uses zookeeper created by LocalCluster
129 |
130 | return "localhost:2000";
131 | }
132 |
133 | protected int getRandomInteger(int max) {
134 | return (int) Math.floor((Math.random() * max));
135 | }
136 |
137 | private Properties createProperties(String logDir, int port, int brokerId) {
138 | Properties properties = new Properties();
139 | properties.put("port", port + "");
140 | properties.put("broker.id", brokerId + "");
141 | properties.put("log.dir", logDir);
142 | properties.put("zookeeper.connect", getZkConnect());
143 | return properties;
144 | }
145 |
146 |
147 | protected abstract StormTopology createTopology();
148 |
149 |
150 | /**
151 | * @return a Config object with time outs set very high so that the storm to zookeeper
152 | * session will be kept alive, even as we are rooting around in a debugger.
153 | */
154 | public static Config getDebugConfigForStormTopology() {
155 | Config config = new Config();
156 | config.setDebug(true);
157 | config.put(Config.STORM_ZOOKEEPER_CONNECTION_TIMEOUT, 900 * 1000);
158 | config.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 900 * 1000);
159 | return config;
160 | }
161 |
162 | public void verifyResults(String topic, int expectedCount) {
163 | if (topic == null) {
164 | topic = this.getTopicName();
165 | }
166 | if (expectedCount == -1) {
167 | expectedCount = sentences.length;
168 | }
169 |
170 | int foundCount = 0;
171 | KafkaMessageConsumer msgConsumer = null;
172 | try {
173 | msgConsumer = new KafkaMessageConsumer(getZkConnect(), topic);
174 | msgConsumer.consumeMessages();
175 |
176 | foundCount = 0;
177 | for (String msg : msgConsumer.getMessagesReceived()) {
178 | System.out.println("message: " + msg);
179 | if (msg.contains("cat") ||
180 | msg.contains("dog") ||
181 | msg.contains("bear") ||
182 | msg.contains("goat") ||
183 | msg.contains("SHUTDOWN")) {
184 | foundCount++;
185 | }
186 | }
187 | } catch (Exception e) {
188 | e.printStackTrace();
189 | }
190 |
191 | if (foundCount != expectedCount) {
192 | if (msgConsumer != null) {
193 | System.out.println("Did not receive expected messages. Got: " +
194 | msgConsumer.getMessagesReceived());
195 | }
196 |
197 | throw new RuntimeException(">>>>>>>>>>>>>>>>>>>> Did not receive expected messages");
198 | }
199 | }
200 |
201 | protected void submitTopology() {
202 |
203 | final Config conf = getDebugConfigForStormTopology();
204 |
205 | cluster.submitTopology(topologyName, conf, createTopology());
206 | }
207 |
208 | public String getTopicName() {
209 | return topicName;
210 | }
211 | }
212 |
--------------------------------------------------------------------------------
/storm+kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | com.buildlackey
6 | kafka-spout-example
7 | kafka-spout-example
8 | 1.0
9 | jar
10 | Simple Kafka Spout Example
11 |
12 |
13 | leadLackey
14 | Chris Bedford
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | com.jayway.awaitility
24 | awaitility
25 | 1.3.5
26 |
27 |
28 |
29 |
30 | storm
31 | storm
32 | 0.9.0-rc2
33 |
34 |
35 | storm
36 | storm-core
37 | 0.9.0-rc2
38 |
39 |
40 |
41 |
42 |
43 |
67 |
68 |
69 | org.scala-lang
70 | scala-library
71 | 2.9.3
72 |
73 |
74 |
75 |
76 |
77 | org.apache.kafka
78 | kafka_2.9.2
79 | 0.8.0-beta1
80 |
81 |
82 | com.sun.jmx
83 | jmxri
84 |
85 |
86 |
87 | com.sun.jdmk
88 | jmxtools
89 |
90 |
91 |
92 | javax.jms
93 | jms
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
106 |
107 | com.yammer.metrics
108 | metrics-core
109 | 2.2.0
110 |
111 |
112 | com.101tec
113 | zkclient
114 | 0.3
115 |
116 |
117 | net.sf.jopt-simple
118 | jopt-simple
119 | 4.5
120 |
121 |
122 |
123 |
124 |
125 | com.netflix.curator
126 | curator-test
127 | 1.2.5
128 |
129 |
130 |
131 | org.slf4j
132 | slf4j-log4j12
133 |
134 |
135 | log4j
136 | log4j
137 |
138 |
139 |
140 |
141 |
142 | org.apache.zookeeper
143 | zookeeper
144 | 3.3.3
145 |
146 |
147 | com.sun.jmx
148 | jmxri
149 |
150 |
151 |
152 | com.sun.jdmk
153 | jmxtools
154 |
155 |
156 |
157 | javax.jms
158 | jms
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 | org.testng
167 | testng
168 | 6.1.1
169 | test
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 | 2.2.1
183 |
184 |
185 |
186 |
187 |
188 | org.apache.maven.plugins
189 | maven-enforcer-plugin
190 | 1.1.1
191 |
192 |
193 | enforce-versions
194 |
195 | enforce
196 |
197 |
198 |
199 |
200 | 2.2.1
201 |
202 |
203 | 1.6
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 | org.apache.maven.plugins
213 | maven-compiler-plugin
214 | 3.1
215 |
216 | 1.6
217 | 1.6
218 |
219 |
220 |
221 |
222 | org.apache.maven.plugins
223 | maven-jar-plugin
224 | 2.4
225 |
226 |
227 | org.apache.maven.plugins
228 | maven-source-plugin
229 | 2.2
230 |
231 | true
232 |
233 |
234 |
235 | create-source-jar
236 |
237 | jar-no-fork
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 | clojars
247 | http://clojars.org/repo/
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/kafka/README.md:
--------------------------------------------------------------------------------
1 | # Kafka Produce/Consumer Example
2 |
3 | A Simple Kafka Produce/Consumer Example With In-Memory Kafka and Zookeeper Test Fixture Servers
4 |
5 |
6 | ## Description
7 |
8 | This example illustrates how to:
9 |
10 | * unit test message passing between Kafka producers and consumers
11 | using basic String serialization.
12 |
13 | * use of Netflix's curator API to instantiate an in-process zookeeper
14 | server, together with an in-memory instance of the
15 | kafka.server.KafkaServer class
16 |
17 | * ensure that all threads launched by Kafka and zookeeper are cleanly
18 | shutdown (this seem to be working pretty well so far.)
19 |
20 |
21 |
22 | By keeping all test fixtures in memory (rather than depending on out-of-process servers
23 | being 'somehow' set up before the test) we make it very easy to get the basics of
24 | Kafka working in the environments of other developers and/or build systems.
25 |
26 |
27 | The main problem with the initial cut of this test program is that I had to use some very strange
28 | dependencies in my maven pom.xml in order to be able to get everything working through a public
29 | repo. (See 'Current Issues', below.)
30 |
31 |
32 |
33 |
34 | ## Building and Running
35 |
36 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands:
37 |
38 | mvn clean install
39 |
40 | mvn exec:java -Dexec.mainClass=TestKafkaProducer
41 |
42 | If you see 'SUCCESS' printed out towards the very end, then you know everything is working.
43 |
44 |
45 |
46 | ## Implementation Details
47 |
48 | The test program pumps a small set of random messages from a producer to a consumer, and
49 | asserts that the messages received are identical with messages sent (lines 22-26, below.)
50 |
51 |
52 | The main method creates an instance of the Netflix's curator API TestingServer class
53 | with default parameters which cause it to select a random unused port, as well as a
54 | random temp directory for the zookeeper files (line 6). On line 12 we interogate
55 | zookeeperTestServer for its port to construct the zookeeper connect string
56 | ("zk.connect" property) used by both the Producer (created in initProducer)
57 | and the consumer (whose connect properties are created in createConsumerConfig, at line 70.)
58 |
59 |
60 |
61 |
62 | Listing 1, Main Routine
63 |
64 | 1 public static void main(String[] args) {
65 | 2 TestKafkaProducer tkp = null;
66 | 3
67 | 4 boolean success = false;
68 | 5
69 | 6 try (TestingServer zookeeperTestServer = new TestingServer()) {
70 | 7
71 | 8 final String theTopic = "someTopic-" + new Random().nextInt();
72 | 9
73 | 10 tkp = new TestKafkaProducer(
74 | 11 theTopic,
75 | 12 "localhost:" + zookeeperTestServer.getPort(),
76 | 13 10);
77 | 14
78 | 15 tkp.sendMessages();
79 | 16
80 | 17 tkp.consumeMessages();
81 | 18 tkp.shutdownConsumers();
82 | 19 tkp.kafkaMessageReceiverThread.join();
83 | 20 tkp.shutdown();
84 | 21
85 | 22 String got = StringUtils.join(tkp.messagesReceived, "+");
86 | 23 String expected = StringUtils.join(tkp.messages, "+");
87 | 24 if (got.equals(expected)) {
88 | 25 success = true;
89 | 26 }
90 | 27 } catch (Exception e) {
91 | 28 e.printStackTrace();
92 | 29 }
93 | 30 if (! success) {
94 | 31 throw new RuntimeException("oh rats... we failed");
95 | 32 }
96 | 33 System.out.println("SUCCESS - WE ARE HAPPY !...");
97 | 34 }
98 |
99 | ....
100 |
101 | 38 private void consumeMessages() {
102 | 39 final ConsumerConnector consumer =
103 | 40 kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
104 | 41 final Map topicCountMap = ImmutableMap.of(topic, 1);
105 | 42 final Map>> consumerMap;
106 | 43 consumerMap = consumer.createMessageStreams(topicCountMap, new StringDecoder());
107 | 44
108 | 45 final KafkaStream stream = consumerMap.get(topic).get(0);
109 | 46 final ConsumerIterator iterator = stream.iterator();
110 | 47
111 | 48 kafkaMessageReceiverThread = new Thread(
112 | 49 new Runnable() {
113 | 50 @Override
114 | 51 public void run() {
115 | 52 while (iterator.hasNext()) {
116 | 53 String msg = iterator.next().message();
117 | 54 msg = msg == null ? "" : msg;
118 | 55 System.out.println("got message" + msg);
119 | 56 if (msg.equals("SHUTDOWN")) {
120 | 57 consumer.shutdown();
121 | 58 return;
122 | 59 }
123 | 60 messagesReceived.add(msg);
124 | 61 }
125 | 62 }
126 | 63 },
127 | 64 "kafkaMessageReceiverThread"
128 | 65 );
129 | 66 kafkaMessageReceiverThread.start();
130 | 67 }
131 | 68
132 | 69
133 | 70 private ConsumerConfig createConsumerConfig() {
134 | 71 Properties props = new Properties();
135 | 72 props.put("zk.connect", this.zkConnectString);
136 | 73 props.put("groupid", RANDOM_GROUP_ID);
137 | 74 props.put("zk.sessiontimeout.ms", "400");
138 | 75 props.put("zk.synctime.ms", "200");
139 | 76 props.put("autocommit.interval.ms", "1000");
140 | 77 props.put("serializer.class", "kafka.serializer.StringEncoder");
141 | 78
142 | 79 return new ConsumerConfig(props);
143 | 80 }
144 | 81
145 |
146 |
147 | The TestKafkaProducer constructor (line 83) sets up the producer in initProducer (line 117),
148 | and an array of random strings to send to the consumer (stored in the 'messages'
149 | member variable, at line 95) These messages are sent via sendMessages() at line 15
150 | (see Listing 1, above.)
151 |
152 |
153 | Listing 2, TestKafkaProducer Constructor
154 |
155 |
156 | 83 TestKafkaProducer(String topic, String zkConnectString, int numRandomMessages) throws IOException {
157 | 84 final Random generator = new Random();
158 | 85
159 | 86 if (numRandomMessages <= 0) {
160 | 87 throw new RuntimeException("no messages defined for test");
161 | 88 }
162 | 89
163 | 90 messages = new ArrayList();
164 | 91 for (int i = 0; i < numRandomMessages; i++) {
165 | 92 int num1 = Math.abs(generator.nextInt());
166 | 93 int num2 = Math.abs(generator.nextInt());
167 | 94 String messageToSend = num1 + ":-(a)-" + num2;
168 | 95 messages.add(messageToSend);
169 | 96 }
170 | 97
171 | 98
172 | 99 this.topic = topic;
173 | 100
174 | 101 this.zkConnectString = zkConnectString;
175 | 102 initProducer(zkConnectString);
176 | 103 }
177 | 104
178 | 105
179 | 106 public void sendMessages() throws IOException {
180 | 107 for (String msg : messages) {
181 | 108 sendMessage(msg);
182 | 109 }
183 | 110 }
184 | 111
185 | 112 private void sendMessage(String msg) {
186 | 113 ProducerData data = new ProducerData(topic, msg);
187 | 114 producer.send(data);
188 | 115 }
189 | 116
190 | 117 private void initProducer(String zkConnectString) throws IOException {
191 | 118 kafkaServer = startKafkaServer();
192 | 119
193 | 120
194 | 121 Properties props = new Properties();
195 | 122 props.put("zk.connect", zkConnectString);
196 | 123 props.put("serializer.class", "kafka.serializer.StringEncoder");
197 | 124 props.put("producer.type", "async");
198 | 125 props.put("batch.size", "1");
199 | 126 ProducerConfig config = new ProducerConfig(props);
200 | 127
201 | 128 producer = new Producer(config);
202 | 129 }
203 |
204 |
205 |
206 | Note that the sequence of events we follow after sending the messages is to launch a thread that
207 | consumes the messages (in consumeMessages at line 48.) We then get the consumer to shutdown cleanly
208 | by sending it a 'poison pill'
209 | ( see: http://books.google.com/books?id=EK43StEVfJIC&pg=PT172&lpg=PT172&dq=shut+down+poison+pill+queue&source=bl&ots=un-zA8wMgs&sig=EWSRAdzaFYlCBGc4NoGh8-TunIw&hl=en&sa=X&ei=qelmUsCeF6muyQGW-4DgAg&ved=0CHQQ6AEwCA#v=onepage&q=shut%20down%20poison%20pill%20queue&f=false )
210 |
211 | This ensures that the consumer gets a chance to processes all pending messages and then call 'consumer.shutdown()' to cleanly shut down.
212 | We make sure that the consumer has completed its shut down by joining its thread (line 19), and only then do we shut down the producer
213 | (line 20.)
214 |
215 |
216 |
217 | ## Current Issues
218 |
219 | I have been struggling to find a Maven pom.xml recipe that will allow me to pull in an official version of
220 | Kafka from a public Maven repository. Kafka is a very recent project so many of the currently available on-line
221 | examples (as of this writing -- October of 2013) don't seem to build correctly out of the box (at least for me.) By contributing this project at least the 'run out of the box'requirement should be met.
222 |
223 | Many examples depend on using maven install-file to get a Kafka jar that you build yourself from sources into your local
224 | repo ($HOME/.m2/repository). A recent stack exchange article
225 | (see: http://stackoverflow.com/questions/17037209/where-can-i-find-maven-repository-for-kafka)
226 | suggests an official Kafka .jar is available, but I haven't figured out the Maven incantations to have
227 | my build download this .jar.
228 |
229 | If someone could provide me with a patch for 'the right way' to do this with Maven I will update my project
230 | accordingly.... Hopefully it will serve as a useful resource for other beginning Kafka developers.
231 |
232 |
233 | For now, I have hacked my dependencies so that the version of Kafka I use is pulled from a work-in-progress
234 | version of a storm-kafka integration project. Well... it works for now, but I'm concerned the 'wip' versions
235 | below will be deprecated. Then this project will loose its dependencies and fail to build properly.
236 | Also, I really shouldn't be introducing storm for this simple Kafka example at this point in any case.
237 |
238 |
239 |
240 |
241 | storm
242 | storm
243 | 0.9.0-wip17
244 |
245 |
246 | storm
247 | storm-core
248 | 0.9.0-wip17
249 |
250 |
251 | storm
252 | storm-kafka
253 | 0.9.0-wip16a-scala292
254 |
255 |
256 |
--------------------------------------------------------------------------------
/esper+storm+kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | com.buildlackey
6 | esper-and-kafka-spout-example
7 | kafka-input-spout-with-kafka-output-bolt-and-esper-bolt-example
8 | 1.0
9 | jar
10 | Example Illustrating a Kafka Consumer Spout, a Kafka Producer Bolt, and an Esper Streaming Query Bolt
11 |
12 |
13 |
14 | leadLackey
15 | Chris Bedford
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | com.restfb
24 | restfb
25 | 1.6.12
26 |
27 |
28 |
29 | org.twitter4j
30 | twitter4j-core
31 | [3.0,)
32 |
33 |
34 |
35 | com.aliasi
36 | lingpipe
37 | 4.0.1
38 |
39 |
40 |
41 | org.twitter4j
42 | twitter4j-stream
43 | [3.0,)
44 |
45 |
46 |
47 | commons-collections
48 | commons-collections
49 | 3.2.1
50 |
51 |
52 |
53 | com.google.code.gson
54 | gson
55 | 1.7.1
56 |
57 |
58 |
59 |
60 | storm
61 | storm
62 | 0.9.0-rc2
63 | provided
64 |
65 |
66 | storm
67 | storm-core
68 | 0.9.0-rc2
69 | provided
70 |
71 |
72 |
73 |
74 |
75 |
82 |
83 | org.clojars.brenden
84 | storm-kafka-0.8-plus
85 | 0.1.3-SNAPSHOT
86 |
87 |
88 | org.apache.kafka
89 | kafka_2.10
90 |
91 |
92 |
93 | org.scala-lang
94 | scala-library
95 |
96 |
97 |
98 |
99 |
100 |
101 | org.scala-lang
102 | scala-library
103 | 2.9.3
104 |
105 |
106 |
107 |
108 | org.apache.kafka
109 | kafka_2.9.2
110 | 0.8.0-beta1
111 |
112 |
113 | com.sun.jmx
114 | jmxri
115 |
116 |
117 |
118 | com.sun.jdmk
119 | jmxtools
120 |
121 |
122 |
123 | javax.jms
124 | jms
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
137 |
138 | com.yammer.metrics
139 | metrics-core
140 | 2.2.0
141 |
142 |
143 |
144 | slf4j
145 | slf4j-api
146 |
147 |
148 |
149 |
150 |
151 | com.101tec
152 | zkclient
153 | 0.3
154 |
155 |
156 | log4j
157 | log4j
158 |
159 |
160 |
161 |
162 |
163 | net.sf.jopt-simple
164 | jopt-simple
165 | 4.5
166 |
167 |
168 |
172 |
173 | org.apache.zookeeper
174 | zookeeper
175 | 3.3.3
176 |
177 |
178 | org.slf4j
179 | slf4j-log4j12
180 |
181 |
182 | log4j
183 | log4j
184 |
185 |
186 | log4j
187 | log4j
188 |
189 |
190 |
191 | com.sun.jmx
192 | jmxri
193 |
194 |
195 |
196 | com.sun.jdmk
197 | jmxtools
198 |
199 |
200 |
201 | javax.jms
202 | jms
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 | org.tomdz.storm
212 | storm-esper
213 | 0.8.1-SNAPSHOT
214 |
215 |
216 |
217 |
218 |
219 |
220 | com.netflix.curator
221 | curator-test
222 | 1.2.5
223 |
224 |
225 |
226 | org.slf4j
227 | slf4j-log4j12
228 |
229 |
230 | log4j
231 | log4j
232 |
233 |
234 |
235 |
236 |
237 |
238 | org.testng
239 | testng
240 | 6.1.1
241 | test
242 |
243 |
244 |
245 | org.easymock
246 | easymock
247 | 3.0
248 | test
249 |
250 |
251 |
252 |
253 |
254 | 2.2.1
255 |
256 |
257 |
258 |
259 |
260 | org.apache.maven.plugins
261 | maven-enforcer-plugin
262 | 1.1.1
263 |
264 |
265 | enforce-versions
266 |
267 | enforce
268 |
269 |
270 |
271 |
272 | 2.2.1
273 |
274 |
275 | 1.7
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 | org.apache.maven.plugins
285 | maven-compiler-plugin
286 | 3.1
287 |
288 | 1.7
289 | 1.7
290 |
291 |
292 |
293 |
294 | org.apache.maven.plugins
295 | maven-jar-plugin
296 | 2.4
297 |
298 |
299 | org.apache.maven.plugins
300 | maven-source-plugin
301 | 2.2
302 |
303 | true
304 |
305 |
306 |
307 | create-source-jar
308 |
309 | jar-no-fork
310 |
311 |
312 |
313 |
314 |
315 |
321 |
322 | maven-assembly-plugin
323 |
324 |
325 | src/main/assembly/dep.xml
326 |
327 |
328 |
329 | EsperFilteredTwitterFeedTopology
330 |
331 |
332 |
333 |
334 |
335 | make-assembly
336 | package
337 |
338 | single
339 |
340 |
341 |
342 |
343 |
344 |
345 |
349 |
350 | org.codehaus.gmaven
351 | gmaven-plugin
352 |
353 |
354 | package
355 |
356 | execute
357 |
358 |
359 |
360 | File targetDir = new File("${project.basedir.path}/target".toString())
361 | println "dir is ${targetDir.path}"
362 | String jarBaseName = "${project.artifactId}-${project.version}"
363 | File jarWithUnwantedStuff = new File(targetDir, "${jarBaseName}-jar.jar".toString())
364 |
365 | def explodedJarDir = new File(targetDir, "explodedJar".toString())
366 | def ant = new AntBuilder() // create an antbuilder
367 | ant.unzip(src: "${jarWithUnwantedStuff.path}",
368 | dest: explodedJarDir.path,
369 | overwrite: "false")
370 | File finalJar = new File(targetDir, "${jarBaseName}-deployable.jar")
371 | unwantedClassesDir = new File(explodedJarDir, "/org/slf4j/impl".toString())
372 | unwantedClassesDir.deleteDir()
373 | ant.zip(basedir: explodedJarDir.path, destFile: finalJar.path)
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 | clojars
385 | http://clojars.org/repo/
386 |
387 |
388 |
389 |
390 |
--------------------------------------------------------------------------------
/esper+storm+kafka/zookeeper.out:
--------------------------------------------------------------------------------
1 | 2013-10-27 15:24:21,823 [myid:] - INFO [main:QuorumPeerConfig@101] - Reading configuration from: /home/chris/esper/zookeeper/bin/../conf/zoo.cfg
2 | 2013-10-27 15:24:21,845 [myid:] - INFO [main:DatadirCleanupManager@78] - autopurge.snapRetainCount set to 3
3 | 2013-10-27 15:24:21,852 [myid:] - INFO [main:DatadirCleanupManager@79] - autopurge.purgeInterval set to 0
4 | 2013-10-27 15:24:21,855 [myid:] - INFO [main:DatadirCleanupManager@101] - Purge task is not scheduled.
5 | 2013-10-27 15:24:21,858 [myid:] - WARN [main:QuorumPeerMain@113] - Either no config or no quorum defined in config, running in standalone mode
6 | 2013-10-27 15:24:21,927 [myid:] - INFO [main:QuorumPeerConfig@101] - Reading configuration from: /home/chris/esper/zookeeper/bin/../conf/zoo.cfg
7 | 2013-10-27 15:24:21,933 [myid:] - INFO [main:ZooKeeperServerMain@95] - Starting server
8 | 2013-10-27 15:24:21,978 [myid:] - INFO [main:Environment@100] - Server environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
9 | 2013-10-27 15:24:21,980 [myid:] - INFO [main:Environment@100] - Server environment:host.name=ubuntu
10 | 2013-10-27 15:24:21,983 [myid:] - INFO [main:Environment@100] - Server environment:java.version=1.7.0_25
11 | 2013-10-27 15:24:21,984 [myid:] - INFO [main:Environment@100] - Server environment:java.vendor=Oracle Corporation
12 | 2013-10-27 15:24:21,986 [myid:] - INFO [main:Environment@100] - Server environment:java.home=/home/chris/Dropbox2/3rdparty/java/jdk1.7.0_25/jre
13 | 2013-10-27 15:24:21,987 [myid:] - INFO [main:Environment@100] - Server environment:java.class.path=/home/chris/esper/zookeeper/bin/../build/classes:/home/chris/esper/zookeeper/bin/../build/lib/*.jar:/home/chris/esper/zookeeper/bin/../lib/slf4j-log4j12-1.6.1.jar:/home/chris/esper/zookeeper/bin/../lib/slf4j-api-1.6.1.jar:/home/chris/esper/zookeeper/bin/../lib/netty-3.2.2.Final.jar:/home/chris/esper/zookeeper/bin/../lib/log4j-1.2.15.jar:/home/chris/esper/zookeeper/bin/../lib/jline-0.9.94.jar:/home/chris/esper/zookeeper/bin/../zookeeper-3.4.5.jar:/home/chris/esper/zookeeper/bin/../src/java/lib/*.jar:/home/chris/esper/zookeeper/bin/../conf:
14 | 2013-10-27 15:24:21,989 [myid:] - INFO [main:Environment@100] - Server environment:java.library.path=/usr/local/lib::/usr/java/packages/lib/i386:/lib:/usr/lib
15 | 2013-10-27 15:24:21,990 [myid:] - INFO [main:Environment@100] - Server environment:java.io.tmpdir=/tmp
16 | 2013-10-27 15:24:21,993 [myid:] - INFO [main:Environment@100] - Server environment:java.compiler=
17 | 2013-10-27 15:24:21,994 [myid:] - INFO [main:Environment@100] - Server environment:os.name=Linux
18 | 2013-10-27 15:24:21,995 [myid:] - INFO [main:Environment@100] - Server environment:os.arch=i386
19 | 2013-10-27 15:24:21,997 [myid:] - INFO [main:Environment@100] - Server environment:os.version=3.8.0-19-generic
20 | 2013-10-27 15:24:22,000 [myid:] - INFO [main:Environment@100] - Server environment:user.name=chris
21 | 2013-10-27 15:24:22,012 [myid:] - INFO [main:Environment@100] - Server environment:user.home=/home/chris
22 | 2013-10-27 15:24:22,014 [myid:] - INFO [main:Environment@100] - Server environment:user.dir=/home/chris/esper/cep/storm+kafka
23 | 2013-10-27 15:24:22,039 [myid:] - INFO [main:ZooKeeperServer@726] - tickTime set to 2000
24 | 2013-10-27 15:24:22,041 [myid:] - INFO [main:ZooKeeperServer@735] - minSessionTimeout set to -1
25 | 2013-10-27 15:24:22,042 [myid:] - INFO [main:ZooKeeperServer@744] - maxSessionTimeout set to -1
26 | 2013-10-27 15:24:22,107 [myid:] - INFO [main:NIOServerCnxnFactory@94] - binding to port 0.0.0.0/0.0.0.0:2181
27 | 2013-10-27 15:24:22,188 [myid:] - INFO [main:FileSnap@83] - Reading snapshot /tmp/data/version-2/snapshot.c
28 | 2013-10-27 15:24:30,338 [myid:] - INFO [main:FileTxnSnapLog@240] - Snapshotting: 0x5382 to /tmp/data/version-2/snapshot.5382
29 | 2013-10-27 15:24:52,002 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340016, timeout of 20000ms exceeded
30 | 2013-10-27 15:24:52,004 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340013, timeout of 20000ms exceeded
31 | 2013-10-27 15:24:52,004 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d34001a, timeout of 20000ms exceeded
32 | 2013-10-27 15:24:52,005 [myid:] - INFO [SessionTracker:ZooKeeperServer@325] - Expiring session 0x141f7697d340014, timeout of 20000ms exceeded
33 | 2013-10-27 15:24:52,006 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340016
34 | 2013-10-27 15:24:52,007 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340013
35 | 2013-10-27 15:24:52,007 [myid:] - INFO [SyncThread:0:FileTxnLog@199] - Creating new log file: log.5383
36 | 2013-10-27 15:24:52,007 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d34001a
37 | 2013-10-27 15:24:52,011 [myid:] - INFO [ProcessThread(sid:0 cport:-1)::PrepRequestProcessor@476] - Processed session termination for sessionid: 0x141f7697d340014
38 | 2013-10-27 15:29:09,756 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50662
39 | 2013-10-27 15:29:09,778 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50663
40 | 2013-10-27 15:29:09,784 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50662
41 | 2013-10-27 15:29:09,800 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50663
42 | 2013-10-27 15:29:09,807 [myid:] - INFO [Thread-1:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50662 (no session established for client)
43 | 2013-10-27 15:29:09,817 [myid:] - INFO [Thread-2:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50663 (no session established for client)
44 | 2013-10-27 15:29:11,536 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50664
45 | 2013-10-27 15:29:11,538 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50664
46 | 2013-10-27 15:29:11,541 [myid:] - INFO [Thread-3:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50664 (no session established for client)
47 | 2013-10-27 15:29:14,035 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50665
48 | 2013-10-27 15:29:14,037 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50665
49 | 2013-10-27 15:29:14,041 [myid:] - INFO [Thread-4:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50665 (no session established for client)
50 | 2013-10-27 15:29:15,213 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50666
51 | 2013-10-27 15:29:15,242 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:ZooKeeperServer@839] - Client attempting to establish new session at /127.0.0.1:50666
52 | 2013-10-27 15:29:15,264 [myid:] - INFO [SyncThread:0:ZooKeeperServer@595] - Established session 0x141fc04ba8d0000 with negotiated timeout 40000 for client /127.0.0.1:50666
53 | 2013-10-27 15:29:16,537 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50667
54 | 2013-10-27 15:29:16,538 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50667
55 | 2013-10-27 15:29:16,542 [myid:] - INFO [Thread-5:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50667 (no session established for client)
56 | 2013-10-27 15:29:19,037 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50668
57 | 2013-10-27 15:29:19,038 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50668
58 | 2013-10-27 15:29:19,042 [myid:] - INFO [Thread-6:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50668 (no session established for client)
59 | 2013-10-27 15:29:21,539 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50669
60 | 2013-10-27 15:29:21,542 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50669
61 | 2013-10-27 15:29:21,544 [myid:] - INFO [Thread-7:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50669 (no session established for client)
62 | 2013-10-27 15:29:24,039 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50670
63 | 2013-10-27 15:29:24,041 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50670
64 | 2013-10-27 15:29:24,045 [myid:] - INFO [Thread-8:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50670 (no session established for client)
65 | 2013-10-27 15:29:26,541 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50671
66 | 2013-10-27 15:29:26,542 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50671
67 | 2013-10-27 15:29:26,546 [myid:] - INFO [Thread-9:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50671 (no session established for client)
68 | 2013-10-27 15:29:29,042 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50672
69 | 2013-10-27 15:29:29,043 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50672
70 | 2013-10-27 15:29:29,052 [myid:] - INFO [Thread-10:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50672 (no session established for client)
71 | 2013-10-27 15:29:31,543 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50673
72 | 2013-10-27 15:29:31,544 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50673
73 | 2013-10-27 15:29:31,548 [myid:] - INFO [Thread-11:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50673 (no session established for client)
74 | 2013-10-27 15:29:34,044 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50674
75 | 2013-10-27 15:29:34,045 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50674
76 | 2013-10-27 15:29:34,049 [myid:] - INFO [Thread-12:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50674 (no session established for client)
77 | 2013-10-27 15:29:36,546 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50675
78 | 2013-10-27 15:29:36,547 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50675
79 | 2013-10-27 15:29:36,550 [myid:] - INFO [Thread-13:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50675 (no session established for client)
80 | 2013-10-27 15:29:39,046 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50676
81 | 2013-10-27 15:29:39,048 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50676
82 | 2013-10-27 15:29:39,053 [myid:] - INFO [Thread-14:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50676 (no session established for client)
83 | 2013-10-27 15:29:41,548 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50677
84 | 2013-10-27 15:29:41,550 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50677
85 | 2013-10-27 15:29:41,552 [myid:] - INFO [Thread-15:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50677 (no session established for client)
86 | 2013-10-27 15:29:44,053 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50678
87 | 2013-10-27 15:29:44,054 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50678
88 | 2013-10-27 15:29:44,057 [myid:] - INFO [Thread-16:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50678 (no session established for client)
89 | 2013-10-27 15:29:46,552 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50680
90 | 2013-10-27 15:29:46,553 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50680
91 | 2013-10-27 15:29:46,555 [myid:] - INFO [Thread-17:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50680 (no session established for client)
92 | 2013-10-27 15:29:49,051 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50681
93 | 2013-10-27 15:29:49,053 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50681
94 | 2013-10-27 15:29:49,055 [myid:] - INFO [Thread-18:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50681 (no session established for client)
95 | 2013-10-27 15:29:51,554 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50682
96 | 2013-10-27 15:29:51,556 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50682
97 | 2013-10-27 15:29:51,558 [myid:] - INFO [Thread-19:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50682 (no session established for client)
98 | 2013-10-27 15:29:54,054 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50683
99 | 2013-10-27 15:29:54,056 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50683
100 | 2013-10-27 15:29:54,058 [myid:] - INFO [Thread-20:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50683 (no session established for client)
101 | 2013-10-27 15:29:56,556 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50684
102 | 2013-10-27 15:29:56,558 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50684
103 | 2013-10-27 15:29:56,560 [myid:] - INFO [Thread-21:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50684 (no session established for client)
104 | 2013-10-27 15:29:59,057 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxnFactory@197] - Accepted socket connection from /127.0.0.1:50685
105 | 2013-10-27 15:29:59,059 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@821] - Processing ruok command from /127.0.0.1:50685
106 | 2013-10-27 15:29:59,060 [myid:] - INFO [Thread-22:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50685 (no session established for client)
107 | 2013-10-27 15:30:01,807 [myid:] - WARN [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@349] - caught end of stream exception
108 | EndOfStreamException: Unable to read additional data from client sessionid 0x141fc04ba8d0000, likely client has closed socket
109 | at org.apache.zookeeper.server.NIOServerCnxn.doIO(NIOServerCnxn.java:220)
110 | at org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:208)
111 | at java.lang.Thread.run(Thread.java:724)
112 | 2013-10-27 15:30:01,811 [myid:] - INFO [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181:NIOServerCnxn@1001] - Closed socket connection for client /127.0.0.1:50666 which had sessionid 0x141fc04ba8d0000
113 |
--------------------------------------------------------------------------------
/storm+kafka/README.md:
--------------------------------------------------------------------------------
1 | # Kafka Spout Example
2 |
3 |
4 | Kafka Spout Integration Test With Local Storm Cluster, and In-Memory Kafka, and Zookeeper Instances
5 |
6 |
7 | ## Description
8 |
9 | This example illustrates how to:
10 |
11 | * push messages into Kafka and retrieves those messages with a Kafka/Storm sput.
12 |
13 | * set up your Kafka spout so that it reads all messages in its configured topic from the very first message
14 | in that topic (the default behavior), or so that it reads only the messages that are emitted to the topic after
15 | the spout has been initialized (to get the latter behavior, specify the --fromCurrent option as shown below.)
16 |
17 | * use the in process Zookeeper server that Storm's LocalCluster seems to 'hardwire'
18 | at port 2000 by default.
19 |
20 | NOTE: there does not seem to be anyway to override LocalCluster's
21 | behavior of instantiating its own zookeeper instance by passing in our own Zookeeper
22 | instance and telling the LocalCluster about that instance via the Map argument
23 | passed to LocalCluster(Map)... Oh well.. this example shows a work-around for that.
24 |
25 |
26 | By keeping all test fixtures in memory (rather than depending on out-of-process servers
27 | being 'somehow' set up before the test) we make it very easy to get the basics of
28 | Kafka Storm integration working in the environments of other developers and/or build systems.
29 |
30 |
31 |
32 | ## Building and Running
33 |
34 | After downloading the project, cd to the directory in which this README is located, then issue the 2 commands below
35 | (note that the second command has two variants):
36 |
37 | mvn clean compile
38 |
39 | mvn exec:java -Dexec.mainClass=TestTopology
40 | .. or ...
41 | mvn exec:java -Dexec.mainClass=TestTopology -Dexec.args="--fromCurrent"
42 |
43 |
44 |
45 | If you see 'SUCCESSFUL COMPLETION' printed out towards the very end, then you know everything is working.
46 |
47 |
48 |
49 | ## Implementation Details
50 |
51 | The test program pumps a small set of random messages from a Kafka producer thread (started
52 | at line 77 of Listing 1) to a Kafka Spout consumer, and then asserts that the messages received are identical
53 | with messages sent (see the verifyResults method of Listing1, starting at line 122.)
54 |
55 |
56 | The main method creates an instance of the TestTopology class whose constructor instantiates an
57 | instance of a Storm LocalCluster. We use the Zookeeper server in that LocalCluster instance
58 | since there doesn't seem to be anyway to instantiate our own Zookeeper and pass that into the
59 | LocalCluster (as mentioned above.). Next, we wait for that Zookeeper instance to come up completely
60 | (line 69 of listing 1.) We then start our Kafka server using the Zookeeper instance created by LocalCluster.
61 | This is done by hard coding the default value for the Storm LocalCluster's self launched zookeeper
62 | server to its preferred host/port value (localhost:2000). See lines 74 and 103 of Listing 2.
63 |
64 | The Kafka producer thread kicked off at line 77 of listing 1 emits a batch of 4 messages
65 | BEFORE our topology is even initialized (line 40 of listing 2). After emitting that first batch
66 | the producer thread unleashes the countdown latch 'producerFinishedInitialBatchLatch'.
67 | This lets the main thread proceed from its wait at line 78 of listing 1. The main thread
68 | then sets up our test topology, which includes a Kafka spout configured to connect to the Zookeeper
69 | instance at port 2000. This the same zookeeper instance that we use when we configure
70 | the Kafka server, so it seems the Kafka spout discovers the
71 | Kafka broker it needs to connect with via Zookeeper. Our topology wires the
72 | Kafka spout to our VerboseCollectorBolt instance whose only job is to dump each tuple it receives
73 | to the console, and collect up each sentence it is transmitted. In verifyResults (line 102 of
74 | listing 1) we check to make sure that what the VerboseCollectorBolt has recorded actually matches what
75 | we know we have sent.
76 |
77 | Note that after we setup our topology (line 80 of Listing 1), we give it a few seconds to launch, then
78 | we unleash the topologyStartedLatch which causes the KafkaProducer thread to proceed from its wait
79 | point at line 43 of Listing 2 and emit the second batch of messages.
80 |
81 |
82 | Listing 1, TestTopology.java
83 |
84 | 1 /*
85 | 2 * Author: cbedford
86 | 3 * Date: 10/22/13
87 | 4 * Time: 8:50 PM
88 | 5 */
89 | 6
90 | 7
91 | 8 import backtype.storm.Config;
92 | 9 import backtype.storm.LocalCluster;
93 | 10 import backtype.storm.spout.SchemeAsMultiScheme;
94 | 11 import backtype.storm.topology.TopologyBuilder;
95 | 12 import storm.kafka.*;
96 | 13
97 | 14 import java.util.ArrayList;
98 | 15 import java.util.List;
99 | 16 import java.util.Random;
100 | 17 import java.util.concurrent.CountDownLatch;
101 | 18
102 | 19 public class TestTopology {
103 | 20
104 | 21
105 | 22 final static int MAX_ALLOWED_TO_RUN_MILLISECS = 1000 * 90 /* seconds */;
106 | 23
107 | 24 CountDownLatch topologyStartedLatch = new CountDownLatch(1);
108 | 25
109 | 26 private static int STORM_KAFKA_FROM_READ_FROM_START = -2;
110 | 27 private static int STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET = -1;
111 | 28 private static int readFromMode = STORM_KAFKA_FROM_READ_FROM_START;
112 | 29 private int expectedNumMessages = 8;
113 | 30
114 | 31 private static final int SECOND = 1000;
115 | 32 private static List messagesReceived = new ArrayList();
116 | 33
117 | 34 private LocalCluster cluster = new LocalCluster();
118 | 35
119 | 36 private static final String TOPIC_NAME = "big-topix-" + new Random().nextInt();
120 | 37 volatile static boolean finishedCollecting = false;
121 | 38
122 | 39 private static String[] sentences = new String[]{
123 | 40 "one dog9 - saw the fox over the moon",
124 | 41 "two cats9 - saw the fox over the moon",
125 | 42 "four bears9 - saw the fox over the moon",
126 | 43 "five goats9 - saw the fox over the moon",
127 | 44 };
128 | 45
129 | 46 private KafkaProducer kafkaProducer = new KafkaProducer(sentences, TOPIC_NAME, topologyStartedLatch);
130 | 47
131 | 48
132 | 49 public static void recordRecievedMessage(String msg) {
133 | 50 synchronized (TestTopology.class) { // ensure visibility of list updates between threads
134 | 51 messagesReceived.add(msg);
135 | 52 }
136 | 53 }
137 | 54
138 | 55
139 | 56 public static void main(String[] args) {
140 | 57 TestTopology testTopology = new TestTopology();
141 | 58
142 | 59 if (args.length == 1 && args[0].equals("--fromCurrent")) {
143 | 60 readFromMode = STORM_KAFKA_FROM_READ_FROM_CURRENT_OFFSET;
144 | 61 testTopology.expectedNumMessages = 4;
145 | 62 }
146 | 63
147 | 64 testTopology.runTest();
148 | 65 }
149 | 66
150 | 67 private void runTest() {
151 | 68 ServerAndThreadCoordinationUtils.setMaxTimeToRunTimer(MAX_ALLOWED_TO_RUN_MILLISECS);
152 | 69 ServerAndThreadCoordinationUtils.waitForServerUp("localhost", 2000, 5 * SECOND); // Wait for zookeeper to come up
153 | 70
154 | 71 kafkaProducer.startKafkaServer();
155 | 72 kafkaProducer.createTopic(TOPIC_NAME);
156 | 73
157 | 74 try {
158 | 75
159 | 76
160 | 77 kafkaProducer.startProducer();
161 | 78 ServerAndThreadCoordinationUtils.await(kafkaProducer.producerFinishedInitialBatchLatch);
162 | 79
163 | 80 setupKafkaSpoutAndSubmitTopology();
164 | 81 try {
165 | 82 Thread.sleep(5000); // Would be nice to have a call back inform us when ready
166 | 83 } catch (InterruptedException e) {
167 | 84 e.printStackTrace();
168 | 85 }
169 | 86 ServerAndThreadCoordinationUtils.countDown(topologyStartedLatch);
170 | 87
171 | 88 awaitResults();
172 | 89 } catch (InterruptedException e) {
173 | 90 e.printStackTrace();
174 | 91 }
175 | 92
176 | 93 verifyResults();
177 | 94 shutdown();
178 | 95 System.out.println("SUCCESSFUL COMPLETION");
179 | 96 System.exit(0);
180 | 97 }
181 | 98
182 | 99
183 | 100
184 | 101 private void awaitResults() {
185 | 102 while (!finishedCollecting) {
186 | 103 try {
187 | 104 Thread.sleep(500);
188 | 105 } catch (InterruptedException e) {
189 | 106 e.printStackTrace();
190 | 107 }
191 | 108 }
192 | 109
193 | 110 // Sleep another couple of seconds in case any more messages than expected come into the bolt.
194 | 111 // In this case the bolt should throw a fatal error
195 | 112 try {
196 | 113 Thread.sleep(2000);
197 | 114 } catch (InterruptedException e) {
198 | 115 e.printStackTrace();
199 | 116 }
200 | 117
201 | 118
202 | 119 System.out.println("after await");
203 | 120 }
204 | 121
205 | 122 private void verifyResults() {
206 | 123 synchronized (TestTopology.class) { // ensure visibility of list updates between threads
207 | 124 int count = 0;
208 | 125 for (String msg : messagesReceived) {
209 | 126 if (msg.contains("cat") || msg.contains("dog") || msg.contains("bear") || msg.contains("goat")) {
210 | 127 count++;
211 | 128 }
212 | 129 }
213 | 130 if (count != expectedNumMessages) {
214 | 131 System.out.println(">>>>>>>>>>>>>>>>>>>>FAILURE - Did not receive expected messages");
215 | 132 System.exit(-1);
216 | 133 }
217 | 134
218 | 135 }
219 | 136 }
220 | 137
221 | 138 private void setupKafkaSpoutAndSubmitTopology() throws InterruptedException {
222 | 139 BrokerHosts brokerHosts = new ZkHosts("localhost:2000");
223 | 140
224 | 141 SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, TOPIC_NAME, "", "storm");
225 | 142 kafkaConfig.forceStartOffsetTime(readFromMode /* either earliest or current offset */);
226 | 143 kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
227 | 144
228 | 145
229 | 146 TopologyBuilder builder = new TopologyBuilder();
230 | 147 builder.setSpout("words", new KafkaSpout(kafkaConfig), 1);
231 | 148 VerboseCollectorBolt bolt = new VerboseCollectorBolt(expectedNumMessages);
232 | 149 builder.setBolt("print", bolt).shuffleGrouping("words");
233 | 150
234 | 151
235 | 152 Config config = new Config();
236 | 153
237 | 154 cluster.submitTopology("kafka-test", config, builder.createTopology());
238 | 155 }
239 | 156
240 | 157 private void shutdown() {
241 | 158 cluster.shutdown();
242 | 159 kafkaProducer.shutdown();
243 | 160 }
244 | 161
245 | 162
246 | 163
247 | 164 }
248 | 165
249 |
250 |
251 |
252 |
253 | Listing 2, KafkaProducer.java
254 |
255 | 1 import java.util.concurrent.CountDownLatch;
256 | 2 import com.google.common.io.Files;
257 | 3 import kafka.admin.CreateTopicCommand;
258 | 4 import kafka.javaapi.producer.Producer;
259 | 5 import kafka.producer.KeyedMessage;
260 | 6 import kafka.producer.ProducerConfig;
261 | 7 import kafka.server.KafkaConfig;
262 | 8 import kafka.server.KafkaServer;
263 | 9 import kafka.utils.MockTime;
264 | 10
265 | 11 import java.io.File;
266 | 12 import java.util.Properties;
267 | 13
268 | 14
269 | 15 public class KafkaProducer {
270 | 16
271 | 17 private KafkaServer kafkaServer = null;
272 | 18 private final String topicName;
273 | 19
274 | 20
275 | 21 CountDownLatch topologyStartedLatch;
276 | 22 public CountDownLatch producerFinishedInitialBatchLatch = new CountDownLatch(1);
277 | 23
278 | 24
279 | 25 Producer producer;
280 | 26
281 | 27 private String[] sentences;
282 | 28
283 | 29 KafkaProducer(String[] sentences, String topicName, CountDownLatch topologyStartedLatch) {
284 | 30 this.sentences = sentences;
285 | 31 this.topicName = topicName;
286 | 32 this.topologyStartedLatch = topologyStartedLatch;
287 | 33 }
288 | 34
289 | 35 public Thread startProducer() {
290 | 36 Thread sender = new Thread(
291 | 37 new Runnable() {
292 | 38 @Override
293 | 39 public void run() {
294 | 40 emitBatch();
295 | 41 ServerAndThreadCoordinationUtils.
296 | 42 countDown(producerFinishedInitialBatchLatch);
297 | 43 ServerAndThreadCoordinationUtils.
298 | 44 await(topologyStartedLatch);
299 | 45 emitBatch(); // emit second batch after we know topology is up
300 | 46 }
301 | 47 },
302 | 48 "producerThread"
303 | 49 );
304 | 50 sender.start();
305 | 51 return sender;
306 | 52 }
307 | 53
308 | 54 private void emitBatch() {
309 | 55 Properties props = new Properties();
310 | 56 props.put("metadata.broker.list", "localhost:9092");
311 | 57 props.put("serializer.class", "kafka.serializer.StringEncoder");
312 | 58 props.put("request.required.acks", "1");
313 | 59 ProducerConfig config = new ProducerConfig(props);
314 | 60 Producer producer = new Producer(config);
315 | 61
316 | 62 for (String sentence : sentences) {
317 | 63 KeyedMessage data =
318 | 64 new KeyedMessage(topicName, sentence);
319 | 65 producer.send(data);
320 | 66 }
321 | 67 producer.close();
322 | 68
323 | 69 }
324 | 70
325 | 71 public void createTopic(String topicName) {
326 | 72 String[] arguments = new String[8];
327 | 73 arguments[0] = "--zookeeper";
328 | 74 arguments[1] = "localhost:2000";
329 | 75 arguments[2] = "--replica";
330 | 76 arguments[3] = "1";
331 | 77 arguments[4] = "--partition";
332 | 78 arguments[5] = "1";
333 | 79 arguments[6] = "--topic";
334 | 80 arguments[7] = topicName;
335 | 81
336 | 82 CreateTopicCommand.main(arguments);
337 | 83 }
338 | 84
339 | 85 public void startKafkaServer() {
340 | 86 File tmpDir = Files.createTempDir();
341 | 87 Properties props = createProperties(tmpDir.getAbsolutePath(), 9092, 1);
342 | 88 KafkaConfig kafkaConfig = new KafkaConfig(props);
343 | 89
344 | 90 kafkaServer = new KafkaServer(kafkaConfig, new MockTime());
345 | 91 kafkaServer.startup();
346 | 92 }
347 | 93
348 | 94 public void shutdown() {
349 | 95 kafkaServer.shutdown();
350 | 96 }
351 | 97
352 | 98 private Properties createProperties(String logDir, int port, int brokerId) {
353 | 99 Properties properties = new Properties();
354 | 100 properties.put("port", port + "");
355 | 101 properties.put("broker.id", brokerId + "");
356 | 102 properties.put("log.dir", logDir);
357 | 103 properties.put("zookeeper.connect", "localhost:2000"); // Uses zookeeper created by LocalCluster
358 | 104 return properties;
359 | 105 }
360 | 106
361 | 107 }
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 | ## Current Issues
372 |
373 | I am not confident that the Maven dependencies that I have put together are optimal, but they seem to work for now.
374 |
375 | ## Acknowledgements
376 |
377 | Got a good start from this github repo: https://github.com/wurstmeister
378 |
379 |
380 |
381 |
--------------------------------------------------------------------------------