├── .gitignore
├── README.md
├── kafka-basics
├── pom.xml
└── src
│ └── main
│ └── java
│ └── kafka
│ └── tutorial1
│ ├── ConsumerDemo.java
│ ├── ConsumerDemoAssignSeek.java
│ ├── ConsumerDemoGroups.java
│ ├── ConsumerDemoWithThread.java
│ ├── ProducerDemo.java
│ ├── ProducerDemoKeys.java
│ └── ProducerDemoWithCallback.java
├── kafka-connect
├── connect-standalone.properties
├── connectors
│ └── kafka-connect-twitter
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── annotations-2.0.1.jar
│ │ ├── connect-utils-0.3.105.jar
│ │ ├── freemarker-2.3.25-incubating.jar
│ │ ├── guava-18.0.jar
│ │ ├── jackson-annotations-2.8.0.jar
│ │ ├── jackson-core-2.8.5.jar
│ │ ├── jackson-databind-2.8.5.jar
│ │ ├── javassist-3.19.0-GA.jar
│ │ ├── kafka-connect-twitter-0.2.26.jar
│ │ ├── reflections-0.9.10.jar
│ │ ├── twitter4j-core-4.0.6.jar
│ │ └── twitter4j-stream-4.0.6.jar
├── run.sh
└── twitter.properties
├── kafka-consumer-elasticsearch
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com.github.simplesteph.kafka
│ └── tutorial3
│ └── ElasticSearchConsumer.java
├── kafka-producer-twitter
├── pom.xml
└── src
│ └── main
│ └── java
│ └── kafka
│ └── tutorial2
│ └── TwitterProducer.java
├── kafka-streams-filter-tweets
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com
│ └── github
│ └── simplesteph
│ └── kafka
│ └── tutorial4
│ └── StreamsFilterTweets.java
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 | *.iml
4 | .DS_Store
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Kafka for Beginners
2 |
3 | This is a companion repository for my [Kafka for Beginners](http://bit.ly/kafka-beginners-github)
4 |
5 | [](http://bit.ly/kafka-beginners-github)
6 |
7 | # Content
8 | - Basics of Kafka
9 | - Twitter Producer
10 | - ElasticSearch Consumer
11 | - Kafka Streams 101
12 | - Kafka Connect Example
--------------------------------------------------------------------------------
/kafka-basics/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | kafka-beginners-course
7 | com.github.simplesteph
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | kafka-basics
13 |
14 |
15 |
16 |
17 |
18 | org.apache.kafka
19 | kafka-clients
20 | 2.0.0
21 |
22 |
23 |
24 |
25 | org.slf4j
26 | slf4j-simple
27 | 1.7.25
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemo.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
7 | import org.apache.kafka.common.serialization.StringDeserializer;
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | import java.time.Duration;
12 | import java.util.Arrays;
13 | import java.util.Properties;
14 |
15 | public class ConsumerDemo {
16 |
17 | public static void main(String[] args) {
18 |
19 | Logger logger = LoggerFactory.getLogger(ConsumerDemo.class.getName());
20 |
21 | String bootstrapServers = "127.0.0.1:9092";
22 | String groupId = "my-fourth-application";
23 | String topic = "first_topic";
24 |
25 | // create consumer configs
26 | Properties properties = new Properties();
27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
30 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
31 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
32 |
33 | // create consumer
34 | KafkaConsumer consumer = new KafkaConsumer(properties);
35 |
36 | // subscribe consumer to our topic(s)
37 | consumer.subscribe(Arrays.asList(topic));
38 |
39 | // poll for new data
40 | while(true){
41 | ConsumerRecords records =
42 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0
43 |
44 | for (ConsumerRecord record : records){
45 | logger.info("Key: " + record.key() + ", Value: " + record.value());
46 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset());
47 | }
48 | }
49 |
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoAssignSeek.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
7 | import org.apache.kafka.common.TopicPartition;
8 | import org.apache.kafka.common.serialization.StringDeserializer;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | import java.time.Duration;
13 | import java.util.Arrays;
14 | import java.util.Properties;
15 |
16 | public class ConsumerDemoAssignSeek {
17 |
18 | public static void main(String[] args) {
19 |
20 | Logger logger = LoggerFactory.getLogger(ConsumerDemoAssignSeek.class.getName());
21 |
22 | String bootstrapServers = "127.0.0.1:9092";
23 | String topic = "first_topic";
24 |
25 | // create consumer configs
26 | Properties properties = new Properties();
27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
30 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
31 |
32 | // create consumer
33 | KafkaConsumer consumer = new KafkaConsumer(properties);
34 |
35 | // assign and seek are mostly used to replay data or fetch a specific message
36 |
37 | // assign
38 | TopicPartition partitionToReadFrom = new TopicPartition(topic, 0);
39 | long offsetToReadFrom = 15L;
40 | consumer.assign(Arrays.asList(partitionToReadFrom));
41 |
42 | // seek
43 | consumer.seek(partitionToReadFrom, offsetToReadFrom);
44 |
45 | int numberOfMessagesToRead = 5;
46 | boolean keepOnReading = true;
47 | int numberOfMessagesReadSoFar = 0;
48 |
49 | // poll for new data
50 | while(keepOnReading){
51 | ConsumerRecords records =
52 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0
53 |
54 | for (ConsumerRecord record : records){
55 | numberOfMessagesReadSoFar += 1;
56 | logger.info("Key: " + record.key() + ", Value: " + record.value());
57 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset());
58 | if (numberOfMessagesReadSoFar >= numberOfMessagesToRead){
59 | keepOnReading = false; // to exit the while loop
60 | break; // to exit the for loop
61 | }
62 | }
63 | }
64 |
65 | logger.info("Exiting the application");
66 |
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoGroups.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
7 | import org.apache.kafka.common.serialization.StringDeserializer;
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | import java.time.Duration;
12 | import java.util.Arrays;
13 | import java.util.Properties;
14 |
15 | public class ConsumerDemoGroups {
16 |
17 | public static void main(String[] args) {
18 |
19 | Logger logger = LoggerFactory.getLogger(ConsumerDemoGroups.class.getName());
20 |
21 | String bootstrapServers = "127.0.0.1:9092";
22 | String groupId = "my-fifth-application";
23 | String topic = "first_topic";
24 |
25 | // create consumer configs
26 | Properties properties = new Properties();
27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
30 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
31 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
32 |
33 | // create consumer
34 | KafkaConsumer consumer = new KafkaConsumer(properties);
35 |
36 | // subscribe consumer to our topic(s)
37 | consumer.subscribe(Arrays.asList(topic));
38 |
39 | // poll for new data
40 | while(true){
41 | ConsumerRecords records =
42 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0
43 |
44 | for (ConsumerRecord record : records){
45 | logger.info("Key: " + record.key() + ", Value: " + record.value());
46 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset());
47 | }
48 | }
49 |
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoWithThread.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
7 | import org.apache.kafka.common.errors.WakeupException;
8 | import org.apache.kafka.common.serialization.StringDeserializer;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | import java.time.Duration;
13 | import java.util.Arrays;
14 | import java.util.Properties;
15 | import java.util.concurrent.CountDownLatch;
16 |
17 | public class ConsumerDemoWithThread {
18 |
19 | public static void main(String[] args) {
20 | new ConsumerDemoWithThread().run();
21 | }
22 |
23 | private ConsumerDemoWithThread() {
24 |
25 | }
26 |
27 | private void run() {
28 | Logger logger = LoggerFactory.getLogger(ConsumerDemoWithThread.class.getName());
29 |
30 | String bootstrapServers = "127.0.0.1:9092";
31 | String groupId = "my-sixth-application";
32 | String topic = "first_topic";
33 |
34 | // latch for dealing with multiple threads
35 | CountDownLatch latch = new CountDownLatch(1);
36 |
37 | // create the consumer runnable
38 | logger.info("Creating the consumer thread");
39 | Runnable myConsumerRunnable = new ConsumerRunnable(
40 | bootstrapServers,
41 | groupId,
42 | topic,
43 | latch
44 | );
45 |
46 | // start the thread
47 | Thread myThread = new Thread(myConsumerRunnable);
48 | myThread.start();
49 |
50 | // add a shutdown hook
51 | Runtime.getRuntime().addShutdownHook(new Thread(() -> {
52 | logger.info("Caught shutdown hook");
53 | ((ConsumerRunnable) myConsumerRunnable).shutdown();
54 | try {
55 | latch.await();
56 | } catch (InterruptedException e) {
57 | e.printStackTrace();
58 | }
59 | logger.info("Application has exited");
60 | }
61 |
62 | ));
63 |
64 | try {
65 | latch.await();
66 | } catch (InterruptedException e) {
67 | logger.error("Application got interrupted", e);
68 | } finally {
69 | logger.info("Application is closing");
70 | }
71 | }
72 |
73 | public class ConsumerRunnable implements Runnable {
74 |
75 | private CountDownLatch latch;
76 | private KafkaConsumer consumer;
77 | private Logger logger = LoggerFactory.getLogger(ConsumerRunnable.class.getName());
78 |
79 | public ConsumerRunnable(String bootstrapServers,
80 | String groupId,
81 | String topic,
82 | CountDownLatch latch) {
83 | this.latch = latch;
84 |
85 | // create consumer configs
86 | Properties properties = new Properties();
87 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
88 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
89 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
90 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
91 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
92 |
93 | // create consumer
94 | consumer = new KafkaConsumer(properties);
95 | // subscribe consumer to our topic(s)
96 | consumer.subscribe(Arrays.asList(topic));
97 | }
98 |
99 | @Override
100 | public void run() {
101 | // poll for new data
102 | try {
103 | while (true) {
104 | ConsumerRecords records =
105 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0
106 |
107 | for (ConsumerRecord record : records) {
108 | logger.info("Key: " + record.key() + ", Value: " + record.value());
109 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset());
110 | }
111 | }
112 | } catch (WakeupException e) {
113 | logger.info("Received shutdown signal!");
114 | } finally {
115 | consumer.close();
116 | // tell our main code we're done with the consumer
117 | latch.countDown();
118 | }
119 | }
120 |
121 | public void shutdown() {
122 | // the wakeup() method is a special method to interrupt consumer.poll()
123 | // it will throw the exception WakeUpException
124 | consumer.wakeup();
125 | }
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ProducerDemo.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.producer.KafkaProducer;
4 | import org.apache.kafka.clients.producer.ProducerConfig;
5 | import org.apache.kafka.clients.producer.ProducerRecord;
6 | import org.apache.kafka.common.serialization.StringSerializer;
7 |
8 | import java.util.Properties;
9 |
10 | public class ProducerDemo {
11 |
12 | public static void main(String[] args) {
13 |
14 | String bootstrapServers = "127.0.0.1:9092";
15 |
16 | // create Producer properties
17 | Properties properties = new Properties();
18 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
19 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
20 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
21 |
22 | // create the producer
23 | KafkaProducer producer = new KafkaProducer(properties);
24 |
25 | // create a producer record
26 | ProducerRecord record =
27 | new ProducerRecord("first_topic", "hello world");
28 |
29 | // send data - asynchronous
30 | producer.send(record);
31 |
32 | // flush data
33 | producer.flush();
34 | // flush and close producer
35 | producer.close();
36 |
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ProducerDemoKeys.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.producer.*;
4 | import org.apache.kafka.common.serialization.StringSerializer;
5 | import org.slf4j.Logger;
6 | import org.slf4j.LoggerFactory;
7 |
8 | import java.util.Properties;
9 | import java.util.concurrent.ExecutionException;
10 |
11 | public class ProducerDemoKeys {
12 |
13 | public static void main(String[] args) throws ExecutionException, InterruptedException {
14 |
15 | final Logger logger = LoggerFactory.getLogger(ProducerDemoKeys.class);
16 |
17 | String bootstrapServers = "127.0.0.1:9092";
18 |
19 | // create Producer properties
20 | Properties properties = new Properties();
21 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
22 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
23 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
24 |
25 | // create the producer
26 | KafkaProducer producer = new KafkaProducer(properties);
27 |
28 |
29 | for (int i=0; i<10; i++ ) {
30 | // create a producer record
31 |
32 | String topic = "first_topic";
33 | String value = "hello world " + Integer.toString(i);
34 | String key = "id_" + Integer.toString(i);
35 |
36 | ProducerRecord record =
37 | new ProducerRecord(topic, key, value);
38 |
39 | logger.info("Key: " + key); // log the key
40 | // id_0 is going to partition 1
41 | // id_1 partition 0
42 | // id_2 partition 2
43 | // id_3 partition 0
44 | // id_4 partition 2
45 | // id_5 partition 2
46 | // id_6 partition 0
47 | // id_7 partition 2
48 | // id_8 partition 1
49 | // id_9 partition 2
50 |
51 |
52 | // send data - asynchronous
53 | producer.send(record, new Callback() {
54 | public void onCompletion(RecordMetadata recordMetadata, Exception e) {
55 | // executes every time a record is successfully sent or an exception is thrown
56 | if (e == null) {
57 | // the record was successfully sent
58 | logger.info("Received new metadata. \n" +
59 | "Topic:" + recordMetadata.topic() + "\n" +
60 | "Partition: " + recordMetadata.partition() + "\n" +
61 | "Offset: " + recordMetadata.offset() + "\n" +
62 | "Timestamp: " + recordMetadata.timestamp());
63 | } else {
64 | logger.error("Error while producing", e);
65 | }
66 | }
67 | }).get(); // block the .send() to make it synchronous - don't do this in production!
68 | }
69 |
70 | // flush data
71 | producer.flush();
72 | // flush and close producer
73 | producer.close();
74 |
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/kafka-basics/src/main/java/kafka/tutorial1/ProducerDemoWithCallback.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial1;
2 |
3 | import org.apache.kafka.clients.producer.*;
4 | import org.apache.kafka.common.serialization.StringSerializer;
5 | import org.slf4j.Logger;
6 | import org.slf4j.LoggerFactory;
7 |
8 | import java.util.Properties;
9 |
10 | public class ProducerDemoWithCallback {
11 |
12 | public static void main(String[] args) {
13 |
14 | final Logger logger = LoggerFactory.getLogger(ProducerDemoWithCallback.class);
15 |
16 | String bootstrapServers = "127.0.0.1:9092";
17 |
18 | // create Producer properties
19 | Properties properties = new Properties();
20 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
21 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
22 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
23 |
24 | // create the producer
25 | KafkaProducer producer = new KafkaProducer(properties);
26 |
27 |
28 | for (int i=0; i<10; i++ ) {
29 | // create a producer record
30 | ProducerRecord record =
31 | new ProducerRecord("first_topic", "hello world " + Integer.toString(i));
32 |
33 | // send data - asynchronous
34 | producer.send(record, new Callback() {
35 | public void onCompletion(RecordMetadata recordMetadata, Exception e) {
36 | // executes every time a record is successfully sent or an exception is thrown
37 | if (e == null) {
38 | // the record was successfully sent
39 | logger.info("Received new metadata. \n" +
40 | "Topic:" + recordMetadata.topic() + "\n" +
41 | "Partition: " + recordMetadata.partition() + "\n" +
42 | "Offset: " + recordMetadata.offset() + "\n" +
43 | "Timestamp: " + recordMetadata.timestamp());
44 | } else {
45 | logger.error("Error while producing", e);
46 | }
47 | }
48 | });
49 | }
50 |
51 | // flush data
52 | producer.flush();
53 | // flush and close producer
54 | producer.close();
55 |
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/kafka-connect/connect-standalone.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # These are defaults. This file just demonstrates how to override some settings.
17 | bootstrap.servers=localhost:9092
18 |
19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will
20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka
21 | key.converter=org.apache.kafka.connect.json.JsonConverter
22 | value.converter=org.apache.kafka.connect.json.JsonConverter
23 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply
24 | # it to
25 | key.converter.schemas.enable=true
26 | value.converter.schemas.enable=true
27 |
28 | offset.storage.file.filename=/tmp/connect.offsets
29 | # Flush much faster than normal, which is useful for testing/debugging
30 | offset.flush.interval.ms=10000
31 |
32 | # Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins
33 | # (connectors, converters, transformations). The list should consist of top level directories that include
34 | # any combination of:
35 | # a) directories immediately containing jars with plugins and their dependencies
36 | # b) uber-jars with plugins and their dependencies
37 | # c) directories immediately containing the package directory structure of classes of plugins and their dependencies
38 | # Note: symlinks will be followed to discover dependencies or plugins.
39 | # Examples:
40 | # plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors,
41 | plugin.path=connectors
42 |
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 | This connector uses the twitter streaming api to listen for status update messages and
4 | convert them to a Kafka Connect struct on the fly. The goal is to match as much of the
5 | Twitter Status object as possible.
6 |
7 | # Configuration
8 |
9 | ## TwitterSourceConnector
10 |
11 | This Twitter Source connector is used to pull data from Twitter in realtime.
12 |
13 | ```properties
14 | name=connector1
15 | tasks.max=1
16 | connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector
17 |
18 | # Set these required values
19 | twitter.oauth.accessTokenSecret=
20 | process.deletes=
21 | filter.keywords=
22 | kafka.status.topic=
23 | kafka.delete.topic=
24 | twitter.oauth.consumerSecret=
25 | twitter.oauth.accessToken=
26 | twitter.oauth.consumerKey=
27 | ```
28 |
29 | | Name | Description | Type | Default | Valid Values | Importance |
30 | |---------------------------------|---------------------------------------------------|----------|---------|--------------|------------|
31 | | filter.keywords | Twitter keywords to filter for. | list | | | high |
32 | | kafka.delete.topic | Kafka topic to write delete events to. | string | | | high |
33 | | kafka.status.topic | Kafka topic to write the statuses to. | string | | | high |
34 | | process.deletes | Should this connector process deletes. | boolean | | | high |
35 | | twitter.oauth.accessToken | OAuth access token | password | | | high |
36 | | twitter.oauth.accessTokenSecret | OAuth access token secret | password | | | high |
37 | | twitter.oauth.consumerKey | OAuth consumer key | password | | | high |
38 | | twitter.oauth.consumerSecret | OAuth consumer secret | password | | | high |
39 | | twitter.debug | Flag to enable debug logging for the twitter api. | boolean | false | | low |
40 |
41 |
42 | # Schemas
43 |
44 | ## com.github.jcustenborder.kafka.connect.twitter.Place
45 |
46 | Returns the place attached to this status
47 |
48 | | Name | Optional | Schema | Default Value | Documentation |
49 | |---------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------|
50 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
51 | | StreetAddress | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
52 | | CountryCode | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
53 | | Id | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
54 | | Country | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
55 | | PlaceType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
56 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
57 | | FullName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
58 |
59 | ## com.github.jcustenborder.kafka.connect.twitter.GeoLocation
60 |
61 | Returns The location that this tweet refers to if available.
62 |
63 | | Name | Optional | Schema | Default Value | Documentation |
64 | |-----------|----------|---------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|
65 | | Latitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the latitude of the geo location |
66 | | Longitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the longitude of the geo location |
67 |
68 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice
69 |
70 | Message that is received when a status is deleted from Twitter.
71 |
72 | | Name | Optional | Schema | Default Value | Documentation |
73 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------|
74 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
75 | | UserId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
76 |
77 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey
78 |
79 | Key for a message that is received when a status is deleted from Twitter.
80 |
81 | | Name | Optional | Schema | Default Value | Documentation |
82 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------|
83 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
84 |
85 | ## com.github.jcustenborder.kafka.connect.twitter.StatusKey
86 |
87 | Key for a twitter status.
88 |
89 | | Name | Optional | Schema | Default Value | Documentation |
90 | |------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------|
91 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
92 |
93 | ## com.github.jcustenborder.kafka.connect.twitter.Status
94 |
95 | Twitter status message.
96 |
97 | | Name | Optional | Schema | Default Value | Documentation |
98 | |----------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------|
99 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | Return the created_at |
100 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the status |
101 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the status |
102 | | Source | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the source |
103 | | Truncated | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is truncated |
104 | | InReplyToStatusId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_tostatus_id |
105 | | InReplyToUserId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_user_id |
106 | | InReplyToScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the in_reply_to_screen_name |
107 | | GeoLocation | true | [com.github.jcustenborder.kafka.connect.twitter.GeoLocation](#com.github.jcustenborder.kafka.connect.twitter.GeoLocation) | | Returns The location that this tweet refers to if available. |
108 | | Place | true | [com.github.jcustenborder.kafka.connect.twitter.Place](#com.github.jcustenborder.kafka.connect.twitter.Place) | | Returns the place attached to this status |
109 | | Favorited | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is favorited |
110 | | Retweeted | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is retweeted |
111 | | FavoriteCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Indicates approximately how many times this Tweet has been "favorited" by Twitter users. |
112 | | User | false | [com.github.jcustenborder.kafka.connect.twitter.User](#com.github.jcustenborder.kafka.connect.twitter.User) | | Return the user associated with the status.
113 | This can be null if the instance is from User.getStatus(). |
114 | | Retweet | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
115 | | Contributors | false | Array of [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns an array of contributors, or null if no contributor is associated with this status. |
116 | | RetweetCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled. |
117 | | RetweetedByMe | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
118 | | CurrentUserRetweetId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled. |
119 | | PossiblySensitive | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
120 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the lang of the status text if available. |
121 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the tweet is withheld |
122 | | HashtagEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.HashtagEntity](#com.github.jcustenborder.kafka.connect.twitter.HashtagEntity) | | Returns an array if hashtag mentioned in the tweet. |
123 | | UserMentionEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity](#com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity) | | Returns an array of user mentions in the tweet. |
124 | | MediaEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.MediaEntity](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity) | | Returns an array of MediaEntities if medias are available in the tweet. |
125 | | SymbolEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.SymbolEntity](#com.github.jcustenborder.kafka.connect.twitter.SymbolEntity) | | Returns an array of SymbolEntities if medias are available in the tweet. |
126 | | URLEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.URLEntity](#com.github.jcustenborder.kafka.connect.twitter.URLEntity) | | Returns an array if URLEntity mentioned in the tweet. |
127 |
128 | ## com.github.jcustenborder.kafka.connect.twitter.User
129 |
130 | Return the user associated with the status.
131 | This can be null if the instance is from User.getStatus().
132 |
133 | | Name | Optional | Schema | Default Value | Documentation |
134 | |--------------------------------|----------|----------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------|
135 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the user |
136 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name of the user |
137 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name of the user |
138 | | Location | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the location of the user |
139 | | Description | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the description of the user |
140 | | ContributorsEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user is enabling contributors |
141 | | ProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the profile image url of the user |
142 | | BiggerProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
143 | | MiniProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
144 | | OriginalProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
145 | | ProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
146 | | BiggerProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
147 | | MiniProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
148 | | OriginalProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
149 | | DefaultProfileImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not uploaded their own avatar |
150 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the url of the user |
151 | | Protected | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the user status is protected |
152 | | FollowersCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of followers |
153 | | ProfileBackgroundColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
154 | | ProfileTextColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
155 | | ProfileLinkColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
156 | | ProfileSidebarFillColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
157 | | ProfileSidebarBorderColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
158 | | ProfileUseBackgroundImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
159 | | DefaultProfile | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not altered the theme or background |
160 | | ShowAllInlineMedia | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
161 | | FriendsCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of users the user follows (AKA "followings") |
162 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | |
163 | | FavouritesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
164 | | UtcOffset | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
165 | | TimeZone | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
166 | | ProfileBackgroundImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
167 | | ProfileBackgroundImageUrlHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
168 | | ProfileBannerURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
169 | | ProfileBannerRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
170 | | ProfileBannerIPadURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
171 | | ProfileBannerIPadRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
172 | | ProfileBannerMobileURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
173 | | ProfileBannerMobileRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
174 | | ProfileBackgroundTiled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
175 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the preferred language of the user |
176 | | StatusesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
177 | | GeoEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
178 | | Verified | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
179 | | Translator | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | |
180 | | ListedCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of public lists the user is listed on, or -1 if the count is unavailable. |
181 | | FollowRequestSent | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Returns true if the authenticating user has requested to follow this user, otherwise false. |
182 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the user is withheld |
183 |
184 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant
185 |
186 | | Name | Optional | Schema | Default Value | Documentation |
187 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------|
188 | | Url | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
189 | | Bitrate | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
190 | | ContentType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
191 |
192 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size
193 |
194 | | Name | Optional | Schema | Default Value | Documentation |
195 | |--------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------|
196 | | Resize | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
197 | | Width | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
198 | | Height | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
199 |
200 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity
201 |
202 | | Name | Optional | Schema | Default Value | Documentation |
203 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------|
204 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
205 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
206 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
207 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | |
208 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
209 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. |
210 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. |
211 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. |
212 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | Returns size variations of the media. |
213 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. |
214 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
215 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
216 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. |
217 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. |
218 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. |
219 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. |
220 |
221 | ## com.github.jcustenborder.kafka.connect.twitter.HashtagEntity
222 |
223 | | Name | Optional | Schema | Default Value | Documentation |
224 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------|
225 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the hashtag without #. |
226 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the hashtag. |
227 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the hashtag. |
228 |
229 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity
230 |
231 | | Name | Optional | Schema | Default Value | Documentation |
232 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------|
233 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. |
234 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. |
235 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. |
236 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | |
237 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. |
238 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
239 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | |
240 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | |
241 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | Returns size variations of the media. |
242 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | |
243 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
244 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
245 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. |
246 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. |
247 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. |
248 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. |
249 |
250 | ## com.github.jcustenborder.kafka.connect.twitter.SymbolEntity
251 |
252 | | Name | Optional | Schema | Default Value | Documentation |
253 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------|
254 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the symbol. |
255 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the symbol. |
256 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the entity |
257 |
258 | ## com.github.jcustenborder.kafka.connect.twitter.URLEntity
259 |
260 | | Name | Optional | Schema | Default Value | Documentation |
261 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------|
262 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
263 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. |
264 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. |
265 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. |
266 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. |
267 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. |
268 |
269 | ## com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity
270 |
271 | | Name | Optional | Schema | Default Value | Documentation |
272 | |------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------------|
273 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name mentioned in the status. |
274 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the user id mentioned in the status. |
275 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. |
276 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. |
277 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the user mention. |
278 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the user mention. |
279 |
280 |
281 | # Running in development
282 |
283 | ```
284 | mvn clean package
285 | export CLASSPATH="$(find target/ -type f -name '*.jar'| grep '\-package' | tr '\n' ':')"
286 | $CONFLUENT_HOME/bin/connect-standalone connect/connect-avro-docker.properties config/TwitterSourceConnector.properties
287 | ```
288 |
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/annotations-2.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/annotations-2.0.1.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/connect-utils-0.3.105.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/connect-utils-0.3.105.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/freemarker-2.3.25-incubating.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/freemarker-2.3.25-incubating.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/guava-18.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/guava-18.0.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/jackson-annotations-2.8.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-annotations-2.8.0.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/jackson-core-2.8.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-core-2.8.5.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/jackson-databind-2.8.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-databind-2.8.5.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/javassist-3.19.0-GA.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/javassist-3.19.0-GA.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/kafka-connect-twitter-0.2.26.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/kafka-connect-twitter-0.2.26.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/reflections-0.9.10.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/reflections-0.9.10.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/twitter4j-core-4.0.6.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/twitter4j-core-4.0.6.jar
--------------------------------------------------------------------------------
/kafka-connect/connectors/kafka-connect-twitter/twitter4j-stream-4.0.6.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/twitter4j-stream-4.0.6.jar
--------------------------------------------------------------------------------
/kafka-connect/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # run the twitter connector
3 | connect-standalone connect-standalone.properties twitter.properties
4 | # OR (linux / mac OSX)
5 | connect-standalone.sh connect-standalone.properties twitter.properties
6 | # OR (Windows)
7 | connect-standalone.bat connect-standalone.properties twitter.properties
--------------------------------------------------------------------------------
/kafka-connect/twitter.properties:
--------------------------------------------------------------------------------
1 | name=TwitterSourceDemo
2 | tasks.max=1
3 | connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector
4 |
5 | # Set these required values
6 | process.deletes=false
7 | filter.keywords=bitcoin
8 | kafka.status.topic=twitter_status_connect
9 | kafka.delete.topic=twitter_deletes_connect
10 | # put your own credentials here - don't share with anyone
11 | twitter.oauth.consumerKey=
12 | twitter.oauth.consumerSecret=
13 | twitter.oauth.accessToken=
14 | twitter.oauth.accessTokenSecret=
--------------------------------------------------------------------------------
/kafka-consumer-elasticsearch/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | kafka-beginners-course
7 | com.github.simplesteph
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | kafka-consumer-elasticsearch
13 |
14 |
15 |
16 | org.elasticsearch.client
17 | elasticsearch-rest-high-level-client
18 | 6.4.0
19 |
20 |
21 |
22 |
23 | org.apache.kafka
24 | kafka-clients
25 | 2.0.0
26 |
27 |
28 |
29 |
30 | org.slf4j
31 | slf4j-simple
32 | 1.7.25
33 |
34 |
35 |
36 |
37 | com.google.code.gson
38 | gson
39 | 2.8.5
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/kafka-consumer-elasticsearch/src/main/java/com.github.simplesteph.kafka/tutorial3/ElasticSearchConsumer.java:
--------------------------------------------------------------------------------
1 | package com.github.simplesteph.kafka.tutorial3;
2 |
3 | import com.google.gson.JsonParser;
4 | import org.apache.http.HttpHost;
5 | import org.apache.http.auth.AuthScope;
6 | import org.apache.http.auth.UsernamePasswordCredentials;
7 | import org.apache.http.client.CredentialsProvider;
8 | import org.apache.http.impl.client.BasicCredentialsProvider;
9 | import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
10 | import org.apache.kafka.clients.consumer.ConsumerConfig;
11 | import org.apache.kafka.clients.consumer.ConsumerRecord;
12 | import org.apache.kafka.clients.consumer.ConsumerRecords;
13 | import org.apache.kafka.clients.consumer.KafkaConsumer;
14 | import org.apache.kafka.common.serialization.StringDeserializer;
15 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
16 | import org.elasticsearch.action.bulk.BulkRequest;
17 | import org.elasticsearch.action.bulk.BulkResponse;
18 | import org.elasticsearch.action.index.IndexRequest;
19 | import org.elasticsearch.action.index.IndexResponse;
20 | import org.elasticsearch.client.RequestOptions;
21 | import org.elasticsearch.client.RestClient;
22 | import org.elasticsearch.client.RestClientBuilder;
23 | import org.elasticsearch.client.RestHighLevelClient;
24 | import org.elasticsearch.common.xcontent.XContentType;
25 | import org.elasticsearch.index.mapper.ObjectMapper;
26 | import org.slf4j.Logger;
27 | import org.slf4j.LoggerFactory;
28 |
29 | import java.io.IOException;
30 | import java.time.Duration;
31 | import java.util.Arrays;
32 | import java.util.Properties;
33 |
34 | public class ElasticSearchConsumer {
35 |
36 | public static RestHighLevelClient createClient(){
37 |
38 | //////////////////////////
39 | /////////// IF YOU USE LOCAL ELASTICSEARCH
40 | //////////////////////////
41 |
42 | // String hostname = "localhost";
43 | // RestClientBuilder builder = RestClient.builder(new HttpHost(hostname,9200,"http"));
44 |
45 |
46 | //////////////////////////
47 | /////////// IF YOU USE BONSAI / HOSTED ELASTICSEARCH
48 | //////////////////////////
49 |
50 | // replace with your own credentials
51 | String hostname = ""; // localhost or bonsai url
52 | String username = ""; // needed only for bonsai
53 | String password = ""; // needed only for bonsai
54 |
55 | // credentials provider help supply username and password
56 | final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
57 | credentialsProvider.setCredentials(AuthScope.ANY,
58 | new UsernamePasswordCredentials(username, password));
59 |
60 | RestClientBuilder builder = RestClient.builder(
61 | new HttpHost(hostname, 443, "https"))
62 | .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
63 | @Override
64 | public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
65 | return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
66 | }
67 | });
68 |
69 | RestHighLevelClient client = new RestHighLevelClient(builder);
70 | return client;
71 | }
72 |
73 | public static KafkaConsumer createConsumer(String topic){
74 |
75 | String bootstrapServers = "127.0.0.1:9092";
76 | String groupId = "kafka-demo-elasticsearch";
77 |
78 | // create consumer configs
79 | Properties properties = new Properties();
80 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
81 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
82 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
83 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
84 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
85 | properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // disable auto commit of offsets
86 | properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "100"); // disable auto commit of offsets
87 |
88 | // create consumer
89 | KafkaConsumer consumer = new KafkaConsumer(properties);
90 | consumer.subscribe(Arrays.asList(topic));
91 |
92 | return consumer;
93 |
94 | }
95 |
96 | private static JsonParser jsonParser = new JsonParser();
97 |
98 | private static String extractIdFromTweet(String tweetJson){
99 | // gson library
100 | return jsonParser.parse(tweetJson)
101 | .getAsJsonObject()
102 | .get("id_str")
103 | .getAsString();
104 | }
105 |
106 | public static void main(String[] args) throws IOException {
107 | Logger logger = LoggerFactory.getLogger(ElasticSearchConsumer.class.getName());
108 | RestHighLevelClient client = createClient();
109 |
110 | KafkaConsumer consumer = createConsumer("twitter_tweets");
111 |
112 | while(true){
113 | ConsumerRecords records =
114 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0
115 |
116 | Integer recordCount = records.count();
117 | logger.info("Received " + recordCount + " records");
118 |
119 | BulkRequest bulkRequest = new BulkRequest();
120 |
121 | for (ConsumerRecord record : records){
122 |
123 | // 2 strategies
124 | // kafka generic ID
125 | // String id = record.topic() + "_" + record.partition() + "_" + record.offset();
126 |
127 | // twitter feed specific id
128 | try {
129 | String id = extractIdFromTweet(record.value());
130 |
131 | // where we insert data into ElasticSearch
132 | IndexRequest indexRequest = new IndexRequest(
133 | "twitter",
134 | "tweets",
135 | id // this is to make our consumer idempotent
136 | ).source(record.value(), XContentType.JSON);
137 |
138 | bulkRequest.add(indexRequest); // we add to our bulk request (takes no time)
139 | } catch (NullPointerException e){
140 | logger.warn("skipping bad data: " + record.value());
141 | }
142 |
143 | }
144 |
145 | if (recordCount > 0) {
146 | BulkResponse bulkItemResponses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
147 | logger.info("Committing offsets...");
148 | consumer.commitSync();
149 | logger.info("Offsets have been committed");
150 | try {
151 | Thread.sleep(1000);
152 | } catch (InterruptedException e) {
153 | e.printStackTrace();
154 | }
155 | }
156 | }
157 |
158 | // close the client gracefully
159 | // client.close();
160 |
161 | }
162 | }
163 |
--------------------------------------------------------------------------------
/kafka-producer-twitter/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | kafka-beginners-course
7 | com.github.simplesteph
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | kafka-producer-twitter
13 |
14 |
15 |
16 |
17 |
18 | org.apache.kafka
19 | kafka-clients
20 | 2.0.0
21 |
22 |
23 |
24 |
25 | org.slf4j
26 | slf4j-simple
27 | 1.7.25
28 |
29 |
30 |
31 |
32 | com.twitter
33 | hbc-core
34 | 2.2.0
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/kafka-producer-twitter/src/main/java/kafka/tutorial2/TwitterProducer.java:
--------------------------------------------------------------------------------
1 | package kafka.tutorial2;
2 |
3 | import com.google.common.collect.Lists;
4 | import com.twitter.hbc.ClientBuilder;
5 | import com.twitter.hbc.core.Client;
6 | import com.twitter.hbc.core.Constants;
7 | import com.twitter.hbc.core.Hosts;
8 | import com.twitter.hbc.core.HttpHosts;
9 | import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint;
10 | import com.twitter.hbc.core.processor.StringDelimitedProcessor;
11 | import com.twitter.hbc.httpclient.auth.Authentication;
12 | import com.twitter.hbc.httpclient.auth.OAuth1;
13 | import org.apache.kafka.clients.producer.*;
14 | import org.apache.kafka.common.serialization.StringSerializer;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | import java.util.List;
19 | import java.util.Properties;
20 | import java.util.concurrent.BlockingQueue;
21 | import java.util.concurrent.LinkedBlockingQueue;
22 | import java.util.concurrent.TimeUnit;
23 |
24 | public class TwitterProducer {
25 |
26 | Logger logger = LoggerFactory.getLogger(TwitterProducer.class.getName());
27 |
28 | // use your own credentials - don't share them with anyone
29 | String consumerKey = "";
30 | String consumerSecret = "";
31 | String token = "";
32 | String secret = "";
33 |
34 | List terms = Lists.newArrayList("bitcoin", "usa", "politics", "sport", "soccer");
35 |
36 |
37 | public TwitterProducer(){}
38 |
39 | public static void main(String[] args) {
40 | new TwitterProducer().run();
41 | }
42 |
43 | public void run(){
44 |
45 | logger.info("Setup");
46 |
47 | /** Set up your blocking queues: Be sure to size these properly based on expected TPS of your stream */
48 | BlockingQueue msgQueue = new LinkedBlockingQueue(1000);
49 |
50 | // create a twitter client
51 | Client client = createTwitterClient(msgQueue);
52 | // Attempts to establish a connection.
53 | client.connect();
54 |
55 | // create a kafka producer
56 | KafkaProducer producer = createKafkaProducer();
57 |
58 | // add a shutdown hook
59 | Runtime.getRuntime().addShutdownHook(new Thread(() -> {
60 | logger.info("stopping application...");
61 | logger.info("shutting down client from twitter...");
62 | client.stop();
63 | logger.info("closing producer...");
64 | producer.close();
65 | logger.info("done!");
66 | }));
67 |
68 | // loop to send tweets to kafka
69 | // on a different thread, or multiple different threads....
70 | while (!client.isDone()) {
71 | String msg = null;
72 | try {
73 | msg = msgQueue.poll(5, TimeUnit.SECONDS);
74 | } catch (InterruptedException e) {
75 | e.printStackTrace();
76 | client.stop();
77 | }
78 | if (msg != null){
79 | logger.info(msg);
80 | producer.send(new ProducerRecord<>("twitter_tweets", null, msg), new Callback() {
81 | @Override
82 | public void onCompletion(RecordMetadata recordMetadata, Exception e) {
83 | if (e != null) {
84 | logger.error("Something bad happened", e);
85 | }
86 | }
87 | });
88 | }
89 | }
90 | logger.info("End of application");
91 | }
92 |
93 | public Client createTwitterClient(BlockingQueue msgQueue){
94 |
95 | /** Declare the host you want to connect to, the endpoint, and authentication (basic auth or oauth) */
96 | Hosts hosebirdHosts = new HttpHosts(Constants.STREAM_HOST);
97 | StatusesFilterEndpoint hosebirdEndpoint = new StatusesFilterEndpoint();
98 |
99 | hosebirdEndpoint.trackTerms(terms);
100 |
101 | // These secrets should be read from a config file
102 | Authentication hosebirdAuth = new OAuth1(consumerKey, consumerSecret, token, secret);
103 |
104 | ClientBuilder builder = new ClientBuilder()
105 | .name("Hosebird-Client-01") // optional: mainly for the logs
106 | .hosts(hosebirdHosts)
107 | .authentication(hosebirdAuth)
108 | .endpoint(hosebirdEndpoint)
109 | .processor(new StringDelimitedProcessor(msgQueue));
110 |
111 | Client hosebirdClient = builder.build();
112 | return hosebirdClient;
113 | }
114 |
115 | public KafkaProducer createKafkaProducer(){
116 | String bootstrapServers = "127.0.0.1:9092";
117 |
118 | // create Producer properties
119 | Properties properties = new Properties();
120 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
121 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
122 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
123 |
124 | // create safe Producer
125 | properties.setProperty(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true");
126 | properties.setProperty(ProducerConfig.ACKS_CONFIG, "all");
127 | properties.setProperty(ProducerConfig.RETRIES_CONFIG, Integer.toString(Integer.MAX_VALUE));
128 | properties.setProperty(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "5"); // kafka 2.0 >= 1.1 so we can keep this as 5. Use 1 otherwise.
129 |
130 | // high throughput producer (at the expense of a bit of latency and CPU usage)
131 | properties.setProperty(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy");
132 | properties.setProperty(ProducerConfig.LINGER_MS_CONFIG, "20");
133 | properties.setProperty(ProducerConfig.BATCH_SIZE_CONFIG, Integer.toString(32*1024)); // 32 KB batch size
134 |
135 | // create the producer
136 | KafkaProducer producer = new KafkaProducer(properties);
137 | return producer;
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/kafka-streams-filter-tweets/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | kafka-beginners-course
7 | com.github.simplesteph
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | kafka-streams-filter-tweets
13 |
14 |
15 |
16 |
17 | org.apache.kafka
18 | kafka-streams
19 | 2.0.0
20 |
21 |
22 |
23 |
24 |
25 | org.slf4j
26 | slf4j-simple
27 | 1.7.25
28 |
29 |
30 |
31 |
32 |
33 | com.google.code.gson
34 | gson
35 | 2.8.5
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/kafka-streams-filter-tweets/src/main/java/com/github/simplesteph/kafka/tutorial4/StreamsFilterTweets.java:
--------------------------------------------------------------------------------
1 | package com.github.simplesteph.kafka.tutorial4;
2 |
3 | import com.google.gson.JsonParser;
4 | import org.apache.kafka.common.protocol.types.Field;
5 | import org.apache.kafka.common.serialization.Serdes;
6 | import org.apache.kafka.streams.KafkaStreams;
7 | import org.apache.kafka.streams.StreamsBuilder;
8 | import org.apache.kafka.streams.StreamsConfig;
9 | import org.apache.kafka.streams.kstream.KStream;
10 |
11 | import java.util.Properties;
12 |
13 | public class StreamsFilterTweets {
14 |
15 | public static void main(String[] args) {
16 | // create properties
17 | Properties properties = new Properties();
18 | properties.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "127.0.0.1:9092");
19 | properties.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "demo-kafka-streams");
20 | properties.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.StringSerde.class.getName());
21 | properties.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.StringSerde.class.getName());
22 |
23 | // create a topology
24 | StreamsBuilder streamsBuilder = new StreamsBuilder();
25 |
26 | // input topic
27 | KStream inputTopic = streamsBuilder.stream("twitter_tweets");
28 | KStream filteredStream = inputTopic.filter(
29 | // filter for tweets which has a user of over 10000 followers
30 | (k, jsonTweet) -> extractUserFollowersInTweet(jsonTweet) > 10000
31 | );
32 | filteredStream.to("important_tweets");
33 |
34 | // build the topology
35 | KafkaStreams kafkaStreams = new KafkaStreams(
36 | streamsBuilder.build(),
37 | properties
38 | );
39 |
40 | // start our streams application
41 | kafkaStreams.start();
42 | }
43 |
44 | private static JsonParser jsonParser = new JsonParser();
45 |
46 | private static Integer extractUserFollowersInTweet(String tweetJson){
47 | // gson library
48 | try {
49 | return jsonParser.parse(tweetJson)
50 | .getAsJsonObject()
51 | .get("user")
52 | .getAsJsonObject()
53 | .get("followers_count")
54 | .getAsInt();
55 | }
56 | catch (NullPointerException e){
57 | return 0;
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.github.simplesteph
8 | kafka-beginners-course
9 | pom
10 | 1.0
11 |
12 |
13 |
14 | org.apache.maven.plugins
15 | maven-compiler-plugin
16 |
17 | 8
18 | 8
19 |
20 |
21 |
22 |
23 |
24 | kafka-basics
25 | kafka-producer-twitter
26 | kafka-consumer-elasticsearch
27 | kafka-streams-filter-tweets
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------