├── .gitignore ├── README.md ├── kafka-basics ├── pom.xml └── src │ └── main │ └── java │ └── kafka │ └── tutorial1 │ ├── ConsumerDemo.java │ ├── ConsumerDemoAssignSeek.java │ ├── ConsumerDemoGroups.java │ ├── ConsumerDemoWithThread.java │ ├── ProducerDemo.java │ ├── ProducerDemoKeys.java │ └── ProducerDemoWithCallback.java ├── kafka-connect ├── connect-standalone.properties ├── connectors │ └── kafka-connect-twitter │ │ ├── LICENSE │ │ ├── README.md │ │ ├── annotations-2.0.1.jar │ │ ├── connect-utils-0.3.105.jar │ │ ├── freemarker-2.3.25-incubating.jar │ │ ├── guava-18.0.jar │ │ ├── jackson-annotations-2.8.0.jar │ │ ├── jackson-core-2.8.5.jar │ │ ├── jackson-databind-2.8.5.jar │ │ ├── javassist-3.19.0-GA.jar │ │ ├── kafka-connect-twitter-0.2.26.jar │ │ ├── reflections-0.9.10.jar │ │ ├── twitter4j-core-4.0.6.jar │ │ └── twitter4j-stream-4.0.6.jar ├── run.sh └── twitter.properties ├── kafka-consumer-elasticsearch ├── pom.xml └── src │ └── main │ └── java │ └── com.github.simplesteph.kafka │ └── tutorial3 │ └── ElasticSearchConsumer.java ├── kafka-producer-twitter ├── pom.xml └── src │ └── main │ └── java │ └── kafka │ └── tutorial2 │ └── TwitterProducer.java ├── kafka-streams-filter-tweets ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── simplesteph │ └── kafka │ └── tutorial4 │ └── StreamsFilterTweets.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | *.iml 4 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka for Beginners 2 | 3 | This is a companion repository for my [Kafka for Beginners](http://bit.ly/kafka-beginners-github) 4 | 5 | [![course logo](https://udemy-images.udemy.com/course/480x270/1075642_b6d2_6.jpg)](http://bit.ly/kafka-beginners-github) 6 | 7 | # Content 8 | - Basics of Kafka 9 | - Twitter Producer 10 | - ElasticSearch Consumer 11 | - Kafka Streams 101 12 | - Kafka Connect Example -------------------------------------------------------------------------------- /kafka-basics/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | kafka-beginners-course 7 | com.github.simplesteph 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | kafka-basics 13 | 14 | 15 | 16 | 17 | 18 | org.apache.kafka 19 | kafka-clients 20 | 2.0.0 21 | 22 | 23 | 24 | 25 | org.slf4j 26 | slf4j-simple 27 | 1.7.25 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemo.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.serialization.StringDeserializer; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.time.Duration; 12 | import java.util.Arrays; 13 | import java.util.Properties; 14 | 15 | public class ConsumerDemo { 16 | 17 | public static void main(String[] args) { 18 | 19 | Logger logger = LoggerFactory.getLogger(ConsumerDemo.class.getName()); 20 | 21 | String bootstrapServers = "127.0.0.1:9092"; 22 | String groupId = "my-fourth-application"; 23 | String topic = "first_topic"; 24 | 25 | // create consumer configs 26 | Properties properties = new Properties(); 27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 30 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); 31 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 32 | 33 | // create consumer 34 | KafkaConsumer consumer = new KafkaConsumer(properties); 35 | 36 | // subscribe consumer to our topic(s) 37 | consumer.subscribe(Arrays.asList(topic)); 38 | 39 | // poll for new data 40 | while(true){ 41 | ConsumerRecords records = 42 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0 43 | 44 | for (ConsumerRecord record : records){ 45 | logger.info("Key: " + record.key() + ", Value: " + record.value()); 46 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset()); 47 | } 48 | } 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoAssignSeek.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.TopicPartition; 8 | import org.apache.kafka.common.serialization.StringDeserializer; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.time.Duration; 13 | import java.util.Arrays; 14 | import java.util.Properties; 15 | 16 | public class ConsumerDemoAssignSeek { 17 | 18 | public static void main(String[] args) { 19 | 20 | Logger logger = LoggerFactory.getLogger(ConsumerDemoAssignSeek.class.getName()); 21 | 22 | String bootstrapServers = "127.0.0.1:9092"; 23 | String topic = "first_topic"; 24 | 25 | // create consumer configs 26 | Properties properties = new Properties(); 27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 30 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 31 | 32 | // create consumer 33 | KafkaConsumer consumer = new KafkaConsumer(properties); 34 | 35 | // assign and seek are mostly used to replay data or fetch a specific message 36 | 37 | // assign 38 | TopicPartition partitionToReadFrom = new TopicPartition(topic, 0); 39 | long offsetToReadFrom = 15L; 40 | consumer.assign(Arrays.asList(partitionToReadFrom)); 41 | 42 | // seek 43 | consumer.seek(partitionToReadFrom, offsetToReadFrom); 44 | 45 | int numberOfMessagesToRead = 5; 46 | boolean keepOnReading = true; 47 | int numberOfMessagesReadSoFar = 0; 48 | 49 | // poll for new data 50 | while(keepOnReading){ 51 | ConsumerRecords records = 52 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0 53 | 54 | for (ConsumerRecord record : records){ 55 | numberOfMessagesReadSoFar += 1; 56 | logger.info("Key: " + record.key() + ", Value: " + record.value()); 57 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset()); 58 | if (numberOfMessagesReadSoFar >= numberOfMessagesToRead){ 59 | keepOnReading = false; // to exit the while loop 60 | break; // to exit the for loop 61 | } 62 | } 63 | } 64 | 65 | logger.info("Exiting the application"); 66 | 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoGroups.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.serialization.StringDeserializer; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.time.Duration; 12 | import java.util.Arrays; 13 | import java.util.Properties; 14 | 15 | public class ConsumerDemoGroups { 16 | 17 | public static void main(String[] args) { 18 | 19 | Logger logger = LoggerFactory.getLogger(ConsumerDemoGroups.class.getName()); 20 | 21 | String bootstrapServers = "127.0.0.1:9092"; 22 | String groupId = "my-fifth-application"; 23 | String topic = "first_topic"; 24 | 25 | // create consumer configs 26 | Properties properties = new Properties(); 27 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 28 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 29 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 30 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); 31 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 32 | 33 | // create consumer 34 | KafkaConsumer consumer = new KafkaConsumer(properties); 35 | 36 | // subscribe consumer to our topic(s) 37 | consumer.subscribe(Arrays.asList(topic)); 38 | 39 | // poll for new data 40 | while(true){ 41 | ConsumerRecords records = 42 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0 43 | 44 | for (ConsumerRecord record : records){ 45 | logger.info("Key: " + record.key() + ", Value: " + record.value()); 46 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset()); 47 | } 48 | } 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ConsumerDemoWithThread.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.errors.WakeupException; 8 | import org.apache.kafka.common.serialization.StringDeserializer; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.time.Duration; 13 | import java.util.Arrays; 14 | import java.util.Properties; 15 | import java.util.concurrent.CountDownLatch; 16 | 17 | public class ConsumerDemoWithThread { 18 | 19 | public static void main(String[] args) { 20 | new ConsumerDemoWithThread().run(); 21 | } 22 | 23 | private ConsumerDemoWithThread() { 24 | 25 | } 26 | 27 | private void run() { 28 | Logger logger = LoggerFactory.getLogger(ConsumerDemoWithThread.class.getName()); 29 | 30 | String bootstrapServers = "127.0.0.1:9092"; 31 | String groupId = "my-sixth-application"; 32 | String topic = "first_topic"; 33 | 34 | // latch for dealing with multiple threads 35 | CountDownLatch latch = new CountDownLatch(1); 36 | 37 | // create the consumer runnable 38 | logger.info("Creating the consumer thread"); 39 | Runnable myConsumerRunnable = new ConsumerRunnable( 40 | bootstrapServers, 41 | groupId, 42 | topic, 43 | latch 44 | ); 45 | 46 | // start the thread 47 | Thread myThread = new Thread(myConsumerRunnable); 48 | myThread.start(); 49 | 50 | // add a shutdown hook 51 | Runtime.getRuntime().addShutdownHook(new Thread(() -> { 52 | logger.info("Caught shutdown hook"); 53 | ((ConsumerRunnable) myConsumerRunnable).shutdown(); 54 | try { 55 | latch.await(); 56 | } catch (InterruptedException e) { 57 | e.printStackTrace(); 58 | } 59 | logger.info("Application has exited"); 60 | } 61 | 62 | )); 63 | 64 | try { 65 | latch.await(); 66 | } catch (InterruptedException e) { 67 | logger.error("Application got interrupted", e); 68 | } finally { 69 | logger.info("Application is closing"); 70 | } 71 | } 72 | 73 | public class ConsumerRunnable implements Runnable { 74 | 75 | private CountDownLatch latch; 76 | private KafkaConsumer consumer; 77 | private Logger logger = LoggerFactory.getLogger(ConsumerRunnable.class.getName()); 78 | 79 | public ConsumerRunnable(String bootstrapServers, 80 | String groupId, 81 | String topic, 82 | CountDownLatch latch) { 83 | this.latch = latch; 84 | 85 | // create consumer configs 86 | Properties properties = new Properties(); 87 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 88 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 89 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 90 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); 91 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 92 | 93 | // create consumer 94 | consumer = new KafkaConsumer(properties); 95 | // subscribe consumer to our topic(s) 96 | consumer.subscribe(Arrays.asList(topic)); 97 | } 98 | 99 | @Override 100 | public void run() { 101 | // poll for new data 102 | try { 103 | while (true) { 104 | ConsumerRecords records = 105 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0 106 | 107 | for (ConsumerRecord record : records) { 108 | logger.info("Key: " + record.key() + ", Value: " + record.value()); 109 | logger.info("Partition: " + record.partition() + ", Offset:" + record.offset()); 110 | } 111 | } 112 | } catch (WakeupException e) { 113 | logger.info("Received shutdown signal!"); 114 | } finally { 115 | consumer.close(); 116 | // tell our main code we're done with the consumer 117 | latch.countDown(); 118 | } 119 | } 120 | 121 | public void shutdown() { 122 | // the wakeup() method is a special method to interrupt consumer.poll() 123 | // it will throw the exception WakeUpException 124 | consumer.wakeup(); 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ProducerDemo.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.ProducerConfig; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | import org.apache.kafka.common.serialization.StringSerializer; 7 | 8 | import java.util.Properties; 9 | 10 | public class ProducerDemo { 11 | 12 | public static void main(String[] args) { 13 | 14 | String bootstrapServers = "127.0.0.1:9092"; 15 | 16 | // create Producer properties 17 | Properties properties = new Properties(); 18 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 19 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 20 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 21 | 22 | // create the producer 23 | KafkaProducer producer = new KafkaProducer(properties); 24 | 25 | // create a producer record 26 | ProducerRecord record = 27 | new ProducerRecord("first_topic", "hello world"); 28 | 29 | // send data - asynchronous 30 | producer.send(record); 31 | 32 | // flush data 33 | producer.flush(); 34 | // flush and close producer 35 | producer.close(); 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ProducerDemoKeys.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.producer.*; 4 | import org.apache.kafka.common.serialization.StringSerializer; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.util.Properties; 9 | import java.util.concurrent.ExecutionException; 10 | 11 | public class ProducerDemoKeys { 12 | 13 | public static void main(String[] args) throws ExecutionException, InterruptedException { 14 | 15 | final Logger logger = LoggerFactory.getLogger(ProducerDemoKeys.class); 16 | 17 | String bootstrapServers = "127.0.0.1:9092"; 18 | 19 | // create Producer properties 20 | Properties properties = new Properties(); 21 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 22 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 23 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 24 | 25 | // create the producer 26 | KafkaProducer producer = new KafkaProducer(properties); 27 | 28 | 29 | for (int i=0; i<10; i++ ) { 30 | // create a producer record 31 | 32 | String topic = "first_topic"; 33 | String value = "hello world " + Integer.toString(i); 34 | String key = "id_" + Integer.toString(i); 35 | 36 | ProducerRecord record = 37 | new ProducerRecord(topic, key, value); 38 | 39 | logger.info("Key: " + key); // log the key 40 | // id_0 is going to partition 1 41 | // id_1 partition 0 42 | // id_2 partition 2 43 | // id_3 partition 0 44 | // id_4 partition 2 45 | // id_5 partition 2 46 | // id_6 partition 0 47 | // id_7 partition 2 48 | // id_8 partition 1 49 | // id_9 partition 2 50 | 51 | 52 | // send data - asynchronous 53 | producer.send(record, new Callback() { 54 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 55 | // executes every time a record is successfully sent or an exception is thrown 56 | if (e == null) { 57 | // the record was successfully sent 58 | logger.info("Received new metadata. \n" + 59 | "Topic:" + recordMetadata.topic() + "\n" + 60 | "Partition: " + recordMetadata.partition() + "\n" + 61 | "Offset: " + recordMetadata.offset() + "\n" + 62 | "Timestamp: " + recordMetadata.timestamp()); 63 | } else { 64 | logger.error("Error while producing", e); 65 | } 66 | } 67 | }).get(); // block the .send() to make it synchronous - don't do this in production! 68 | } 69 | 70 | // flush data 71 | producer.flush(); 72 | // flush and close producer 73 | producer.close(); 74 | 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /kafka-basics/src/main/java/kafka/tutorial1/ProducerDemoWithCallback.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial1; 2 | 3 | import org.apache.kafka.clients.producer.*; 4 | import org.apache.kafka.common.serialization.StringSerializer; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.util.Properties; 9 | 10 | public class ProducerDemoWithCallback { 11 | 12 | public static void main(String[] args) { 13 | 14 | final Logger logger = LoggerFactory.getLogger(ProducerDemoWithCallback.class); 15 | 16 | String bootstrapServers = "127.0.0.1:9092"; 17 | 18 | // create Producer properties 19 | Properties properties = new Properties(); 20 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 21 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 22 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 23 | 24 | // create the producer 25 | KafkaProducer producer = new KafkaProducer(properties); 26 | 27 | 28 | for (int i=0; i<10; i++ ) { 29 | // create a producer record 30 | ProducerRecord record = 31 | new ProducerRecord("first_topic", "hello world " + Integer.toString(i)); 32 | 33 | // send data - asynchronous 34 | producer.send(record, new Callback() { 35 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 36 | // executes every time a record is successfully sent or an exception is thrown 37 | if (e == null) { 38 | // the record was successfully sent 39 | logger.info("Received new metadata. \n" + 40 | "Topic:" + recordMetadata.topic() + "\n" + 41 | "Partition: " + recordMetadata.partition() + "\n" + 42 | "Offset: " + recordMetadata.offset() + "\n" + 43 | "Timestamp: " + recordMetadata.timestamp()); 44 | } else { 45 | logger.error("Error while producing", e); 46 | } 47 | } 48 | }); 49 | } 50 | 51 | // flush data 52 | producer.flush(); 53 | // flush and close producer 54 | producer.close(); 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /kafka-connect/connect-standalone.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # These are defaults. This file just demonstrates how to override some settings. 17 | bootstrap.servers=localhost:9092 18 | 19 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 20 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 21 | key.converter=org.apache.kafka.connect.json.JsonConverter 22 | value.converter=org.apache.kafka.connect.json.JsonConverter 23 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 24 | # it to 25 | key.converter.schemas.enable=true 26 | value.converter.schemas.enable=true 27 | 28 | offset.storage.file.filename=/tmp/connect.offsets 29 | # Flush much faster than normal, which is useful for testing/debugging 30 | offset.flush.interval.ms=10000 31 | 32 | # Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins 33 | # (connectors, converters, transformations). The list should consist of top level directories that include 34 | # any combination of: 35 | # a) directories immediately containing jars with plugins and their dependencies 36 | # b) uber-jars with plugins and their dependencies 37 | # c) directories immediately containing the package directory structure of classes of plugins and their dependencies 38 | # Note: symlinks will be followed to discover dependencies or plugins. 39 | # Examples: 40 | # plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, 41 | plugin.path=connectors 42 | -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This connector uses the twitter streaming api to listen for status update messages and 4 | convert them to a Kafka Connect struct on the fly. The goal is to match as much of the 5 | Twitter Status object as possible. 6 | 7 | # Configuration 8 | 9 | ## TwitterSourceConnector 10 | 11 | This Twitter Source connector is used to pull data from Twitter in realtime. 12 | 13 | ```properties 14 | name=connector1 15 | tasks.max=1 16 | connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector 17 | 18 | # Set these required values 19 | twitter.oauth.accessTokenSecret= 20 | process.deletes= 21 | filter.keywords= 22 | kafka.status.topic= 23 | kafka.delete.topic= 24 | twitter.oauth.consumerSecret= 25 | twitter.oauth.accessToken= 26 | twitter.oauth.consumerKey= 27 | ``` 28 | 29 | | Name | Description | Type | Default | Valid Values | Importance | 30 | |---------------------------------|---------------------------------------------------|----------|---------|--------------|------------| 31 | | filter.keywords | Twitter keywords to filter for. | list | | | high | 32 | | kafka.delete.topic | Kafka topic to write delete events to. | string | | | high | 33 | | kafka.status.topic | Kafka topic to write the statuses to. | string | | | high | 34 | | process.deletes | Should this connector process deletes. | boolean | | | high | 35 | | twitter.oauth.accessToken | OAuth access token | password | | | high | 36 | | twitter.oauth.accessTokenSecret | OAuth access token secret | password | | | high | 37 | | twitter.oauth.consumerKey | OAuth consumer key | password | | | high | 38 | | twitter.oauth.consumerSecret | OAuth consumer secret | password | | | high | 39 | | twitter.debug | Flag to enable debug logging for the twitter api. | boolean | false | | low | 40 | 41 | 42 | # Schemas 43 | 44 | ## com.github.jcustenborder.kafka.connect.twitter.Place 45 | 46 | Returns the place attached to this status 47 | 48 | | Name | Optional | Schema | Default Value | Documentation | 49 | |---------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| 50 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 51 | | StreetAddress | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 52 | | CountryCode | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 53 | | Id | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 54 | | Country | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 55 | | PlaceType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 56 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 57 | | FullName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 58 | 59 | ## com.github.jcustenborder.kafka.connect.twitter.GeoLocation 60 | 61 | Returns The location that this tweet refers to if available. 62 | 63 | | Name | Optional | Schema | Default Value | Documentation | 64 | |-----------|----------|---------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------| 65 | | Latitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the latitude of the geo location | 66 | | Longitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the longitude of the geo location | 67 | 68 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice 69 | 70 | Message that is received when a status is deleted from Twitter. 71 | 72 | | Name | Optional | Schema | Default Value | Documentation | 73 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 74 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 75 | | UserId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 76 | 77 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey 78 | 79 | Key for a message that is received when a status is deleted from Twitter. 80 | 81 | | Name | Optional | Schema | Default Value | Documentation | 82 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 83 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 84 | 85 | ## com.github.jcustenborder.kafka.connect.twitter.StatusKey 86 | 87 | Key for a twitter status. 88 | 89 | | Name | Optional | Schema | Default Value | Documentation | 90 | |------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 91 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 92 | 93 | ## com.github.jcustenborder.kafka.connect.twitter.Status 94 | 95 | Twitter status message. 96 | 97 | | Name | Optional | Schema | Default Value | Documentation | 98 | |----------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------| 99 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | Return the created_at | 100 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the status | 101 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the status | 102 | | Source | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the source | 103 | | Truncated | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is truncated | 104 | | InReplyToStatusId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_tostatus_id | 105 | | InReplyToUserId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_user_id | 106 | | InReplyToScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the in_reply_to_screen_name | 107 | | GeoLocation | true | [com.github.jcustenborder.kafka.connect.twitter.GeoLocation](#com.github.jcustenborder.kafka.connect.twitter.GeoLocation) | | Returns The location that this tweet refers to if available. | 108 | | Place | true | [com.github.jcustenborder.kafka.connect.twitter.Place](#com.github.jcustenborder.kafka.connect.twitter.Place) | | Returns the place attached to this status | 109 | | Favorited | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is favorited | 110 | | Retweeted | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is retweeted | 111 | | FavoriteCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Indicates approximately how many times this Tweet has been "favorited" by Twitter users. | 112 | | User | false | [com.github.jcustenborder.kafka.connect.twitter.User](#com.github.jcustenborder.kafka.connect.twitter.User) | | Return the user associated with the status. 113 | This can be null if the instance is from User.getStatus(). | 114 | | Retweet | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 115 | | Contributors | false | Array of [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns an array of contributors, or null if no contributor is associated with this status. | 116 | | RetweetCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled. | 117 | | RetweetedByMe | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 118 | | CurrentUserRetweetId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled. | 119 | | PossiblySensitive | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 120 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the lang of the status text if available. | 121 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the tweet is withheld | 122 | | HashtagEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.HashtagEntity](#com.github.jcustenborder.kafka.connect.twitter.HashtagEntity) | | Returns an array if hashtag mentioned in the tweet. | 123 | | UserMentionEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity](#com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity) | | Returns an array of user mentions in the tweet. | 124 | | MediaEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.MediaEntity](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity) | | Returns an array of MediaEntities if medias are available in the tweet. | 125 | | SymbolEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.SymbolEntity](#com.github.jcustenborder.kafka.connect.twitter.SymbolEntity) | | Returns an array of SymbolEntities if medias are available in the tweet. | 126 | | URLEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.URLEntity](#com.github.jcustenborder.kafka.connect.twitter.URLEntity) | | Returns an array if URLEntity mentioned in the tweet. | 127 | 128 | ## com.github.jcustenborder.kafka.connect.twitter.User 129 | 130 | Return the user associated with the status. 131 | This can be null if the instance is from User.getStatus(). 132 | 133 | | Name | Optional | Schema | Default Value | Documentation | 134 | |--------------------------------|----------|----------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------| 135 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the user | 136 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name of the user | 137 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name of the user | 138 | | Location | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the location of the user | 139 | | Description | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the description of the user | 140 | | ContributorsEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user is enabling contributors | 141 | | ProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the profile image url of the user | 142 | | BiggerProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 143 | | MiniProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 144 | | OriginalProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 145 | | ProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 146 | | BiggerProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 147 | | MiniProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 148 | | OriginalProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 149 | | DefaultProfileImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not uploaded their own avatar | 150 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the url of the user | 151 | | Protected | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the user status is protected | 152 | | FollowersCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of followers | 153 | | ProfileBackgroundColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 154 | | ProfileTextColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 155 | | ProfileLinkColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 156 | | ProfileSidebarFillColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 157 | | ProfileSidebarBorderColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 158 | | ProfileUseBackgroundImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 159 | | DefaultProfile | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not altered the theme or background | 160 | | ShowAllInlineMedia | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 161 | | FriendsCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of users the user follows (AKA "followings") | 162 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | | 163 | | FavouritesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 164 | | UtcOffset | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 165 | | TimeZone | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 166 | | ProfileBackgroundImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 167 | | ProfileBackgroundImageUrlHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 168 | | ProfileBannerURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 169 | | ProfileBannerRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 170 | | ProfileBannerIPadURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 171 | | ProfileBannerIPadRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 172 | | ProfileBannerMobileURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 173 | | ProfileBannerMobileRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 174 | | ProfileBackgroundTiled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 175 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the preferred language of the user | 176 | | StatusesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 177 | | GeoEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 178 | | Verified | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 179 | | Translator | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 180 | | ListedCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of public lists the user is listed on, or -1 if the count is unavailable. | 181 | | FollowRequestSent | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Returns true if the authenticating user has requested to follow this user, otherwise false. | 182 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the user is withheld | 183 | 184 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant 185 | 186 | | Name | Optional | Schema | Default Value | Documentation | 187 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| 188 | | Url | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 189 | | Bitrate | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 190 | | ContentType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 191 | 192 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size 193 | 194 | | Name | Optional | Schema | Default Value | Documentation | 195 | |--------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 196 | | Resize | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 197 | | Width | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 198 | | Height | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 199 | 200 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity 201 | 202 | | Name | Optional | Schema | Default Value | Documentation | 203 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 204 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 205 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 206 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 207 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | | 208 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 209 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | 210 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | 211 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | 212 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | Returns size variations of the media. | 213 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | 214 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 215 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 216 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 217 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 218 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 219 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 220 | 221 | ## com.github.jcustenborder.kafka.connect.twitter.HashtagEntity 222 | 223 | | Name | Optional | Schema | Default Value | Documentation | 224 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------| 225 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the hashtag without #. | 226 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the hashtag. | 227 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the hashtag. | 228 | 229 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity 230 | 231 | | Name | Optional | Schema | Default Value | Documentation | 232 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 233 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | 234 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | 235 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | 236 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | | 237 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | 238 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 239 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 240 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 241 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | Returns size variations of the media. | 242 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 243 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 244 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 245 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 246 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 247 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 248 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 249 | 250 | ## com.github.jcustenborder.kafka.connect.twitter.SymbolEntity 251 | 252 | | Name | Optional | Schema | Default Value | Documentation | 253 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------| 254 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the symbol. | 255 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the symbol. | 256 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the entity | 257 | 258 | ## com.github.jcustenborder.kafka.connect.twitter.URLEntity 259 | 260 | | Name | Optional | Schema | Default Value | Documentation | 261 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 262 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 263 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 264 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 265 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 266 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 267 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 268 | 269 | ## com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity 270 | 271 | | Name | Optional | Schema | Default Value | Documentation | 272 | |------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------------| 273 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name mentioned in the status. | 274 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the user id mentioned in the status. | 275 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | 276 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | 277 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the user mention. | 278 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the user mention. | 279 | 280 | 281 | # Running in development 282 | 283 | ``` 284 | mvn clean package 285 | export CLASSPATH="$(find target/ -type f -name '*.jar'| grep '\-package' | tr '\n' ':')" 286 | $CONFLUENT_HOME/bin/connect-standalone connect/connect-avro-docker.properties config/TwitterSourceConnector.properties 287 | ``` 288 | -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/annotations-2.0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/annotations-2.0.1.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/connect-utils-0.3.105.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/connect-utils-0.3.105.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/freemarker-2.3.25-incubating.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/freemarker-2.3.25-incubating.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/guava-18.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/guava-18.0.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/jackson-annotations-2.8.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-annotations-2.8.0.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/jackson-core-2.8.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-core-2.8.5.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/jackson-databind-2.8.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/jackson-databind-2.8.5.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/javassist-3.19.0-GA.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/javassist-3.19.0-GA.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/kafka-connect-twitter-0.2.26.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/kafka-connect-twitter-0.2.26.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/reflections-0.9.10.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/reflections-0.9.10.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/twitter4j-core-4.0.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/twitter4j-core-4.0.6.jar -------------------------------------------------------------------------------- /kafka-connect/connectors/kafka-connect-twitter/twitter4j-stream-4.0.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacsek/kafka-beginner-learnings/394a1b2aa947c44440e2f0e09dcf0b97cb06fd05/kafka-connect/connectors/kafka-connect-twitter/twitter4j-stream-4.0.6.jar -------------------------------------------------------------------------------- /kafka-connect/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # run the twitter connector 3 | connect-standalone connect-standalone.properties twitter.properties 4 | # OR (linux / mac OSX) 5 | connect-standalone.sh connect-standalone.properties twitter.properties 6 | # OR (Windows) 7 | connect-standalone.bat connect-standalone.properties twitter.properties -------------------------------------------------------------------------------- /kafka-connect/twitter.properties: -------------------------------------------------------------------------------- 1 | name=TwitterSourceDemo 2 | tasks.max=1 3 | connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector 4 | 5 | # Set these required values 6 | process.deletes=false 7 | filter.keywords=bitcoin 8 | kafka.status.topic=twitter_status_connect 9 | kafka.delete.topic=twitter_deletes_connect 10 | # put your own credentials here - don't share with anyone 11 | twitter.oauth.consumerKey= 12 | twitter.oauth.consumerSecret= 13 | twitter.oauth.accessToken= 14 | twitter.oauth.accessTokenSecret= -------------------------------------------------------------------------------- /kafka-consumer-elasticsearch/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | kafka-beginners-course 7 | com.github.simplesteph 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | kafka-consumer-elasticsearch 13 | 14 | 15 | 16 | org.elasticsearch.client 17 | elasticsearch-rest-high-level-client 18 | 6.4.0 19 | 20 | 21 | 22 | 23 | org.apache.kafka 24 | kafka-clients 25 | 2.0.0 26 | 27 | 28 | 29 | 30 | org.slf4j 31 | slf4j-simple 32 | 1.7.25 33 | 34 | 35 | 36 | 37 | com.google.code.gson 38 | gson 39 | 2.8.5 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /kafka-consumer-elasticsearch/src/main/java/com.github.simplesteph.kafka/tutorial3/ElasticSearchConsumer.java: -------------------------------------------------------------------------------- 1 | package com.github.simplesteph.kafka.tutorial3; 2 | 3 | import com.google.gson.JsonParser; 4 | import org.apache.http.HttpHost; 5 | import org.apache.http.auth.AuthScope; 6 | import org.apache.http.auth.UsernamePasswordCredentials; 7 | import org.apache.http.client.CredentialsProvider; 8 | import org.apache.http.impl.client.BasicCredentialsProvider; 9 | import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; 10 | import org.apache.kafka.clients.consumer.ConsumerConfig; 11 | import org.apache.kafka.clients.consumer.ConsumerRecord; 12 | import org.apache.kafka.clients.consumer.ConsumerRecords; 13 | import org.apache.kafka.clients.consumer.KafkaConsumer; 14 | import org.apache.kafka.common.serialization.StringDeserializer; 15 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; 16 | import org.elasticsearch.action.bulk.BulkRequest; 17 | import org.elasticsearch.action.bulk.BulkResponse; 18 | import org.elasticsearch.action.index.IndexRequest; 19 | import org.elasticsearch.action.index.IndexResponse; 20 | import org.elasticsearch.client.RequestOptions; 21 | import org.elasticsearch.client.RestClient; 22 | import org.elasticsearch.client.RestClientBuilder; 23 | import org.elasticsearch.client.RestHighLevelClient; 24 | import org.elasticsearch.common.xcontent.XContentType; 25 | import org.elasticsearch.index.mapper.ObjectMapper; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.io.IOException; 30 | import java.time.Duration; 31 | import java.util.Arrays; 32 | import java.util.Properties; 33 | 34 | public class ElasticSearchConsumer { 35 | 36 | public static RestHighLevelClient createClient(){ 37 | 38 | ////////////////////////// 39 | /////////// IF YOU USE LOCAL ELASTICSEARCH 40 | ////////////////////////// 41 | 42 | // String hostname = "localhost"; 43 | // RestClientBuilder builder = RestClient.builder(new HttpHost(hostname,9200,"http")); 44 | 45 | 46 | ////////////////////////// 47 | /////////// IF YOU USE BONSAI / HOSTED ELASTICSEARCH 48 | ////////////////////////// 49 | 50 | // replace with your own credentials 51 | String hostname = ""; // localhost or bonsai url 52 | String username = ""; // needed only for bonsai 53 | String password = ""; // needed only for bonsai 54 | 55 | // credentials provider help supply username and password 56 | final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); 57 | credentialsProvider.setCredentials(AuthScope.ANY, 58 | new UsernamePasswordCredentials(username, password)); 59 | 60 | RestClientBuilder builder = RestClient.builder( 61 | new HttpHost(hostname, 443, "https")) 62 | .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() { 63 | @Override 64 | public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) { 65 | return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); 66 | } 67 | }); 68 | 69 | RestHighLevelClient client = new RestHighLevelClient(builder); 70 | return client; 71 | } 72 | 73 | public static KafkaConsumer createConsumer(String topic){ 74 | 75 | String bootstrapServers = "127.0.0.1:9092"; 76 | String groupId = "kafka-demo-elasticsearch"; 77 | 78 | // create consumer configs 79 | Properties properties = new Properties(); 80 | properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 81 | properties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 82 | properties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 83 | properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); 84 | properties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 85 | properties.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // disable auto commit of offsets 86 | properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "100"); // disable auto commit of offsets 87 | 88 | // create consumer 89 | KafkaConsumer consumer = new KafkaConsumer(properties); 90 | consumer.subscribe(Arrays.asList(topic)); 91 | 92 | return consumer; 93 | 94 | } 95 | 96 | private static JsonParser jsonParser = new JsonParser(); 97 | 98 | private static String extractIdFromTweet(String tweetJson){ 99 | // gson library 100 | return jsonParser.parse(tweetJson) 101 | .getAsJsonObject() 102 | .get("id_str") 103 | .getAsString(); 104 | } 105 | 106 | public static void main(String[] args) throws IOException { 107 | Logger logger = LoggerFactory.getLogger(ElasticSearchConsumer.class.getName()); 108 | RestHighLevelClient client = createClient(); 109 | 110 | KafkaConsumer consumer = createConsumer("twitter_tweets"); 111 | 112 | while(true){ 113 | ConsumerRecords records = 114 | consumer.poll(Duration.ofMillis(100)); // new in Kafka 2.0.0 115 | 116 | Integer recordCount = records.count(); 117 | logger.info("Received " + recordCount + " records"); 118 | 119 | BulkRequest bulkRequest = new BulkRequest(); 120 | 121 | for (ConsumerRecord record : records){ 122 | 123 | // 2 strategies 124 | // kafka generic ID 125 | // String id = record.topic() + "_" + record.partition() + "_" + record.offset(); 126 | 127 | // twitter feed specific id 128 | try { 129 | String id = extractIdFromTweet(record.value()); 130 | 131 | // where we insert data into ElasticSearch 132 | IndexRequest indexRequest = new IndexRequest( 133 | "twitter", 134 | "tweets", 135 | id // this is to make our consumer idempotent 136 | ).source(record.value(), XContentType.JSON); 137 | 138 | bulkRequest.add(indexRequest); // we add to our bulk request (takes no time) 139 | } catch (NullPointerException e){ 140 | logger.warn("skipping bad data: " + record.value()); 141 | } 142 | 143 | } 144 | 145 | if (recordCount > 0) { 146 | BulkResponse bulkItemResponses = client.bulk(bulkRequest, RequestOptions.DEFAULT); 147 | logger.info("Committing offsets..."); 148 | consumer.commitSync(); 149 | logger.info("Offsets have been committed"); 150 | try { 151 | Thread.sleep(1000); 152 | } catch (InterruptedException e) { 153 | e.printStackTrace(); 154 | } 155 | } 156 | } 157 | 158 | // close the client gracefully 159 | // client.close(); 160 | 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /kafka-producer-twitter/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | kafka-beginners-course 7 | com.github.simplesteph 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | kafka-producer-twitter 13 | 14 | 15 | 16 | 17 | 18 | org.apache.kafka 19 | kafka-clients 20 | 2.0.0 21 | 22 | 23 | 24 | 25 | org.slf4j 26 | slf4j-simple 27 | 1.7.25 28 | 29 | 30 | 31 | 32 | com.twitter 33 | hbc-core 34 | 2.2.0 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /kafka-producer-twitter/src/main/java/kafka/tutorial2/TwitterProducer.java: -------------------------------------------------------------------------------- 1 | package kafka.tutorial2; 2 | 3 | import com.google.common.collect.Lists; 4 | import com.twitter.hbc.ClientBuilder; 5 | import com.twitter.hbc.core.Client; 6 | import com.twitter.hbc.core.Constants; 7 | import com.twitter.hbc.core.Hosts; 8 | import com.twitter.hbc.core.HttpHosts; 9 | import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; 10 | import com.twitter.hbc.core.processor.StringDelimitedProcessor; 11 | import com.twitter.hbc.httpclient.auth.Authentication; 12 | import com.twitter.hbc.httpclient.auth.OAuth1; 13 | import org.apache.kafka.clients.producer.*; 14 | import org.apache.kafka.common.serialization.StringSerializer; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.util.List; 19 | import java.util.Properties; 20 | import java.util.concurrent.BlockingQueue; 21 | import java.util.concurrent.LinkedBlockingQueue; 22 | import java.util.concurrent.TimeUnit; 23 | 24 | public class TwitterProducer { 25 | 26 | Logger logger = LoggerFactory.getLogger(TwitterProducer.class.getName()); 27 | 28 | // use your own credentials - don't share them with anyone 29 | String consumerKey = ""; 30 | String consumerSecret = ""; 31 | String token = ""; 32 | String secret = ""; 33 | 34 | List terms = Lists.newArrayList("bitcoin", "usa", "politics", "sport", "soccer"); 35 | 36 | 37 | public TwitterProducer(){} 38 | 39 | public static void main(String[] args) { 40 | new TwitterProducer().run(); 41 | } 42 | 43 | public void run(){ 44 | 45 | logger.info("Setup"); 46 | 47 | /** Set up your blocking queues: Be sure to size these properly based on expected TPS of your stream */ 48 | BlockingQueue msgQueue = new LinkedBlockingQueue(1000); 49 | 50 | // create a twitter client 51 | Client client = createTwitterClient(msgQueue); 52 | // Attempts to establish a connection. 53 | client.connect(); 54 | 55 | // create a kafka producer 56 | KafkaProducer producer = createKafkaProducer(); 57 | 58 | // add a shutdown hook 59 | Runtime.getRuntime().addShutdownHook(new Thread(() -> { 60 | logger.info("stopping application..."); 61 | logger.info("shutting down client from twitter..."); 62 | client.stop(); 63 | logger.info("closing producer..."); 64 | producer.close(); 65 | logger.info("done!"); 66 | })); 67 | 68 | // loop to send tweets to kafka 69 | // on a different thread, or multiple different threads.... 70 | while (!client.isDone()) { 71 | String msg = null; 72 | try { 73 | msg = msgQueue.poll(5, TimeUnit.SECONDS); 74 | } catch (InterruptedException e) { 75 | e.printStackTrace(); 76 | client.stop(); 77 | } 78 | if (msg != null){ 79 | logger.info(msg); 80 | producer.send(new ProducerRecord<>("twitter_tweets", null, msg), new Callback() { 81 | @Override 82 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 83 | if (e != null) { 84 | logger.error("Something bad happened", e); 85 | } 86 | } 87 | }); 88 | } 89 | } 90 | logger.info("End of application"); 91 | } 92 | 93 | public Client createTwitterClient(BlockingQueue msgQueue){ 94 | 95 | /** Declare the host you want to connect to, the endpoint, and authentication (basic auth or oauth) */ 96 | Hosts hosebirdHosts = new HttpHosts(Constants.STREAM_HOST); 97 | StatusesFilterEndpoint hosebirdEndpoint = new StatusesFilterEndpoint(); 98 | 99 | hosebirdEndpoint.trackTerms(terms); 100 | 101 | // These secrets should be read from a config file 102 | Authentication hosebirdAuth = new OAuth1(consumerKey, consumerSecret, token, secret); 103 | 104 | ClientBuilder builder = new ClientBuilder() 105 | .name("Hosebird-Client-01") // optional: mainly for the logs 106 | .hosts(hosebirdHosts) 107 | .authentication(hosebirdAuth) 108 | .endpoint(hosebirdEndpoint) 109 | .processor(new StringDelimitedProcessor(msgQueue)); 110 | 111 | Client hosebirdClient = builder.build(); 112 | return hosebirdClient; 113 | } 114 | 115 | public KafkaProducer createKafkaProducer(){ 116 | String bootstrapServers = "127.0.0.1:9092"; 117 | 118 | // create Producer properties 119 | Properties properties = new Properties(); 120 | properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 121 | properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 122 | properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 123 | 124 | // create safe Producer 125 | properties.setProperty(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true"); 126 | properties.setProperty(ProducerConfig.ACKS_CONFIG, "all"); 127 | properties.setProperty(ProducerConfig.RETRIES_CONFIG, Integer.toString(Integer.MAX_VALUE)); 128 | properties.setProperty(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "5"); // kafka 2.0 >= 1.1 so we can keep this as 5. Use 1 otherwise. 129 | 130 | // high throughput producer (at the expense of a bit of latency and CPU usage) 131 | properties.setProperty(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy"); 132 | properties.setProperty(ProducerConfig.LINGER_MS_CONFIG, "20"); 133 | properties.setProperty(ProducerConfig.BATCH_SIZE_CONFIG, Integer.toString(32*1024)); // 32 KB batch size 134 | 135 | // create the producer 136 | KafkaProducer producer = new KafkaProducer(properties); 137 | return producer; 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /kafka-streams-filter-tweets/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | kafka-beginners-course 7 | com.github.simplesteph 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | kafka-streams-filter-tweets 13 | 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-streams 19 | 2.0.0 20 | 21 | 22 | 23 | 24 | 25 | org.slf4j 26 | slf4j-simple 27 | 1.7.25 28 | 29 | 30 | 31 | 32 | 33 | com.google.code.gson 34 | gson 35 | 2.8.5 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /kafka-streams-filter-tweets/src/main/java/com/github/simplesteph/kafka/tutorial4/StreamsFilterTweets.java: -------------------------------------------------------------------------------- 1 | package com.github.simplesteph.kafka.tutorial4; 2 | 3 | import com.google.gson.JsonParser; 4 | import org.apache.kafka.common.protocol.types.Field; 5 | import org.apache.kafka.common.serialization.Serdes; 6 | import org.apache.kafka.streams.KafkaStreams; 7 | import org.apache.kafka.streams.StreamsBuilder; 8 | import org.apache.kafka.streams.StreamsConfig; 9 | import org.apache.kafka.streams.kstream.KStream; 10 | 11 | import java.util.Properties; 12 | 13 | public class StreamsFilterTweets { 14 | 15 | public static void main(String[] args) { 16 | // create properties 17 | Properties properties = new Properties(); 18 | properties.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "127.0.0.1:9092"); 19 | properties.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "demo-kafka-streams"); 20 | properties.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.StringSerde.class.getName()); 21 | properties.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.StringSerde.class.getName()); 22 | 23 | // create a topology 24 | StreamsBuilder streamsBuilder = new StreamsBuilder(); 25 | 26 | // input topic 27 | KStream inputTopic = streamsBuilder.stream("twitter_tweets"); 28 | KStream filteredStream = inputTopic.filter( 29 | // filter for tweets which has a user of over 10000 followers 30 | (k, jsonTweet) -> extractUserFollowersInTweet(jsonTweet) > 10000 31 | ); 32 | filteredStream.to("important_tweets"); 33 | 34 | // build the topology 35 | KafkaStreams kafkaStreams = new KafkaStreams( 36 | streamsBuilder.build(), 37 | properties 38 | ); 39 | 40 | // start our streams application 41 | kafkaStreams.start(); 42 | } 43 | 44 | private static JsonParser jsonParser = new JsonParser(); 45 | 46 | private static Integer extractUserFollowersInTweet(String tweetJson){ 47 | // gson library 48 | try { 49 | return jsonParser.parse(tweetJson) 50 | .getAsJsonObject() 51 | .get("user") 52 | .getAsJsonObject() 53 | .get("followers_count") 54 | .getAsInt(); 55 | } 56 | catch (NullPointerException e){ 57 | return 0; 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.github.simplesteph 8 | kafka-beginners-course 9 | pom 10 | 1.0 11 | 12 | 13 | 14 | org.apache.maven.plugins 15 | maven-compiler-plugin 16 | 17 | 8 18 | 8 19 | 20 | 21 | 22 | 23 | 24 | kafka-basics 25 | kafka-producer-twitter 26 | kafka-consumer-elasticsearch 27 | kafka-streams-filter-tweets 28 | 29 | 30 | 31 | 32 | --------------------------------------------------------------------------------