├── KakfaTest ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── binod │ │ ├── KakfaTest │ │ ├── App.java │ │ ├── KafkaTopicReader.java │ │ └── KafkaTopicWriter.java │ │ └── kafka │ │ └── json │ │ ├── Consumer.java │ │ ├── ConsumerLoop.java │ │ ├── Contact.java │ │ └── Producer.java │ └── test │ └── java │ └── com │ └── binod │ └── KakfaTest │ └── AppTest.java ├── README.md ├── SparkDemo ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ └── java │ │ ├── binod │ │ └── Demo │ │ │ ├── SparkKafkaConsumer.java │ │ │ ├── SparkKafkaProducer.java │ │ │ └── WordCountSpark.java │ │ └── practice1 │ │ └── RDD_First.java │ └── test │ └── java │ └── binod │ └── Demo │ └── AppTest.java ├── kafka-command.txt ├── spark-command.txt ├── students.txt └── test.csv /KakfaTest/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/test/java=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /KakfaTest/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.7 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 12 | org.eclipse.jdt.core.compiler.source=1.7 13 | -------------------------------------------------------------------------------- /KakfaTest/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /KakfaTest/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.binod 6 | KakfaTest 7 | 0.0.1-SNAPSHOT 8 | jar 9 | 10 | KakfaTest 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | junit 20 | junit 21 | 3.8.1 22 | test 23 | 24 | 25 | 26 | org.apache.kafka 27 | kafka-clients 28 | 0.8.2.0 29 | 30 | 31 | 32 | 33 | org.apache.kafka 34 | kafka_2.11 35 | 0.8.2.0 36 | 37 | 38 | 39 | org.apache.kafka 40 | connect-json 41 | 1.0.0 42 | 43 | 44 | 45 | com.fasterxml.jackson.core 46 | jackson-databind 47 | 2.9.8 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/KakfaTest/App.java: -------------------------------------------------------------------------------- 1 | package com.binod.KakfaTest; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/KakfaTest/KafkaTopicReader.java: -------------------------------------------------------------------------------- 1 | package com.binod.KakfaTest; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.Properties; 7 | 8 | import kafka.consumer.ConsumerConfig; 9 | import kafka.consumer.ConsumerIterator; 10 | import kafka.consumer.KafkaStream; 11 | import kafka.javaapi.consumer.ConsumerConnector; 12 | 13 | public class KafkaTopicReader extends Thread{ 14 | private final ConsumerConnector consumer; 15 | public static String topicName = "binod"; 16 | 17 | 18 | public KafkaTopicReader(){ 19 | System.out.println("** Initialize **"); 20 | Properties props = new Properties(); 21 | props.put("zookeeper.connect","kafka-server-ip-address:2181"); 22 | props.put("group.id","group_binod_test"); 23 | ConsumerConfig consumerConfig = new ConsumerConfig(props); 24 | consumer = kafka.consumer.Consumer.createJavaConsumerConnector(consumerConfig); 25 | } 26 | public static void main(String[] args) { 27 | System.out.println("******* Consumer Started ***************"); 28 | KafkaTopicReader demo = new KafkaTopicReader(); 29 | demo.start(); 30 | 31 | } 32 | 33 | @Override 34 | public void run(){ 35 | Map topicCountMap = new HashMap(); 36 | topicCountMap.put(topicName, new Integer(1)); 37 | Map>> consuerMap = consumer.createMessageStreams(topicCountMap); 38 | KafkaStream stream = consuerMap.get(topicName).get(0); 39 | ConsumerIterator it = stream.iterator(); 40 | while(it.hasNext()){ 41 | System.out.println(new String(it.next().message())); 42 | } 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/KakfaTest/KafkaTopicWriter.java: -------------------------------------------------------------------------------- 1 | package com.binod.KakfaTest; 2 | 3 | import java.util.Date; 4 | import java.util.Properties; 5 | 6 | import org.apache.kafka.clients.producer.Callback; 7 | import org.apache.kafka.clients.producer.KafkaProducer; 8 | import org.apache.kafka.clients.producer.ProducerRecord; 9 | import org.apache.kafka.clients.producer.RecordMetadata; 10 | import org.apache.kafka.common.serialization.ByteArraySerializer; 11 | import org.apache.kafka.common.serialization.StringSerializer; 12 | 13 | public class KafkaTopicWriter { 14 | 15 | Properties props = new Properties(); 16 | private static int numberOfRecrod = 10; 17 | 18 | public void init(){ 19 | props.setProperty("bootstrap.servers", "kafka-server-ip-address:9092"); 20 | props.setProperty("kafka.topic.name", "binod"); 21 | KafkaProducer producer = new KafkaProducer(this.props,new StringSerializer(), new ByteArraySerializer()); 22 | 23 | Callback callback = new Callback() { 24 | public void onCompletion(RecordMetadata metadata, Exception e) { 25 | if (e != null) 26 | e.printStackTrace(); 27 | } 28 | }; 29 | 30 | for(int i=1;i<=numberOfRecrod;i++){ 31 | byte[] payload = (i+" Binod Suman From Eclipse "+new Date()).getBytes(); 32 | ProducerRecord record = new ProducerRecord(props.getProperty("kafka.topic.name"), payload); 33 | producer.send(record); 34 | //producer.send(record,callback);//Callbacks for records being sent to the same partition are guaranteed to execute in order. 35 | } 36 | 37 | producer.close(); 38 | 39 | } 40 | 41 | public static void main(String[] args) { 42 | 43 | KafkaTopicWriter kafkaWrite = new KafkaTopicWriter(); 44 | kafkaWrite.init(); 45 | 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/kafka/json/Consumer.java: -------------------------------------------------------------------------------- 1 | package com.binod.kafka.json; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.JsonNode; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | 7 | import org.apache.kafka.clients.consumer.ConsumerConfig; 8 | import org.apache.kafka.clients.consumer.ConsumerRecord; 9 | import org.apache.kafka.clients.consumer.ConsumerRecords; 10 | import org.apache.kafka.clients.consumer.KafkaConsumer; 11 | import org.apache.kafka.common.errors.WakeupException; 12 | 13 | import java.util.Arrays; 14 | import java.util.Properties; 15 | import java.util.Scanner; 16 | 17 | /** Some issue is there in this code, not working **/ 18 | 19 | public class Consumer { 20 | 21 | private static Scanner in; 22 | 23 | public static void main(String[] args) { 24 | 25 | //String topicName = argv[0]; 26 | //String groupId = argv[1]; 27 | String topicName = "binod"; // Kafka topic name 28 | String groupId = "group_binod_test"; 29 | 30 | in = new Scanner(System.in); 31 | 32 | ConsumerThread consumerRunnable = new ConsumerThread(topicName,groupId); 33 | consumerRunnable.start(); 34 | String line = ""; 35 | while (!line.equals("exit")) { 36 | line = in.next(); 37 | } 38 | consumerRunnable.getKafkaConsumer().wakeup(); 39 | System.out.println("Stopping consumer ....."); 40 | consumerRunnable.join(); 41 | } 42 | 43 | private static class ConsumerThread extends Thread{ 44 | private String topicName; 45 | private String groupId; 46 | private KafkaConsumer kafkaConsumer; 47 | 48 | public ConsumerThread(String topicName, String groupId){ 49 | this.topicName = topicName; 50 | this.groupId = groupId; 51 | } 52 | 53 | public void run() { 54 | Properties configProperties = new Properties(); 55 | configProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "1kafka-server-ip-address:9092"); 56 | configProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer"); 57 | configProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonDeserializer"); 58 | configProperties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); 59 | configProperties.put(ConsumerConfig.CLIENT_ID_CONFIG, "simple"); 60 | 61 | 62 | //Figure out where to start processing messages from 63 | kafkaConsumer = new KafkaConsumer(configProperties); 64 | kafkaConsumer.subscribe(topicName); 65 | ObjectMapper mapper = new ObjectMapper(); 66 | 67 | //Start processing messages 68 | try { 69 | while (true) { 70 | 71 | ConsumerRecords records = kafkaConsumer.poll(Long.MAX_VALUE); 72 | for (ConsumerRecord record : records) { 73 | 74 | ConsumerRecords records = (ConsumerRecords) kafkaConsumer.poll(100); 75 | for (ConsumerRecord record : records) { 76 | JsonNode jsonNode = record.value(); 77 | System.out.println(mapper.treeToValue(jsonNode,Contact.class)); 78 | } 79 | } 80 | }catch(WakeupException ex){ 81 | System.out.println("Exception caught " + ex.getMessage()); 82 | } catch (JsonProcessingException e) { 83 | e.printStackTrace(); 84 | } finally{ 85 | kafkaConsumer.close(); 86 | System.out.println("After closing KafkaConsumer"); 87 | } 88 | } 89 | public KafkaConsumer getKafkaConsumer(){ 90 | return this.kafkaConsumer; 91 | } 92 | } 93 | } 94 | 95 | 96 | } 97 | -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/kafka/json/ConsumerLoop.java: -------------------------------------------------------------------------------- 1 | package com.binod.kafka.json; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.Properties; 6 | import java.util.Set; 7 | 8 | import org.apache.kafka.clients.consumer.ConsumerRecord; 9 | import org.apache.kafka.clients.consumer.ConsumerRecords; 10 | import org.apache.kafka.clients.consumer.KafkaConsumer; 11 | import org.apache.kafka.common.serialization.StringDeserializer; 12 | 13 | import com.fasterxml.jackson.databind.JsonNode; 14 | 15 | /** Some issue is there in this code, not working **/ 16 | 17 | 18 | public class ConsumerLoop implements Runnable { 19 | private final KafkaConsumer consumer; 20 | private final String[] topics; 21 | private final int id; 22 | 23 | public ConsumerLoop(int id, 24 | String groupId, 25 | String[] topics) { 26 | this.id = id; 27 | this.topics = topics; 28 | Properties props = new Properties(); 29 | props.put("bootstrap.servers", "kafka-server-ip-address:9092"); 30 | props.put("group.id", groupId); 31 | props.put("key.deserializer", StringDeserializer.class.getName()); 32 | props.put("value.deserializer", StringDeserializer.class.getName()); 33 | this.consumer = new KafkaConsumer<>(props); 34 | } 35 | 36 | @Override 37 | public void run() { 38 | try { 39 | consumer.subscribe(topics); 40 | 41 | while (true) { 42 | Map> records = consumer.poll(Long.MAX_VALUE); 43 | Set keySet = records.keySet(); 44 | for(String key : keySet){ 45 | // for (ConsumerRecord record : records.values()) { 46 | // ConsumerRecords record = records.get(key); 47 | ConsumerRecords recordtmep = records.get(key); 48 | JsonNode jsonNode = recordtmep.toString(); 49 | System.out.println(mapper.treeToValue(jsonNode,Contact.class)); 50 | 51 | } 52 | } 53 | } catch (Exception e) { 54 | // ignore for shutdown 55 | } finally { 56 | consumer.close(); 57 | } 58 | } 59 | 60 | public void shutdown() { 61 | consumer.wakeup(); 62 | } 63 | } -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/kafka/json/Contact.java: -------------------------------------------------------------------------------- 1 | package com.binod.kafka.json; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | 5 | import java.util.StringTokenizer; 6 | 7 | public class Contact { 8 | 9 | private int contactId; 10 | private String firstName; 11 | private String lastName; 12 | 13 | public Contact(){} 14 | 15 | public void parseString(String csvStr){ 16 | StringTokenizer st = new StringTokenizer(csvStr,","); 17 | contactId = Integer.parseInt(st.nextToken()); 18 | firstName = st.nextToken(); 19 | lastName = st.nextToken(); 20 | } 21 | 22 | public Contact(int contactId, String firstName, String lastName) { 23 | super(); 24 | this.contactId = contactId; 25 | this.firstName = firstName; 26 | this.lastName = lastName; 27 | } 28 | 29 | public int getContactId() { 30 | return contactId; 31 | } 32 | public void setContactId(int contactId) { 33 | this.contactId = contactId; 34 | } 35 | public String getFirstName() { 36 | return firstName; 37 | } 38 | public void setFirstName(String firstName) { 39 | this.firstName = firstName; 40 | } 41 | public String getLastName() { 42 | return lastName; 43 | } 44 | public void setLastName(String lastName) { 45 | this.lastName = lastName; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "Contact{" + 51 | "contactId=" + contactId + 52 | ", firstName='" + firstName + '\'' + 53 | ", lastName='" + lastName + '\'' + 54 | '}'; 55 | } 56 | 57 | public static void main(String[] argv)throws Exception{ 58 | ObjectMapper mapper = new ObjectMapper(); 59 | Contact contact = new Contact(); 60 | contact.setContactId(1); 61 | contact.setFirstName("Sachin"); 62 | contact.setLastName("Tendulkar"); 63 | System.out.println(mapper.writeValueAsString(contact)); 64 | contact.parseString("1,Rahul,Dravid"); 65 | System.out.println(mapper.writeValueAsString(contact)); 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /KakfaTest/src/main/java/com/binod/kafka/json/Producer.java: -------------------------------------------------------------------------------- 1 | package com.binod.kafka.json; 2 | 3 | import com.fasterxml.jackson.databind.JsonNode; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import org.apache.kafka.clients.producer.KafkaProducer; 6 | import org.apache.kafka.clients.producer.ProducerConfig; 7 | import org.apache.kafka.clients.producer.ProducerRecord; 8 | import org.apache.kafka.connect.json.JsonSerializer; 9 | 10 | import java.util.Properties; 11 | import java.util.Scanner; 12 | 13 | public class Producer { 14 | 15 | private static Scanner in; 16 | public static void main(String[] argv)throws Exception { 17 | /*if (argv.length != 1) { 18 | System.err.println("Please specify 1 parameters "); 19 | System.exit(-1); 20 | }*/ 21 | // String topicName = argv[0]; 22 | String topicName = "binod"; // Kafka-topic-name 23 | in = new Scanner(System.in); 24 | System.out.println("Enter message(type exit to quit)"); 25 | 26 | //Configure the Producer 27 | Properties configProperties = new Properties(); 28 | configProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka-server-ip-address:9092"); 29 | configProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.ByteArraySerializer"); 30 | configProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.connect.json.JsonSerializer"); 31 | 32 | org.apache.kafka.clients.producer.Producer producer = new KafkaProducer(configProperties); 33 | 34 | ObjectMapper objectMapper = new ObjectMapper(); 35 | 36 | String line = in.nextLine(); 37 | while(!line.equals("exit")) { 38 | Contact contact = new Contact(); 39 | contact.parseString(line); 40 | JsonNode jsonNode = objectMapper.valueToTree(contact); 41 | System.out.println(jsonNode); 42 | ProducerRecord rec = new ProducerRecord(topicName,jsonNode); 43 | producer.send(rec); 44 | System.out.println(rec); 45 | line = in.nextLine(); 46 | } 47 | in.close(); 48 | producer.close(); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /KakfaTest/src/test/java/com/binod/KakfaTest/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.binod.KakfaTest; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kafka-spark-integration 2 | Kafka and Spark Integration. Alll code in maven project. 3 | 4 | This repository has Java code for 5 | How send message to Kafka topic (Producer) 6 | How receive message from kafka topic (Subscriber) 7 | How send message from Kafka to Spark Stream. 8 | How spark steam take data from Kafka topic 9 | 10 | 11 | Message -> Kafka -> Spark Stream - RDD 12 | 13 | -------------------------------------------------------------------------------- /SparkDemo/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/test/java=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /SparkDemo/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.8 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 13 | org.eclipse.jdt.core.compiler.source=1.8 14 | -------------------------------------------------------------------------------- /SparkDemo/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /SparkDemo/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | binod 6 | Demo 7 | 0.0.1-SNAPSHOT 8 | jar 9 | 10 | Demo 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | junit 20 | junit 21 | 3.8.1 22 | test 23 | 24 | 25 | 26 | postgresql 27 | postgresql 28 | 9.1-901.jdbc4 29 | 30 | 31 | 32 | 33 | org.apache.spark 34 | spark-core_2.10 35 | 1.6.2 36 | 37 | 38 | org.apache.spark 39 | spark-streaming_2.10 40 | 1.6.2 41 | 42 | 43 | org.apache.spark 44 | spark-streaming-kafka_2.10 45 | 1.6.2 46 | 47 | 48 | 49 | 50 | 51 | org.apache.kafka 52 | kafka-clients 53 | 0.8.2.0 54 | 55 | 56 | 57 | 58 | org.apache.kafka 59 | kafka_2.11 60 | 0.8.2.0 61 | 62 | 63 | 64 | org.apache.spark 65 | spark-sql_2.10 66 | 1.3.1 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /SparkDemo/src/main/java/binod/Demo/SparkKafkaConsumer.java: -------------------------------------------------------------------------------- 1 | package binod.Demo; 2 | 3 | 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Set; 8 | 9 | import kafka.serializer.StringDecoder; 10 | 11 | import org.apache.spark.SparkConf; 12 | import org.apache.spark.api.java.JavaSparkContext; 13 | import org.apache.spark.streaming.Duration; 14 | import org.apache.spark.streaming.api.java.JavaPairInputDStream; 15 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 16 | import org.apache.spark.streaming.kafka.KafkaUtils; 17 | 18 | public class SparkKafkaConsumer { 19 | 20 | public static void main(String[] args) { 21 | 22 | System.out.println("Spark Streaming started now ....."); 23 | 24 | SparkConf conf = new SparkConf() 25 | .setAppName("kafka-sandbox") 26 | .setMaster("local[*]"); 27 | JavaSparkContext sc = new JavaSparkContext(conf); 28 | JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(20000)); 29 | 30 | // TODO: processing pipeline 31 | 32 | Map kafkaParams = new HashMap<>(); 33 | kafkaParams.put("metadata.broker.list", "kafka-server-ip-address:9092"); 34 | Set topics = Collections.singleton("binod"); 35 | 36 | JavaPairInputDStream directKafkaStream = KafkaUtils.createDirectStream(ssc, 37 | String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics); 38 | 39 | 40 | directKafkaStream.foreachRDD(rdd -> { 41 | System.out.println("--- Received new data RDD " + rdd.partitions().size() + " partitions and " + rdd.count() + " records"); 42 | //rdd. 43 | rdd.foreach(record -> System.out.println(record._2)); 44 | }); 45 | 46 | ssc.start(); 47 | ssc.awaitTermination(); 48 | 49 | System.out.println("Spark Streaming endin now ....."); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /SparkDemo/src/main/java/binod/Demo/SparkKafkaProducer.java: -------------------------------------------------------------------------------- 1 | package binod.Demo; 2 | 3 | 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Properties; 8 | import java.util.Set; 9 | 10 | import kafka.serializer.StringDecoder; 11 | 12 | import org.apache.kafka.clients.producer.KafkaProducer; 13 | import org.apache.kafka.clients.producer.Producer; 14 | import org.apache.spark.SparkConf; 15 | import org.apache.spark.api.java.JavaSparkContext; 16 | import org.apache.spark.streaming.Duration; 17 | import org.apache.spark.streaming.api.java.JavaDStream; 18 | import org.apache.spark.streaming.api.java.JavaPairInputDStream; 19 | import org.apache.spark.streaming.api.java.JavaStreamingContext; 20 | import org.apache.spark.streaming.kafka.KafkaUtils; 21 | 22 | class MyKafkaProducer{ 23 | private static Producer producer = null; 24 | private MyKafkaProducer() {} 25 | 26 | public static Producer getProducer(){ 27 | if(producer == null){ 28 | Properties prop = new Properties(); 29 | prop.put("bootstrap.servers", "kafka-server-ip-address:9092"); 30 | prop.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer"); 31 | prop.put("bootstrap.servers", "org.apache.kafka.common.serialization.StringSerializer"); 32 | producer = new KafkaProducer(prop); 33 | } 34 | return producer; 35 | } 36 | 37 | } 38 | 39 | public class SparkKafkaProducer { 40 | 41 | public static void main(String[] args) { 42 | 43 | System.out.println("Spark Streaming started now ....."); 44 | 45 | SparkConf conf = new SparkConf() 46 | .setAppName("kafka-sandbox") 47 | .setMaster("local[*]"); 48 | JavaSparkContext sc = new JavaSparkContext(conf); 49 | JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(20000)); 50 | 51 | // TODO: processing pipeline 52 | 53 | Map kafkaParams = new HashMap<>(); 54 | kafkaParams.put("metadata.broker.list", "kafka-server-ip-address:9092"); 55 | Set topics = Collections.singleton("test"); 56 | 57 | JavaPairInputDStream directKafkaStream = KafkaUtils.createDirectStream(ssc, 58 | String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics); 59 | 60 | 61 | directKafkaStream.foreachRDD(rdd -> { 62 | System.out.println("--- New RDD with " + rdd.partitions().size() + " partitions and " + rdd.count() + " records"); 63 | //rdd. 64 | rdd.foreach(record -> System.out.println(record._2)); 65 | }); 66 | 67 | ssc.start(); 68 | ssc.awaitTermination(); 69 | 70 | System.out.println("Spark Streaming endin now ....."); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /SparkDemo/src/main/java/binod/Demo/WordCountSpark.java: -------------------------------------------------------------------------------- 1 | package binod.Demo; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | import org.apache.spark.SparkConf; 7 | import org.apache.spark.api.java.JavaPairRDD; 8 | import org.apache.spark.api.java.JavaRDD; 9 | import org.apache.spark.api.java.JavaSparkContext; 10 | 11 | import scala.Tuple2; 12 | 13 | 14 | public class WordCountSpark { 15 | 16 | private static List data = Arrays.asList(new String[] { 17 | "Binod Suman", 18 | "Suman Binod", 19 | "binod Suman", 20 | "Pramod Suman" 21 | }); 22 | 23 | 24 | /*private static List data = Arrays.asList(new String[] { 25 | "{\"contactId\":101,\"firstName\":\"Binod\",\"lastName\":\"Suman\"}" 26 | });*/ 27 | 28 | //{"contactId":101,"firstName":"Ishan","lastName":"Suman"} 29 | 30 | public static void main(String[] args) { 31 | System.out.println("Another Test"); 32 | SparkConf sparkConf = new SparkConf(); 33 | sparkConf.setMaster("local"); 34 | sparkConf.setAppName("Test Spark"); 35 | JavaSparkContext sc = new JavaSparkContext(sparkConf); 36 | 37 | JavaRDD input = sc.parallelize(data); 38 | JavaPairRDD result = input 39 | //.filter(line -> line.contains("Binod")) 40 | .flatMap(in -> Arrays.asList(in.split(":"))) 41 | .mapToPair(x -> new Tuple2(x,1)) 42 | .reduceByKey( (x,y) -> x+y ); 43 | 44 | System.out.println("**************************************"); 45 | System.out.println(result.collect()); 46 | System.out.println("***********************************"); 47 | 48 | sc.stop(); 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /SparkDemo/src/main/java/practice1/RDD_First.java: -------------------------------------------------------------------------------- 1 | package practice1; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | import org.apache.spark.SparkConf; 7 | import org.apache.spark.api.java.JavaRDD; 8 | import org.apache.spark.api.java.JavaSparkContext; 9 | 10 | public class RDD_First { 11 | 12 | private static List names = Arrays.asList(new String[] { 13 | "Binod","Pramod","Binod","binod","pramod","Sanjay" 14 | }); 15 | 16 | public static void main(String[] args) { 17 | 18 | SparkConf conf = new SparkConf(); 19 | conf.setAppName("Demo"); 20 | conf.setMaster("local"); 21 | 22 | JavaSparkContext sc = new JavaSparkContext(conf); 23 | JavaRDD data = sc.parallelize(names); 24 | System.out.println(data.countByValue()); 25 | System.out.println("Ends here"); 26 | sc.stop(); 27 | 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /SparkDemo/src/test/java/binod/Demo/AppTest.java: -------------------------------------------------------------------------------- 1 | package binod.Demo; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /kafka-command.txt: -------------------------------------------------------------------------------- 1 | Start Zookeeper 2 | >bin\windows\zookeeper-server-start.sh config\zookeeper.properties 3 | 4 | Start Kafka Broker 5 | >bin\windows\kafka-server-start.sh config/server.properties 6 | 7 | Create topic 8 | >bin\windows\kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test 9 | 10 | List topic 11 | >bin\windows\kafka-topics.sh --list --zookeeper localhost:2181 12 | 13 | Start Producer 14 | >bin\windows\kafka-console-producer.sh --broker-list localhost:9092 --topic test 15 | 16 | Send message 17 | How are you 18 | Binod Suman Academy 19 | 20 | Receive message 21 | >bin\wndows\kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning 22 | How are you 23 | Binod Suman Academy 24 | 25 | For Latest Version 2.13 26 | 27 | ./zookeeper-server-start.sh ../config/zookeeper.properties 28 | ./kafka-server-start.sh ../config/server.properties 29 | ./kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic test 30 | ./kafka-topics.sh --list --bootstrap-server localhost:9092 31 | 32 | ./kafka-console-producer.sh --broker-list localhost:9092 --topic test 33 | ./kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /spark-command.txt: -------------------------------------------------------------------------------- 1 | 2 | How to start Shell 3 | spark-shell 4 | 5 | Find Version 6 | sc.version 7 | 8 | val distData = sc.parallelize(List(1,2,3,4,5)) 9 | distData = disData.map(a => a*a) 10 | 11 | var names = sc.textFile("students.txt") 12 | names.collect() 13 | -------------------------------------------------------------------------------- /students.txt: -------------------------------------------------------------------------------- 1 | Binod 2 | steve 3 | Bill 4 | Narayan 5 | Binod 6 | Nandan 7 | Bill 8 | Sunder 9 | -------------------------------------------------------------------------------- /test.csv: -------------------------------------------------------------------------------- 1 | step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud 2 | 1,PAYMENT,9839.64,C1231006815,170136,160296.36,M1979787155,0,0,0,0 3 | 1,PAYMENT,1864.28,C1666544295,21249,19384.72,M2044282225,0,0,0,0 4 | 1,TRANSFER,181,C1305486145,181,0,C553264065,0,0,1,0 5 | 1,CASH_OUT,181,C840083671,181,0,C38997010,21182,0,1,0 6 | 1,PAYMENT,11668.14,C2048537720,41554,29885.86,M1230701703,0,0,0,0 7 | 1,PAYMENT,7817.71,C90045638,53860,46042.29,M573487274,0,0,0,0 8 | 1,PAYMENT,7107.77,C154988899,183195,176087.23,M408069119,0,0,0,0 9 | 1,PAYMENT,7861.64,C1912850431,176087.23,168225.59,M633326333,0,0,0,0 10 | 1,PAYMENT,4024.36,C1265012928,2671,0,M1176932104,0,0,0,0 11 | 1,DEBIT,5337.77,C712410124,41720,36382.23,C195600860,41898,40348.79,0,0 12 | 1,DEBIT,9644.94,C1900366749,4465,0,C997608398,10845,157982.12,0,0 13 | 1,PAYMENT,3099.97,C249177573,20771,17671.03,M2096539129,0,0,0,0 14 | 1,PAYMENT,2560.74,C1648232591,5070,2509.26,M972865270,0,0,0,0 15 | 1,PAYMENT,11633.76,C1716932897,10127,0,M801569151,0,0,0,0 16 | 1,PAYMENT,4098.78,C1026483832,503264,499165.22,M1635378213,0,0,0,0 17 | 1,CASH_OUT,229133.94,C905080434,15325,0,C476402209,5083,51513.44,0,0 18 | 1,PAYMENT,1563.82,C761750706,450,0,M1731217984,0,0,0,0 19 | 1,PAYMENT,1157.86,C1237762639,21156,19998.14,M1877062907,0,0,0,0 20 | 1,PAYMENT,671.64,C2033524545,15123,14451.36,M473053293,0,0,0,0 21 | 1,TRANSFER,215310.3,C1670993182,705,0,C1100439041,22425,0,0,0 22 | 1,PAYMENT,1373.43,C20804602,13854,12480.57,M1344519051,0,0,0,0 23 | 1,DEBIT,9302.79,C1566511282,11299,1996.21,C1973538135,29832,16896.7,0,0 24 | 1,DEBIT,1065.41,C1959239586,1817,751.59,C515132998,10330,0,0,0 25 | 1,PAYMENT,3876.41,C504336483,67852,63975.59,M1404932042,0,0,0,0 26 | 1,TRANSFER,311685.89,C1984094095,10835,0,C932583850,6267,2719172.89,0,0 27 | 1,PAYMENT,6061.13,C1043358826,443,0,M1558079303,0,0,0,0 28 | 1,PAYMENT,9478.39,C1671590089,116494,107015.61,M58488213,0,0,0,0 29 | 1,PAYMENT,8009.09,C1053967012,10968,2958.91,M295304806,0,0,0,0 30 | 1,PAYMENT,8901.99,C1632497828,2958.91,0,M33419717,0,0,0,0 --------------------------------------------------------------------------------