├── .gitignore ├── README.md ├── connect-elasticsearch-sink.properties ├── connect-file-source.properties ├── connect-standalone.properties ├── docker-compose.yml ├── openrecipes.json ├── pom.xml ├── run_standalone.sh └── src └── main ├── java └── com │ └── hannesstockner │ └── connect │ └── es │ ├── ElasticsearchSinkConnector.java │ └── ElasticsearchSinkTask.java └── resources └── logback.xml /.gitignore: -------------------------------------------------------------------------------- 1 | #Idea and Mac OS X hidden files 2 | .DS_Store 3 | .idea 4 | *.iml 5 | *.iws 6 | *.ipr 7 | 8 | #Project Files 9 | *.class 10 | *.settings 11 | *.project 12 | *.classpath 13 | *.log 14 | 15 | # Package Files # 16 | *.cache 17 | *.jar 18 | *.war 19 | *.ear 20 | *.war 21 | /*.launch 22 | /*.tmproj 23 | stacktrace.log 24 | 25 | #Folders to Ignore 26 | /project/target/ 27 | /bin/ 28 | /state/ 29 | state/ 30 | /out/** 31 | /test/reports 32 | /target/ 33 | target/ 34 | /log/ 35 | checkpoint_* 36 | 37 | # Folders 38 | /cruise-output/ 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect Elasticsearch 2 | 3 | kafka-connect-elasticsearch is a Kafka Connector for loading data from Kafka into Elasticsearch. 4 | More explanation can be found in the article [Kafka and Elastic Search, A Perfect Match](https://qbox.io/blog/kafka-and-elasticsearch-a-perfect-match-1). 5 | 6 | # Prerequisites 7 | 8 | - a Linux console or Apple OSX console (not tested on windows, but adaptable with little effort) 9 | - a Git client to fetch the project 10 | - Docker Compose 11 | - Apache Maven installed. 12 | - git clone https://github.com/hannesstockner/kafka-connect-elasticsearch kafka-connect-elasticsearch 13 | 14 | # Quickstart 15 | 16 | Build a package of the code: 17 | ``` 18 | mvn clean package 19 | ``` 20 | Open a console and export env variable: 21 | ``` 22 | export DOCKER_IP={YOUR_DOCKER_IP_ADDRESS} 23 | ``` 24 | Start docker containers: 25 | ``` 26 | docker-compose up 27 | ``` 28 | Open another console window and export DOCKER_IP env variable 29 | 30 | Run connector: 31 | ``` 32 | ./run_standalone.sh 33 | ``` 34 | Go to http://{YOUR_DOCKER_IP_ADDRESS}:9200/kafka_recipes/_search to check your imported recipes 35 | 36 | -------------------------------------------------------------------------------- /connect-elasticsearch-sink.properties: -------------------------------------------------------------------------------- 1 | name=local-elasticsearch-sink 2 | connector.class=com.hannesstockner.connect.es.ElasticsearchSinkConnector 3 | tasks.max=1 4 | es.host=docker 5 | topics=recipes 6 | index.prefix=kafka_ 7 | -------------------------------------------------------------------------------- /connect-file-source.properties: -------------------------------------------------------------------------------- 1 | name=local-file-source 2 | connector.class=org.apache.kafka.connect.file.FileStreamSourceConnector 3 | tasks.max=1 4 | file=openrecipes.json 5 | topic=recipes -------------------------------------------------------------------------------- /connect-standalone.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=docker:9092 2 | 3 | key.converter=org.apache.kafka.connect.json.JsonConverter 4 | value.converter=org.apache.kafka.connect.json.JsonConverter 5 | key.converter.schemas.enable=false 6 | value.converter.schemas.enable=false 7 | 8 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 9 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 10 | internal.key.converter.schemas.enable=false 11 | internal.value.converter.schemas.enable=false 12 | 13 | offset.storage.file.filename=/tmp/connect.offsets 14 | # Flush much faster than normal, which is useful for testing/debugging 15 | offset.flush.interval.ms=10000 16 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | kafka: 2 | image: flozano/kafka:0.9.0.0 3 | ports: 4 | - "2181:2181" 5 | - "9092:9092" 6 | environment: 7 | ADVERTISED_HOST: ${DOCKER_IP} 8 | elasticsearch: 9 | image: elasticsearch 10 | ports: 11 | - "9200:9200" 12 | - "9300:9300" -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | com.hannesstockner.kafka-connect-elasticsearch 9 | kafka-connect-elasticsearch 10 | jar 11 | 1.0.0-SNAPSHOT 12 | kafka-connect-elasticsearch 13 | 14 | A Kafka Connect Elasticsearch connector for copying data from Kafka to Elasticsearch. 15 | 16 | 17 | 18 | 19 | Apache License 2.0 20 | http://www.apache.org/licenses/LICENSE-2.0.html 21 | repo 22 | 23 | 24 | 25 | 26 | scm:git:git://github.com/hannesstockner/kafka-connect-elasticsearch.git 27 | scm:git:git@github.com:hannesstockner/kafka-connect-elasticsearch.git 28 | https://github.com/hannesstockner/kafka-connect-elasticsearch 29 | HEAD 30 | 31 | 32 | 33 | 0.9.0.0 34 | 2.1.1 35 | 4.12 36 | 1.7.13 37 | 1.1.3 38 | UTF-8 39 | 40 | 41 | 42 | 43 | org.apache.kafka 44 | connect-api 45 | ${kafka.version} 46 | 47 | 48 | org.apache.kafka 49 | connect-json 50 | ${kafka.version} 51 | 52 | 53 | org.apache.kafka 54 | connect-file 55 | ${kafka.version} 56 | 57 | 58 | org.apache.kafka 59 | connect-runtime 60 | ${kafka.version} 61 | 62 | 63 | org.elasticsearch 64 | elasticsearch 65 | ${elasticsearch.version} 66 | 67 | 68 | org.slf4j 69 | slf4j-api 70 | ${slf4j.version} 71 | 72 | 73 | ch.qos.logback 74 | logback-classic 75 | ${logback.version} 76 | 77 | 78 | ch.qos.logback 79 | logback-core 80 | ${logback.version} 81 | 82 | 83 | junit 84 | junit 85 | ${junit.version} 86 | test 87 | 88 | 89 | 90 | 91 | 92 | 93 | org.apache.maven.plugins 94 | maven-compiler-plugin 95 | 2.5.1 96 | true 97 | 98 | 1.7 99 | 1.7 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-assembly-plugin 106 | 2.4.1 107 | 108 | 109 | 110 | jar-with-dependencies 111 | 112 | 113 | 114 | 115 | make-assembly 116 | 117 | package 118 | 119 | single 120 | 121 | 122 | 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-surefire-plugin 127 | 2.18.1 128 | 129 | -Djava.awt.headless=true 130 | pertest 131 | 132 | 133 | 134 | 135 | 136 | 137 | src/main/resources 138 | true 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /run_standalone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -z "$DOCKER_IP" ]; then 4 | echo "Need to set DOCKER_IP" 5 | exit 1 6 | fi 7 | 8 | CONNECT_PROCESS_FILE=connect-standalone.properties 9 | CONNECT_FILE_SOURCE=connect-file-source.properties 10 | CONNECT_ES_SINK=connect-elasticsearch-sink.properties 11 | 12 | sed -i '' -e "s/bootstrap.servers=.*/bootstrap.servers=$DOCKER_IP:9092/g" $CONNECT_PROCESS_FILE 13 | sed -i '' -e "s/es.host=.*/es.host=$DOCKER_IP/g" $CONNECT_ES_SINK 14 | 15 | CLASSPATH=target/*-jar-with-dependencies.jar 16 | exec java -cp $CLASSPATH org.apache.kafka.connect.cli.ConnectStandalone $CONNECT_PROCESS_FILE $CONNECT_FILE_SOURCE $CONNECT_ES_SINK -------------------------------------------------------------------------------- /src/main/java/com/hannesstockner/connect/es/ElasticsearchSinkConnector.java: -------------------------------------------------------------------------------- 1 | package com.hannesstockner.connect.es; 2 | 3 | import org.apache.kafka.common.utils.AppInfoParser; 4 | import org.apache.kafka.connect.connector.Task; 5 | import org.apache.kafka.connect.sink.SinkConnector; 6 | 7 | import java.util.ArrayList; 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | public class ElasticsearchSinkConnector extends SinkConnector { 13 | 14 | public static final String ES_HOST = "es.host"; 15 | public static final String INDEX_PREFIX = "index.prefix"; 16 | 17 | private String esHost; 18 | private String indexPrefix; 19 | 20 | @Override 21 | public String version() { 22 | return AppInfoParser.getVersion(); 23 | } 24 | 25 | @Override 26 | public void start(Map props) { 27 | esHost = props.get(ES_HOST); 28 | indexPrefix = props.get(INDEX_PREFIX); 29 | } 30 | 31 | @Override 32 | public Class taskClass() { 33 | return ElasticsearchSinkTask.class; 34 | } 35 | 36 | @Override 37 | public List> taskConfigs(int maxTasks) { 38 | ArrayList> configs = new ArrayList<>(); 39 | for (int i = 0; i < maxTasks; i++) { 40 | Map config = new HashMap<>(); 41 | if (esHost != null) 42 | config.put(ES_HOST, esHost); 43 | if (indexPrefix != null) 44 | config.put(INDEX_PREFIX, indexPrefix); 45 | configs.add(config); 46 | } 47 | return configs; 48 | } 49 | 50 | @Override 51 | public void stop() { 52 | //not implemented 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/hannesstockner/connect/es/ElasticsearchSinkTask.java: -------------------------------------------------------------------------------- 1 | package com.hannesstockner.connect.es; 2 | 3 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 4 | import org.apache.kafka.common.TopicPartition; 5 | import org.apache.kafka.connect.errors.ConnectException; 6 | import org.apache.kafka.connect.sink.SinkRecord; 7 | import org.apache.kafka.connect.sink.SinkTask; 8 | import org.elasticsearch.client.Client; 9 | import org.elasticsearch.client.transport.TransportClient; 10 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.net.InetAddress; 15 | import java.net.UnknownHostException; 16 | import java.util.Collection; 17 | import java.util.HashMap; 18 | import java.util.Map; 19 | 20 | public class ElasticsearchSinkTask extends SinkTask { 21 | 22 | private static final Logger log = LoggerFactory.getLogger(ElasticsearchSinkTask.class); 23 | 24 | private String indexPrefix; 25 | private final String TYPE = "kafka"; 26 | 27 | private Client client; 28 | 29 | @Override 30 | public void start(Map props) { 31 | final String esHost = props.get(ElasticsearchSinkConnector.ES_HOST); 32 | indexPrefix = props.get(ElasticsearchSinkConnector.INDEX_PREFIX); 33 | try { 34 | client = TransportClient 35 | .builder() 36 | .build() 37 | .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(esHost), 9300)); 38 | 39 | client 40 | .admin() 41 | .indices() 42 | .preparePutTemplate("kafka_template") 43 | .setTemplate(indexPrefix + "*") 44 | .addMapping(TYPE, new HashMap() {{ 45 | put("date_detection", false); 46 | put("numeric_detection", false); 47 | }}) 48 | .get(); 49 | } catch (UnknownHostException ex) { 50 | throw new ConnectException("Couldn't connect to es host", ex); 51 | } 52 | } 53 | 54 | @Override 55 | public void put(Collection records) { 56 | for (SinkRecord record : records) { 57 | log.info("Processing {}", record.value()); 58 | 59 | log.info(record.value().getClass().toString()); 60 | 61 | client 62 | .prepareIndex(indexPrefix + record.topic(), TYPE) 63 | .setSource(record.value().toString()) 64 | .get(); 65 | } 66 | } 67 | 68 | @Override 69 | public void flush(Map offsets) { 70 | } 71 | 72 | @Override 73 | public void stop() { 74 | client.close(); 75 | } 76 | 77 | @Override 78 | public String version() { 79 | return new ElasticsearchSinkConnector().version(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | --------------------------------------------------------------------------------