├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── docker ├── bootstrap.sh ├── elasticsearch-task.json └── example.txt ├── examples ├── commands ├── connect-distributed.properties ├── connect-standalone.properties └── kafkaconnectsink.properties ├── pom.xml └── src └── main ├── java └── org │ └── apache │ └── kafka │ └── connect │ └── es │ ├── KafkaElasticSearchSinkConnector.java │ ├── KafkaElasticSearchSinkConnectorConfig.java │ └── KafkaElasticSearchSinkTask.java └── resources └── kafka-elasticsearch-sink-version.properties /.gitignore: -------------------------------------------------------------------------------- 1 | # use glob syntax. 2 | syntax: glob 3 | *.ser 4 | *.class 5 | *~ 6 | *.bak 7 | #*.off 8 | *.old 9 | *.log 10 | 11 | # eclipse conf file 12 | .settings 13 | .classpath 14 | .project 15 | .manager 16 | .scala_dependencies 17 | 18 | # idea 19 | .idea 20 | *.iml 21 | 22 | # building 23 | target 24 | build 25 | null 26 | tmp 27 | temp 28 | test-output 29 | build.log 30 | 31 | # other scm 32 | .svn 33 | .CVS 34 | .hg* 35 | 36 | # switch to regexp syntax. 37 | # syntax: regexp 38 | # ^\.pc/ 39 | 40 | #SHITTY output not in target directory 41 | /dependency-reduced-pom.xml 42 | 43 | # Documentation autogenerated 44 | javadoc 45 | apidocs -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.1.0 (upcoming) 4 | 5 | * Sink to save Kafka stream data into Elasticsearch 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dbtucker/cp-kafka-connect:3.0.0 2 | 3 | ARG VERSION 4 | ADD "target/kafka-elasticsearch-sink-${VERSION}-jar-with-dependencies.jar" /etc/kafka-connect/jars/ 5 | 6 | # Delete library dependecies for hdfs and jdbc connect 7 | RUN rm -rf /usr/share/java/kafka-connect-hdfs 8 | RUN rm -rf /usr/share/java/kafka-connect-jdbc 9 | 10 | # Delete library conflict with netty in elasticsearch library 11 | RUN rm /usr/share/java/confluent-common/netty-3.2.2.Final.jar 12 | 13 | # Task in json format, this task will be sent to the Kafka Connect API 14 | ADD docker/elasticsearch-task.json /etc/kafka-connect-tasks/ 15 | 16 | # Entrypoint 17 | ADD docker/bootstrap.sh /etc/bootstrap.sh 18 | RUN chown root:root /etc/bootstrap.sh 19 | RUN chmod 700 /etc/bootstrap.sh 20 | 21 | ENTRYPOINT ["/etc/bootstrap.sh"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | © 2017 Stratio Big Data Inc., Sucursal en España. All rights reserved. 2 | 3 | This software – including all its source code – contains proprietary information of Stratio Big Data Inc., Sucursal en España and may not be revealed, sold, transferred, modified, distributed or otherwise made available, licensed or sublicensed to third parties; nor reverse engineered, disassembled or decompiled, without express written authorization from Stratio Big Data Inc., Sucursal en España. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Coverage Status](https://coveralls.io/repos/github/Stratio/kafka-elasticsearch-sink/badge.svg?branch=master)](https://coveralls.io/github/Stratio/kafka-elasticsearch-sink?branch=master) 2 | 3 | # kafka-elasticsearch-sink 4 | 5 | kafka-elasticsearch-sink is a library for generate tasks in Kafka Connect for connecting to Elasticsearch. 6 | 7 | 8 | ## Requirements 9 | 10 | This library requires Kafka 0.10.0 and Elasticsearch 2.0.2 11 | 12 | 13 | ## Using the library 14 | 15 | The ElasticsearchSinkTask can be configured with the following configuration. 16 | 17 | ``` 18 | connector.class=org.apache.kafka.connect.es.KafkaElasticSearchSinkConnector, 19 | topics=metadata 20 | action.type=insert, 21 | elasticsearch.cluster.name=dg-cluster 22 | elasticsearch.hosts=127.0.0.1:9300 23 | elasticsearch.index=dg-metadata 24 | elasticsearch.mapping.type=type-v0 25 | elasticsearch.bulk.size=100000 26 | ``` 27 | 28 | By default the library insert data from Kafka topics, in addiction is possible to create Kafka Connect tasks in order 29 | to make updates, deletes or upserts in the Elasticsearch indexes. 30 | 31 | Is necessary add the jar plugin inside the connectors directory and this directory should be added in the classpath: 32 | 33 | ``` 34 | mkdir ${KAFKA_PATH}/libs_connect 35 | cp -r target/kafka-elasticsearch-sink-0.0.1-SNAPSHOT-jar-with-dependencies.jar ${KAFKA_PATH}/libs_connect/ 36 | export CLASSPATH=${KAFKA_PATH}/libs_connect/* 37 | ``` 38 | 39 | If the user want make persistent this change can add this export in the file ".bashrc" 40 | 41 | 42 | ### Build 43 | 44 | `mvn clean package` 45 | 46 | 47 | -------------------------------------------------------------------------------- /docker/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset \ 4 | -o verbose \ 5 | -o xtrace 6 | 7 | [ -f /etc/confluent/docker/apply-mesos-overrides ] && \ 8 | . /etc/confluent/docker/apply-mesos-overrides 9 | 10 | set -o errexit 11 | 12 | echo "===> ENV Variables ..." 13 | env | sort 14 | 15 | echo "===> User" 16 | id 17 | 18 | echo "===> Configuring ..." 19 | /etc/confluent/docker/configure 20 | 21 | if [[ ! -v CONNECT_TASK_NAME ]]; then 22 | CONNECT_TASK_NAME=kafka-connect-task-name 23 | fi 24 | 25 | if [[ ! -v KAFKA_TOPICS ]]; then 26 | KAFKA_TOPICS=topictest 27 | fi 28 | 29 | if [[ ! -v KAFKA_TOPIC_REPLICATION_FACTOR ]]; then 30 | KAFKA_TOPIC_REPLICATION_FACTOR=1 31 | fi 32 | 33 | if [[ ! -v KAFKA_TOPIC_PARTITIONS ]]; then 34 | KAFKA_TOPIC_PARTITIONS=1 35 | fi 36 | 37 | if [[ ! -v KAFKA_TOPICS ]]; then 38 | KAFKA_TOPICS=topictest 39 | fi 40 | 41 | if [[ ! -v ELASTICSEARCH_HOSTS ]]; then 42 | ELASTICSEARCH_HOSTS=localhost:9300 43 | fi 44 | 45 | if [[ ! -v ELASTICSEARCH_CLUSTER_NAME ]]; then 46 | ELASTICSEARCH_CLUSTER_NAME=elasticsearch 47 | fi 48 | 49 | if [[ ! -v ELASTICSEARCH_INDEX ]]; then 50 | ELASTICSEARCH_INDEX=connect-index 51 | fi 52 | 53 | if [[ ! -v ELASTICSEARCH_MAPPING_TYPE ]]; then 54 | ELASTICSEARCH_MAPPING_TYPE=mapping-v1 55 | fi 56 | 57 | if [[ ! -v ELASTICSEARCH_BULK_SIZE ]]; then 58 | ELASTICSEARCH_BULK_SIZE=250 59 | fi 60 | 61 | if [[ ! -v ELASTICSEARCH_ID_FIELD ]]; then 62 | ELASTICSEARCH_ID_FIELD=id 63 | fi 64 | 65 | if [[ ! -v ELASTICSEARCH_ACTION_TYPE ]]; then 66 | ELASTICSEARCH_ACTION_TYPE=insert 67 | fi 68 | 69 | if [[ ! -v CONNECT_CONVERTER_ENABLE_SCHEMAS ]]; then 70 | CONNECT_CONVERTER_ENABLE_SCHEMAS=false 71 | fi 72 | if [[ ! -v CONNECT_INTERNAL_CONVERTER_ENABLE_SCHEMAS ]]; then 73 | CONNECT_INTERNAL_CONVERTER_ENABLE_SCHEMAS=false 74 | fi 75 | 76 | CONNECT_CONF_FILE=/etc/kafka-connect/kafka-connect.properties 77 | 78 | if [[ "${CONNECT_CONVERTER_ENABLE_SCHEMAS}" == "false" ]]; then 79 | sed -i "s|key.converter.schemas.enable=true*|key.converter.schemas.enable=false|" $CONNECT_CONF_FILE 80 | sed -i "s|value.converter.schemas.enable=true*|value.converter.schemas.enable=false|" $CONNECT_CONF_FILE 81 | echo "key.converter.schemas.enable=false" >> $CONNECT_CONF_FILE 82 | echo "value.converter.schemas.enable=false" >> $CONNECT_CONF_FILE 83 | fi 84 | 85 | if [[ "${CONNECT_INTERNAL_CONVERTER_ENABLE_SCHEMAS}" == "false" ]]; then 86 | sed -i "s|internal.key.converter.schemas.enable=true*|internal.key.converter.schemas.enable=false|" $CONNECT_CONF_FILE 87 | sed -i "s|internal.value.converter.schemas.enable=true*|internal.value.converter.schemas.enable=false|" $CONNECT_CONF_FILE 88 | echo "internal.key.converter.schemas.enable=false" >> $CONNECT_CONF_FILE 89 | echo "internal.value.converter.schemas.enable=false" >> $CONNECT_CONF_FILE 90 | fi 91 | 92 | echo "===> Running preflight checks ... " 93 | /etc/confluent/docker/ensure 94 | 95 | echo "===> Creating topics in Kafka" 96 | topics=$(echo $KAFKA_TOPICS | tr "," "\n") 97 | for topic in $topics 98 | do 99 | echo "===> Creating topic $topic in Kafka" 100 | /usr/bin/kafka-topics --create --zookeeper $CONNECT_ZOOKEEPER_CONNECT --replication-factor $KAFKA_TOPIC_REPLICATION_FACTOR --partitions $KAFKA_TOPIC_PARTITIONS --topic $topic || echo "The topic $topic exists" 101 | done 102 | 103 | echo "===> Launching Kafka Connect ... " 104 | exec /etc/confluent/docker/launch & 105 | 106 | echo "===> Waiting for TCP connection to Kafka Connect ... " 107 | while ! nc -w 1 127.0.0.1 8083 108 | do 109 | sleep 1s 110 | done 111 | echo -n "done!" 112 | echo "===> Kafka Connect Up" 113 | 114 | echo "===> Sending tasks to Kafka Connect ..." 115 | TASK_FILE=/etc/kafka-connect-tasks/elasticsearch-task.json 116 | 117 | echo "===> Applying configuration to ${TASK_FILE}" 118 | sed -i "s|\"name\":\"kafka.*|\"name\":\"${CONNECT_TASK_NAME}\",|" $TASK_FILE 119 | sed -i "s|\"topics.*|\"topics\":\"${KAFKA_TOPICS}\",|" $TASK_FILE 120 | sed -i "s|\"action.type.*|\"action.type\":\"${ELASTICSEARCH_ACTION_TYPE}\",|" $TASK_FILE 121 | sed -i "s|\"elasticsearch.hosts.*|\"elasticsearch.hosts\":\"${ELASTICSEARCH_HOSTS}\",|" $TASK_FILE 122 | sed -i "s|\"elasticsearch.cluster.name.*|\"elasticsearch.cluster.name\":\"${ELASTICSEARCH_CLUSTER_NAME}\",|" $TASK_FILE 123 | sed -i "s|\"elasticsearch.index.*|\"elasticsearch.index\":\"${ELASTICSEARCH_INDEX}\",|" $TASK_FILE 124 | sed -i "s|\"elasticsearch.mapping.type.*|\"elasticsearch.mapping.type\":\"${ELASTICSEARCH_MAPPING_TYPE}\",|" $TASK_FILE 125 | sed -i "s|\"elasticsearch.id.field.*|\"elasticsearch.id.field\":\"${ELASTICSEARCH_ID_FIELD}\",|" $TASK_FILE 126 | sed -i "s|\"elasticsearch.bulk.size.*|\"elasticsearch.bulk.size\":\"${ELASTICSEARCH_BULK_SIZE}\"|" $TASK_FILE 127 | 128 | echo "===> Sending task to Kafka Connect ... " 129 | curl -H "Content-Type: application/json; charset=UTF-8" -X POST http://localhost:8083/connectors -d @${TASK_FILE} 130 | 131 | tail -f /dev/null 132 | -------------------------------------------------------------------------------- /docker/elasticsearch-task.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"kafka-connect-task-name", 3 | "config":{ 4 | "connector.class":"org.apache.kafka.connect.es.KafkaElasticSearchSinkConnector", 5 | "topics":"topictest", 6 | "action.type":"insert", 7 | "elasticsearch.cluster.name":"elasticsearch", 8 | "elasticsearch.hosts":"localhost:9300", 9 | "elasticsearch.index":"connect-index", 10 | "elasticsearch.mapping.type":"mapping-v1", 11 | "elasticsearch.idField":"id", 12 | "elasticsearch.bulk.size":"250" 13 | } 14 | } -------------------------------------------------------------------------------- /docker/example.txt: -------------------------------------------------------------------------------- 1 | First build the library jar: 2 | mvn clean install 3 | 4 | 5 | Build the Docker container: 6 | docker build --build-arg VERSION=0.1.0-SNAPSHOT -f Dockerfile -t kafka-elasticsearch-sink . 7 | 8 | 9 | Run the generated Docker container: 10 | docker run -it 11 | --env CONNECT_BOOTSTRAP_SERVERS=kafka:9092 12 | --env CONNECT_GROUP_ID=mesos-connect-group 13 | --env CONNECT_CONFIG_STORAGE_TOPIC=mesos-connect-configs 14 | --env CONNECT_OFFSET_STORAGE_TOPIC=mesos-connect-offsets 15 | --env CONNECT_STATUS_STORAGE_TOPIC=mesos-connect-status 16 | --env CONNECT_KEY_CONVERTER=org.apache.kafka.connect.json.JsonConverter 17 | --env CONNECT_VALUE_CONVERTER=org.apache.kafka.connect.json.JsonConverter 18 | --env CONNECT_INTERNAL_KEY_CONVERTER=org.apache.kafka.connect.json.JsonConverter 19 | --env CONNECT_INTERNAL_VALUE_CONVERTER=org.apache.kafka.connect.json.JsonConverter 20 | --env CONNECT_REST_ADVERTISED_HOST_NAME=localhost 21 | --env CONNECT_ZOOKEEPER_CONNECT=zookeeper:2181 22 | --env CONNECT_TASK_NAME=connector-dg 23 | --env KAFKA_TOPICS=metadata 24 | --env ELASTICSEARCH_CLUSTER_NAME=dg-cluster 25 | --env ELASTICSEARCH_HOSTS=elasticsearch:9300 26 | --env ELASTICSEARCH_INDEX=dg-metadata 27 | --env ELASTICSEARCH_MAPPING_TYPE=type-v0 28 | --env ELASTICSEARCH_BULK_SIZE=250 29 | --env ELASTICSEARCH_ACTION_TYPE=insert 30 | --env ELASTICSEARCH_ID_FIELD=id 31 | --net=host -p 8083:8083 kafka-elasticsearch-sink -------------------------------------------------------------------------------- /examples/commands: -------------------------------------------------------------------------------- 1 | *** Add the plugins library directory to the Classpath *** 2 | 3 | export CLASSPATH=${KAFKA_PATH}/libs_connect/* 4 | persistent -> .bashrc 5 | 6 | 7 | *** Copy kafka connect library *** 8 | 9 | mkdir ${KAFKA_PATH}/libs_connect 10 | cp -r target/kafka-elasticsearch-sink-0.0.1-SNAPSHOT-jar-with-dependencies.jar ${KAFKA_PATH}/libs_connect/ 11 | cp -r target/kafka-elasticsearch-sink-0.0.1-SNAPSHOT-jar-with-dependencies.jar ${KAFKA_PATH}/libs/ 12 | 13 | 14 | *** Run Connect distributed *** 15 | 16 | ${KAFKA_PATH}/bin/connect-distributed.sh examples/connect-distributed.properties 17 | 18 | 19 | *** Run Connect standalone *** 20 | 21 | ${KAFKA_PATH}/bin/connect-standalone.sh examples/connect-standalone.properties examples/kafkaconnectsink.properties 22 | 23 | 24 | *** Create and run Elasticsearch Connector in Kafka Connect when running in distributed mode *** 25 | 26 | curl -H "Content-Type:application/json" -X POST http://localhost:8160/connectors -d '{ 27 | "name": "connector-dg", 28 | "config":{ 29 | "connector.class":"org.apache.kafka.connect.es.KafkaElasticSearchSinkConnector", 30 | "topics":"metadata", 31 | "elasticsearch.cluster.name":"dg-cluster", 32 | "elasticsearch.hosts":"localhost:9300", 33 | "elasticsearch.index":"dg-metadata", 34 | "elasticsearch.mapping.type":"type-v0", 35 | "elasticsearch.bulk.size":250 36 | } 37 | }' 38 | 39 | 40 | *** Create connect origin topics *** 41 | 42 | ${KAFKA_PATH}/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic metadata 43 | 44 | 45 | *** Delete connect origin topics *** 46 | 47 | ${KAFKA_PATH}/bin/kafka-topics.sh --delete --zookeeper localhost:2181 --topic metadata 48 | 49 | 50 | *** Producer *** 51 | 52 | ${KAFKA_PATH}/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic metadata 53 | 54 | event -> {"field1" : "1"} 55 | 56 | 57 | *** Create Elasticsearch Index and Mapping (the library create the mapping and the index automatically)*** 58 | 59 | curl -XPOST localhost:9200/dg-cluster -d '{ 60 | "settings" : { 61 | "number_of_shards" : 1 62 | }, 63 | "mappings" : { 64 | "type1" : { 65 | "properties" : { 66 | "type-v0" : { "type" : "string", "index" : "not_analyzed" } 67 | } 68 | } 69 | } 70 | }' 71 | -------------------------------------------------------------------------------- /examples/connect-distributed.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=localhost:9092 2 | 3 | group.id=connect-metadata 4 | 5 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 6 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 7 | key.converter=org.apache.kafka.connect.json.JsonConverter 8 | value.converter=org.apache.kafka.connect.json.JsonConverter 9 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 10 | # it to 11 | schemas.enable=false 12 | key.converter.schemas.enable=false 13 | value.converter.schemas.enable=false 14 | 15 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 16 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 17 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 18 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 19 | internal.key.converter.schemas.enable=false 20 | internal.value.converter.schemas.enable=false 21 | 22 | # Flush much faster than normal, which is useful for testing/debugging 23 | offset.flush.interval.ms=5000 24 | 25 | rest.advertised.host.name=localhost 26 | rest.advertised.port=8160 27 | rest.host.name=localhost 28 | rest.port=8160 29 | 30 | # Topic to use for storing offsets. This topic should have many partitions and be replicated. 31 | offset.storage.topic=connect-offsets 32 | 33 | # Topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated topic. 34 | # You may need to manually create the topic to ensure single partition for the config topic as auto created topics may have multiple partitions. 35 | config.storage.topic=connect-configs 36 | 37 | # Topic to use for storing statuses. This topic can have multiple partitions and should be replicated. 38 | status.storage.topic=connect-status 39 | 40 | #OFFSET SETTINGS 41 | 42 | consumer.consumer.timeout.ms=5000 43 | consumer.auto.commit.interval.ms=5000 44 | consumer.enable.auto.commit=true 45 | 46 | 47 | -------------------------------------------------------------------------------- /examples/connect-standalone.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=localhost:9092 2 | 3 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 4 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 5 | key.converter=org.apache.kafka.connect.json.JsonConverter 6 | value.converter=org.apache.kafka.connect.json.JsonConverter 7 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 8 | # it to 9 | key.converter.schemas.enable=false 10 | value.converter.schemas.enable=false 11 | 12 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 13 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 14 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 15 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 16 | internal.key.converter.schemas.enable=false 17 | internal.value.converter.schemas.enable=false 18 | 19 | offset.storage.file.filename=/tmp/connect.offsets 20 | # Flush much faster than normal, which is useful for testing/debugging 21 | offset.flush.interval.ms=10000 22 | -------------------------------------------------------------------------------- /examples/kafkaconnectsink.properties: -------------------------------------------------------------------------------- 1 | name=es-connect-sink 2 | connector.class=org.apache.kafka.connect.es.KafkaElasticSearchSinkConnector 3 | topics=metadata 4 | elasticsearch.cluster.name=dg-cluster 5 | elasticsearch.hosts=127.0.0.1:9300 6 | elasticsearch.index=dg-metadata 7 | elasticsearch.mapping.type=type-v0 8 | elasticsearch.bulk.size=1000000 9 | 10 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.stratio 7 | parent 8 | 0.8.2 9 | 10 | 11 | kafka-elasticsearch-sink 12 | com.stratio 13 | 0.1.0-SNAPSHOT 14 | 15 | 16 | 17 | confluent 18 | http://packages.confluent.io/maven/ 19 | 20 | 21 | 22 | 23 | https://github.com/Stratio/kafka-elasticsearch-sink 24 | scm:git:git://github.com/Stratio/kafka-elasticsearch-sink.git 25 | scm:git:git@github.com/Stratio/kafka-elasticsearch-sink.git 26 | HEAD 27 | 28 | 29 | 30 | src/main/java 31 | src/test/java 32 | 33 | 34 | org.apache.maven.plugins 35 | maven-surefire-plugin 36 | 37 | 38 | org.apache.maven.plugins 39 | maven-failsafe-plugin 40 | 41 | 42 | org.jacoco 43 | jacoco-maven-plugin 44 | 45 | 46 | org/apache/spark/streaming/datasource/** 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-source-plugin 53 | 2.2.1 54 | 55 | 56 | attach-sources 57 | 58 | jar 59 | 60 | 61 | 62 | 63 | 64 | org.apache.maven.plugins 65 | maven-compiler-plugin 66 | 3.3 67 | true 68 | 69 | 1.8 70 | 1.8 71 | 72 | 73 | 74 | org.apache.maven.plugins 75 | maven-assembly-plugin 76 | 2.6 77 | 78 | 79 | jar-with-dependencies 80 | 81 | 82 | 83 | 84 | make-assembly 85 | package 86 | 87 | single 88 | 89 | 90 | 91 | 92 | 93 | com.mycila 94 | license-maven-plugin 95 | 96 | 97 | **/README 98 | **/src/test/resources/** 99 | **/src/main/resources/** 100 | **/*.csv 101 | **/*.json 102 | **/*.conf 103 | **/*.txt 104 | **/*.properties 105 | **/jetty* 106 | **/node*/** 107 | **/.tmp/** 108 | **/*.scss 109 | **/*.woff 110 | **/*.woff2 111 | **/*.ttf 112 | **/*.svg 113 | **/*.eot 114 | **/*.otf 115 | **/*.htaccess 116 | **/*.jshintrc 117 | **/*.html 118 | 119 | 120 | 121 | 122 | 123 | check 124 | 125 | validate 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | com.fasterxml.jackson.core 136 | jackson-core 137 | 2.6.3 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | org.apache.kafka 146 | connect-api 147 | 0.10.0.0-cp1 148 | provided 149 | 150 | 151 | javax.json 152 | javax.json-api 153 | 1.0 154 | 155 | 156 | org.slf4j 157 | slf4j-api 158 | 1.7.6 159 | compile 160 | 161 | 162 | org.apache.avro 163 | avro 164 | 1.7.7 165 | 166 | 167 | com.fasterxml.jackson.core 168 | jackson-databind 169 | 170 | 171 | 172 | 173 | com.fasterxml.jackson.core 174 | jackson-core 175 | 176 | 177 | org.elasticsearch 178 | elasticsearch 179 | 2.0.2 180 | 181 | 182 | com.fasterxml.jackson.core 183 | jackson-databind 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /src/main/java/org/apache/kafka/connect/es/KafkaElasticSearchSinkConnector.java: -------------------------------------------------------------------------------- 1 | package org.apache.kafka.connect.es; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | import org.apache.kafka.common.utils.AppInfoParser; 5 | import org.apache.kafka.connect.connector.Task; 6 | import org.apache.kafka.connect.errors.ConnectException; 7 | import org.apache.kafka.connect.sink.SinkConnector; 8 | 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | 15 | /** 16 | * HdfsSinkConnector is a Kafka Connect Connector implementation that ingest data from Kafka to HDFS. 17 | */ 18 | public class KafkaElasticSearchSinkConnector extends SinkConnector { 19 | 20 | Map configProperties; 21 | KafkaElasticSearchSinkConnectorConfig config; 22 | 23 | @Override 24 | public String version() { 25 | return AppInfoParser.getVersion(); 26 | } 27 | 28 | @Override 29 | public void start(Map props) throws ConnectException { 30 | try { 31 | configProperties = props; 32 | config = new KafkaElasticSearchSinkConnectorConfig(props); 33 | //} catch (ConfigException e) { 34 | } catch (Exception e) { 35 | throw new ConnectException("Couldn't start KafkaElasticSearchSinkConnector due to configuration error", e); 36 | } 37 | } 38 | 39 | @Override 40 | public Class taskClass() { 41 | return KafkaElasticSearchSinkTask.class; 42 | } 43 | 44 | @Override 45 | public List> taskConfigs(int maxTasks) { 46 | List> taskConfigs = new ArrayList<>(); 47 | Map taskProps = new HashMap<>(); 48 | taskProps.putAll(configProperties); 49 | for (int i = 0; i < maxTasks; i++) { 50 | taskConfigs.add(taskProps); 51 | } 52 | return taskConfigs; 53 | } 54 | 55 | @Override 56 | public void stop() throws ConnectException { 57 | 58 | } 59 | 60 | @Override 61 | public ConfigDef config() { 62 | return KafkaElasticSearchSinkConnectorConfig.config; 63 | } 64 | } -------------------------------------------------------------------------------- /src/main/java/org/apache/kafka/connect/es/KafkaElasticSearchSinkConnectorConfig.java: -------------------------------------------------------------------------------- 1 | package org.apache.kafka.connect.es; 2 | 3 | import java.util.Map; 4 | 5 | import org.apache.kafka.common.config.AbstractConfig; 6 | import org.apache.kafka.common.config.ConfigDef; 7 | import org.apache.kafka.common.config.ConfigDef.Importance; 8 | import org.apache.kafka.common.config.ConfigDef.Type; 9 | 10 | import java.util.HashMap; 11 | 12 | 13 | public class KafkaElasticSearchSinkConnectorConfig extends AbstractConfig { 14 | 15 | static final String CLUSTER_NAME = "elasticsearch.cluster.name"; 16 | static final String HOSTS = "elasticsearch.hosts"; 17 | static final String BULK_SIZE = "elasticsearch.bulk.size"; 18 | static final String MAPPING_TYPE = "elasticsearch.mapping.type"; 19 | static final String TOPICS = "topics"; 20 | static final String INDEX = "elasticsearch.index"; 21 | static final String ID_FIELD = "elasticsearch.idField"; 22 | static final String ACTION_TYPE = "action.type"; 23 | 24 | private static final String DEFAULT_ACTION_TYPE = "insert"; 25 | private static final String DEFAULT_ID_FIELD = "id"; 26 | private static final Integer DEFAULT_BULK_SIZE = 250; 27 | 28 | private static final String CLUSTER_NAME_DOC = "Elastic search cluster name"; 29 | private static final String HOSTS_DOC = "A comma separated Elastic search hosts including port with a :. For " + 30 | "example localhost:9300"; 31 | private static final String BULK_SIZE_DOC = "The number of messages to be bulk indexed into elasticsearch"; 32 | private static final String MAPPING_TYPE_DOC = "The mapping associated to the topic-indexes"; 33 | private static final String TOPICS_DOC = "topics in kafka"; 34 | private static final String INDEX_DOC = "The name of elasticsearch index"; 35 | private static final String ID_FIELD_DOC = "Id field when the action type is delete, update or upsert"; 36 | private static final String ACTION_TYPE_DOC = "The action type against how the messages should be processed." + 37 | " Default is: index. The following options are available:\n" 38 | + "insert : Creates documents in ES with the values in the received message\n" 39 | + "update : Update documents in ES with the values in the received message based on id field\n" + 40 | "upsert : Update and create if not exists documents in ES with the values in the received " + 41 | "message based on id field set\n" 42 | + "delete : Deletes documents from ES based on id field in the received message"; 43 | 44 | static ConfigDef config = new ConfigDef().define(HOSTS, Type.STRING, Importance.HIGH, HOSTS_DOC) 45 | .define(CLUSTER_NAME, Type.STRING, Importance.HIGH, CLUSTER_NAME_DOC) 46 | .define(INDEX, Type.STRING, Importance.HIGH, INDEX_DOC) 47 | .define(BULK_SIZE, Type.INT, DEFAULT_BULK_SIZE, Importance.HIGH, BULK_SIZE_DOC) 48 | .define(TOPICS, Type.STRING, Importance.HIGH, TOPICS_DOC) 49 | .define(MAPPING_TYPE, Type.STRING, Importance.HIGH, MAPPING_TYPE_DOC) 50 | .define(ID_FIELD, Type.STRING, DEFAULT_ID_FIELD, Importance.MEDIUM, ID_FIELD_DOC) 51 | .define(ACTION_TYPE, Type.STRING, DEFAULT_ACTION_TYPE, Importance.HIGH, ACTION_TYPE_DOC); 52 | 53 | KafkaElasticSearchSinkConnectorConfig(Map props) { 54 | super(config, props); 55 | } 56 | 57 | enum ActionType { 58 | 59 | DELETE("delete"), UPDATE("update"), UPSERT("upsert"), INSERT("insert"); 60 | 61 | private String actionType; 62 | private static Map ACTIONS = init(); 63 | 64 | ActionType(String actionType) { 65 | this.actionType = actionType; 66 | } 67 | 68 | public String toValue() { 69 | return actionType; 70 | } 71 | 72 | public static ActionType toType(String value) { 73 | return ACTIONS.get(value); 74 | } 75 | 76 | public static Map init() { 77 | Map actions = new HashMap<>(); 78 | ActionType[] types = values(); 79 | for (ActionType type : types) { 80 | actions.put(type.name().toLowerCase(), type); 81 | } 82 | return actions; 83 | } 84 | } 85 | 86 | ActionType getActionType(String actionType) { 87 | return ActionType.toType(actionType); 88 | } 89 | 90 | } -------------------------------------------------------------------------------- /src/main/java/org/apache/kafka/connect/es/KafkaElasticSearchSinkTask.java: -------------------------------------------------------------------------------- 1 | package org.apache.kafka.connect.es; 2 | 3 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 4 | import org.apache.kafka.common.TopicPartition; 5 | import org.apache.kafka.common.config.ConfigException; 6 | import org.apache.kafka.connect.errors.ConnectException; 7 | import org.apache.kafka.connect.errors.RetriableException; 8 | import org.apache.kafka.connect.sink.SinkRecord; 9 | import org.apache.kafka.connect.sink.SinkTask; 10 | import org.elasticsearch.action.bulk.BulkItemResponse; 11 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 12 | import org.elasticsearch.action.bulk.BulkResponse; 13 | import org.elasticsearch.action.index.IndexRequest; 14 | import org.elasticsearch.action.update.UpdateRequest; 15 | import org.elasticsearch.client.Client; 16 | import org.elasticsearch.client.Requests; 17 | import org.elasticsearch.client.transport.TransportClient; 18 | import org.elasticsearch.common.settings.Settings; 19 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | import org.apache.kafka.connect.es.KafkaElasticSearchSinkConnectorConfig.*; 23 | 24 | import java.net.InetAddress; 25 | import java.util.*; 26 | 27 | /** 28 | * ElasticsearchSinkTask is a Task that takes records loaded from Kafka and sends them to 29 | * another system. 30 | */ 31 | public class KafkaElasticSearchSinkTask extends SinkTask { 32 | 33 | private static final Logger log = LoggerFactory.getLogger(KafkaElasticSearchSinkTask.class); 34 | 35 | private KafkaElasticSearchSinkConnectorConfig config; 36 | private Client client; 37 | private Map topicIndexes; 38 | private Map topicMappings; 39 | private ActionType actionType; 40 | private Integer bulkSize; 41 | private String idField; 42 | 43 | public KafkaElasticSearchSinkTask() { 44 | topicIndexes = new HashMap<>(0); 45 | topicMappings = new HashMap<>(0); 46 | } 47 | 48 | @Override 49 | public String version() { 50 | return new KafkaElasticSearchSinkConnector().version(); 51 | } 52 | 53 | /** 54 | * Start the Task. Handles configuration parsing and one-time setup of the task. 55 | * 56 | * @param props initial configuration 57 | */ 58 | @Override 59 | public void start(Map props) { 60 | 61 | try { 62 | config = new KafkaElasticSearchSinkConnectorConfig(props); 63 | } catch (ConfigException e) { 64 | throw new ConnectException("Couldn't start " + KafkaElasticSearchSinkConnector.class.getName() + " due to configuration error.", e); 65 | } 66 | 67 | String clusterName = config.getString(KafkaElasticSearchSinkConnectorConfig.CLUSTER_NAME); 68 | String hosts = config.getString(KafkaElasticSearchSinkConnectorConfig.HOSTS); 69 | String topics = config.getString(KafkaElasticSearchSinkConnectorConfig.TOPICS); 70 | String indexes = config.getString(KafkaElasticSearchSinkConnectorConfig.INDEX); 71 | String mappingTypes = config.getString(KafkaElasticSearchSinkConnectorConfig.MAPPING_TYPE); 72 | actionType = config.getActionType(config.getString(KafkaElasticSearchSinkConnectorConfig.ACTION_TYPE)); 73 | idField = config.getString(KafkaElasticSearchSinkConnectorConfig.ID_FIELD); 74 | bulkSize = config.getInt(KafkaElasticSearchSinkConnectorConfig.BULK_SIZE); 75 | 76 | List hostsList = new ArrayList<>(Arrays.asList(hosts.replaceAll(" ", "").split(","))); 77 | List topicsList = Arrays.asList(topics.replaceAll(" ", "").split(",")); 78 | List indexesList = Arrays.asList(indexes.replaceAll(" ", "").split(",")); 79 | List mappingTypesList = Arrays.asList(mappingTypes.replaceAll(" ", "").split(",")); 80 | 81 | if (topicsList.size() != indexesList.size()) { 82 | throw new ConnectException("The number of indexes should be the same as the number of topics"); 83 | } 84 | 85 | for (int i = 0; i < topicsList.size(); i++) { 86 | topicIndexes.put(topicsList.get(i), indexesList.get(i)); 87 | topicMappings.put(topicsList.get(i), mappingTypesList.get(i)); 88 | } 89 | 90 | try { 91 | Settings settings = Settings.settingsBuilder().put("cluster.name", clusterName).build(); 92 | client = TransportClient.builder().settings(settings).build(); 93 | for (String host : hostsList) { 94 | String address; 95 | Integer port; 96 | String[] hostArray = host.split(":"); 97 | address = hostArray[0]; 98 | 99 | try { 100 | port = Integer.parseInt(hostArray[1]); 101 | } catch (Exception e) { 102 | port = 9300; 103 | } 104 | log.info("address " + address + "port " + port); 105 | ((TransportClient) client).addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(address), port)); 106 | } 107 | } catch (Exception e) { 108 | throw new ConnectException("Impossible to connect to hosts"); 109 | } 110 | 111 | } 112 | 113 | /** 114 | * Put the records in the sink. 115 | * 116 | * @param sinkRecords the set of records to send. 117 | */ 118 | @Override 119 | public void put(Collection sinkRecords) { 120 | try { 121 | List records = new ArrayList<>(sinkRecords); 122 | 123 | for (int i = 0; i < records.size(); i++) { 124 | BulkRequestBuilder bulkRequest = client.prepareBulk().setRefresh(Boolean.TRUE); 125 | 126 | for (int j = 0; j < bulkSize && i < records.size(); j++, i++) { 127 | SinkRecord record = records.get(i); 128 | Map jsonMap = (Map) record.value(); 129 | 130 | if (!jsonMap.isEmpty()) { 131 | String index = topicIndexes.get(record.topic()); 132 | String mappingType = topicMappings.get(record.topic()); 133 | String id; 134 | 135 | switch (actionType) { 136 | case INSERT: 137 | bulkRequest.add(client.prepareIndex(index, mappingType).setSource(jsonMap)); 138 | break; 139 | case DELETE: 140 | id = (String) jsonMap.get(idField); 141 | bulkRequest.add(Requests.deleteRequest(index).type(mappingType).id(id)); 142 | break; 143 | case UPDATE: 144 | id = (String) jsonMap.get(idField); 145 | bulkRequest.add(new UpdateRequest(index, mappingType, id).doc(jsonMap)); 146 | break; 147 | case UPSERT: 148 | id = (String) jsonMap.get(idField); 149 | IndexRequest indexRequest = new IndexRequest(index, mappingType, id).source(jsonMap); 150 | bulkRequest.add(new UpdateRequest(index, mappingType, id).doc(jsonMap) 151 | .upsert(indexRequest)); 152 | break; 153 | } 154 | } 155 | } 156 | i--; 157 | if (bulkRequest.numberOfActions() > 0) { 158 | BulkResponse bulkResponse = bulkRequest.execute().actionGet(); 159 | if (bulkResponse.hasFailures()) { 160 | for (BulkItemResponse item : bulkResponse) { 161 | log.error(item.getFailureMessage()); 162 | } 163 | } 164 | } 165 | } 166 | } catch (Exception e) { 167 | throw new RetriableException("Elasticsearch not connected", e); 168 | } 169 | } 170 | 171 | @Override 172 | public void flush(Map offsets) { 173 | } 174 | 175 | @Override 176 | public void stop() { 177 | client.close(); 178 | } 179 | 180 | public KafkaElasticSearchSinkConnectorConfig getConfig() { 181 | return config; 182 | } 183 | 184 | public void setConfig(KafkaElasticSearchSinkConnectorConfig config) { 185 | this.config = config; 186 | } 187 | 188 | public Client getClient() { 189 | return client; 190 | } 191 | 192 | public void setClient(Client client) { 193 | this.client = client; 194 | } 195 | 196 | public Map getTopicIndexes() { 197 | return topicIndexes; 198 | } 199 | 200 | public void setTopicIndexes(Map topicIndexes) { 201 | this.topicIndexes = topicIndexes; 202 | } 203 | 204 | public Map getTopicMappings() { 205 | return topicMappings; 206 | } 207 | 208 | public void setTopicMappings(Map topicMappings) { 209 | this.topicMappings = topicMappings; 210 | } 211 | 212 | public ActionType getActionType() { 213 | return actionType; 214 | } 215 | 216 | public void setActionType(ActionType actionType) { 217 | this.actionType = actionType; 218 | } 219 | 220 | public Integer getBulkSize() { 221 | return bulkSize; 222 | } 223 | 224 | public void setBulkSize(Integer bulkSize) { 225 | this.bulkSize = bulkSize; 226 | } 227 | 228 | public String getIdField() { 229 | return idField; 230 | } 231 | 232 | public void setIdField(String idField) { 233 | this.idField = idField; 234 | } 235 | } -------------------------------------------------------------------------------- /src/main/resources/kafka-elasticsearch-sink-version.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} --------------------------------------------------------------------------------