├── .gitignore ├── README.md ├── config └── connect-elasticsearch-schema-sink.properties ├── pom.xml └── src └── main └── java └── org └── apache └── kafka └── connect └── elasticsearchschema ├── ElasticsearchSinkConnector.java └── ElasticsearchSinkTask.java /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect Elastic Search Connector 2 | 3 | kafka-connect-elastic-search-sink is a [Kafka Connector](http://kafka.apache.org/090/documentation.html#connect) 4 | for loading data to Elasticsearch from Kafka with Avro and schema registry integration. 5 | 6 | 7 | # Development 8 | 9 | To build a development version you'll need a recent version of Kafka. You can build 10 | kafka-connect-elastic-search with Maven using the standard lifecycle phases. 11 | 12 | 13 | # Testing 14 | -------------------------------------------------------------------------------- /config/connect-elasticsearch-schema-sink.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name=elasticsearch-schema-sink 17 | connector.class=org.apache.kafka.connect.elasticsearchschema.ElasticsearchSinkConnector 18 | tasks.max=1 19 | # topics from which consume messages 20 | topics=topic1,topic2,topic3 21 | # indexes on which write the messages 22 | elasticsearch.indexes=index_for_topic1,index_for_topic2,index_for_topic3 23 | elasticsearch.cluster.name=test-cluster 24 | elasticsearch.hosts=localhost:9300 25 | # name to use for documents, optional, default value: avro schema name 26 | elasticsearch.document.name=document 27 | elasticsearch.bulk.size=10 28 | # format to use for the date to append at the end of the index name, optional 29 | # if empty or null, no suffix will be used 30 | date.format=yyyy.MM.dd 31 | # separator to use for separate the index name and the date suffix, optional 32 | # default value: "-" 33 | suffix.separator=- -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.apache.kafka 8 | connect-elasticsearch-schema-sink 9 | 1.0 10 | 11 | 12 | 13 | confluent 14 | http://packages.confluent.io/maven/ 15 | 16 | 17 | 18 | 19 | 20 | 21 | org.apache.maven.plugins 22 | maven-compiler-plugin 23 | 3.3 24 | true 25 | 26 | 1.8 27 | 1.8 28 | 29 | 30 | 31 | org.apache.maven.plugins 32 | maven-assembly-plugin 33 | 2.6 34 | 35 | 36 | jar-with-dependencies 37 | 38 | 39 | 40 | 41 | make-assembly 42 | package 43 | 44 | single 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | org.apache.kafka 55 | connect-api 56 | 0.10.0.0 57 | compile 58 | 59 | 60 | org.slf4j 61 | slf4j-api 62 | 1.7.6 63 | compile 64 | 65 | 66 | org.apache.avro 67 | avro 68 | 1.7.7 69 | 70 | 71 | org.elasticsearch 72 | elasticsearch 73 | 2.3.4 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/main/java/org/apache/kafka/connect/elasticsearchschema/ElasticsearchSinkConnector.java: -------------------------------------------------------------------------------- 1 | package org.apache.kafka.connect.elasticsearchschema; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | import org.apache.kafka.common.utils.AppInfoParser; 5 | import org.apache.kafka.connect.connector.Task; 6 | import org.apache.kafka.connect.errors.ConnectException; 7 | import org.apache.kafka.connect.sink.SinkConnector; 8 | 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | /** 15 | * ElasticsearchSinkConnector implement the Connector interface to send Kafka 16 | * data to Elasticsearch. 17 | * 18 | * @author Andrea Patelli 19 | */ 20 | public class ElasticsearchSinkConnector extends SinkConnector { 21 | public static final String CLUSTER_NAME = "elasticsearch.cluster.name"; 22 | public static final String HOSTS = "elasticsearch.hosts"; 23 | public static final String BULK_SIZE = "elasticsearch.bulk.size"; 24 | public static final String INDEXES = "elasticsearch.indexes"; 25 | public static final String DOCUMENT_NAME = "elasticsearch.document.name"; 26 | public static final String TOPICS = "topics"; 27 | public static final String DATE_FORMAT = "date.format"; 28 | public static final String SUFFIX_SEPARATOR = "suffix.separator"; 29 | 30 | private String clusterName; 31 | private String hosts; 32 | private String bulkSize; 33 | private String documentName; 34 | private String topics; 35 | private String indexes; 36 | private String dateFormat; 37 | private String suffixSeparator; 38 | 39 | 40 | /** 41 | * Get the version of this connector. 42 | * 43 | * @return the version, formatted as a String 44 | */ 45 | @Override 46 | public String version() { 47 | return AppInfoParser.getVersion(); 48 | } 49 | 50 | /** 51 | * Start this Connector. This method will only be called on a clean Connector, i.e. it has 52 | * either just been instantiated and initialized or {@link #stop()} has been invoked. 53 | * 54 | * @param props configuration settings 55 | */ 56 | @Override 57 | public void start(Map props) { 58 | clusterName = props.get(CLUSTER_NAME); 59 | hosts = props.get(HOSTS); 60 | bulkSize = props.get(BULK_SIZE); 61 | documentName = props.get(DOCUMENT_NAME); 62 | suffixSeparator = props.get(SUFFIX_SEPARATOR); 63 | if (clusterName == null || clusterName.isEmpty()) { 64 | throw new ConnectException("ElasticsearchSinkConnector configuration must include 'elasticsearch.cluster.name' setting"); 65 | } 66 | if (hosts == null || hosts.isEmpty()) { 67 | throw new ConnectException("ElasticsearchSinkConnector configuration must include 'elasticserch.hosts' setting"); 68 | } 69 | if (bulkSize == null || bulkSize.isEmpty()) { 70 | throw new ConnectException("ElasticsearchSinkConnector configuration must include 'elasticsearch.bulk.size' setting"); 71 | } 72 | if (documentName == null) { 73 | documentName = ""; 74 | } 75 | 76 | topics = props.get(TOPICS); 77 | indexes = props.get(INDEXES); 78 | if (topics == null || topics.isEmpty()) { 79 | throw new ConnectException("ElasticsearchSinkConnector configuration must include 'topics' setting"); 80 | } 81 | if (indexes == null || indexes.isEmpty()) { 82 | throw new ConnectException("ElasticsearchSinkConnector configuration must include 'elasticsearch.indexes' setting"); 83 | } 84 | 85 | if (suffixSeparator == null) { 86 | suffixSeparator = "-"; 87 | } 88 | dateFormat = props.get(DATE_FORMAT); 89 | } 90 | 91 | /** 92 | * Returns the Task implementation for this Connector. 93 | */ 94 | @Override 95 | public Class taskClass() { 96 | return ElasticsearchSinkTask.class; 97 | } 98 | 99 | /** 100 | * Returns a set of configurations for Tasks based on the current configuration, 101 | * producing at most count configurations. 102 | * 103 | * @param maxTasks maximum number of configurations to generate 104 | * @return configurations for Tasks 105 | */ 106 | @Override 107 | public List> taskConfigs(int maxTasks) { 108 | ArrayList> configs = new ArrayList>(); 109 | for (int i = 0; i < maxTasks; i++) { 110 | Map config = new HashMap(); 111 | config.put(CLUSTER_NAME, clusterName); 112 | config.put(HOSTS, hosts); 113 | config.put(BULK_SIZE, bulkSize); 114 | config.put(DOCUMENT_NAME, documentName); 115 | config.put(INDEXES, indexes); 116 | config.put(TOPICS, topics); 117 | if (dateFormat != null) 118 | config.put(DATE_FORMAT, dateFormat); 119 | config.put(SUFFIX_SEPARATOR, suffixSeparator); 120 | configs.add(config); 121 | } 122 | return configs; 123 | } 124 | 125 | /** 126 | * Stop this connector. 127 | */ 128 | @Override 129 | public void stop() { 130 | // Nothing to do 131 | } 132 | 133 | @Override 134 | public ConfigDef config() { 135 | // TODO Auto-generated method stub 136 | return null; 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/org/apache/kafka/connect/elasticsearchschema/ElasticsearchSinkTask.java: -------------------------------------------------------------------------------- 1 | package org.apache.kafka.connect.elasticsearchschema; 2 | 3 | import java.net.InetAddress; 4 | import java.text.SimpleDateFormat; 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.Collection; 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 13 | import org.apache.kafka.common.TopicPartition; 14 | import org.apache.kafka.connect.data.Date; 15 | import org.apache.kafka.connect.data.Field; 16 | import org.apache.kafka.connect.data.Schema; 17 | import org.apache.kafka.connect.data.Struct; 18 | import org.apache.kafka.connect.data.Time; 19 | import org.apache.kafka.connect.data.Timestamp; 20 | import org.apache.kafka.connect.errors.ConnectException; 21 | import org.apache.kafka.connect.errors.RetriableException; 22 | import org.apache.kafka.connect.sink.SinkRecord; 23 | import org.apache.kafka.connect.sink.SinkTask; 24 | import org.elasticsearch.action.bulk.BulkItemResponse; 25 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 26 | import org.elasticsearch.action.bulk.BulkResponse; 27 | import org.elasticsearch.client.Client; 28 | import org.elasticsearch.client.transport.TransportClient; 29 | import org.elasticsearch.common.settings.Settings; 30 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | /** 35 | * ElasticsearchSinkTask is a Task that takes records loaded from Kafka and sends them to 36 | * another system. 37 | * 38 | * @author Andrea Patelli 39 | */ 40 | public class ElasticsearchSinkTask extends SinkTask { 41 | private static final Logger log = LoggerFactory.getLogger(ElasticsearchSinkConnector.class); 42 | 43 | private String clusterName; 44 | private String hosts; 45 | private Integer bulkSize; 46 | private String documentName; 47 | private String indexes; 48 | private String topics; 49 | private String dateFormat; 50 | private String suffixSeparator; 51 | 52 | Client client; 53 | 54 | public Map mapping; 55 | 56 | public ElasticsearchSinkTask() { 57 | } 58 | 59 | public String version() { 60 | return new ElasticsearchSinkConnector().version(); 61 | } 62 | 63 | /** 64 | * Start the Task. Handles configuration parsing and one-time setup of the task. 65 | * 66 | * @param props initial configuration 67 | */ 68 | @Override 69 | public void start(Map props) { 70 | mapping = new HashMap<>(0); 71 | clusterName = props.get(ElasticsearchSinkConnector.CLUSTER_NAME); 72 | hosts = props.get(ElasticsearchSinkConnector.HOSTS); 73 | documentName = props.get(ElasticsearchSinkConnector.DOCUMENT_NAME); 74 | topics = props.get(ElasticsearchSinkConnector.TOPICS); 75 | indexes = props.get(ElasticsearchSinkConnector.INDEXES); 76 | dateFormat = props.get(ElasticsearchSinkConnector.DATE_FORMAT); 77 | suffixSeparator = props.get(ElasticsearchSinkConnector.SUFFIX_SEPARATOR); 78 | 79 | try { 80 | bulkSize = Integer.parseInt(props.get(ElasticsearchSinkConnector.BULK_SIZE)); 81 | } catch (Exception e) { 82 | throw new ConnectException("Setting elasticsearch.bulk.size should be an integer"); 83 | } 84 | List hostsList = new ArrayList<>(Arrays.asList(hosts.replaceAll(" ", "").split(","))); 85 | 86 | List topicsList = Arrays.asList(topics.replaceAll(" ", "").split(",")); 87 | List indexesList = Arrays.asList(indexes.replaceAll(" ", "").split(",")); 88 | 89 | if (topicsList.size() != indexesList.size()) { 90 | throw new ConnectException("The number of indexes should be the same as the number of topics"); 91 | } 92 | 93 | for (int i = 0; i < topicsList.size(); i++) { 94 | mapping.put(topicsList.get(i), indexesList.get(i)); 95 | } 96 | 97 | try { 98 | Settings settings = Settings.settingsBuilder() 99 | .put("cluster.name", clusterName).build(); 100 | 101 | client = TransportClient.builder().settings(settings).build(); 102 | 103 | for (String host : hostsList) { 104 | String address; 105 | Integer port; 106 | String[] hostArray = host.split(":"); 107 | address = hostArray[0]; 108 | try { 109 | port = Integer.parseInt(hostArray[1]); 110 | } catch (Exception e) { 111 | port = 9300; 112 | } 113 | ((TransportClient) client).addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(address), port)); 114 | } 115 | } catch (Exception e) { 116 | throw new ConnectException("Impossible to connect to hosts"); 117 | } 118 | 119 | } 120 | 121 | /** 122 | * Put the records in the sink. 123 | * 124 | * @param sinkRecords the set of records to send. 125 | */ 126 | @Override 127 | public void put(Collection sinkRecords) { 128 | try { 129 | List records = new ArrayList(sinkRecords); 130 | for (int i = 0; i < records.size(); i++) { 131 | BulkRequestBuilder bulkRequest = client.prepareBulk(); 132 | for (int j = 0; j < bulkSize && i < records.size(); j++, i++) { 133 | SinkRecord record = records.get(i); 134 | Map jsonMap = toJsonMap((Struct) record.value()); 135 | String topic = record.topic(); 136 | StringBuilder index = new StringBuilder() 137 | .append(mapping.get(topic)); 138 | if (dateFormat != null && !dateFormat.isEmpty()) { 139 | index 140 | .append(suffixSeparator) 141 | .append(new SimpleDateFormat(dateFormat).format(new java.util.Date())); 142 | } 143 | bulkRequest.add( 144 | client 145 | .prepareIndex( 146 | index.toString(), 147 | documentName.isEmpty() ? ((Struct) record.value()).schema().name() : documentName, 148 | Long.toString(record.kafkaOffset()) 149 | ) 150 | .setSource(jsonMap) 151 | ); 152 | } 153 | i--; 154 | BulkResponse bulkResponse = bulkRequest.get(); 155 | if (bulkResponse.hasFailures()) { 156 | for (BulkItemResponse item : bulkResponse) { 157 | log.error(item.getFailureMessage()); 158 | } 159 | } 160 | } 161 | } catch (Exception e) { 162 | //TODO: this exception is misleading 163 | e.printStackTrace(); 164 | throw new RetriableException("Elasticsearch not connected"); 165 | } 166 | } 167 | 168 | @Override 169 | public void flush(Map offsets) { 170 | //don't know if needed 171 | } 172 | 173 | @Override 174 | public void stop() { 175 | //close connection 176 | if (client != null) 177 | client.close(); 178 | } 179 | 180 | private Map toJsonMap(Struct struct) { 181 | Map jsonMap = new HashMap(0); 182 | List fields = struct.schema().fields(); 183 | for (Field field : fields) { 184 | //TODO: Decimal ?? 185 | String fieldName = field.name(); 186 | Schema.Type fieldType = field.schema().type(); 187 | String schemaName=field.schema().name(); 188 | switch (fieldType) { 189 | case STRING: 190 | jsonMap.put(fieldName, struct.getString(fieldName)); 191 | break; 192 | case INT32: 193 | if (Date.LOGICAL_NAME.equals(schemaName) 194 | || Time.LOGICAL_NAME.equals(schemaName)) { 195 | jsonMap.put(fieldName, (java.util.Date) struct.get(fieldName)); 196 | } else { 197 | jsonMap.put(fieldName, struct.getInt32(fieldName)); 198 | } 199 | break; 200 | case INT16: 201 | jsonMap.put(fieldName, struct.getInt16(fieldName)); 202 | break; 203 | case INT64: 204 | if (Timestamp.LOGICAL_NAME.equals(schemaName)) { 205 | jsonMap.put(fieldName, (java.util.Date) struct.get(fieldName)); 206 | } else { 207 | jsonMap.put(fieldName, struct.getInt64(fieldName)); 208 | } 209 | break; 210 | case FLOAT32: 211 | jsonMap.put(fieldName, struct.getFloat32(fieldName)); 212 | break; 213 | case STRUCT: 214 | jsonMap.put(fieldName, toJsonMap(struct.getStruct(fieldName))); 215 | break; 216 | } 217 | } 218 | return jsonMap; 219 | } 220 | } 221 | --------------------------------------------------------------------------------