├── README.md ├── data ├── dummy_data.kcat └── sample2.json ├── docker-compose.yml ├── load_sample_data.sh ├── run_datagen.sh └── test_notes.adoc /README.md: -------------------------------------------------------------------------------- 1 | See related post here: https://rmoff.net/2019/10/07/kafka-connect-and-elasticsearch/ 2 | -------------------------------------------------------------------------------- /data/dummy_data.kcat: -------------------------------------------------------------------------------- 1 | {"batt":100,"lon":-1.8125752571133549,"acc":65,"p":98.489105224609375,"bs":3,"vac":10,"lat":53.955233261289684,"t":"t","conn":"w","event_ts":1569334836000,"alt":98,"type":"location","tid":"C1"} 2 | {"cog":193,"batt":45,"lon":-78.74988541880019,"acc":16,"p":100.14521789550781,"bs":1,"vel":0,"vac":3,"lat":35.66231724270073,"conn":"w","event_ts":1569330852000,"tid":"RM","type":"location","alt":104} 3 | {"batt":100,"lon":-1.8125821847113142,"acc":65,"bs":2,"p":98.428581237792969,"vac":10,"lat":53.955247763099669,"conn":"w","event_ts":1569334000160,"alt":98,"type":"location","tid":"A4"} 4 | -------------------------------------------------------------------------------- /data/sample2.json: -------------------------------------------------------------------------------- 1 | { "ModelResult": { "CoverType": "ExampleCover", "Version": "v02", "ModelBuild": "1.0.62", "Model": "ExampleModel", "QuoteIdentifier": "BDA2EC9831284A018F5DB383F97EA82E", "ParentOperationId": "|4d5e4efbadf04f41b4d2aba08a8182b3.", "Guid": "BDA2EC9831284A018F5DB383F97EA82E_Example", "NumericResult": "1to3", "Result": "CpH" } } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | services: 4 | zookeeper: 5 | image: confluentinc/cp-zookeeper:5.4.0-beta1 6 | container_name: zookeeper 7 | environment: 8 | ZOOKEEPER_CLIENT_PORT: 2181 9 | ZOOKEEPER_TICK_TIME: 2000 10 | 11 | kafka: 12 | image: confluentinc/cp-enterprise-kafka:5.4.0-beta1 13 | container_name: kafka 14 | depends_on: 15 | - zookeeper 16 | ports: 17 | # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,- 18 | # An important note about accessing Kafka from clients on other machines: 19 | # ----------------------------------------------------------------------- 20 | # 21 | # The config used here exposes port 9092 for _external_ connections to the broker 22 | # i.e. those from _outside_ the docker network. This could be from the host machine 23 | # running docker, or maybe further afield if you've got a more complicated setup. 24 | # If the latter is true, you will need to change the value 'localhost' in 25 | # KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker host from those 26 | # remote clients 27 | # 28 | # For connections _internal_ to the docker network, such as from other services 29 | # and components, use kafka:29092. 30 | # 31 | # See https://rmoff.net/2018/08/02/kafka-listeners-explained/ for details 32 | # "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,- 33 | # 34 | - 9092:9092 35 | environment: 36 | KAFKA_BROKER_ID: 1 37 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 38 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 39 | KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT 40 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 41 | KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" 42 | KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter 43 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 44 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 45 | CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka:29092 46 | CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181 47 | CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 48 | CONFLUENT_METRICS_ENABLE: 'true' 49 | CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' 50 | 51 | schema-registry: 52 | image: confluentinc/cp-schema-registry:5.4.0-beta1 53 | container_name: schema-registry 54 | ports: 55 | - "8081:8081" 56 | depends_on: 57 | - zookeeper 58 | - kafka 59 | environment: 60 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 61 | SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181 62 | 63 | ksql-server: 64 | image: confluentinc/cp-ksql-server:5.4.0-dist-1 65 | container_name: ksql-server 66 | ports: 67 | - 8088:8088 68 | depends_on: 69 | - kafka 70 | environment: 71 | KSQL_BOOTSTRAP_SERVERS: kafka:29092 72 | KSQL_LISTENERS: http://0.0.0.0:8088 73 | KSQL_KSQL_SERVICE_ID: confluent_rmoff_01 74 | KSQL_CUB_KAFKA_TIMEOUT: 300 75 | KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081 76 | # -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v 77 | # Useful settings for development/laptop use - modify as needed for Prod 78 | KSQL_KSQL_COMMIT_INTERVAL_MS: 2000 79 | # KSQL_KSQL_SINK_PARTITIONS: 1 80 | KSQL_KSQL_CACHE_MAX_BYTES_BUFFERING: 10000000 81 | KSQL_KSQL_STREAMS_AUTO_OFFSET_RESET: earliest 82 | 83 | ksql-cli: 84 | image: confluentinc/cp-ksql-cli:5.4.0-beta1 85 | container_name: ksql-cli 86 | depends_on: 87 | - ksql-server 88 | entrypoint: /bin/sh 89 | tty: true 90 | 91 | kafka-connect-540: 92 | image: confluentinc/cp-kafka-connect:5.4.0-beta1 93 | container_name: kafka-connect-540 94 | depends_on: 95 | - zookeeper 96 | - kafka 97 | - schema-registry 98 | ports: 99 | - 8083:8083 100 | environment: 101 | CONNECT_BOOTSTRAP_SERVERS: "kafka:29092" 102 | CONNECT_REST_PORT: 8083 103 | CONNECT_GROUP_ID: kafka-connect-540 104 | CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-540-configs 105 | CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-540-offsets 106 | CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-540-status 107 | CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 108 | CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 109 | CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: "false" 110 | CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" 111 | CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 112 | CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 113 | CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01" 114 | CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO" 115 | CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR" 116 | CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n" 117 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" 118 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" 119 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" 120 | CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components 121 | 122 | # kafka-connect-531: 123 | # image: confluentinc/cp-kafka-connect:5.3.1 124 | # container_name: kafka-connect-531 125 | # depends_on: 126 | # - zookeeper 127 | # - kafka 128 | # - schema-registry 129 | # ports: 130 | # - 18083:8083 131 | # environment: 132 | # CONNECT_BOOTSTRAP_SERVERS: "kafka:29092" 133 | # CONNECT_REST_PORT: 8083 134 | # CONNECT_GROUP_ID: kafka-connect-531 135 | # CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-531-configs 136 | # CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-531-offsets 137 | # CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-531-status 138 | # CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 139 | # CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 140 | # CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: "false" 141 | # CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" 142 | # CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 143 | # CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 144 | # CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01" 145 | # CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO" 146 | # CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR" 147 | # CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n" 148 | # CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" 149 | # CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" 150 | # CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" 151 | # CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components 152 | 153 | kafka-connect-530: 154 | image: confluentinc/cp-kafka-connect:5.3.0 155 | container_name: kafka-connect-530 156 | depends_on: 157 | - zookeeper 158 | - kafka 159 | - schema-registry 160 | ports: 161 | - 28083:8083 162 | environment: 163 | CONNECT_BOOTSTRAP_SERVERS: "kafka:29092" 164 | CONNECT_REST_PORT: 8083 165 | CONNECT_GROUP_ID: kafka-connect-530 166 | CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-530-configs 167 | CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-530-offsets 168 | CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-530-status 169 | CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 170 | CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 171 | CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: "false" 172 | CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" 173 | CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 174 | CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 175 | CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01" 176 | CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO" 177 | CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR" 178 | CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n" 179 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" 180 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" 181 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" 182 | CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components 183 | 184 | # kafka-connect-522: 185 | # image: confluentinc/cp-kafka-connect:5.2.2 186 | # container_name: kafka-connect-522 187 | # depends_on: 188 | # - zookeeper 189 | # - kafka 190 | # - schema-registry 191 | # ports: 192 | # - 38083:8083 193 | # environment: 194 | # CONNECT_BOOTSTRAP_SERVERS: "kafka:29092" 195 | # CONNECT_REST_PORT: 8083 196 | # CONNECT_GROUP_ID: kafka-connect-522 197 | # CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-522-configs 198 | # CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-522-offsets 199 | # CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-522-status 200 | # CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 201 | # CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 202 | # CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: "false" 203 | # CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" 204 | # CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 205 | # CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 206 | # CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect-01" 207 | # CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO" 208 | # CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR" 209 | # CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n" 210 | # CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" 211 | # CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" 212 | # CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" 213 | # CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components 214 | 215 | # elasticsearch5: 216 | # image: docker.elastic.co/elasticsearch/elasticsearch:5.6.16 217 | # container_name: elasticsearch5 218 | # ports: 219 | # - 9200:9200 220 | # environment: 221 | # xpack.security.enabled: "false" 222 | # ES_JAVA_OPTS: "-Xms1g -Xmx1g" 223 | # discovery.type: "single-node" 224 | # # volumes: 225 | # # - ./data/container_data/elasticserarch:/usr/share/elasticsearch/data 226 | 227 | elasticsearch6: 228 | image: docker.elastic.co/elasticsearch/elasticsearch:6.8.3 229 | container_name: elasticsearch6 230 | ports: 231 | - 19200:9200 232 | environment: 233 | xpack.security.enabled: "false" 234 | ES_JAVA_OPTS: "-Xms1g -Xmx1g" 235 | discovery.type: "single-node" 236 | # volumes: 237 | # - ./data/container_data/elasticserarch:/usr/share/elasticsearch/data 238 | 239 | elasticsearch7: 240 | image: docker.elastic.co/elasticsearch/elasticsearch:7.4.0 241 | container_name: elasticsearch7 242 | ports: 243 | - 29200:9200 244 | environment: 245 | xpack.security.enabled: "false" 246 | ES_JAVA_OPTS: "-Xms1g -Xmx1g" 247 | discovery.type: "single-node" 248 | # volumes: 249 | # - ./data/container_data/elasticserarch:/usr/share/elasticsearch/data 250 | -------------------------------------------------------------------------------- /load_sample_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kafkacat -b localhost:9092 -t sample_topic -P -T -l ./data/dummy_data.kcat 4 | 5 | kafkacat -b localhost:9092 -t sample_topic2 -P -T -l ./data/sample2.json 6 | -------------------------------------------------------------------------------- /run_datagen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while [ 1 -eq 1 ] 4 | do awk '{print $0;system("sleep 0.75");}' ./data/dummy_data.kcat | \ 5 | kafkacat -b localhost:9092 -P -t sample_topic 6 | done; -------------------------------------------------------------------------------- /test_notes.adoc: -------------------------------------------------------------------------------- 1 | = Testing Kafka and Elasticsearch 2 | Robin Moffatt 3 | v0.01, 9 October 2019 4 | 5 | :toc: 6 | 7 | Environment was rebuilt after each top-level section. 8 | 9 | == No type name, no template 10 | 11 | === Elasticsearch 7 12 | 13 | [source,bash] 14 | ---- 15 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 16 | http://localhost:8083/connectors/sink-5.4.0-es7-notypename/config \ 17 | -d '{ 18 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 19 | "connection.url": "http://elasticsearch7:9200", 20 | "type.name": "", 21 | "topics": "sample_topic", 22 | "key.ignore": "true", 23 | "schema.ignore": "true", 24 | "transforms.renameTopic.regex": "(.*)", 25 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-notypename", 26 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 27 | "transforms": "renameTopic" 28 | }' 29 | ---- 30 | 31 | == Elasticsearch <= 6 32 | 33 | [source,bash] 34 | ---- 35 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 36 | http://localhost:8083/connectors/sink-5.4.0-es6-notypename/config \ 37 | -d '{ 38 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 39 | "connection.url": "http://elasticsearch6:9200", 40 | "type.name": "", 41 | "topics": "sample_topic", 42 | "key.ignore": "true", 43 | "schema.ignore": "true", 44 | "transforms.renameTopic.regex": "(.*)", 45 | "transforms.renameTopic.replacement": "$1-5.4.0-es6-notypename", 46 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 47 | "transforms": "renameTopic" 48 | }' 49 | ---- 50 | 51 | Fails : `"type":"action_request_validation_exception","reason":"Validation Failed: 1: type is missing` 52 | 53 | == Type name given, no template 54 | 55 | === Elasticsearch 7 56 | 57 | [source,bash] 58 | ---- 59 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 60 | http://localhost:8083/connectors/sink-5.4.0-es7-typenamegiven/config \ 61 | -d '{ 62 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 63 | "connection.url": "http://elasticsearch7:9200", 64 | "type.name": "foobarwibble", 65 | "topics": "sample_topic", 66 | "key.ignore": "true", 67 | "schema.ignore": "true", 68 | "transforms.renameTopic.regex": "(.*)", 69 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-typenamegiven", 70 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 71 | "transforms": "renameTopic" 72 | }' 73 | ---- 74 | 75 | === Elasticsearch 6 76 | 77 | [source,bash] 78 | ---- 79 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 80 | http://localhost:8083/connectors/sink-5.4.0-es6-typenamegiven/config \ 81 | -d '{ 82 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 83 | "connection.url": "http://elasticsearch6:9200", 84 | "type.name": "foobarwibble", 85 | "topics": "sample_topic", 86 | "key.ignore": "true", 87 | "schema.ignore": "true", 88 | "transforms.renameTopic.regex": "(.*)", 89 | "transforms.renameTopic.replacement": "$1-5.4.0-es6-typenamegiven", 90 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 91 | "transforms": "renameTopic" 92 | }' 93 | ---- 94 | 95 | 96 | Templates to define mappings: 97 | 98 | [source,bash] 99 | ---- 100 | docker exec elasticsearch7 curl -s -XPUT "http://localhost:9200/_template/kafkaconnect/" -H 'Content-Type: application/json' -d' 101 | { 102 | "template": "*", 103 | "settings": { "number_of_shards": 1, "number_of_replicas": 0 }, 104 | "mappings": { "dynamic_templates": [ { "dates": { "match": "*_ts", "mapping": { "type": "date" } } } ] } 105 | }' 106 | ---- 107 | 108 | [source,bash] 109 | ---- 110 | docker exec elasticsearch6 curl -s -XPUT "http://localhost:9200/_template/kafkaconnect/" -H 'Content-Type: application/json' -d' 111 | { 112 | "template": "*", 113 | "settings": { "number_of_shards": 1, "number_of_replicas": 0 }, 114 | "mappings": { "_default_": { "dynamic_templates": [ { "dates": { "match": "*_ts", "mapping": { "type": "date" } } } ] } } 115 | }' 116 | ---- 117 | 118 | 119 | == No type name, template exists 120 | 121 | === Elasticsearch 7 122 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 123 | http://localhost:8083/connectors/sink-5.4.0-es7-notypename-templateexists/config \ 124 | -d '{ 125 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 126 | "connection.url": "http://elasticsearch7:9200", 127 | "type.name": "", 128 | "topics": "sample_topic", 129 | "key.ignore": "true", 130 | "schema.ignore": "true", 131 | "transforms.renameTopic.regex": "(.*)", 132 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-notypename-templateexists", 133 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 134 | "transforms": "renameTopic" 135 | }' 136 | 137 | ✅Works. 138 | 139 | === Elasticsearch <= 6 140 | 141 | N/A because `type.name` is mandatory with Elasticsearch <= 6. 142 | 143 | == Type name given, template exists 144 | 145 | === Elasticsearch 7 146 | 147 | [source,bash] 148 | ---- 149 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 150 | http://localhost:8083/connectors/sink-5.4.0-es7-typenamegiven-templateexists/config \ 151 | -d '{ 152 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 153 | "connection.url": "http://elasticsearch7:9200", 154 | "type.name": "foobarwibble", 155 | "topics": "sample_topic", 156 | "key.ignore": "true", 157 | "schema.ignore": "true", 158 | "transforms.renameTopic.regex": "(.*)", 159 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-typenamegiven-templateexists", 160 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 161 | "transforms": "renameTopic" 162 | }' 163 | ---- 164 | 165 | ❌Fails: 166 | 167 | ``` 168 | kafka-connect-540 | [2019-10-09 10:57:05,746] ERROR [sink-5.4.0-es7-typenamegiven-templateexists|task-0] Encountered an illegal document error when executing batch 3 of 1 records. Error was [{"type":"illegal_argument_exception","reason":"Rejecting mapping update to [sample_topic-5.4.0-es7-typenamegiven-templateexists] as the final mapping would have more than 1 type: [_doc, foobarwibble]"}] (to ignore future records like this change the configuration property 'behavior.on.malformed.documents' from 'fail' to 'ignore'). (io.confluent.connect.elasticsearch.bulk.BulkProcessor:421) 169 | ``` 170 | 171 | Solution: use blank `type.name`, or use `"type.name": "_doc"` 172 | 173 | [source,bash] 174 | ---- 175 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 176 | http://localhost:8083/connectors/sink-5.4.0-es7-typenamegiven-templateexists/config \ 177 | -d '{ 178 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 179 | "connection.url": "http://elasticsearch7:9200", 180 | "type.name": "_doc", 181 | "topics": "sample_topic", 182 | "key.ignore": "true", 183 | "schema.ignore": "true", 184 | "transforms.renameTopic.regex": "(.*)", 185 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-typenamegiven-templateexists", 186 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 187 | "transforms": "renameTopic" 188 | }' 189 | ---- 190 | 191 | ✅Works. 192 | 193 | == Elasticsearch <= 6 194 | 195 | [source,bash] 196 | ---- 197 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 198 | http://localhost:8083/connectors/sink-5.4.0-es6-typenamegiven-templateexists/config \ 199 | -d '{ 200 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 201 | "connection.url": "http://elasticsearch6:9200", 202 | "type.name": "foobarwibble", 203 | "topics": "sample_topic", 204 | "key.ignore": "true", 205 | "schema.ignore": "true", 206 | "transforms.renameTopic.regex": "(.*)", 207 | "transforms.renameTopic.replacement": "$1-5.4.0-es6-typenamegiven-templateexists", 208 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 209 | "transforms": "renameTopic" 210 | }' 211 | ---- 212 | 213 | ✅Works. 214 | 215 | == Avro and schemas 216 | 217 | Templates to define mappings (these vary from the earlier ones because the field names are uppercase). 218 | 219 | [source,bash] 220 | ---- 221 | docker exec elasticsearch7 curl -s -XPUT "http://localhost:9200/_template/kafkaconnect/" -H 'Content-Type: application/json' -d' 222 | { 223 | "template": "*", 224 | "settings": { "number_of_shards": 1, "number_of_replicas": 0 }, 225 | "mappings": { "dynamic_templates": [ { "dates": { "match": "*_TS", "mapping": { "type": "date" } } } ] } 226 | }' 227 | ---- 228 | 229 | [source,bash] 230 | ---- 231 | docker exec elasticsearch6 curl -s -XPUT "http://localhost:9200/_template/kafkaconnect/" -H 'Content-Type: application/json' -d' 232 | { 233 | "template": "*", 234 | "settings": { "number_of_shards": 1, "number_of_replicas": 0 }, 235 | "mappings": { "_default_": { "dynamic_templates": [ { "dates": { "match": "*_TS", "mapping": { "type": "date" } } } ] } } 236 | }' 237 | ---- 238 | 239 | Re-serialise JSON to Avro using KSQL: 240 | 241 | [source,sql] 242 | ---- 243 | CREATE STREAM SAMPLE_JSON 244 | (TID VARCHAR, BATT INTEGER, LON DOUBLE, LAT DOUBLE, 245 | TST BIGINT, ALT INTEGER, COG INTEGER, VEL INTEGER, 246 | P DOUBLE, BS INTEGER, CONN VARCHAR, ACC INTEGER, 247 | T VARCHAR, VAC INTEGER, INREGIONS VARCHAR, TYPE VARCHAR, EVENT_TS BIGINT) 248 | WITH (KAFKA_TOPIC = 'sample_topic', VALUE_FORMAT='JSON'); 249 | 250 | CREATE STREAM SAMPLE_AVRO WITH (VALUE_FORMAT='AVRO') AS SELECT * FROM SAMPLE_JSON; 251 | ---- 252 | 253 | === Avro - schemas.ignore=true - Elasticsearch 7 254 | 255 | [source,bash] 256 | ---- 257 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 258 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema/config \ 259 | -d '{ 260 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 261 | "connection.url": "http://elasticsearch7:9200", 262 | "type.name": "", 263 | "topics": "SAMPLE_AVRO", 264 | "key.ignore": "true", 265 | "schema.ignore": "true", 266 | "value.converter":"io.confluent.connect.avro.AvroConverter", 267 | "value.converter.schema.registry.url":"http://schema-registry:8081", 268 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 269 | "transforms.renameTopic.regex": "(.*)", 270 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema", 271 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 272 | "transforms": "renameTopic" 273 | }' 274 | ---- 275 | 276 | ✅Works. 277 | 278 | === Avro - schemas.ignore=false - Elasticsearch 7 279 | 280 | [source,bash] 281 | ---- 282 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 283 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false/config \ 284 | -d '{ 285 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 286 | "connection.url": "http://elasticsearch7:9200", 287 | "type.name": "", 288 | "topics": "SAMPLE_AVRO", 289 | "key.ignore": "true", 290 | "schema.ignore": "false", 291 | "value.converter":"io.confluent.connect.avro.AvroConverter", 292 | "value.converter.schema.registry.url":"http://schema-registry:8081", 293 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 294 | "transforms.renameTopic.regex": "(.*)", 295 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false", 296 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 297 | "transforms": "renameTopic" 298 | }' 299 | ---- 300 | 301 | ❌Fails : `"type":"action_request_validation_exception","reason":"Validation Failed: 1: mapping type is missing;` 302 | 303 | Specify type name as `_doc`: 304 | 305 | [source,bash] 306 | ---- 307 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 308 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false-withtypename/config \ 309 | -d '{ 310 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 311 | "connection.url": "http://elasticsearch7:9200", 312 | "type.name": "_doc", 313 | "topics": "SAMPLE_AVRO", 314 | "key.ignore": "true", 315 | "schema.ignore": "false", 316 | "value.converter":"io.confluent.connect.avro.AvroConverter", 317 | "value.converter.schema.registry.url":"http://schema-registry:8081", 318 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 319 | "transforms.renameTopic.regex": "(.*)", 320 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false-withtypename", 321 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 322 | "transforms": "renameTopic" 323 | }' 324 | ---- 325 | 326 | ✅Works. 327 | 328 | Specify type name as something other than `_doc`: 329 | 330 | [source,bash] 331 | ---- 332 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 333 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false-withtypename2/config \ 334 | -d '{ 335 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 336 | "connection.url": "http://elasticsearch7:9200", 337 | "type.name": "foobarwibble", 338 | "topics": "SAMPLE_AVRO", 339 | "key.ignore": "true", 340 | "schema.ignore": "false", 341 | "value.converter":"io.confluent.connect.avro.AvroConverter", 342 | "value.converter.schema.registry.url":"http://schema-registry:8081", 343 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 344 | "transforms.renameTopic.regex": "(.*)", 345 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false-withtypename2", 346 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 347 | "transforms": "renameTopic" 348 | }' 349 | ---- 350 | 351 | ❌Doesn't work, because `Rejecting mapping update to [sample_avro-5.4.0-es7-avro-ignore-schema-false-withtypename2] as the final mapping would have more than 1 type: [_doc, foobarwibble]` 352 | 353 | Remove the template: 354 | 355 | [source,bash] 356 | ---- 357 | docker exec -it elasticsearch7 curl -XDELETE "http://localhost:9200/_template/kafkaconnect" 358 | ---- 359 | 360 | Retry with `_doc` type.name, no template: 361 | 362 | [source,bash] 363 | ---- 364 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 365 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false-withtypename-no-template/config \ 366 | -d '{ 367 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 368 | "connection.url": "http://elasticsearch7:9200", 369 | "type.name": "_doc", 370 | "topics": "SAMPLE_AVRO", 371 | "key.ignore": "true", 372 | "schema.ignore": "false", 373 | "value.converter":"io.confluent.connect.avro.AvroConverter", 374 | "value.converter.schema.registry.url":"http://schema-registry:8081", 375 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 376 | "transforms.renameTopic.regex": "(.*)", 377 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false-withtypename-no-template", 378 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 379 | "transforms": "renameTopic" 380 | }' 381 | ---- 382 | 383 | ✅Works. 384 | 385 | Retry with non-`_doc` type.name, no template: 386 | 387 | [source,bash] 388 | ---- 389 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 390 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false-withrandomtypename-no-template/config \ 391 | -d '{ 392 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 393 | "connection.url": "http://elasticsearch7:9200", 394 | "type.name": "foobarwibble", 395 | "topics": "SAMPLE_AVRO", 396 | "key.ignore": "true", 397 | "schema.ignore": "false", 398 | "value.converter":"io.confluent.connect.avro.AvroConverter", 399 | "value.converter.schema.registry.url":"http://schema-registry:8081", 400 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 401 | "transforms.renameTopic.regex": "(.*)", 402 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false-withrandomtypename-no-template", 403 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 404 | "transforms": "renameTopic" 405 | }' 406 | ---- 407 | 408 | ✅Works. 409 | 410 | == Wrong deserialisers 411 | 412 | === Reading Avro with JSON converter 413 | 414 | [source,bash] 415 | ---- 416 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 417 | http://localhost:8083/connectors/sink-5.4.0-es7-01/config \ 418 | -d '{ 419 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 420 | "connection.url": "http://elasticsearch7:9200", 421 | "type.name": "foobarwibble", 422 | "topics": "SAMPLE_AVRO", 423 | "key.ignore": "true", 424 | "schema.ignore": "false", 425 | "value.converter":"org.apache.kafka.connect.json.JsonConverter", 426 | "key.converter":"org.apache.kafka.connect.json.JsonConverter", 427 | "transforms.renameTopic.regex": "(.*)", 428 | "transforms.renameTopic.replacement": "$1-sink-5.4.0-es7-01", 429 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 430 | "transforms": "renameTopic" 431 | }' 432 | ---- 433 | 434 | === Reading JSON with Avro converter 435 | 436 | [source,bash] 437 | ---- 438 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 439 | http://localhost:8083/connectors/sink-5.4.0-es7-01/config \ 440 | -d '{ 441 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 442 | "connection.url": "http://elasticsearch7:9200", 443 | "type.name": "foobarwibble", 444 | "topics": "sample_topic", 445 | "key.ignore": "true", 446 | "schema.ignore": "false", 447 | "value.converter":"io.confluent.connect.avro.AvroConverter", 448 | "value.converter.schema.registry.url":"http://schema-registry:8081", 449 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 450 | "transforms.renameTopic.regex": "(.*)", 451 | "transforms.renameTopic.replacement": "$1-sink-5.4.0-es7-01", 452 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 453 | "transforms": "renameTopic" 454 | }' 455 | ---- 456 | 457 | === Reading String with Avro converter 458 | 459 | [source,bash] 460 | ---- 461 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 462 | http://localhost:8083/connectors/sink-5.4.0-es7-avro-ignore-schema-false-withrandomtypename-no-template/config \ 463 | -d '{ 464 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 465 | "connection.url": "http://elasticsearch7:9200", 466 | "type.name": "foobarwibble", 467 | "topics": "SAMPLE_AVRO", 468 | "key.ignore": "true", 469 | "schema.ignore": "false", 470 | "value.converter":"io.confluent.connect.avro.AvroConverter", 471 | "value.converter.schema.registry.url":"http://schema-registry:8081", 472 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 473 | "transforms.renameTopic.regex": "(.*)", 474 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-avro-ignore-schema-false-withrandomtypename-no-template", 475 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 476 | "transforms": "renameTopic" 477 | }' 478 | ---- 479 | 480 | == Reading JSON data 481 | 482 | [source,bash] 483 | ---- 484 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 485 | http://localhost:8083/connectors/sink-5.4.0-es7-sample-schemasignore-true/config \ 486 | -d '{ 487 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 488 | "connection.url": "http://elasticsearch7:9200", 489 | "type.name": "", 490 | "topics": "sample_topic2", 491 | "key.ignore": "true", 492 | "schema.ignore": "true", 493 | "transforms.renameTopic.regex": "(.*)", 494 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-sample-schemasignore-true", 495 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 496 | "transforms": "renameTopic" 497 | }' 498 | ---- 499 | 500 | 501 | [source,bash] 502 | ---- 503 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 504 | http://localhost:8083/connectors/sink-5.4.0-es7-sample-schemasignore-false-schemasenable-false/config \ 505 | -d '{ 506 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 507 | "connection.url": "http://elasticsearch7:9200", 508 | "type.name": "", 509 | "topics": "sample_topic2", 510 | "key.ignore": "true", 511 | "schema.ignore": "false", 512 | "transforms.renameTopic.regex": "(.*)", 513 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-sample-schemasignore-false-schemasenable-false", 514 | "value.converter":"org.apache.kafka.connect.json.JsonConverter", 515 | "value.converter.schemas.enable":"false", 516 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 517 | "transforms": "renameTopic" 518 | }' 519 | ---- 520 | 521 | 522 | [source,bash] 523 | ---- 524 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 525 | http://localhost:8083/connectors/sink-5.4.0-es7-sample-schemasignore-false-schemasenable-true/config \ 526 | -d '{ 527 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 528 | "connection.url": "http://elasticsearch7:9200", 529 | "type.name": "", 530 | "topics": "sample_topic2", 531 | "key.ignore": "true", 532 | "schema.ignore": "false", 533 | "transforms.renameTopic.regex": "(.*)", 534 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-sample-schemasignore-false-schemasenable-true", 535 | "value.converter":"org.apache.kafka.connect.json.JsonConverter", 536 | "value.converter.schemas.enable":"true", 537 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 538 | "transforms": "renameTopic" 539 | }' 540 | ---- 541 | 542 | == Using a stringconverter when you shouldn't 543 | 544 | [source,bash] 545 | ---- 546 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 547 | http://localhost:8083/connectors/sink-5.4.0-es7-string-schemaignore-true/config \ 548 | -d '{ 549 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 550 | "connection.url": "http://elasticsearch7:9200", 551 | "type.name": "", 552 | "topics": "sample_topic2", 553 | "key.ignore": "true", 554 | "schema.ignore": "true", 555 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 556 | "value.converter":"org.apache.kafka.connect.storage.StringConverter", 557 | "transforms.renameTopic.regex": "(.*)", 558 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-string-schemaignore-true", 559 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 560 | "transforms": "renameTopic" 561 | }' 562 | ---- 563 | 564 | 565 | [source,bash] 566 | ---- 567 | docker exec kafka-connect-540 curl -s -i -X PUT -H "Content-Type:application/json" \ 568 | http://localhost:8083/connectors/sink-5.4.0-es7-string-schemaignore-false/config \ 569 | -d '{ 570 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 571 | "connection.url": "http://elasticsearch7:9200", 572 | "type.name": "", 573 | "topics": "sample_topic2", 574 | "key.ignore": "true", 575 | "schema.ignore": "false", 576 | "key.converter":"org.apache.kafka.connect.storage.StringConverter", 577 | "value.converter":"org.apache.kafka.connect.storage.StringConverter", 578 | "transforms.renameTopic.regex": "(.*)", 579 | "transforms.renameTopic.replacement": "$1-5.4.0-es7-string-schemaignore-false", 580 | "transforms.renameTopic.type": "org.apache.kafka.connect.transforms.RegexRouter", 581 | "transforms": "renameTopic" 582 | }' 583 | ---- 584 | 585 | --------------------------------------------------------------------------------