├── .gitignore ├── Jenkinsfile ├── LICENSE ├── README.md ├── bin └── debug.sh ├── config ├── AvroExample.properties ├── CSVExample.json ├── CSVExample.properties ├── CSVSchemaGenerator.properties ├── ELFTesting.properties ├── JsonExample.properties └── connect-avro-docker.properties ├── docker-compose.yml ├── pom.xml └── src ├── main └── java │ └── com │ └── github │ └── jcustenborder │ └── kafka │ └── connect │ └── spooldir │ ├── AbstractCleanUpPolicy.java │ ├── AbstractSchemaGenerator.java │ ├── AbstractSourceConnector.java │ ├── AbstractSourceConnectorConfig.java │ ├── AbstractSourceTask.java │ ├── AbstractSpoolDirSourceConnector.java │ ├── AbstractSpoolDirSourceConnectorConfig.java │ ├── AbstractSpoolDirSourceTask.java │ ├── AbstractTaskPartitionerPredicate.java │ ├── CsvSchemaGenerator.java │ ├── FileComparator.java │ ├── InputFile.java │ ├── InputFileDequeue.java │ ├── JsonSchemaGenerator.java │ ├── Metadata.java │ ├── SpoolDirAvroSourceConnector.java │ ├── SpoolDirAvroSourceConnectorConfig.java │ ├── SpoolDirAvroSourceTask.java │ ├── SpoolDirBinaryFileSourceConnector.java │ ├── SpoolDirBinaryFileSourceConnectorConfig.java │ ├── SpoolDirBinaryFileSourceTask.java │ ├── SpoolDirCsvSourceConnector.java │ ├── SpoolDirCsvSourceConnectorConfig.java │ ├── SpoolDirCsvSourceTask.java │ ├── SpoolDirJsonSourceConnector.java │ ├── SpoolDirJsonSourceConnectorConfig.java │ ├── SpoolDirJsonSourceTask.java │ ├── SpoolDirLineDelimitedSourceConnector.java │ ├── SpoolDirLineDelimitedSourceConnectorConfig.java │ ├── SpoolDirLineDelimitedSourceTask.java │ ├── SpoolDirSchemaLessJsonSourceConnector.java │ ├── SpoolDirSchemaLessJsonSourceConnectorConfig.java │ ├── SpoolDirSchemaLessJsonSourceTask.java │ ├── elf │ ├── SchemaConversion.java │ ├── SchemaConversionBuilder.java │ ├── SpoolDirELFSourceConnector.java │ ├── SpoolDirELFSourceConnectorConfig.java │ ├── SpoolDirELFSourceTask.java │ └── converters │ │ ├── LocalDateLogFieldConverter.java │ │ ├── LocalTimeLogFieldConverter.java │ │ ├── LogFieldConverter.java │ │ ├── LogFieldConverterFactory.java │ │ ├── PrimitiveLogFieldConverter.java │ │ └── TimestampLogFieldConverter.java │ └── package-info.java └── test ├── java └── com │ └── github │ └── jcustenborder │ └── kafka │ └── connect │ └── spooldir │ ├── AbstractCleanUpPolicyTest.java │ ├── AbstractSchemaGeneratorTest.java │ ├── AbstractSpoolDirSourceConnectorTest.java │ ├── AbstractSpoolDirSourceTaskTest.java │ ├── ByNameAbstractTaskPartitionerPredicateTest.java │ ├── CsvSchemaGeneratorTest.java │ ├── DeleteCleanupPolicySubDirsNoRetainTest.java │ ├── DeleteCleanupPolicySubDirsRetainTest.java │ ├── DeleteCleanupPolicyTest.java │ ├── DocumentationTest.java │ ├── FileComparatorTest.java │ ├── JsonSchemaGeneratorTest.java │ ├── MinimumFileAgePredicateTest.java │ ├── MoveByDateCleanupPolicySubDirsNoRetainTest.java │ ├── MoveByDateCleanupPolicySubDirsRetainTest.java │ ├── MoveByDateCleanupPolicyTest.java │ ├── MoveCleanupPolicySubDirsNoRetainTest.java │ ├── MoveCleanupPolicySubDirsRetainTest.java │ ├── MoveCleanupPolicyTest.java │ ├── NamedTest.java │ ├── NoneCleanupPolicyTest.java │ ├── ProcessingFileExistsPredicateTest.java │ ├── SpoolDirAvroSourceTaskTest.java │ ├── SpoolDirBinaryFileSourceTaskTest.java │ ├── SpoolDirCsvSourceConnectorConfigTest.java │ ├── SpoolDirCsvSourceConnectorTest.java │ ├── SpoolDirCsvSourceTaskSubDirsNoRetainTest.java │ ├── SpoolDirCsvSourceTaskSubDirsRetainTest.java │ ├── SpoolDirCsvSourceTaskTest.java │ ├── SpoolDirJsonSourceConnectorTest.java │ ├── SpoolDirJsonSourceTaskTest.java │ ├── SpoolDirLineDelimitedSourceTaskTest.java │ ├── SpoolDirSchemaLessJsonSourceTaskTest.java │ ├── TestCase.java │ ├── TestDataUtils.java │ └── elf │ ├── SchemaConversionBuilderTest.java │ └── SpoolDirELFSourceTaskTest.java └── resources ├── com └── github │ └── jcustenborder │ └── kafka │ └── connect │ └── spooldir │ ├── SpoolBinaryFileSourceConnector │ ├── binary.json │ └── fromXML.json │ ├── SpoolDirBinaryFileSourceConnector │ ├── binary.json │ └── fromXML.json │ ├── SpoolDirCsvSourceConnector │ ├── schema.json │ ├── schemaheaders.json │ └── tsv.json │ ├── SpoolDirJsonSourceConnector │ └── test.json │ ├── SpoolDirLineDelimitedSourceConnector │ └── fix.json │ ├── avro │ ├── FieldsMatch.data │ └── FieldsMatch.json │ ├── binary │ ├── DataHasMoreFields.data │ └── DataHasMoreFields.json │ ├── csv │ ├── BlankLines.data │ ├── BlankLines.json │ ├── DataHasMoreFields.data │ ├── DataHasMoreFields.json │ ├── FieldsMatch.data │ ├── FieldsMatch.json │ ├── FileModeFieldFieldsMatch.data │ ├── FileModeFieldFieldsMatch.json │ ├── SchemaHasMoreFields.data │ ├── SchemaHasMoreFields.json │ ├── SourceOffset.data │ ├── SourceOffset.json │ ├── WithHeaderSkipLines.data │ ├── WithHeaderSkipLines.json │ ├── WithoutHeader.data │ └── WithoutHeader.json │ ├── elf │ ├── SpoolDirELFSourceConnector │ │ └── example.json │ └── elf │ │ ├── FieldsMatch.data │ │ └── FieldsMatch.json │ ├── json │ ├── DataHasMoreFields.data │ ├── DataHasMoreFields.json │ ├── FieldsMatch.data │ ├── FieldsMatch.json │ ├── FileModeFieldFieldsMatch.data │ ├── FileModeFieldFieldsMatch.json │ ├── SchemaHasMoreFields.data │ ├── SchemaHasMoreFields.json │ ├── SourceOffset.data │ └── SourceOffset.json │ └── schemalessjson │ ├── DataHasMoreFields.data │ └── DataHasMoreFields.json └── logback.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | *.iml 3 | .okhttpcache 4 | ELFTesting.properties 5 | .checkstyle 6 | .factorypath 7 | .idea/ 8 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!groovy 2 | @Library('jenkins-pipeline') import com.github.jcustenborder.jenkins.pipeline.KafkaConnectPipeline 3 | 4 | def pipe = new KafkaConnectPipeline() 5 | pipe.execute() -------------------------------------------------------------------------------- /bin/debug.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | : ${INPUT_PATH:='/tmp/spooldir/input'} 19 | : ${ERROR_PATH:='/tmp/spooldir/error'} 20 | : ${FINISHED_PATH:='/tmp/spooldir/finished'} 21 | : ${DEBUG_SUSPEND_FLAG:='y'} 22 | export KAFKA_DEBUG='n' 23 | export DEBUG_SUSPEND_FLAG='n' 24 | # export KAFKA_OPTS='-agentpath:/Applications/YourKit-Java-Profiler-2017.02.app/Contents/Resources/bin/mac/libyjpagent.jnilib=disablestacktelemetry,exceptions=disable,delay=10000' 25 | set -e 26 | 27 | # mvn clean package 28 | 29 | if [ ! -d "${INPUT_PATH}" ]; then 30 | mkdir -p "${INPUT_PATH}" 31 | fi 32 | 33 | if [ ! -d "${ERROR_PATH}" ]; then 34 | mkdir -p "${ERROR_PATH}" 35 | fi 36 | 37 | if [ ! -d "${FINISHED_PATH}" ]; then 38 | mkdir -p "${FINISHED_PATH}" 39 | fi 40 | 41 | cp /Users/jeremy/Downloads/csv-spooldir-source.csv "${INPUT_PATH}/csv-spooldir-source.csv" 42 | # cp src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data "${INPUT_PATH}/FieldsMatch.csv" 43 | # cp src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data "${INPUT_PATH}/FieldsMatch.json" 44 | # connect-standalone config/connect-avro-docker.properties config/CSVSchemaGenerator.properties 45 | # connect-standalone config/connect-avro-docker.properties config/JsonExample.properties 46 | # connect-standalone config/connect-avro-docker.properties config/AvroExample.properties 47 | 48 | export DOCKER_IMAGE="confluentinc/cp-kafka-connect:5.5.2-1-ubi8" 49 | 50 | docker run --rm --network=kafka-connect-spooldir_default \ 51 | -p "5005:5005" \ 52 | -v "/tmp/spooldir:/tmp/spooldir" \ 53 | -v "$(pwd)/config:/config" \ 54 | -v "$(pwd)/target/kafka-connect-target/usr/share/kafka-connect:/plugins" \ 55 | "${DOCKER_IMAGE}" /bin/connect-standalone /config/connect-avro-docker.properties /config/CSVSchemaGenerator.properties 56 | 57 | -------------------------------------------------------------------------------- /config/AvroExample.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name=AvroSpoolDir 18 | tasks.max=1 19 | connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirAvroSourceConnector 20 | input.file.pattern=^.*\.avro$ 21 | 22 | halt.on.error=false 23 | topic=testing 24 | 25 | input.path=/Users/jeremy/data/stackoverflow 26 | finished.path=/tmp/spooldir/finished 27 | error.path=/tmp/spooldir/error 28 | batch.size = 5000 29 | cleanup.policy = NONE 30 | -------------------------------------------------------------------------------- /config/CSVExample.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CsvSpoolDir", 3 | "config": { 4 | "tasks.max": "1", 5 | "connector.class": "com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector", 6 | "input.file.pattern": "^.*\\.csv$", 7 | "halt.on.error": "false", 8 | "topic": "testing" 9 | "csv.first.row.as.header": "true", 10 | "csv.null.field.indicator": "EMPTY_SEPARATORS", 11 | "input.path": "/tmp/spooldir/input", 12 | "finished.path": "/tmp/spooldir/finished", 13 | "error.path": "/tmp/spooldir/error", 14 | "key.schema": "{\"name\":\"com.example.users.UserKey\",\"type\":\"STRUCT\",\"isOptional\":false,\"fieldSchemas\":{\"id\":{\"type\":\"INT64\",\"isOptional\":false}}}", 15 | "value.schema": "{\"name\":\"com.example.users.User\",\"type\":\"STRUCT\",\"isOptional\":false,\"fieldSchemas\":{\"id\":{\"type\":\"INT64\",\"isOptional\":false},\"first_name\":{\"type\":\"STRING\",\"isOptional\":true},\"last_name\":{\"type\":\"STRING\",\"isOptional\":true},\"email\":{\"type\":\"STRING\",\"isOptional\":true},\"gender\":{\"type\":\"STRING\",\"isOptional\":true},\"ip_address\":{\"type\":\"STRING\",\"isOptional\":true},\"last_login\":{\"name\":\"org.apache.kafka.connect.data.Timestamp\",\"type\":\"INT64\",\"version\":1,\"isOptional\":true},\"account_balance\":{\"name\":\"org.apache.kafka.connect.data.Decimal\",\"type\":\"BYTES\",\"version\":1,\"parameters\":{\"scale\":\"2\"},\"isOptional\":true},\"country\":{\"type\":\"STRING\",\"isOptional\":true},\"favorite_color\":{\"type\":\"STRING\",\"isOptional\":true}}}" 16 | } 17 | } -------------------------------------------------------------------------------- /config/CSVExample.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name=CsvSpoolDir 18 | tasks.max=1 19 | connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector 20 | input.file.pattern=^.*\.csv$ 21 | 22 | halt.on.error=false 23 | topic=testing 24 | key.schema={"name":"com.example.users.UserKey","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false}}} 25 | value.schema={"name":"com.example.users.User","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false},"first_name":{"type":"STRING","isOptional":true},"last_name":{"type":"STRING","isOptional":true},"email":{"type":"STRING","isOptional":true},"gender":{"type":"STRING","isOptional":true},"ip_address":{"type":"STRING","isOptional":true},"last_login":{"name":"org.apache.kafka.connect.data.Timestamp","type":"INT64","version":1,"isOptional":true},"account_balance":{"name":"org.apache.kafka.connect.data.Decimal","type":"BYTES","version":1,"parameters":{"scale":"2"},"isOptional":true},"country":{"type":"STRING","isOptional":true},"favorite_color":{"type":"STRING","isOptional":true}}} 26 | csv.first.row.as.header=true 27 | csv.null.field.indicator=EMPTY_SEPARATORS 28 | 29 | input.path=/tmp/spooldir/input 30 | finished.path=/tmp/spooldir/finished 31 | error.path=/tmp/spooldir/error 32 | batch.size = 5000 33 | cleanup.policy = DELETE 34 | file.buffer.size.bytes = 1048576 -------------------------------------------------------------------------------- /config/CSVSchemaGenerator.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name=CsvSpoolDir 18 | tasks.max=1 19 | connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector 20 | input.file.pattern=^.*\.csv$ 21 | 22 | halt.on.error=false 23 | topic=testing 24 | csv.first.row.as.header=true 25 | csv.null.field.indicator=EMPTY_SEPARATORS 26 | 27 | input.path=/tmp/spooldir/input 28 | finished.path=/tmp/spooldir/finished 29 | error.path=/tmp/spooldir/error 30 | batch.size = 5000 31 | cleanup.policy = DELETE 32 | file.buffer.size.bytes = 1048576 33 | 34 | schema.generation.enabled=true -------------------------------------------------------------------------------- /config/ELFTesting.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name=elftesting 18 | tasks.max=1 19 | connector.class=com.github.jcustenborder.kafka.connect.spooldir.elf.SpoolDirELFSourceConnector 20 | input.file.pattern=^.*\.gz$ 21 | finished.path=/Users/jeremy/data/confluent/logs/packages/finished 22 | input.path=/Users/jeremy/data/confluent/logs/packages 23 | error.path=/Users/jeremy/data/confluent/logs/packages/error 24 | halt.on.error=true 25 | topic=cloudfront 26 | schema.generation.enabled=true 27 | -------------------------------------------------------------------------------- /config/JsonExample.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | name=JsonSpoolDir 18 | tasks.max=1 19 | connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirJsonSourceConnector 20 | input.file.pattern=^.*\.json$ 21 | finished.path=/tmp/spooldir/finished 22 | input.path=/tmp/spooldir/input 23 | error.path=/tmp/spooldir/error 24 | halt.on.error=false 25 | topic=testing 26 | key.schema={"name":"com.example.users.UserKey","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false}}} 27 | value.schema={"name":"com.example.users.User","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false},"first_name":{"type":"STRING","isOptional":true},"last_name":{"type":"STRING","isOptional":true},"email":{"type":"STRING","isOptional":true},"gender":{"type":"STRING","isOptional":true},"ip_address":{"type":"STRING","isOptional":true},"last_login":{"name":"org.apache.kafka.connect.data.Timestamp","type":"INT64","version":1,"isOptional":true},"account_balance":{"name":"org.apache.kafka.connect.data.Decimal","type":"BYTES","version":1,"parameters":{"scale":"2"},"isOptional":true},"country":{"type":"STRING","isOptional":true},"favorite_color":{"type":"STRING","isOptional":true}}} -------------------------------------------------------------------------------- /config/connect-avro-docker.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | group.id=foo 17 | bootstrap.servers=kafka:9092 18 | key.converter=io.confluent.connect.avro.AvroConverter 19 | key.converter.schema.registry.url=http://schema-registry:8081 20 | value.converter=io.confluent.connect.avro.AvroConverter 21 | value.converter.schema.registry.url=http://schema-registry:8081 22 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 23 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 24 | internal.key.converter.schemas.enable=false 25 | internal.value.converter.schemas.enable=false 26 | offset.storage.file.filename=/tmp/connect.offsets 27 | plugin.path=target/kafka-connect-target/usr/share/kafka-connect,/plugins 28 | 29 | config.storage.replication.factor=1 30 | config.storage.topic=connect_config 31 | offset.storage.replication.factor=1 32 | offset.storage.topic=connect_offset 33 | status.storage.replication.factor=1 34 | status.storage.topic=connect_status -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | version: "2" 18 | services: 19 | zookeeper: 20 | image: confluentinc/cp-zookeeper:6.0.0 21 | ports: 22 | - "2181:2181" 23 | environment: 24 | ZOOKEEPER_CLIENT_PORT: 2181 25 | kafka: 26 | image: confluentinc/cp-kafka:6.0.0 27 | depends_on: 28 | - zookeeper 29 | ports: 30 | - "9092:9092" 31 | environment: 32 | KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181" 33 | KAFKA_ADVERTISED_LISTENERS: "plaintext://kafka:9092" 34 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 35 | schema-registry: 36 | image: confluentinc/cp-schema-registry:6.0.0 37 | depends_on: 38 | - kafka 39 | - zookeeper 40 | ports: 41 | - "8081:8081" 42 | environment: 43 | SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: "zookeeper:2181" 44 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 45 | connect: 46 | image: confluentinc/cp-kafka-connect:5.5.1-1-ubi8 47 | depends_on: 48 | - kafka 49 | - zookeeper 50 | - schema-registry 51 | ports: 52 | - "5005:5005" 53 | environment: 54 | CONNECT_BOOTSTRAP_SERVERS: kafka:9092 55 | CONNECT_GROUP_ID: spooldir 56 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 19 | import org.apache.kafka.connect.source.SourceConnector; 20 | 21 | import java.util.ArrayList; 22 | import java.util.LinkedHashMap; 23 | import java.util.List; 24 | import java.util.Map; 25 | 26 | public abstract class AbstractSourceConnector extends SourceConnector { 27 | protected Map settings; 28 | protected CONF config; 29 | protected abstract CONF config(Map settings); 30 | 31 | @Override 32 | public void start(Map settings) { 33 | this.config = config(settings); 34 | this.settings = settings; 35 | } 36 | 37 | @Override 38 | public List> taskConfigs(int taskCount) { 39 | List> result = new ArrayList<>(); 40 | 41 | for (int i = 0; i < taskCount; i++) { 42 | Map taskConfig = new LinkedHashMap<>(this.settings); 43 | taskConfig.put(AbstractSourceConnectorConfig.TASK_INDEX_CONF, Integer.toString(i)); 44 | taskConfig.put(AbstractSourceConnectorConfig.TASK_COUNT_CONF, Integer.toString(taskCount)); 45 | result.add(taskConfig); 46 | } 47 | 48 | return result; 49 | } 50 | 51 | @Override 52 | public void stop() { 53 | 54 | } 55 | 56 | @Override 57 | public String version() { 58 | return VersionUtil.version(this.getClass()); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 19 | import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory; 20 | import com.google.common.base.Preconditions; 21 | import com.google.common.collect.HashMultimap; 22 | import com.google.common.collect.Multimap; 23 | import org.apache.kafka.connect.data.Schema; 24 | import org.apache.kafka.connect.errors.ConnectException; 25 | import org.apache.kafka.connect.errors.DataException; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.io.File; 30 | import java.io.IOException; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import java.util.HashMap; 34 | import java.util.LinkedHashMap; 35 | import java.util.List; 36 | import java.util.Map; 37 | import java.util.stream.Collectors; 38 | 39 | public abstract class AbstractSpoolDirSourceConnector extends AbstractSourceConnector { 40 | private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceConnector.class); 41 | 42 | protected abstract AbstractSchemaGenerator generator(Map settings); 43 | 44 | @Override 45 | public String version() { 46 | return VersionUtil.version(this.getClass()); 47 | } 48 | 49 | @Override 50 | public void start(final Map input) { 51 | super.start(input); 52 | this.config = config(input); 53 | final Map settings = new LinkedHashMap<>(input); 54 | 55 | if (this.config.schemasRequired() && (null == this.config.valueSchema || null == this.config.keySchema)) { 56 | log.info("Key or Value schema was not defined. Running schema generator."); 57 | AbstractSchemaGenerator generator = generator(settings); 58 | 59 | try { 60 | List inputFiles = Arrays.stream(this.config.inputPath.listFiles(this.config.inputFilenameFilter)) 61 | .limit(5) 62 | .collect(Collectors.toList()); 63 | Preconditions.checkState( 64 | !inputFiles.isEmpty(), 65 | "Could not find any input file(s) to infer schema from." 66 | ); 67 | 68 | Map> schemas = new HashMap<>(); 69 | Multimap schemaToFiles = HashMultimap.create(); 70 | 71 | for (File inputFile : inputFiles) { 72 | Map.Entry schemaEntry = generator.generate(inputFile, this.config.keyFields); 73 | String schema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaEntry.getValue()); 74 | schemaToFiles.put(schema, inputFile); 75 | schemas.put(schema, schemaEntry); 76 | } 77 | 78 | Map> schemaToFilesMap = schemaToFiles.asMap(); 79 | if (1 != schemaToFilesMap.keySet().size()) { 80 | StringBuilder builder = new StringBuilder(); 81 | builder.append("More than one schema was found for the input pattern.\n"); 82 | for (String schema : schemaToFilesMap.keySet()) { 83 | builder.append("Schema: "); 84 | builder.append(schema); 85 | builder.append("\n"); 86 | 87 | for (File f : schemaToFilesMap.get(schema)) { 88 | builder.append(" "); 89 | builder.append(f); 90 | builder.append("\n"); 91 | } 92 | } 93 | 94 | throw new DataException(builder.toString()); 95 | } 96 | 97 | Map.Entry schemaPair = null; 98 | for (Map.Entry s : schemas.values()) { 99 | schemaPair = s; 100 | break; 101 | } 102 | 103 | if (null == schemaPair) { 104 | throw new DataException("Schema could not be generated."); 105 | } 106 | 107 | final String keySchema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaPair.getKey()); 108 | log.info("Setting {} to {}", AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, keySchema); 109 | final String valueSchema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaPair.getValue()); 110 | log.info("Setting {} to {}", AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, valueSchema); 111 | settings.put(AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, keySchema); 112 | settings.put(AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, valueSchema); 113 | } catch (IOException e) { 114 | throw new ConnectException("Exception thrown while generating schema", e); 115 | } 116 | this.settings = settings; 117 | } 118 | 119 | this.settings = settings; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.data.Parser; 19 | import com.github.jcustenborder.kafka.connect.utils.data.type.DateTypeParser; 20 | import com.github.jcustenborder.kafka.connect.utils.data.type.TimeTypeParser; 21 | import com.github.jcustenborder.kafka.connect.utils.data.type.TimestampTypeParser; 22 | import com.github.jcustenborder.kafka.connect.utils.data.type.TypeParser; 23 | import com.google.common.collect.ImmutableMap; 24 | import org.apache.kafka.connect.data.Date; 25 | import org.apache.kafka.connect.data.Schema; 26 | import org.apache.kafka.connect.data.SchemaAndValue; 27 | import org.apache.kafka.connect.data.Struct; 28 | import org.apache.kafka.connect.data.Time; 29 | import org.apache.kafka.connect.data.Timestamp; 30 | import org.apache.kafka.connect.source.SourceRecord; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import java.util.List; 35 | import java.util.Map; 36 | 37 | public abstract class AbstractSpoolDirSourceTask extends AbstractSourceTask { 38 | private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceTask.class); 39 | protected Parser parser; 40 | 41 | @Override 42 | public void start(Map settings) { 43 | super.start(settings); 44 | 45 | this.parser = new Parser(); 46 | Map dateTypeParsers = ImmutableMap.of( 47 | Timestamp.SCHEMA, new TimestampTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats), 48 | Date.SCHEMA, new DateTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats), 49 | Time.SCHEMA, new TimeTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats) 50 | ); 51 | 52 | for (Map.Entry kvp : dateTypeParsers.entrySet()) { 53 | this.parser.registerTypeParser(kvp.getKey(), kvp.getValue()); 54 | } 55 | } 56 | 57 | protected void addRecord(List records, SchemaAndValue key, SchemaAndValue value) { 58 | final Long timestamp; 59 | 60 | switch (this.config.timestampMode) { 61 | case FIELD: 62 | Struct valueStruct = (Struct) value.value(); 63 | log.trace("addRecord() - Reading date from timestamp field '{}'", this.config.timestampField); 64 | final java.util.Date date = (java.util.Date) valueStruct.get(this.config.timestampField); 65 | timestamp = date.getTime(); 66 | break; 67 | case FILE_TIME: 68 | timestamp = this.inputFile.lastModified(); 69 | break; 70 | case PROCESS_TIME: 71 | timestamp = null; 72 | break; 73 | default: 74 | throw new UnsupportedOperationException( 75 | String.format("Unsupported timestamp mode. %s", this.config.timestampMode) 76 | ); 77 | } 78 | 79 | SourceRecord sourceRecord = record( 80 | key, 81 | value, 82 | timestamp 83 | ); 84 | recordCount++; 85 | records.add(sourceRecord); 86 | } 87 | 88 | } -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractTaskPartitionerPredicate.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.hash.Hashing; 19 | import org.apache.kafka.common.config.ConfigException; 20 | 21 | import java.io.File; 22 | import java.util.function.Predicate; 23 | 24 | abstract class AbstractTaskPartitionerPredicate implements Predicate { 25 | final int index; 26 | final int count; 27 | 28 | protected AbstractTaskPartitionerPredicate(int index, int count) { 29 | this.index = index; 30 | this.count = count; 31 | } 32 | 33 | public static Predicate create(AbstractSourceConnectorConfig config) { 34 | Predicate result; 35 | 36 | if (config.taskCount == 1) { 37 | result = new None(config.taskIndex, config.taskCount); 38 | } else { 39 | switch (config.taskPartitioner) { 40 | case ByName: 41 | result = new ByName(config.taskIndex, config.taskCount); 42 | break; 43 | default: 44 | throw new ConfigException( 45 | AbstractSourceConnectorConfig.TASK_PARTITIONER_CONF, 46 | config.taskPartitioner.toString(), 47 | "Unsupported value" 48 | ); 49 | } 50 | } 51 | 52 | return result; 53 | } 54 | 55 | /** 56 | * This implementation is used to bypass the check. 57 | */ 58 | static class None extends AbstractTaskPartitionerPredicate { 59 | None(int index, int count) { 60 | super(index, count); 61 | } 62 | 63 | @Override 64 | public boolean test(File file) { 65 | return true; 66 | } 67 | } 68 | 69 | /** 70 | * 71 | */ 72 | static class ByName extends AbstractTaskPartitionerPredicate { 73 | 74 | protected ByName(int index, int count) { 75 | super(index, count); 76 | } 77 | 78 | @Override 79 | public boolean test(File file) { 80 | final int hashcode = Math.abs( 81 | Hashing.adler32() 82 | .hashUnencodedChars(file.getName()) 83 | .asInt() 84 | ); 85 | return (hashcode % this.count) == index; 86 | } 87 | } 88 | 89 | 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.opencsv.CSVReader; 19 | import com.opencsv.CSVReaderBuilder; 20 | import com.opencsv.ICSVParser; 21 | import org.apache.kafka.connect.data.Schema; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.IOException; 26 | import java.io.InputStream; 27 | import java.io.InputStreamReader; 28 | import java.util.LinkedHashMap; 29 | import java.util.Map; 30 | 31 | public class CsvSchemaGenerator extends AbstractSchemaGenerator { 32 | private static final Logger log = LoggerFactory.getLogger(CsvSchemaGenerator.class); 33 | 34 | public CsvSchemaGenerator(Map settings) { 35 | super(settings); 36 | } 37 | 38 | @Override 39 | protected SpoolDirCsvSourceConnectorConfig config(Map settings) { 40 | return new SpoolDirCsvSourceConnectorConfig(false, settings); 41 | } 42 | 43 | @Override 44 | protected Map determineFieldTypes(InputStream inputStream) throws IOException { 45 | Map typeMap = new LinkedHashMap<>(); 46 | ICSVParser parserBuilder = this.config.createCSVParserBuilder(); 47 | try (InputStreamReader reader = new InputStreamReader(inputStream)) { 48 | CSVReaderBuilder readerBuilder = this.config.createCSVReaderBuilder(reader, parserBuilder); 49 | try (CSVReader csvReader = readerBuilder.build()) { 50 | String[] headers = null; 51 | 52 | if (this.config.firstRowAsHeader) { 53 | headers = csvReader.readNext(); 54 | } 55 | 56 | String[] row = csvReader.readNext(); 57 | 58 | if (null == headers) { 59 | headers = new String[row.length]; 60 | for (int i = 1; i <= row.length; i++) { 61 | headers[(i - 1)] = String.format("column%02d", i); 62 | } 63 | } 64 | 65 | for (String s : headers) { 66 | typeMap.put(s, Schema.Type.STRING); 67 | } 68 | } 69 | } 70 | return typeMap; 71 | } 72 | 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.collect.ComparisonChain; 19 | 20 | import java.io.File; 21 | import java.util.Comparator; 22 | import java.util.List; 23 | 24 | class FileComparator implements Comparator { 25 | final List attributes; 26 | 27 | FileComparator(List attributes) { 28 | this.attributes = attributes; 29 | } 30 | 31 | @Override 32 | public int compare(File f1, File f2) { 33 | ComparisonChain chain = ComparisonChain.start(); 34 | 35 | for (AbstractSourceConnectorConfig.FileAttribute fileAttribute : this.attributes) { 36 | switch (fileAttribute) { 37 | case NameAsc: 38 | chain = chain.compare(f1.getName(), f2.getName()); 39 | break; 40 | case NameDesc: 41 | chain = chain.compare(f2.getName(), f1.getName()); 42 | break; 43 | case LengthAsc: // We prefer larger files first. 44 | chain = chain.compare(f1.length(), f2.length()); 45 | break; 46 | case LengthDesc: // We prefer larger files first. 47 | chain = chain.compare(f2.length(), f1.length()); 48 | break; 49 | case LastModifiedAsc: 50 | chain = chain.compare(f1.lastModified(), f2.lastModified()); 51 | break; 52 | case LastModifiedDesc: 53 | chain = chain.compare(f2.lastModified(), f1.lastModified()); 54 | break; 55 | default: 56 | throw new UnsupportedOperationException( 57 | String.format("%s is not a supported FileAttribute.", fileAttribute) 58 | ); 59 | } 60 | } 61 | return chain.result(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.fasterxml.jackson.core.JsonFactory; 19 | import com.fasterxml.jackson.core.JsonParser; 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory; 22 | import org.apache.kafka.connect.data.Schema; 23 | 24 | import java.io.IOException; 25 | import java.io.InputStream; 26 | import java.util.Iterator; 27 | import java.util.LinkedHashMap; 28 | import java.util.Map; 29 | 30 | public class JsonSchemaGenerator extends AbstractSchemaGenerator { 31 | public JsonSchemaGenerator(Map settings) { 32 | super(settings); 33 | } 34 | 35 | @Override 36 | protected SpoolDirJsonSourceConnectorConfig config(Map settings) { 37 | return new SpoolDirJsonSourceConnectorConfig(false, settings); 38 | } 39 | 40 | @Override 41 | protected Map determineFieldTypes(InputStream inputStream) throws IOException { 42 | Map typeMap = new LinkedHashMap<>(); 43 | 44 | JsonFactory factory = new JsonFactory(); 45 | try (JsonParser parser = factory.createParser(inputStream)) { 46 | Iterator iterator = ObjectMapperFactory.INSTANCE.readValues(parser, JsonNode.class); 47 | while (iterator.hasNext()) { 48 | JsonNode node = iterator.next(); 49 | if (node.isObject()) { 50 | Iterator fieldNames = node.fieldNames(); 51 | while (fieldNames.hasNext()) { 52 | typeMap.put(fieldNames.next(), Schema.Type.STRING); 53 | } 54 | break; 55 | } 56 | } 57 | } 58 | 59 | return typeMap; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/Metadata.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.collect.ImmutableMap; 19 | import com.google.common.io.Files; 20 | import org.apache.kafka.connect.header.ConnectHeaders; 21 | import org.apache.kafka.connect.header.Headers; 22 | 23 | import java.io.File; 24 | import java.util.Date; 25 | import java.util.LinkedHashMap; 26 | import java.util.Map; 27 | 28 | /** 29 | * Class is used to write metadata for the InputFile. 30 | */ 31 | class Metadata { 32 | static final String HEADER_PATH = "file.path"; 33 | static final String HEADER_PARENT_DIR_NAME = "file.parent.dir.name"; 34 | static final String HEADER_NAME = "file.name"; 35 | static final String HEADER_NAME_WITHOUT_EXTENSION = "file.name.without.extension"; 36 | static final String HEADER_LAST_MODIFIED = "file.last.modified"; 37 | static final String HEADER_LENGTH = "file.length"; 38 | static final String HEADER_OFFSET = "file.offset"; 39 | static final String HEADER_FILE_RELATIVE_PATH = "file.relative.path"; 40 | 41 | final String path; 42 | final String name; 43 | final String nameWithoutExtension; 44 | final Date lastModified; 45 | final long length; 46 | final String relativePath; 47 | String parentDirName = null; 48 | 49 | 50 | public static final Map HEADER_DESCRIPTIONS; 51 | 52 | static { 53 | Map result = new LinkedHashMap<>(); 54 | result.put(HEADER_PATH, "The absolute path to the file ingested."); 55 | result.put(HEADER_PARENT_DIR_NAME, "The parent directory name of the file ingested"); 56 | result.put(HEADER_NAME, "The name part of the file ingested."); 57 | result.put(HEADER_NAME_WITHOUT_EXTENSION, "The file name without the extension part of the file."); 58 | result.put(HEADER_LAST_MODIFIED, "The last modified date of the file."); 59 | result.put(HEADER_LENGTH, "The size of the file in bytes."); 60 | result.put(HEADER_OFFSET, "The offset for this piece of data within the file."); 61 | result.put(HEADER_FILE_RELATIVE_PATH, "The file's parent sub-directory relative from the input.path."); 62 | HEADER_DESCRIPTIONS = ImmutableMap.copyOf(result); 63 | } 64 | 65 | public static final String HEADER_DOCS; 66 | 67 | static { 68 | StringBuilder builder = new StringBuilder(); 69 | 70 | HEADER_DESCRIPTIONS.forEach((key, value) -> { 71 | builder.append("* `"); 72 | builder.append(key); 73 | builder.append("` - "); 74 | builder.append(value); 75 | builder.append('\n'); 76 | }); 77 | HEADER_DOCS = builder.toString(); 78 | } 79 | 80 | 81 | 82 | public Metadata(File file, String relativePath) { 83 | this.path = file.getAbsolutePath(); 84 | this.name = file.getName(); 85 | this.lastModified = new Date(file.lastModified()); 86 | this.length = file.length(); 87 | this.nameWithoutExtension = Files.getNameWithoutExtension(this.name); 88 | 89 | if (file.getParentFile() != null) { 90 | this.parentDirName = file.getParentFile().getName(); 91 | } 92 | 93 | this.relativePath = relativePath; 94 | } 95 | 96 | /** 97 | * Method is used to copy metadata from the file to the headers of the file. 98 | * 99 | * @return Returns a Headers object populated with the metadata from the file. 100 | */ 101 | public Headers headers(long offset) { 102 | ConnectHeaders headers = new ConnectHeaders(); 103 | headers.addString(HEADER_NAME, this.name); 104 | headers.addString(HEADER_NAME_WITHOUT_EXTENSION, this.nameWithoutExtension); 105 | headers.addString(HEADER_PATH, this.path); 106 | headers.addString(HEADER_PARENT_DIR_NAME, this.parentDirName); 107 | headers.addLong(HEADER_LENGTH, this.length); 108 | headers.addLong(HEADER_OFFSET, offset); 109 | headers.addTimestamp(HEADER_LAST_MODIFIED, this.lastModified); 110 | 111 | if (this.relativePath != null) { 112 | headers.addString(HEADER_FILE_RELATIVE_PATH, this.relativePath); 113 | } 114 | 115 | return headers; 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.connect.connector.Task; 23 | 24 | import java.util.Map; 25 | 26 | @Title("Avro Source Connector") 27 | @Description("This connector is used to read avro data files from the file system and write their contents " + 28 | "to Kafka. The schema of the file is used to read the data and produce it to Kafka") 29 | @DocumentationImportant("This connector has a dependency on the Confluent Schema Registry specifically kafka-connect-avro-converter. " + 30 | "This dependency is not shipped along with the connector to ensure that there are not potential version mismatch issues. " + 31 | "The easiest way to ensure this component is available is to use one of the Confluent packages or containers for deployment.") 32 | public class SpoolDirAvroSourceConnector extends AbstractSourceConnector { 33 | @Override 34 | protected SpoolDirAvroSourceConnectorConfig config(Map settings) { 35 | return new SpoolDirAvroSourceConnectorConfig(settings); 36 | } 37 | 38 | @Override 39 | public Class taskClass() { 40 | return SpoolDirAvroSourceTask.class; 41 | } 42 | 43 | @Override 44 | public ConfigDef config() { 45 | return SpoolDirAvroSourceConnectorConfig.config(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.apache.kafka.common.config.ConfigDef; 19 | 20 | import java.util.Map; 21 | 22 | public class SpoolDirAvroSourceConnectorConfig extends AbstractSourceConnectorConfig { 23 | 24 | 25 | public SpoolDirAvroSourceConnectorConfig(Map originals) { 26 | super(config(), originals, true); 27 | } 28 | 29 | public static ConfigDef config() { 30 | return AbstractSourceConnectorConfig.config(true); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import io.confluent.connect.avro.AvroData; 19 | import org.apache.avro.file.DataFileReader; 20 | import org.apache.avro.generic.GenericContainer; 21 | import org.apache.avro.generic.GenericDatumReader; 22 | import org.apache.avro.io.DatumReader; 23 | import org.apache.kafka.connect.data.SchemaAndValue; 24 | import org.apache.kafka.connect.source.SourceRecord; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import java.io.IOException; 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | public class SpoolDirAvroSourceTask extends AbstractSourceTask { 34 | private static final Logger log = LoggerFactory.getLogger(SpoolDirAvroSourceTask.class); 35 | long recordOffset; 36 | AvroData avroData = new AvroData(1024); 37 | DataFileReader dataFileReader; 38 | DatumReader datumReader = new GenericDatumReader<>(); 39 | 40 | 41 | @Override 42 | protected SpoolDirAvroSourceConnectorConfig config(Map settings) { 43 | return new SpoolDirAvroSourceConnectorConfig(settings); 44 | } 45 | 46 | @Override 47 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 48 | if (null != this.dataFileReader) { 49 | this.dataFileReader.close(); 50 | } 51 | inputFile.startProcessing(); 52 | this.dataFileReader = new DataFileReader<>(inputFile.file(), datumReader); 53 | this.recordOffset = 0; 54 | 55 | if (null != lastOffset) { 56 | while (recordOffset < lastOffset && this.dataFileReader.hasNext()) { 57 | this.dataFileReader.next(); 58 | recordOffset++; 59 | } 60 | } 61 | 62 | } 63 | 64 | @Override 65 | protected List process() throws IOException { 66 | int recordCount = 0; 67 | List records = new ArrayList<>(this.config.batchSize); 68 | GenericContainer container = null; 69 | while (recordCount <= this.config.batchSize && dataFileReader.hasNext()) { 70 | container = dataFileReader.next(container); 71 | SchemaAndValue value = avroData.toConnectData(this.dataFileReader.getSchema(), container); 72 | SourceRecord sourceRecord = record(null, value, null); 73 | records.add(sourceRecord); 74 | recordCount++; 75 | recordOffset++; 76 | } 77 | return records; 78 | } 79 | 80 | @Override 81 | protected long recordOffset() { 82 | return recordOffset; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationWarning; 21 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | import org.apache.kafka.connect.connector.Task; 24 | 25 | import java.util.Map; 26 | 27 | @Title("Binary File Source Connector") 28 | @Description("This connector is used to read an entire file as a byte array write the data to Kafka.") 29 | @DocumentationImportant("The recommended converter to use is the ByteArrayConverter. " + 30 | "Example: `value.converter=org.apache.kafka.connect.storage.ByteArrayConverter`") 31 | @DocumentationWarning("Large files will be read as a single byte array. This means that the process could " + 32 | "run out of memory or try to send a message to Kafka that is greater than the max message size. If this happens " + 33 | "an exception will be thrown.") 34 | public class SpoolDirBinaryFileSourceConnector extends AbstractSourceConnector { 35 | @Override 36 | protected SpoolDirBinaryFileSourceConnectorConfig config(Map settings) { 37 | return new SpoolDirBinaryFileSourceConnectorConfig(settings); 38 | } 39 | 40 | @Override 41 | public Class taskClass() { 42 | return SpoolDirBinaryFileSourceTask.class; 43 | } 44 | 45 | @Override 46 | public ConfigDef config() { 47 | return SpoolDirBinaryFileSourceConnectorConfig.config(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.apache.kafka.common.config.ConfigDef; 19 | 20 | import java.util.Map; 21 | 22 | public class SpoolDirBinaryFileSourceConnectorConfig extends AbstractSourceConnectorConfig { 23 | public SpoolDirBinaryFileSourceConnectorConfig(Map originals) { 24 | super(config(), originals, true); 25 | } 26 | 27 | public static ConfigDef config() { 28 | return AbstractSourceConnectorConfig.config(true); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.ByteStreams; 19 | import org.apache.kafka.connect.data.Schema; 20 | import org.apache.kafka.connect.data.SchemaAndValue; 21 | import org.apache.kafka.connect.source.SourceRecord; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.IOException; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | public class SpoolDirBinaryFileSourceTask extends AbstractSourceTask { 31 | private static final Logger log = LoggerFactory.getLogger(SpoolDirBinaryFileSourceTask.class); 32 | 33 | @Override 34 | protected SpoolDirBinaryFileSourceConnectorConfig config(Map settings) { 35 | return new SpoolDirBinaryFileSourceConnectorConfig(settings); 36 | } 37 | 38 | @Override 39 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 40 | inputFile.openStream(); 41 | } 42 | 43 | @Override 44 | protected List process() throws IOException { 45 | List records = new ArrayList<>(1); 46 | 47 | if (this.inputFile.inputStream().available() > 0) { 48 | byte[] buffer = ByteStreams.toByteArray(this.inputFile.inputStream()); 49 | records.add( 50 | record( 51 | null, 52 | new SchemaAndValue(Schema.BYTES_SCHEMA, buffer), 53 | null 54 | ) 55 | ); 56 | } 57 | return records; 58 | } 59 | 60 | @Override 61 | protected long recordOffset() { 62 | return 0; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip; 21 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | import org.apache.kafka.connect.connector.Task; 24 | 25 | import java.util.Map; 26 | 27 | @Title("CSV Source Connector") 28 | @Description("The SpoolDirCsvSourceConnector will monitor the directory specified in `input.path` for files and read them as a CSV " + 29 | "converting each of the records to the strongly typed equivalent specified in `key.schema` and `value.schema`.") 30 | @DocumentationTip("To get a starting point for a schema you can use the following command to generate an all String schema. This " + 31 | "will give you the basic structure of a schema. From there you can changes the types to match what you expect.\n\n" + 32 | ".. code-block:: bash\n\n" + 33 | " mvn clean package\n" + 34 | " export CLASSPATH=\"$(find target/kafka-connect-target/usr/share/kafka-connect/kafka-connect-spooldir -type f -name '*.jar' | tr '\\n' ':')\"\n" + 35 | " kafka-run-class com.github.jcustenborder.kafka.connect.spooldir.AbstractSchemaGenerator -t csv -f src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data -c config/CSVExample.properties -i id\n" + 36 | "") 37 | @DocumentationImportant("There are some caveats to running this connector with `" + SpoolDirCsvSourceConnectorConfig.SCHEMA_GENERATION_ENABLED_CONF + 38 | " = true`. If schema generation is enabled the connector will start by reading one of the files that match `" + SpoolDirCsvSourceConnectorConfig.INPUT_FILE_PATTERN_CONF + 39 | "` in the path specified by `" + SpoolDirCsvSourceConnectorConfig.INPUT_PATH_CONFIG + "`. If there are no files when the connector starts or is restarted " + 40 | "the connector will fail to start. If there are different fields in other files they will not be detected. The recommended path is to specify a schema that the " + 41 | "files will be parsed with. This will ensure that data written by this connector to Kafka will be consistent across files that have inconsistent columns. For example " + 42 | "if some files have an optional column that is not always included, create a schema that includes the column marked as optional.") 43 | public class SpoolDirCsvSourceConnector extends AbstractSpoolDirSourceConnector { 44 | @Override 45 | protected SpoolDirCsvSourceConnectorConfig config(Map settings) { 46 | return new SpoolDirCsvSourceConnectorConfig(false, settings); 47 | } 48 | 49 | @Override 50 | protected AbstractSchemaGenerator generator(Map settings) { 51 | return new CsvSchemaGenerator(settings); 52 | } 53 | 54 | @Override 55 | public Class taskClass() { 56 | return SpoolDirCsvSourceTask.class; 57 | } 58 | 59 | @Override 60 | public ConfigDef config() { 61 | return SpoolDirCsvSourceConnectorConfig.config(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationNote; 21 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip; 22 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 23 | import org.apache.kafka.common.config.ConfigDef; 24 | import org.apache.kafka.connect.connector.Task; 25 | 26 | import java.util.Map; 27 | 28 | @Title("Json Source Connector") 29 | @Description("This connector is used to `stream ` JSON files from a directory " + 30 | "while converting the data based on the schema supplied in the configuration.") 31 | @DocumentationTip("To get a starting point for a schema you can use the following command to generate an all String schema. This " + 32 | "will give you the basic structure of a schema. From there you can changes the types to match what you expect.\n" + 33 | ".. code-block:: bash\n\n" + 34 | " mvn clean package\n" + 35 | " export CLASSPATH=\"$(find target/kafka-connect-target/usr/share/kafka-connect/kafka-connect-spooldir -type f -name '*.jar' | tr '\\n' ':')\"\n" + 36 | " kafka-run-class com.github.jcustenborder.kafka.connect.spooldir.AbstractSchemaGenerator -t json -f src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data -c config/JsonExample.properties -i id\n" + 37 | "") 38 | @DocumentationImportant("There are some caveats to running this connector with `" + SpoolDirCsvSourceConnectorConfig.SCHEMA_GENERATION_ENABLED_CONF + 39 | " = true`. If schema generation is enabled the connector will start by reading one of the files that match `" + SpoolDirCsvSourceConnectorConfig.INPUT_FILE_PATTERN_CONF + 40 | "` in the path specified by `" + SpoolDirCsvSourceConnectorConfig.INPUT_PATH_CONFIG + "`. If there are no files when the connector starts or is restarted " + 41 | "the connector will fail to start. If there are different fields in other files they will not be detected. The recommended path is to specify a schema that the " + 42 | "files will be parsed with. This will ensure that data written by this connector to Kafka will be consistent across files that have inconsistent columns. For example " + 43 | "if some files have an optional column that is not always included, create a schema that includes the column marked as optional.") 44 | @DocumentationNote("If you want to import JSON node by node in the file and do not care about schemas, do not use this connector with Schema Generation enabled. " + 45 | "Take a look at the Schema Less Json Source Connector.") 46 | public class SpoolDirJsonSourceConnector extends AbstractSpoolDirSourceConnector { 47 | @Override 48 | protected SpoolDirJsonSourceConnectorConfig config(Map settings) { 49 | return new SpoolDirJsonSourceConnectorConfig(false, settings); 50 | } 51 | 52 | @Override 53 | protected AbstractSchemaGenerator generator(Map settings) { 54 | return new JsonSchemaGenerator(settings); 55 | } 56 | 57 | @Override 58 | public Class taskClass() { 59 | return SpoolDirJsonSourceTask.class; 60 | } 61 | 62 | @Override 63 | public ConfigDef config() { 64 | return SpoolDirJsonSourceConnectorConfig.config(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.apache.kafka.common.config.ConfigDef; 19 | 20 | import java.util.Map; 21 | 22 | class SpoolDirJsonSourceConnectorConfig extends AbstractSpoolDirSourceConnectorConfig { 23 | public SpoolDirJsonSourceConnectorConfig(final boolean isTask, Map settings) { 24 | super(isTask, true, config(), settings); 25 | } 26 | 27 | @Override 28 | public boolean schemasRequired() { 29 | return true; 30 | } 31 | 32 | public static ConfigDef config() { 33 | return AbstractSpoolDirSourceConnectorConfig.config(true); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.fasterxml.jackson.core.JsonFactory; 19 | import com.fasterxml.jackson.core.JsonParser; 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory; 22 | import org.apache.kafka.connect.data.Field; 23 | import org.apache.kafka.connect.data.SchemaAndValue; 24 | import org.apache.kafka.connect.data.Struct; 25 | import org.apache.kafka.connect.errors.DataException; 26 | import org.apache.kafka.connect.source.SourceRecord; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import java.io.IOException; 31 | import java.io.InputStream; 32 | import java.util.ArrayList; 33 | import java.util.Iterator; 34 | import java.util.List; 35 | import java.util.Map; 36 | 37 | public class SpoolDirJsonSourceTask extends AbstractSpoolDirSourceTask { 38 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTask.class); 39 | JsonFactory jsonFactory; 40 | JsonParser jsonParser; 41 | Iterator iterator; 42 | long offset; 43 | 44 | @Override 45 | protected SpoolDirJsonSourceConnectorConfig config(Map settings) { 46 | return new SpoolDirJsonSourceConnectorConfig(true, settings); 47 | } 48 | 49 | @Override 50 | public void start(Map settings) { 51 | super.start(settings); 52 | this.jsonFactory = new JsonFactory(); 53 | } 54 | 55 | @Override 56 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 57 | if (null != jsonParser) { 58 | log.trace("configure() - Closing existing json parser."); 59 | jsonParser.close(); 60 | } 61 | InputStream inputStream = inputFile.openStream(); 62 | this.jsonParser = this.jsonFactory.createParser(inputStream); 63 | this.iterator = ObjectMapperFactory.INSTANCE.readValues(this.jsonParser, JsonNode.class); 64 | this.offset = -1; 65 | 66 | if (null != lastOffset) { 67 | int skippedRecords = 1; 68 | while (this.iterator.hasNext() && skippedRecords <= lastOffset) { 69 | next(); 70 | skippedRecords++; 71 | } 72 | log.trace("configure() - Skipped {} record(s).", skippedRecords); 73 | log.info("configure() - Starting on offset {}", this.offset); 74 | } 75 | 76 | } 77 | 78 | JsonNode next() { 79 | this.offset++; 80 | return this.iterator.next(); 81 | } 82 | 83 | @Override 84 | protected List process() { 85 | List records = new ArrayList<>(this.config.batchSize); 86 | 87 | while (this.iterator.hasNext() && records.size() < this.config.batchSize) { 88 | JsonNode node = next(); 89 | 90 | Struct valueStruct = new Struct(this.config.valueSchema); 91 | Struct keyStruct = new Struct(this.config.keySchema); 92 | log.trace("process() - input = {}", node); 93 | for (Field field : this.config.valueSchema.fields()) { 94 | JsonNode fieldNode = node.get(field.name()); 95 | log.trace("process() - field: {} input = '{}'", field.name(), fieldNode); 96 | Object fieldValue; 97 | try { 98 | fieldValue = this.parser.parseJsonNode(field.schema(), fieldNode); 99 | log.trace("process() - field: {} output = '{}'", field.name(), fieldValue); 100 | valueStruct.put(field, fieldValue); 101 | 102 | Field keyField = this.config.keySchema.field(field.name()); 103 | if (null != keyField) { 104 | log.trace("process() - Setting key field '{}' to '{}'", keyField.name(), fieldValue); 105 | keyStruct.put(keyField, fieldValue); 106 | } 107 | } catch (Exception ex) { 108 | String message = String.format("Exception thrown while parsing data for '%s'. linenumber=%s", field.name(), this.recordOffset()); 109 | throw new DataException(message, ex); 110 | } 111 | } 112 | 113 | addRecord( 114 | records, 115 | new SchemaAndValue(keyStruct.schema(), keyStruct), 116 | new SchemaAndValue(valueStruct.schema(), valueStruct) 117 | ); 118 | } 119 | 120 | return records; 121 | } 122 | 123 | @Override 124 | protected long recordOffset() { 125 | return this.offset; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.connect.connector.Task; 23 | 24 | import java.util.Map; 25 | 26 | @Title("Line Delimited Source Connector") 27 | @Description("This connector is used to read a file line by line and write the data to Kafka.") 28 | @DocumentationImportant("The recommended converter to use is the StringConverter. " + 29 | "Example: `value.converter=org.apache.kafka.connect.storage.StringConverter`") 30 | public class SpoolDirLineDelimitedSourceConnector extends AbstractSourceConnector { 31 | @Override 32 | protected SpoolDirLineDelimitedSourceConnectorConfig config(Map settings) { 33 | return new SpoolDirLineDelimitedSourceConnectorConfig(settings); 34 | } 35 | 36 | @Override 37 | public Class taskClass() { 38 | return SpoolDirLineDelimitedSourceTask.class; 39 | } 40 | 41 | @Override 42 | public ConfigDef config() { 43 | return SpoolDirLineDelimitedSourceConnectorConfig.config(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; 19 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigUtils; 20 | import org.apache.kafka.common.config.ConfigDef; 21 | 22 | import java.nio.charset.Charset; 23 | import java.util.Map; 24 | 25 | public class SpoolDirLineDelimitedSourceConnectorConfig extends AbstractSourceConnectorConfig { 26 | public static final String CHARSET_CONF = "file.charset"; 27 | static final String CHARSET_DOC = "Character set to read wth file with."; 28 | static final String CHARSET_DEFAULT = Charset.defaultCharset().name(); 29 | static final String CHARSET_DISPLAY = "File character set."; 30 | 31 | public final Charset charset; 32 | 33 | public SpoolDirLineDelimitedSourceConnectorConfig(Map originals) { 34 | super(config(), originals, true); 35 | this.charset = ConfigUtils.charset(this, CHARSET_CONF); 36 | } 37 | 38 | public static ConfigDef config() { 39 | return AbstractSourceConnectorConfig.config(true) 40 | .define( 41 | ConfigKeyBuilder.of(CHARSET_CONF, ConfigDef.Type.STRING) 42 | .defaultValue(CHARSET_DEFAULT) 43 | .validator(SpoolDirCsvSourceConnectorConfig.CharsetValidator.of()) 44 | .importance(ConfigDef.Importance.LOW) 45 | .documentation(CHARSET_DOC) 46 | .displayName(CHARSET_DISPLAY) 47 | .width(ConfigDef.Width.LONG) 48 | .build() 49 | ); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.apache.kafka.connect.data.Schema; 19 | import org.apache.kafka.connect.data.SchemaAndValue; 20 | import org.apache.kafka.connect.source.SourceRecord; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import java.io.IOException; 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | import java.util.Map; 28 | 29 | public class SpoolDirLineDelimitedSourceTask extends AbstractSourceTask { 30 | private static final Logger log = LoggerFactory.getLogger(SpoolDirLineDelimitedSourceTask.class); 31 | 32 | @Override 33 | protected SpoolDirLineDelimitedSourceConnectorConfig config(Map settings) { 34 | return new SpoolDirLineDelimitedSourceConnectorConfig(settings); 35 | } 36 | 37 | @Override 38 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 39 | this.inputFile.openLineNumberReader(this.config.charset); 40 | } 41 | 42 | @Override 43 | protected List process() throws IOException { 44 | int recordCount = 0; 45 | List records = new ArrayList<>(this.config.batchSize); 46 | String line = null; 47 | while (recordCount < this.config.batchSize && null != (line = this.inputFile.lineNumberReader().readLine())) { 48 | SourceRecord record = record( 49 | null, 50 | new SchemaAndValue(Schema.STRING_SCHEMA, line), 51 | null 52 | ); 53 | records.add(record); 54 | recordCount++; 55 | } 56 | return records; 57 | } 58 | 59 | @Override 60 | protected long recordOffset() { 61 | long result = -1L; 62 | 63 | if (null != this.inputFile && null != this.inputFile.lineNumberReader()) { 64 | result = this.inputFile.lineNumberReader().getLineNumber(); 65 | } 66 | 67 | return result; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 19 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.connect.connector.Task; 23 | 24 | import java.util.Map; 25 | 26 | @Title("Schema Less Json Source Connector") 27 | @Description("This connector is used to `stream _` JSON files from a directory. " + 28 | "This connector will read each file node by node writing each node as a record in Kafka." + 29 | "For example if your data file contains several json objects the connector will read from { to } " + 30 | "for each object and write each object to Kafka.") 31 | @DocumentationImportant("This connector does not try to convert the json records to a schema. " + 32 | "The recommended converter to use is the StringConverter. " + 33 | "Example: `value.converter=org.apache.kafka.connect.storage.StringConverter`") 34 | public class SpoolDirSchemaLessJsonSourceConnector extends AbstractSourceConnector { 35 | @Override 36 | protected SpoolDirSchemaLessJsonSourceConnectorConfig config(Map settings) { 37 | return new SpoolDirSchemaLessJsonSourceConnectorConfig(settings); 38 | } 39 | 40 | @Override 41 | public Class taskClass() { 42 | return SpoolDirSchemaLessJsonSourceTask.class; 43 | } 44 | 45 | @Override 46 | public ConfigDef config() { 47 | return SpoolDirSchemaLessJsonSourceConnectorConfig.config(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; 19 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigUtils; 20 | import org.apache.kafka.common.config.ConfigDef; 21 | 22 | import java.nio.charset.Charset; 23 | import java.util.Map; 24 | 25 | public class SpoolDirSchemaLessJsonSourceConnectorConfig extends AbstractSourceConnectorConfig { 26 | public static final String CHARSET_CONF = "file.charset"; 27 | static final String CHARSET_DOC = "Character set to read wth file with."; 28 | static final String CHARSET_DEFAULT = Charset.defaultCharset().name(); 29 | static final String CHARSET_DISPLAY = "File character set."; 30 | 31 | public final Charset charset; 32 | 33 | public SpoolDirSchemaLessJsonSourceConnectorConfig(Map originals) { 34 | super(config(), originals, true); 35 | this.charset = ConfigUtils.charset(this, CHARSET_CONF); 36 | } 37 | 38 | public static ConfigDef config() { 39 | return AbstractSourceConnectorConfig.config(true) 40 | .define( 41 | ConfigKeyBuilder.of(CHARSET_CONF, ConfigDef.Type.STRING) 42 | .defaultValue(CHARSET_DEFAULT) 43 | .validator(SpoolDirCsvSourceConnectorConfig.CharsetValidator.of()) 44 | .importance(ConfigDef.Importance.LOW) 45 | .documentation(CHARSET_DOC) 46 | .displayName(CHARSET_DISPLAY) 47 | .width(ConfigDef.Width.LONG) 48 | .build() 49 | ); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.fasterxml.jackson.core.JsonParser; 19 | import com.fasterxml.jackson.databind.JsonNode; 20 | import com.fasterxml.jackson.databind.MappingIterator; 21 | import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory; 22 | import org.apache.kafka.connect.data.Schema; 23 | import org.apache.kafka.connect.data.SchemaAndValue; 24 | import org.apache.kafka.connect.source.SourceRecord; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import java.io.IOException; 29 | import java.io.InputStream; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import java.util.Map; 33 | 34 | public class SpoolDirSchemaLessJsonSourceTask extends AbstractSourceTask { 35 | private static final Logger log = LoggerFactory.getLogger(SpoolDirSchemaLessJsonSourceTask.class); 36 | 37 | @Override 38 | protected SpoolDirSchemaLessJsonSourceConnectorConfig config(Map settings) { 39 | return new SpoolDirSchemaLessJsonSourceConnectorConfig(settings); 40 | } 41 | 42 | JsonParser parser; 43 | MappingIterator nodeIterator; 44 | 45 | long recordOffset; 46 | 47 | @Override 48 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 49 | if (null != this.parser) { 50 | this.parser.close(); 51 | } 52 | this.recordOffset = 0; 53 | InputStream inputStream = inputFile.openStream(); 54 | this.parser = ObjectMapperFactory.INSTANCE.getJsonFactory().createParser(inputStream); 55 | this.nodeIterator = ObjectMapperFactory.INSTANCE.readValues(this.parser, JsonNode.class); 56 | 57 | } 58 | 59 | @Override 60 | protected List process() throws IOException { 61 | int recordCount = 0; 62 | List records = new ArrayList<>(this.config.batchSize); 63 | while (recordCount < this.config.batchSize && this.nodeIterator.hasNext()) { 64 | JsonNode node = this.nodeIterator.next(); 65 | String value = ObjectMapperFactory.INSTANCE.writeValueAsString(node); 66 | SourceRecord record = record( 67 | null, 68 | new SchemaAndValue(Schema.STRING_SCHEMA, value), 69 | null 70 | ); 71 | records.add(record); 72 | recordCount++; 73 | recordOffset++; 74 | } 75 | return records; 76 | } 77 | 78 | @Override 79 | protected long recordOffset() { 80 | return this.recordOffset; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversion.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.elf.converters.LogFieldConverter; 19 | import com.github.jcustenborder.parsers.elf.LogEntry; 20 | import org.apache.kafka.connect.data.Schema; 21 | import org.apache.kafka.connect.data.SchemaAndValue; 22 | import org.apache.kafka.connect.data.Struct; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import java.util.List; 27 | 28 | public class SchemaConversion { 29 | private static final Logger log = LoggerFactory.getLogger(SchemaConversion.class); 30 | private final Schema valueSchema; 31 | private final List valueConverters; 32 | 33 | SchemaConversion(Schema valueSchema, List valueConverters) { 34 | this.valueSchema = valueSchema; 35 | this.valueConverters = valueConverters; 36 | } 37 | 38 | 39 | static SchemaAndValue convert(Schema schema, List converters, LogEntry entry) { 40 | final SchemaAndValue result; 41 | if (null == schema) { 42 | result = SchemaAndValue.NULL; 43 | } else { 44 | Struct struct = new Struct(schema); 45 | for (LogFieldConverter converter : converters) { 46 | converter.convert(entry, struct); 47 | } 48 | struct.validate(); 49 | result = new SchemaAndValue(schema, struct); 50 | } 51 | return result; 52 | } 53 | 54 | public SchemaAndValue convert(LogEntry entry) { 55 | final SchemaAndValue value = convert(this.valueSchema, this.valueConverters, entry); 56 | return value; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilder.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.elf.converters.LogFieldConverter; 19 | import com.github.jcustenborder.kafka.connect.spooldir.elf.converters.LogFieldConverterFactory; 20 | import com.github.jcustenborder.parsers.elf.ElfParser; 21 | import com.google.common.base.Preconditions; 22 | import org.apache.kafka.connect.data.Schema; 23 | import org.apache.kafka.connect.data.SchemaBuilder; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import java.time.LocalDate; 28 | import java.time.LocalTime; 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | public class SchemaConversionBuilder { 34 | private static final Logger log = LoggerFactory.getLogger(SchemaConversionBuilder.class); 35 | final ElfParser parser; 36 | 37 | public SchemaConversionBuilder(ElfParser parser) { 38 | this.parser = parser; 39 | } 40 | 41 | static String normalizeFieldName(String fieldName) { 42 | Preconditions.checkNotNull(fieldName, "fieldname cannot be null."); 43 | final String result = fieldName.replace('(', '_') 44 | .replace(")", "") 45 | .replace('-', '_') 46 | .toLowerCase(); 47 | return result; 48 | } 49 | 50 | 51 | public SchemaConversion build() { 52 | log.trace("build() - Building SchemaConversion"); 53 | 54 | final SchemaBuilder valueBuilder = SchemaBuilder.struct(); 55 | valueBuilder.name("com.github.jcustenborder.kafka.connect.spooldir.LogEntry"); 56 | 57 | LogFieldConverterFactory factory = new LogFieldConverterFactory(); 58 | List valueConverters = new ArrayList<>(); 59 | 60 | for (Map.Entry> entry : this.parser.fieldTypes().entrySet()) { 61 | final String logFieldName = entry.getKey(); 62 | final Class logFieldClass = entry.getValue(); 63 | final String connectFieldName = normalizeFieldName(logFieldName); 64 | log.trace("build() - Mapping log field '{}' to schema field '{}'", logFieldName, connectFieldName); 65 | final LogFieldConverter converter = factory.create( 66 | valueBuilder, 67 | logFieldClass, 68 | logFieldName, 69 | connectFieldName 70 | ); 71 | valueConverters.add(converter); 72 | } 73 | 74 | if (LocalDate.class.equals(this.parser.fieldTypes().get("date")) && LocalTime.class.equals(this.parser.fieldTypes().get("time"))) { 75 | log.trace("build() - found date and time field. Creating datetime field."); 76 | final LogFieldConverter converter = factory.createDateTime( 77 | valueBuilder, 78 | "date", 79 | "time", 80 | "datetime" 81 | ); 82 | valueConverters.add(converter); 83 | } 84 | 85 | final Schema valueSchema = valueBuilder.build(); 86 | 87 | return new SchemaConversion(valueSchema, valueConverters); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.AbstractSourceConnectorConfig; 19 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 21 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | import org.apache.kafka.connect.connector.Task; 24 | import org.apache.kafka.connect.source.SourceConnector; 25 | 26 | import java.util.ArrayList; 27 | import java.util.LinkedHashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | 31 | @Title("Extended Log File Format Source Connector") 32 | @Description("This connector is used to stream `Extended Log File Format ` " + 33 | "files from a directory while converting the data to a strongly typed schema.") 34 | public class SpoolDirELFSourceConnector extends SourceConnector { 35 | 36 | @Override 37 | public List> taskConfigs(int taskCount) { 38 | List> result = new ArrayList<>(); 39 | 40 | for (int i = 0; i < taskCount; i++) { 41 | Map taskConfig = new LinkedHashMap<>(this.settings); 42 | taskConfig.put(AbstractSourceConnectorConfig.TASK_INDEX_CONF, Integer.toString(i)); 43 | taskConfig.put(AbstractSourceConnectorConfig.TASK_COUNT_CONF, Integer.toString(taskCount)); 44 | result.add(taskConfig); 45 | } 46 | 47 | return result; 48 | } 49 | 50 | @Override 51 | public void stop() { 52 | 53 | } 54 | 55 | @Override 56 | public String version() { 57 | return VersionUtil.version(this.getClass()); 58 | } 59 | 60 | Map settings; 61 | 62 | @Override 63 | public void start(Map settings) { 64 | SpoolDirELFSourceConnectorConfig config = new SpoolDirELFSourceConnectorConfig(settings); 65 | this.settings = settings; 66 | } 67 | 68 | @Override 69 | public Class taskClass() { 70 | return SpoolDirELFSourceTask.class; 71 | } 72 | 73 | @Override 74 | public ConfigDef config() { 75 | return SpoolDirELFSourceConnectorConfig.config(true); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.AbstractSourceConnectorConfig; 19 | import org.apache.kafka.common.config.ConfigDef; 20 | 21 | import java.util.Map; 22 | 23 | class SpoolDirELFSourceConnectorConfig extends AbstractSourceConnectorConfig { 24 | 25 | public SpoolDirELFSourceConnectorConfig(Map settings) { 26 | super(config(true), settings, true); 27 | } 28 | 29 | public static ConfigDef config(boolean bufferedInputStream) { 30 | return AbstractSourceConnectorConfig.config(bufferedInputStream); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.AbstractSourceTask; 19 | import com.github.jcustenborder.kafka.connect.spooldir.InputFile; 20 | import com.github.jcustenborder.parsers.elf.ElfParser; 21 | import com.github.jcustenborder.parsers.elf.ElfParserBuilder; 22 | import com.github.jcustenborder.parsers.elf.LogEntry; 23 | import org.apache.kafka.connect.data.SchemaAndValue; 24 | import org.apache.kafka.connect.errors.ConnectException; 25 | import org.apache.kafka.connect.source.SourceRecord; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.io.IOException; 30 | import java.io.InputStream; 31 | import java.util.ArrayList; 32 | import java.util.List; 33 | import java.util.Map; 34 | 35 | public class SpoolDirELFSourceTask extends AbstractSourceTask { 36 | private static final Logger log = LoggerFactory.getLogger(SpoolDirELFSourceTask.class); 37 | ElfParser parser; 38 | ElfParserBuilder parserBuilder; 39 | SchemaConversion conversion; 40 | long offset; 41 | 42 | @Override 43 | protected SpoolDirELFSourceConnectorConfig config(Map settings) { 44 | return new SpoolDirELFSourceConnectorConfig(settings); 45 | } 46 | 47 | @Override 48 | public void start(Map settings) { 49 | super.start(settings); 50 | this.parserBuilder = ElfParserBuilder.of(); 51 | } 52 | 53 | 54 | @Override 55 | protected void configure(InputFile inputFile, Long lastOffset) throws IOException { 56 | if (null != this.parser) { 57 | log.trace("configure() - Closing existing parser."); 58 | this.parser.close(); 59 | } 60 | InputStream inputStream = inputFile.openStream(); 61 | this.parser = this.parserBuilder.build(inputStream); 62 | SchemaConversionBuilder builder = new SchemaConversionBuilder(this.parser); 63 | this.conversion = builder.build(); 64 | 65 | this.offset = -1; 66 | 67 | if (null != lastOffset) { 68 | int skippedRecords = 1; 69 | while (null != next() && skippedRecords <= lastOffset) { 70 | skippedRecords++; 71 | } 72 | log.trace("configure() - Skipped {} record(s).", skippedRecords); 73 | log.info("configure() - Starting on offset {}", this.offset); 74 | } 75 | } 76 | 77 | LogEntry next() throws IOException { 78 | this.offset++; 79 | return this.parser.next(); 80 | } 81 | 82 | @Override 83 | protected List process() { 84 | int recordCount = 0; 85 | List records = new ArrayList<>(this.config.batchSize); 86 | 87 | LogEntry entry; 88 | try { 89 | while (null != (entry = next()) && recordCount < this.config.batchSize) { 90 | log.trace("process() - Processing LogEntry: {}", entry); 91 | SchemaAndValue value = conversion.convert(entry); 92 | SourceRecord record = record(SchemaAndValue.NULL, value, null); 93 | records.add(record); 94 | recordCount++; 95 | } 96 | } catch (IOException ex) { 97 | throw new ConnectException(ex); 98 | } 99 | return records; 100 | } 101 | 102 | @Override 103 | protected long recordOffset() { 104 | return this.offset; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalDateLogFieldConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf.converters; 17 | 18 | import org.apache.kafka.connect.data.Field; 19 | 20 | import java.sql.Date; 21 | import java.time.Instant; 22 | import java.time.LocalDate; 23 | import java.time.ZoneId; 24 | 25 | public class LocalDateLogFieldConverter extends LogFieldConverter { 26 | private static final ZoneId ZONE_ID = ZoneId.of("UTC"); 27 | 28 | @Override 29 | protected Object convert(Object input) { 30 | final LocalDate localDate = (LocalDate) input; 31 | final Instant instant = localDate.atStartOfDay(ZONE_ID).toInstant(); 32 | return Date.from(instant); 33 | } 34 | 35 | public LocalDateLogFieldConverter(String logFieldName, Field field) { 36 | super(logFieldName, field); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalTimeLogFieldConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf.converters; 17 | 18 | import org.apache.kafka.connect.data.Field; 19 | 20 | import java.sql.Date; 21 | import java.time.Instant; 22 | import java.time.LocalDate; 23 | import java.time.LocalTime; 24 | import java.time.ZoneOffset; 25 | 26 | public class LocalTimeLogFieldConverter extends LogFieldConverter { 27 | private static final LocalDate EPOCH_DATE = LocalDate.ofEpochDay(0); 28 | 29 | @Override 30 | protected Object convert(Object input) { 31 | final LocalTime localTime = (LocalTime) input; 32 | final Instant instant = localTime.atDate(EPOCH_DATE).toInstant(ZoneOffset.UTC); 33 | return Date.from(instant); 34 | } 35 | 36 | public LocalTimeLogFieldConverter(String logFieldName, Field field) { 37 | super(logFieldName, field); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LogFieldConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf.converters; 17 | 18 | import com.github.jcustenborder.parsers.elf.LogEntry; 19 | import org.apache.kafka.connect.data.Field; 20 | import org.apache.kafka.connect.data.Struct; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | public abstract class LogFieldConverter { 25 | private static final Logger log = LoggerFactory.getLogger(LogFieldConverter.class); 26 | protected final String logFieldName; 27 | protected final Field field; 28 | 29 | protected abstract Object convert(Object input); 30 | 31 | public LogFieldConverter(String logFieldName, Field field) { 32 | this.logFieldName = logFieldName; 33 | this.field = field; 34 | } 35 | 36 | public void convert(LogEntry logEntry, Struct struct) { 37 | final Object input = logEntry.fieldData().get(this.logFieldName); 38 | final Object output; 39 | if (null == input) { 40 | output = null; 41 | } else { 42 | output = convert(input); 43 | } 44 | 45 | log.trace("convert() - Setting {} to {}", field.name(), output); 46 | struct.put(this.field, output); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/PrimitiveLogFieldConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf.converters; 17 | 18 | import org.apache.kafka.connect.data.Field; 19 | 20 | public class PrimitiveLogFieldConverter extends LogFieldConverter { 21 | @Override 22 | protected Object convert(Object input) { 23 | return input; 24 | } 25 | 26 | public PrimitiveLogFieldConverter(String logFieldName, Field field) { 27 | super(logFieldName, field); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/TimestampLogFieldConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf.converters; 17 | 18 | import com.github.jcustenborder.parsers.elf.LogEntry; 19 | import org.apache.kafka.connect.data.Field; 20 | import org.apache.kafka.connect.data.Struct; 21 | 22 | import java.sql.Date; 23 | import java.time.Instant; 24 | import java.time.LocalDate; 25 | import java.time.LocalTime; 26 | import java.time.ZoneOffset; 27 | 28 | public class TimestampLogFieldConverter extends LogFieldConverter { 29 | private final String timeField; 30 | private final String dateField; 31 | 32 | public TimestampLogFieldConverter(Field field, String timeField, String dateField) { 33 | super(null, field); 34 | this.timeField = timeField; 35 | this.dateField = dateField; 36 | } 37 | 38 | @Override 39 | protected Object convert(Object input) { 40 | return null; 41 | } 42 | 43 | @Override 44 | public void convert(LogEntry logEntry, Struct struct) { 45 | final LocalDate date = (LocalDate) logEntry.fieldData().get(this.dateField); 46 | final LocalTime time = (LocalTime) logEntry.fieldData().get(this.timeField); 47 | 48 | final Object value; 49 | 50 | if (null == date || null == time) { 51 | value = null; 52 | } else { 53 | final Instant instant = time.atDate(date).toInstant(ZoneOffset.UTC); 54 | value = Date.from(instant); 55 | } 56 | struct.put(this.field, value); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/spooldir/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | @Introduction( 17 | "This Kafka Connect connector provides the capability to watch a directory for files and " + 18 | "read the data as new files are written to the input directory. Each of the records in the " + 19 | "input file will be converted based on the user supplied schema. The connectors in this project " + 20 | "handle all different kinds of use cases like ingesting json, csv, tsv, avro, or binary files.") 21 | @Title("Spool Dir") 22 | @DocumentationWarning("Running these connectors with multiple tasks requires a shared volume across " + 23 | "all of the Kafka Connect workers. Kafka Connect does not have a mechanism for synchronization of " + 24 | "tasks. Because of this each task will select which file it will use the following " + 25 | "algorithm `hash() % totalTasks == taskNumber`. If you are not using a shared volume " + 26 | "this could cause issues where files are not processed. Using more than one task could also affect " + 27 | "the order that the data is written to Kafka.") 28 | @PluginOwner("jcustenborder") 29 | @PluginName("kafka-connect-spooldir") 30 | @DocumentationNote("Each of the connectors in this plugin emit the following headers for each record " + 31 | "written to kafka. \n\n" + 32 | "* `file.path` - The absolute path to the file ingested.\n" + 33 | "* `file.name` - The name part of the file ingested.\n" + 34 | "* `file.name.without.extension` - The file name without the extension part of the file.\n" + 35 | "* `file.last.modified` - The last modified date of the file.\n" + 36 | "* `file.length` - The size of the file in bytes.\n" + 37 | "* `file.offset` - The offset for this piece of data within the file.\n" 38 | ) 39 | package com.github.jcustenborder.kafka.connect.spooldir; 40 | 41 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationNote; 42 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationWarning; 43 | import com.github.jcustenborder.kafka.connect.utils.config.Introduction; 44 | import com.github.jcustenborder.kafka.connect.utils.config.PluginName; 45 | import com.github.jcustenborder.kafka.connect.utils.config.PluginOwner; 46 | import com.github.jcustenborder.kafka.connect.utils.config.Title; -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicyTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import com.google.common.io.Files; 5 | import org.junit.jupiter.api.AfterEach; 6 | import org.junit.jupiter.api.BeforeEach; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.File; 10 | import java.io.IOException; 11 | import java.io.InputStreamReader; 12 | import java.io.LineNumberReader; 13 | import java.util.Objects; 14 | 15 | import static org.junit.jupiter.api.Assertions.assertFalse; 16 | import static org.junit.jupiter.api.Assertions.assertTrue; 17 | import static org.mockito.Mockito.mock; 18 | import static org.mockito.Mockito.only; 19 | import static org.mockito.Mockito.verify; 20 | 21 | public abstract class AbstractCleanUpPolicyTest { 22 | 23 | InputFile inputFile; 24 | File inputPath; 25 | File finishedPath; 26 | File errorPath; 27 | String inputPathSubDir; 28 | protected T cleanupPolicy; 29 | 30 | protected abstract T create( 31 | InputFile inputFile, File errorPath, File finishedPath 32 | ); 33 | 34 | protected String defineInputPathSubDir() { 35 | return null; 36 | } 37 | 38 | protected ImmutableMap.Builder getConnectorConfigMap() { 39 | return new ImmutableMap.Builder() 40 | .put(SpoolDirBinaryFileSourceConnectorConfig.TOPIC_CONF, "foo") 41 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_CONFIG, this.inputPath.toString()) 42 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.$") 43 | .put(SpoolDirBinaryFileSourceConnectorConfig.ERROR_PATH_CONFIG, this.errorPath.toString()) 44 | .put(SpoolDirBinaryFileSourceConnectorConfig.FINISHED_PATH_CONFIG, this.finishedPath.toString()); 45 | } 46 | 47 | @BeforeEach 48 | public void before() throws IOException { 49 | this.errorPath = Files.createTempDir(); 50 | this.finishedPath = Files.createTempDir(); 51 | this.inputPath = Files.createTempDir(); 52 | this.inputPathSubDir = defineInputPathSubDir(); 53 | 54 | File tempFileParentPathDir = this.inputPath; 55 | if (this.inputPathSubDir != null) { 56 | tempFileParentPathDir = new File(this.inputPath, this.inputPathSubDir); 57 | tempFileParentPathDir.mkdirs(); 58 | } 59 | 60 | File inputFile = File.createTempFile("input", "file", tempFileParentPathDir); 61 | 62 | SpoolDirBinaryFileSourceConnectorConfig config = 63 | new SpoolDirBinaryFileSourceConnectorConfig(getConnectorConfigMap().build()); 64 | 65 | this.inputFile = new InputFile(config, inputFile); 66 | this.inputFile.inputStreamReader = mock(InputStreamReader.class); 67 | this.inputFile.lineNumberReader = mock(LineNumberReader.class); 68 | this.cleanupPolicy = create(this.inputFile, this.errorPath, this.finishedPath); 69 | } 70 | 71 | protected File getTargetFilePath(File containerPath, InputFile inputFile) { 72 | String subDir = (this.defineInputPathSubDir() != null ? this.defineInputPathSubDir() : ""); 73 | return new File(new File(containerPath,subDir), inputFile.getName()); 74 | } 75 | 76 | @Test 77 | public void error() throws IOException { 78 | assertTrue(this.inputFile.exists(), "Input file should exist"); 79 | this.cleanupPolicy.error(); 80 | assertFalse(this.inputFile.exists(), "input file should not exist"); 81 | File erroredFile = this.getTargetFilePath(this.errorPath,this.inputFile); 82 | assertTrue(erroredFile.exists(), "errored file should exist."); 83 | } 84 | 85 | void delete(File file) { 86 | if (file.isDirectory()) { 87 | for (File child : Objects.requireNonNull(file.listFiles())) { 88 | delete(child); 89 | } 90 | } 91 | 92 | file.delete(); 93 | } 94 | 95 | @AfterEach 96 | public void after() throws IOException { 97 | delete(this.finishedPath); 98 | delete(this.errorPath); 99 | delete(this.inputPath); 100 | verify(this.inputFile.inputStreamReader, only()).close(); 101 | verify(this.inputFile.lineNumberReader, only()).close(); 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGeneratorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.Files; 19 | import org.junit.jupiter.api.BeforeEach; 20 | 21 | import java.io.File; 22 | import java.util.LinkedHashMap; 23 | import java.util.Map; 24 | 25 | public class AbstractSchemaGeneratorTest { 26 | protected Map settings; 27 | File tempRoot; 28 | File inputPath; 29 | File finishedPath; 30 | File errorPath; 31 | 32 | @BeforeEach 33 | public void createTempDir() { 34 | this.tempRoot = Files.createTempDir(); 35 | this.inputPath = new File(this.tempRoot, "input"); 36 | this.inputPath.mkdirs(); 37 | this.finishedPath = new File(this.tempRoot, "finished"); 38 | this.finishedPath.mkdirs(); 39 | this.errorPath = new File(this.tempRoot, "error"); 40 | this.errorPath.mkdirs(); 41 | 42 | this.settings = new LinkedHashMap<>(); 43 | this.settings.put(AbstractSourceConnectorConfig.INPUT_PATH_CONFIG, this.inputPath.getAbsolutePath()); 44 | this.settings.put(AbstractSourceConnectorConfig.FINISHED_PATH_CONFIG, this.finishedPath.getAbsolutePath()); 45 | this.settings.put(AbstractSourceConnectorConfig.ERROR_PATH_CONFIG, this.errorPath.getAbsolutePath()); 46 | this.settings.put(AbstractSourceConnectorConfig.TOPIC_CONF, "dummy"); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.Files; 19 | import org.junit.jupiter.api.AfterEach; 20 | import org.junit.jupiter.api.BeforeEach; 21 | import org.junit.jupiter.api.Test; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.nio.file.FileVisitResult; 28 | import java.nio.file.Path; 29 | import java.nio.file.SimpleFileVisitor; 30 | import java.nio.file.attribute.BasicFileAttributes; 31 | import java.util.LinkedHashMap; 32 | import java.util.Map; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertNotNull; 35 | 36 | public abstract class AbstractSpoolDirSourceConnectorTest { 37 | private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceConnectorTest.class); 38 | protected T connector; 39 | protected Map settings; 40 | File tempRoot; 41 | File inputPath; 42 | File finishedPath; 43 | File errorPath; 44 | 45 | protected abstract T createConnector(); 46 | 47 | @BeforeEach 48 | public void before() { 49 | this.connector = createConnector(); 50 | } 51 | 52 | @Test 53 | public void taskClass() { 54 | assertNotNull(this.connector.taskClass()); 55 | } 56 | 57 | @BeforeEach 58 | public void createTempDir() { 59 | this.tempRoot = Files.createTempDir(); 60 | this.inputPath = new File(this.tempRoot, "input"); 61 | this.inputPath.mkdirs(); 62 | this.finishedPath = new File(this.tempRoot, "finished"); 63 | this.finishedPath.mkdirs(); 64 | this.errorPath = new File(this.tempRoot, "error"); 65 | this.errorPath.mkdirs(); 66 | 67 | this.settings = new LinkedHashMap<>(); 68 | this.settings.put(AbstractSourceConnectorConfig.INPUT_PATH_CONFIG, this.inputPath.getAbsolutePath()); 69 | this.settings.put(AbstractSourceConnectorConfig.FINISHED_PATH_CONFIG, this.finishedPath.getAbsolutePath()); 70 | this.settings.put(AbstractSourceConnectorConfig.ERROR_PATH_CONFIG, this.errorPath.getAbsolutePath()); 71 | this.settings.put(AbstractSourceConnectorConfig.TOPIC_CONF, "dummy"); 72 | this.settings.put(AbstractSpoolDirSourceConnectorConfig.SCHEMA_GENERATION_ENABLED_CONF, "true"); 73 | } 74 | 75 | @AfterEach 76 | public void cleanupTempDir() throws IOException { 77 | java.nio.file.Files.walkFileTree(this.tempRoot.toPath(), new SimpleFileVisitor() { 78 | @Override 79 | public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { 80 | log.trace("cleanupTempDir() - Removing {}", file); 81 | java.nio.file.Files.delete(file); 82 | return FileVisitResult.CONTINUE; 83 | } 84 | 85 | @Override 86 | public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { 87 | log.trace("cleanupTempDir() - Removing {}", file); 88 | java.nio.file.Files.delete(file); 89 | return FileVisitResult.CONTINUE; 90 | } 91 | }); 92 | } 93 | 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ByNameAbstractTaskPartitionerPredicateTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.DynamicTest; 4 | import org.junit.jupiter.api.TestFactory; 5 | 6 | import java.io.File; 7 | import java.util.ArrayList; 8 | import java.util.LinkedHashSet; 9 | import java.util.List; 10 | import java.util.Set; 11 | import java.util.UUID; 12 | import java.util.stream.IntStream; 13 | import java.util.stream.Stream; 14 | 15 | import static org.junit.jupiter.api.Assertions.assertEquals; 16 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 17 | 18 | public class ByNameAbstractTaskPartitionerPredicateTest { 19 | protected List input; 20 | 21 | 22 | @TestFactory 23 | public Stream test() { 24 | List files = new ArrayList<>(500); 25 | for (int i = 0; i < 500; i++) { 26 | files.add(new File(UUID.randomUUID().toString())); 27 | } 28 | return IntStream.range(2, 50).boxed().map(count -> dynamicTest(count.toString(), () -> { 29 | Set queue = new LinkedHashSet<>(files); 30 | for (int index = 0; index <= count; index++) { 31 | AbstractTaskPartitionerPredicate.ByName predicate = new AbstractTaskPartitionerPredicate.ByName(index, count); 32 | files.stream() 33 | .filter(predicate) 34 | .forEach(queue::remove); 35 | } 36 | assertEquals(0, queue.size(), "Queue should be empty"); 37 | })); 38 | } 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGeneratorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.apache.kafka.connect.data.Schema; 19 | import org.apache.kafka.connect.data.SchemaBuilder; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.io.File; 23 | import java.io.IOException; 24 | import java.util.Arrays; 25 | import java.util.Map; 26 | 27 | import static com.github.jcustenborder.kafka.connect.utils.AssertSchema.assertSchema; 28 | 29 | public class CsvSchemaGeneratorTest extends AbstractSchemaGeneratorTest { 30 | 31 | @Test 32 | public void foo() throws IOException { 33 | File inputFile = new File("src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data"); 34 | this.settings.put(SpoolDirCsvSourceConnectorConfig.CSV_FIRST_ROW_AS_HEADER_CONF, "true"); 35 | CsvSchemaGenerator schemaGenerator = new CsvSchemaGenerator(settings); 36 | Map.Entry kvp = schemaGenerator.generate(inputFile, Arrays.asList("id")); 37 | final Schema expectedKeySchema = SchemaBuilder.struct() 38 | .name("com.github.jcustenborder.kafka.connect.model.Key") 39 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 40 | .build(); 41 | 42 | final Schema expectedValueSchema = SchemaBuilder.struct() 43 | .name("com.github.jcustenborder.kafka.connect.model.Value") 44 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 45 | .field("first_name", Schema.OPTIONAL_STRING_SCHEMA) 46 | .field("last_name", Schema.OPTIONAL_STRING_SCHEMA) 47 | .field("email", Schema.OPTIONAL_STRING_SCHEMA) 48 | .field("gender", Schema.OPTIONAL_STRING_SCHEMA) 49 | .field("ip_address", Schema.OPTIONAL_STRING_SCHEMA) 50 | .field("last_login", Schema.OPTIONAL_STRING_SCHEMA) 51 | .field("account_balance", Schema.OPTIONAL_STRING_SCHEMA) 52 | .field("country", Schema.OPTIONAL_STRING_SCHEMA) 53 | .field("favorite_color", Schema.OPTIONAL_STRING_SCHEMA) 54 | .build(); 55 | 56 | assertSchema(expectedKeySchema, kvp.getKey(), "key schema does not match."); 57 | assertSchema(expectedValueSchema, kvp.getValue(), "value schema does not match."); 58 | } 59 | 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsNoRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertFalse; 11 | 12 | public class DeleteCleanupPolicySubDirsNoRetainTest extends DeleteCleanupPolicyTest { 13 | @Override 14 | protected String defineInputPathSubDir() { 15 | return "test/01/02/03"; 16 | } 17 | 18 | protected ImmutableMap.Builder getConnectorConfigMap() { 19 | return super.getConnectorConfigMap() 20 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 21 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "false"); 22 | } 23 | 24 | @Test 25 | public void success() throws IOException { 26 | super.success(); 27 | 28 | assertFalse(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 29 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should not exist"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertTrue; 11 | 12 | public class DeleteCleanupPolicySubDirsRetainTest extends DeleteCleanupPolicyTest { 13 | @Override 14 | protected String defineInputPathSubDir() { 15 | return "test/01/02/03"; 16 | } 17 | 18 | protected ImmutableMap.Builder getConnectorConfigMap() { 19 | return super.getConnectorConfigMap() 20 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 21 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "true"); 22 | } 23 | 24 | @Test 25 | public void success() throws IOException { 26 | super.success(); 27 | 28 | assertTrue(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 29 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should exist"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicyTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertFalse; 9 | import static org.junit.jupiter.api.Assertions.assertTrue; 10 | 11 | public class DeleteCleanupPolicyTest extends AbstractCleanUpPolicyTest { 12 | @Override 13 | protected AbstractCleanUpPolicy.Delete create(InputFile inputFile, File errorPath, File finishedPath) { 14 | return new AbstractCleanUpPolicy.Delete(inputFile, errorPath, finishedPath); 15 | } 16 | 17 | @Test 18 | public void success() throws IOException { 19 | assertTrue(this.inputFile.exists(), "Input file should exist"); 20 | this.cleanupPolicy.success(); 21 | assertFalse(this.inputFile.exists(), "Input file should not exist"); 22 | 23 | if (!(cleanupPolicy instanceof AbstractCleanUpPolicy.Delete)) { 24 | File finishedFile = new File(this.finishedPath, this.inputFile.getName()); 25 | assertTrue(finishedFile.exists(), "finishedPath file should exist."); 26 | } else { 27 | File finishedFile = new File(this.finishedPath, this.inputFile.getName()); 28 | assertFalse(finishedFile.exists(), "finishedPath file should exist."); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DocumentationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.BaseDocumentationTest; 19 | 20 | public class DocumentationTest extends BaseDocumentationTest { 21 | 22 | } 23 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparatorTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import com.google.common.io.Files; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.Arrays; 12 | import java.util.Date; 13 | import java.util.List; 14 | 15 | import static org.junit.jupiter.api.Assertions.assertEquals; 16 | 17 | public class FileComparatorTest { 18 | File tempDirectory; 19 | 20 | @BeforeEach 21 | public void before() { 22 | this.tempDirectory = Files.createTempDir(); 23 | } 24 | 25 | File createFile(String name) throws IOException { 26 | return createFile(name, new Date().getTime(), 0); 27 | } 28 | 29 | File createFile(String name, long date) throws IOException { 30 | return createFile(name, date, 0); 31 | } 32 | 33 | File createFile(String name, long date, long length) throws IOException { 34 | File result = new File(tempDirectory, name); 35 | 36 | if (length == 0) { 37 | Files.touch(result); 38 | } else { 39 | Files.write( 40 | new byte[(int) length], 41 | result 42 | ); 43 | } 44 | result.setLastModified(date); 45 | return result; 46 | } 47 | 48 | 49 | List sort(List files, AbstractSourceConnectorConfig.FileAttribute... attributes) { 50 | List result = new ArrayList<>(files); 51 | FileComparator comparator = new FileComparator(ImmutableList.copyOf(attributes)); 52 | result.sort(comparator); 53 | return result; 54 | } 55 | 56 | List expected(List files, int... indexes) { 57 | List result = new ArrayList<>(); 58 | for (int index : indexes) { 59 | result.add(files.get(index)); 60 | } 61 | return result; 62 | } 63 | 64 | @Test 65 | public void existingFunctionality() throws IOException { 66 | List input = Arrays.asList( 67 | createFile("File1.csv"), 68 | createFile("File2.csv"), 69 | createFile("File3.csv") 70 | ); 71 | 72 | List expected = expected(input, 0, 1, 2); 73 | List actual = sort(input, AbstractSourceConnectorConfig.FileAttribute.NameAsc); 74 | assertEquals(expected, actual); 75 | } 76 | 77 | @Test 78 | public void sortByLastModified() throws IOException { 79 | long lastModified = new Date().getTime(); 80 | 81 | List input = Arrays.asList( 82 | createFile("File1.csv", lastModified-=1000L), 83 | createFile("File2.csv", lastModified-=1000L), 84 | createFile("File3.csv", lastModified-=1000L) 85 | ); 86 | 87 | List expected = expected(input, 2, 1, 0); 88 | List actual = sort(input, AbstractSourceConnectorConfig.FileAttribute.LastModifiedAsc); 89 | assertEquals(expected, actual); 90 | } 91 | @Test 92 | public void sortBySize() throws IOException { 93 | long lastModified = new Date().getTime(); 94 | long length = 10000; 95 | 96 | List input = Arrays.asList( 97 | createFile("File1.csv", lastModified-=1000L, length-=1000L), 98 | createFile("File2.csv", lastModified-=1000L, length-=1000L), 99 | createFile("File3.csv", lastModified-=1000L, length-=1000L) 100 | ); 101 | 102 | List expected = expected(input, 0, 1, 2); 103 | List actual = sort(input, AbstractSourceConnectorConfig.FileAttribute.LengthDesc); 104 | assertEquals(expected, actual); 105 | } 106 | 107 | 108 | } 109 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGeneratorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import java.util.HashMap; 19 | import org.apache.kafka.connect.data.Schema; 20 | import org.apache.kafka.connect.data.SchemaBuilder; 21 | import org.junit.jupiter.api.Test; 22 | 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.util.Arrays; 26 | import java.util.Map; 27 | 28 | import static com.github.jcustenborder.kafka.connect.utils.AssertSchema.assertSchema; 29 | 30 | public class JsonSchemaGeneratorTest extends AbstractSchemaGeneratorTest { 31 | 32 | @Test 33 | public void schema() throws IOException { 34 | File inputFile = new File("src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data"); 35 | JsonSchemaGenerator schemaGenerator = new JsonSchemaGenerator(settings); 36 | Map.Entry kvp = schemaGenerator.generate(inputFile, Arrays.asList("id")); 37 | final Schema expectedKeySchema = SchemaBuilder.struct() 38 | .name("com.github.jcustenborder.kafka.connect.model.Key") 39 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 40 | .build(); 41 | 42 | final Schema expectedValueSchema = SchemaBuilder.struct() 43 | .name("com.github.jcustenborder.kafka.connect.model.Value") 44 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 45 | .field("first_name", Schema.OPTIONAL_STRING_SCHEMA) 46 | .field("last_name", Schema.OPTIONAL_STRING_SCHEMA) 47 | .field("email", Schema.OPTIONAL_STRING_SCHEMA) 48 | .field("gender", Schema.OPTIONAL_STRING_SCHEMA) 49 | .field("ip_address", Schema.OPTIONAL_STRING_SCHEMA) 50 | .field("last_login", Schema.OPTIONAL_STRING_SCHEMA) 51 | .field("account_balance", Schema.OPTIONAL_STRING_SCHEMA) 52 | .field("country", Schema.OPTIONAL_STRING_SCHEMA) 53 | .field("favorite_color", Schema.OPTIONAL_STRING_SCHEMA) 54 | .build(); 55 | 56 | assertSchema(expectedKeySchema, kvp.getKey(), "key schema does not match."); 57 | assertSchema(expectedValueSchema, kvp.getValue(), "value schema does not match."); 58 | } 59 | 60 | 61 | @Test 62 | public void schemaWithCustomSchemaName() throws IOException { 63 | File inputFile = new File("src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data"); 64 | Map configs = new HashMap<>(settings); 65 | configs.put(AbstractSpoolDirSourceConnectorConfig.SCHEMA_GENERATION_KEY_NAME_CONF, "com.foo.key"); 66 | configs.put(AbstractSpoolDirSourceConnectorConfig.SCHEMA_GENERATION_VALUE_NAME_CONF, "com.foo.value"); 67 | JsonSchemaGenerator schemaGenerator = new JsonSchemaGenerator(configs); 68 | Map.Entry kvp = schemaGenerator.generate(inputFile, Arrays.asList("id")); 69 | final Schema expectedKeySchema = SchemaBuilder.struct() 70 | .name("com.foo.key") 71 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 72 | .build(); 73 | 74 | final Schema expectedValueSchema = SchemaBuilder.struct() 75 | .name("com.foo.value") 76 | .field("id", Schema.OPTIONAL_STRING_SCHEMA) 77 | .field("first_name", Schema.OPTIONAL_STRING_SCHEMA) 78 | .field("last_name", Schema.OPTIONAL_STRING_SCHEMA) 79 | .field("email", Schema.OPTIONAL_STRING_SCHEMA) 80 | .field("gender", Schema.OPTIONAL_STRING_SCHEMA) 81 | .field("ip_address", Schema.OPTIONAL_STRING_SCHEMA) 82 | .field("last_login", Schema.OPTIONAL_STRING_SCHEMA) 83 | .field("account_balance", Schema.OPTIONAL_STRING_SCHEMA) 84 | .field("country", Schema.OPTIONAL_STRING_SCHEMA) 85 | .field("favorite_color", Schema.OPTIONAL_STRING_SCHEMA) 86 | .build(); 87 | 88 | assertSchema(expectedKeySchema, kvp.getKey(), "key schema does not match."); 89 | assertSchema(expectedValueSchema, kvp.getValue(), "value schema does not match."); 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MinimumFileAgePredicateTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.apache.kafka.common.utils.Time; 4 | import org.junit.jupiter.api.AfterEach; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertFalse; 12 | import static org.junit.jupiter.api.Assertions.assertTrue; 13 | import static org.mockito.Mockito.mock; 14 | import static org.mockito.Mockito.when; 15 | 16 | public class MinimumFileAgePredicateTest { 17 | File inputFile; 18 | 19 | 20 | static final String EXTENSION = "processing"; 21 | 22 | @BeforeEach 23 | public void before() throws IOException { 24 | this.inputFile = File.createTempFile("test", "file"); 25 | } 26 | 27 | @AfterEach 28 | public void after() throws IOException { 29 | if (null != this.inputFile && this.inputFile.exists()) { 30 | this.inputFile.delete(); 31 | } 32 | } 33 | 34 | Time time(long milliseconds) { 35 | Time time = mock(Time.class); 36 | when(time.milliseconds()).thenReturn(milliseconds); 37 | return time; 38 | } 39 | 40 | @Test 41 | public void notOldEnough() throws IOException { 42 | long timestamp = 1559653835123L; 43 | Time time = time(timestamp); 44 | this.inputFile.setLastModified(timestamp); 45 | InputFileDequeue.MinimumFileAgePredicate predicate = new InputFileDequeue.MinimumFileAgePredicate( 46 | 1000, 47 | time 48 | ); 49 | assertFalse(predicate.test(this.inputFile), "File should not be old enough"); 50 | } 51 | 52 | @Test 53 | public void oldEnough() throws IOException { 54 | long timestamp = 1559653835123L; 55 | this.inputFile.setLastModified(timestamp); 56 | timestamp += 5000L; 57 | Time time = time(timestamp); 58 | InputFileDequeue.MinimumFileAgePredicate predicate = new InputFileDequeue.MinimumFileAgePredicate( 59 | 1000, 60 | time 61 | ); 62 | assertTrue(predicate.test(this.inputFile), "File should be old enough"); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsNoRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertFalse; 11 | 12 | public class MoveByDateCleanupPolicySubDirsNoRetainTest extends MoveByDateCleanupPolicyTest { 13 | @Override 14 | protected String defineInputPathSubDir() { 15 | return "test/01/02/03"; 16 | } 17 | 18 | protected ImmutableMap.Builder getConnectorConfigMap() { 19 | return super.getConnectorConfigMap() 20 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 21 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "false"); 22 | } 23 | 24 | @Test 25 | public void success() throws IOException { 26 | super.success(); 27 | 28 | assertFalse(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 29 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should not exist"); 30 | 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | import static org.junit.jupiter.api.Assertions.assertTrue; 10 | 11 | public class MoveByDateCleanupPolicySubDirsRetainTest extends MoveByDateCleanupPolicyTest { 12 | @Override 13 | protected String defineInputPathSubDir() { 14 | return "test/01/02/03"; 15 | } 16 | 17 | protected ImmutableMap.Builder getConnectorConfigMap() { 18 | return super.getConnectorConfigMap() 19 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 20 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "true"); 21 | } 22 | 23 | @Test 24 | public void success() throws IOException { 25 | super.success(); 26 | 27 | assertTrue(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 28 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should exist"); 29 | 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicyTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.nio.file.Path; 8 | import java.nio.file.Paths; 9 | import java.text.SimpleDateFormat; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertFalse; 12 | import static org.junit.jupiter.api.Assertions.assertTrue; 13 | 14 | public class MoveByDateCleanupPolicyTest extends AbstractCleanUpPolicyTest { 15 | @Override 16 | protected AbstractCleanUpPolicy.MoveByDate create(InputFile inputFile, File errorPath, File finishedPath) { 17 | return new AbstractCleanUpPolicy.MoveByDate(inputFile, errorPath, finishedPath); 18 | } 19 | 20 | @Test 21 | public void success() throws IOException { 22 | SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd"); 23 | Path subDirectory = Paths.get(this.finishedPath.getAbsolutePath(), dateFormatter.format(this.inputFile.lastModified())); 24 | File finishedFile = this.getTargetFilePath(subDirectory.toFile(), this.inputFile); 25 | 26 | assertTrue(this.inputFile.exists(), "Input file should exist"); 27 | assertFalse(finishedFile.exists(), "Finished file should not exist"); 28 | 29 | this.cleanupPolicy.success(); 30 | 31 | assertFalse(this.inputFile.exists(), "Input file should not exist"); 32 | assertTrue(finishedFile.exists(), "Finished file should exist"); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsNoRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertFalse; 11 | 12 | public class MoveCleanupPolicySubDirsNoRetainTest extends MoveCleanupPolicyTest { 13 | 14 | @Override 15 | protected String defineInputPathSubDir() { 16 | return "test/01/02/03"; 17 | } 18 | 19 | protected ImmutableMap.Builder getConnectorConfigMap() { 20 | return super.getConnectorConfigMap() 21 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 22 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "false"); 23 | } 24 | 25 | @Test 26 | public void success() throws IOException { 27 | super.success(); 28 | 29 | assertFalse(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 30 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should not exist"); 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsRetainTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertTrue; 11 | 12 | public class MoveCleanupPolicySubDirsRetainTest extends MoveCleanupPolicyTest { 13 | 14 | @Override 15 | protected String defineInputPathSubDir() { 16 | return "test/01/02/03"; 17 | } 18 | 19 | protected ImmutableMap.Builder getConnectorConfigMap() { 20 | return super.getConnectorConfigMap() 21 | .put(SpoolDirBinaryFileSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY, "true") 22 | .put(SpoolDirBinaryFileSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, "true"); 23 | } 24 | 25 | @Test 26 | public void success() throws IOException { 27 | super.success(); 28 | 29 | assertTrue(new File(this.inputPath,this.defineInputPathSubDir()).exists(), 30 | "The input.path sub-directory "+this.defineInputPathSubDir()+" should exist"); 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicyTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertFalse; 9 | import static org.junit.jupiter.api.Assertions.assertTrue; 10 | 11 | public class MoveCleanupPolicyTest extends AbstractCleanUpPolicyTest { 12 | @Override 13 | protected AbstractCleanUpPolicy.Move create(InputFile inputFile, File errorPath, File finishedPath) { 14 | return new AbstractCleanUpPolicy.Move(inputFile, errorPath, finishedPath); 15 | } 16 | 17 | @Test 18 | public void success() throws IOException { 19 | File finishedFile = this.getTargetFilePath(this.finishedPath, this.inputFile); 20 | assertTrue(this.inputFile.exists(), "Input file should exist"); 21 | assertFalse(finishedFile.exists(), "Finished file should not exist"); 22 | this.cleanupPolicy.success(); 23 | assertFalse(this.inputFile.exists(), "Input file should not exist"); 24 | assertTrue(finishedFile.exists(), "Finished file should exist"); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NamedTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import java.nio.file.Path; 19 | 20 | public interface NamedTest { 21 | void path(Path path); 22 | } 23 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NoneCleanupPolicyTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertTrue; 9 | 10 | public class NoneCleanupPolicyTest extends AbstractCleanUpPolicyTest { 11 | @Override 12 | protected AbstractCleanUpPolicy.None create(InputFile inputFile, File errorPath, File finishedPath) { 13 | return new AbstractCleanUpPolicy.None(inputFile, errorPath, finishedPath); 14 | } 15 | 16 | @Test 17 | public void success() throws IOException { 18 | assertTrue(this.inputFile.exists(), "Input file should exist"); 19 | this.cleanupPolicy.success(); 20 | assertTrue(this.inputFile.exists(), "Input file should exist"); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ProcessingFileExistsPredicateTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.io.Files; 4 | import org.junit.jupiter.api.AfterEach; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertFalse; 12 | import static org.junit.jupiter.api.Assertions.assertTrue; 13 | 14 | public class ProcessingFileExistsPredicateTest { 15 | File inputFile; 16 | InputFileDequeue.ProcessingFileExistsPredicate predicate; 17 | 18 | static final String EXTENSION = "processing"; 19 | 20 | @BeforeEach 21 | public void before() throws IOException { 22 | this.inputFile = File.createTempFile("test", "file"); 23 | this.predicate = new InputFileDequeue.ProcessingFileExistsPredicate(EXTENSION); 24 | } 25 | 26 | @AfterEach 27 | public void after() throws IOException { 28 | if (null != this.inputFile && this.inputFile.exists()) { 29 | this.inputFile.delete(); 30 | } 31 | } 32 | 33 | @Test 34 | public void test() throws IOException { 35 | File processingFlag = InputFileDequeue.processingFile(EXTENSION, this.inputFile); 36 | Files.touch(processingFlag); 37 | assertFalse(this.predicate.test(this.inputFile)); 38 | processingFlag.delete(); 39 | assertTrue(this.predicate.test(this.inputFile)); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.io.Files; 4 | import io.confluent.connect.avro.AvroData; 5 | import org.apache.avro.Schema; 6 | import org.apache.avro.file.CodecFactory; 7 | import org.apache.avro.file.DataFileWriter; 8 | import org.apache.avro.generic.GenericContainer; 9 | import org.apache.avro.generic.GenericDatumWriter; 10 | import org.apache.avro.io.DatumWriter; 11 | import org.apache.kafka.connect.source.SourceRecord; 12 | import org.junit.jupiter.api.Disabled; 13 | import org.junit.jupiter.api.DynamicTest; 14 | import org.junit.jupiter.api.Test; 15 | import org.junit.jupiter.api.TestFactory; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Optional; 24 | import java.util.stream.Stream; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertTrue; 27 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 28 | 29 | public class SpoolDirAvroSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 30 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTaskTest.class); 31 | 32 | @Override 33 | protected SpoolDirAvroSourceTask createTask() { 34 | return new SpoolDirAvroSourceTask(); 35 | } 36 | 37 | @Override 38 | protected Map settings() { 39 | Map settings = super.settings(); 40 | return settings; 41 | } 42 | 43 | @Disabled 44 | @Test 45 | public void foo() throws IOException { 46 | 47 | File outputFile = new File("src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/avro/FieldsMatch.data"); 48 | 49 | DatumWriter datumWriter = new GenericDatumWriter<>(); 50 | DataFileWriter writer = new DataFileWriter<>(datumWriter); 51 | writer.setCodec(CodecFactory.bzip2Codec()); 52 | 53 | final String packageName = "csv"; 54 | List testCases = loadTestCases(packageName); 55 | Optional testcase = testCases.stream().filter(testCase -> testCase.path.getFileName().endsWith("FieldsMatch.json")).findFirst(); 56 | assertTrue(testcase.isPresent()); 57 | AvroData avroData = new AvroData(1235); 58 | Schema schema = null; 59 | for (SourceRecord expected : testcase.get().expected) { 60 | if (null == schema) { 61 | schema = avroData.fromConnectSchema(expected.valueSchema()); 62 | writer.create(schema, outputFile); 63 | } 64 | GenericContainer value = (GenericContainer) avroData.fromConnectData(expected.valueSchema(), expected.value()); 65 | writer.append(value); 66 | } 67 | 68 | writer.close(); 69 | } 70 | 71 | @TestFactory 72 | public Stream poll() throws IOException { 73 | final String packageName = "avro"; 74 | List testCases = loadTestCases(packageName); 75 | 76 | return testCases.stream().map(testCase -> { 77 | String name = Files.getNameWithoutExtension(testCase.path.toString()); 78 | return dynamicTest(name, () -> { 79 | poll(packageName, testCase); 80 | }); 81 | }); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.io.Files; 4 | import org.junit.jupiter.api.DynamicTest; 5 | import org.junit.jupiter.api.TestFactory; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.io.IOException; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.stream.Stream; 13 | 14 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 15 | 16 | public class SpoolDirBinaryFileSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 17 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTaskTest.class); 18 | 19 | @Override 20 | protected SpoolDirBinaryFileSourceTask createTask() { 21 | return new SpoolDirBinaryFileSourceTask(); 22 | } 23 | 24 | @Override 25 | protected Map settings() { 26 | Map settings = super.settings(); 27 | return settings; 28 | } 29 | 30 | @TestFactory 31 | public Stream poll() throws IOException { 32 | final String packageName = "binary"; 33 | List testCases = loadTestCases(packageName); 34 | 35 | return testCases.stream().map(testCase -> { 36 | String name = Files.getNameWithoutExtension(testCase.path.toString()); 37 | return dynamicTest(name, () -> { 38 | poll(packageName, testCase); 39 | }); 40 | }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorConfigTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | 4 | import com.opencsv.CSVReader; 5 | import com.opencsv.CSVReaderBuilder; 6 | import com.opencsv.ICSVParser; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.IOException; 10 | import java.io.StringReader; 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | 14 | import static org.junit.jupiter.api.Assertions.assertArrayEquals; 15 | 16 | public class SpoolDirCsvSourceConnectorConfigTest { 17 | 18 | @Test 19 | public void nullFieldSeparator() throws IOException { 20 | Map settings = new HashMap<>(); 21 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_SEPARATOR_CHAR_CONF, "0"); 22 | settings.put(SpoolDirCsvSourceConnectorConfig.TOPIC_CONF, "test"); 23 | settings.put(SpoolDirCsvSourceConnectorConfig.INPUT_PATH_CONFIG, "/tmp"); 24 | settings.put(SpoolDirCsvSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.+$"); 25 | settings.put(SpoolDirCsvSourceConnectorConfig.ERROR_PATH_CONFIG, "/tmp"); 26 | settings.put(SpoolDirCsvSourceConnectorConfig.FINISHED_PATH_CONFIG, "/tmp"); 27 | settings.put(SpoolDirCsvSourceConnectorConfig.KEY_SCHEMA_CONF, "{\n" + 28 | " \"name\" : \"com.example.users.UserKey\",\n" + 29 | " \"type\" : \"STRUCT\",\n" + 30 | " \"isOptional\" : false,\n" + 31 | " \"fieldSchemas\" : {\n" + 32 | " \"id\" : {\n" + 33 | " \"type\" : \"INT64\",\n" + 34 | " \"isOptional\" : false\n" + 35 | " }\n" + 36 | " }\n" + 37 | " }"); 38 | settings.put(SpoolDirCsvSourceConnectorConfig.VALUE_SCHEMA_CONF, "{\n" + 39 | " \"name\" : \"com.example.users.UserKey\",\n" + 40 | " \"type\" : \"STRUCT\",\n" + 41 | " \"isOptional\" : false,\n" + 42 | " \"fieldSchemas\" : {\n" + 43 | " \"id\" : {\n" + 44 | " \"type\" : \"INT64\",\n" + 45 | " \"isOptional\" : false\n" + 46 | " }\n" + 47 | " }\n" + 48 | " }"); 49 | SpoolDirCsvSourceConnectorConfig config = new SpoolDirCsvSourceConnectorConfig( 50 | true, 51 | settings 52 | ); 53 | ICSVParser parser = config.createCSVParserBuilder(); 54 | try (StringReader reader = new StringReader("id\u0000test\n123\u0000foo")) { 55 | CSVReaderBuilder readerBuilder = config.createCSVReaderBuilder(reader, parser); 56 | try (CSVReader csvReader = readerBuilder.build()) { 57 | String[] line = csvReader.readNext(); 58 | assertArrayEquals(new String[]{"id", "test"}, line); 59 | line = csvReader.readNext(); 60 | assertArrayEquals(new String[]{"123", "foo"}, line); 61 | } 62 | } 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.ByteStreams; 19 | import org.apache.kafka.connect.errors.DataException; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.io.File; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | import java.io.InputStream; 26 | import java.io.OutputStream; 27 | 28 | import static org.junit.jupiter.api.Assertions.assertThrows; 29 | 30 | 31 | public class SpoolDirCsvSourceConnectorTest extends AbstractSpoolDirSourceConnectorTest { 32 | @Override 33 | protected SpoolDirCsvSourceConnector createConnector() { 34 | return new SpoolDirCsvSourceConnector(); 35 | } 36 | 37 | @Test 38 | public void startWithoutSchema() throws IOException { 39 | this.settings.put(AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.*\\.csv$"); 40 | 41 | String[] inputFiles = new String[]{ 42 | "csv/FieldsMatch.data", 43 | "csv/FieldsMatch.data", 44 | }; 45 | 46 | int index = 0; 47 | for (String inputFile : inputFiles) { 48 | try (InputStream inputStream = this.getClass().getResourceAsStream(inputFile)) { 49 | File outputFile = new File(this.inputPath, "input" + index + ".csv"); 50 | try (OutputStream outputStream = new FileOutputStream(outputFile)) { 51 | ByteStreams.copy(inputStream, outputStream); 52 | } 53 | } 54 | index++; 55 | } 56 | 57 | this.connector.start(settings); 58 | } 59 | 60 | @Test() 61 | public void startWithoutSchemaMismatch() throws IOException { 62 | this.settings.put(AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.*\\.csv$"); 63 | 64 | 65 | String[] inputFiles = new String[]{ 66 | "csv/FieldsMatch.data", 67 | "csv/DataHasMoreFields.data", 68 | }; 69 | 70 | int index = 0; 71 | for (String inputFile : inputFiles) { 72 | try (InputStream inputStream = this.getClass().getResourceAsStream(inputFile)) { 73 | File outputFile = new File(this.inputPath, "input" + index + ".csv"); 74 | try (OutputStream outputStream = new FileOutputStream(outputFile)) { 75 | ByteStreams.copy(inputStream, outputStream); 76 | } 77 | } 78 | index++; 79 | } 80 | 81 | assertThrows(DataException.class, () -> { 82 | this.connector.start(settings); 83 | }); 84 | 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsNoRetainTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | import java.util.Map; 22 | public class SpoolDirCsvSourceTaskSubDirsNoRetainTest extends SpoolDirCsvSourceTaskTest { 23 | private static final Logger log = LoggerFactory.getLogger(SpoolDirCsvSourceTaskSubDirsNoRetainTest.class); 24 | 25 | @Override 26 | protected Map settings() { 27 | Map settings = super.settings(); 28 | 29 | settings.put(AbstractSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY,"true"); 30 | settings.put(AbstractSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH,"false"); 31 | 32 | return settings; 33 | } 34 | 35 | @Override 36 | protected String defineInputPathSubDir() { 37 | return "test/01/02/03"; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsRetainTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | import java.util.Map; 22 | public class SpoolDirCsvSourceTaskSubDirsRetainTest extends SpoolDirCsvSourceTaskTest { 23 | private static final Logger log = LoggerFactory.getLogger(SpoolDirCsvSourceTaskSubDirsRetainTest.class); 24 | 25 | @Override 26 | protected Map settings() { 27 | Map settings = super.settings(); 28 | 29 | settings.put(AbstractSourceConnectorConfig.INPUT_PATH_WALK_RECURSIVELY,"true"); 30 | settings.put(AbstractSourceConnectorConfig.CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH,"true"); 31 | 32 | return settings; 33 | } 34 | 35 | @Override 36 | protected String defineInputPathSubDir() { 37 | return "test/01/02/03"; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.ByteStreams; 19 | import org.apache.kafka.connect.errors.DataException; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.io.File; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | import java.io.InputStream; 26 | import java.io.OutputStream; 27 | 28 | import static org.junit.jupiter.api.Assertions.assertThrows; 29 | 30 | public class SpoolDirJsonSourceConnectorTest extends AbstractSpoolDirSourceConnectorTest { 31 | @Override 32 | protected SpoolDirJsonSourceConnector createConnector() { 33 | return new SpoolDirJsonSourceConnector(); 34 | } 35 | 36 | @Test 37 | public void startWithoutSchema() throws IOException { 38 | settings.put(AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.*\\.json$"); 39 | 40 | String[] inputFiles = new String[]{ 41 | "json/FieldsMatch.data", 42 | "json/FieldsMatch.data", 43 | }; 44 | 45 | int index = 0; 46 | for (String inputFile : inputFiles) { 47 | try (InputStream inputStream = this.getClass().getResourceAsStream(inputFile)) { 48 | File outputFile = new File(this.inputPath, "input" + index + ".json"); 49 | try (OutputStream outputStream = new FileOutputStream(outputFile)) { 50 | ByteStreams.copy(inputStream, outputStream); 51 | } 52 | } 53 | index++; 54 | } 55 | 56 | this.connector.start(settings); 57 | } 58 | 59 | @Test() 60 | public void startWithoutSchemaMismatch() throws IOException { 61 | this.settings.put(AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, "^.*\\.json$"); 62 | 63 | 64 | String[] inputFiles = new String[]{ 65 | "json/FieldsMatch.data", 66 | "json/DataHasMoreFields.data", 67 | }; 68 | 69 | int index = 0; 70 | for (String inputFile : inputFiles) { 71 | try (InputStream inputStream = this.getClass().getResourceAsStream(inputFile)) { 72 | File outputFile = new File(this.inputPath, "input" + index + ".json"); 73 | try (OutputStream outputStream = new FileOutputStream(outputFile)) { 74 | ByteStreams.copy(inputStream, outputStream); 75 | } 76 | } 77 | index++; 78 | } 79 | 80 | assertThrows(DataException.class, () -> { 81 | this.connector.start(settings); 82 | }); 83 | 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.google.common.io.Files; 19 | import org.junit.jupiter.api.DynamicTest; 20 | import org.junit.jupiter.api.TestFactory; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import java.io.IOException; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.stream.Stream; 28 | 29 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 30 | 31 | public class SpoolDirJsonSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 32 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTaskTest.class); 33 | 34 | @Override 35 | protected SpoolDirJsonSourceTask createTask() { 36 | return new SpoolDirJsonSourceTask(); 37 | } 38 | 39 | @Override 40 | protected Map settings() { 41 | Map settings = super.settings(); 42 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_FIRST_ROW_AS_HEADER_CONF, "true"); 43 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_NULL_FIELD_INDICATOR_CONF, "BOTH"); 44 | settings.put(SpoolDirCsvSourceConnectorConfig.PARSER_TIMESTAMP_DATE_FORMATS_CONF, "yyyy-MM-dd'T'HH:mm:ss'Z'"); 45 | return settings; 46 | } 47 | 48 | @TestFactory 49 | public Stream poll() throws IOException { 50 | final String packageName = "json"; 51 | List testCases = loadTestCases(packageName); 52 | 53 | return testCases.stream().map(testCase -> { 54 | String name = Files.getNameWithoutExtension(testCase.path.toString()); 55 | return dynamicTest(name, () -> { 56 | poll(packageName, testCase); 57 | }); 58 | }); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Map; 8 | 9 | import static org.junit.jupiter.api.Assertions.assertEquals; 10 | 11 | public class SpoolDirLineDelimitedSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 12 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTaskTest.class); 13 | 14 | @Override 15 | protected SpoolDirLineDelimitedSourceTask createTask() { 16 | return new SpoolDirLineDelimitedSourceTask(); 17 | } 18 | 19 | @Override 20 | protected Map settings() { 21 | Map settings = super.settings(); 22 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_FIRST_ROW_AS_HEADER_CONF, "true"); 23 | settings.put(SpoolDirCsvSourceConnectorConfig.PARSER_TIMESTAMP_DATE_FORMATS_CONF, "yyyy-MM-dd'T'HH:mm:ss'Z'"); 24 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_NULL_FIELD_INDICATOR_CONF, "BOTH"); 25 | return settings; 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.spooldir; 2 | 3 | import com.google.common.io.Files; 4 | import org.junit.jupiter.api.DynamicTest; 5 | import org.junit.jupiter.api.TestFactory; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.io.IOException; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.stream.Stream; 13 | 14 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 15 | 16 | public class SpoolDirSchemaLessJsonSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 17 | private static final Logger log = LoggerFactory.getLogger(SpoolDirJsonSourceTaskTest.class); 18 | 19 | @Override 20 | protected SpoolDirSchemaLessJsonSourceTask createTask() { 21 | return new SpoolDirSchemaLessJsonSourceTask(); 22 | } 23 | 24 | @Override 25 | protected Map settings() { 26 | Map settings = super.settings(); 27 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_FIRST_ROW_AS_HEADER_CONF, "true"); 28 | settings.put(SpoolDirCsvSourceConnectorConfig.PARSER_TIMESTAMP_DATE_FORMATS_CONF, "yyyy-MM-dd'T'HH:mm:ss'Z'"); 29 | settings.put(SpoolDirCsvSourceConnectorConfig.CSV_NULL_FIELD_INDICATOR_CONF, "BOTH"); 30 | return settings; 31 | } 32 | 33 | @TestFactory 34 | public Stream poll() throws IOException { 35 | final String packageName = "schemalessjson"; 36 | List testCases = loadTestCases(packageName); 37 | 38 | return testCases.stream().map(testCase -> { 39 | String name = Files.getNameWithoutExtension(testCase.path.toString()); 40 | return dynamicTest(name, () -> { 41 | poll(packageName, testCase); 42 | }); 43 | }); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestCase.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.fasterxml.jackson.annotation.JsonIgnore; 19 | import org.apache.kafka.connect.data.Schema; 20 | import org.apache.kafka.connect.source.SourceRecord; 21 | 22 | import java.nio.file.Path; 23 | import java.util.LinkedHashMap; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | public class TestCase implements NamedTest { 28 | @JsonIgnore 29 | public Path path; 30 | public Map settings = new LinkedHashMap<>(); 31 | public Map offset = new LinkedHashMap<>(); 32 | public Schema keySchema; 33 | public Schema valueSchema; 34 | public List expected; 35 | 36 | @Override 37 | public void path(Path path) { 38 | this.path = path; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestDataUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory; 19 | import com.google.common.base.Preconditions; 20 | import org.junit.jupiter.api.Test; 21 | import org.reflections.Reflections; 22 | import org.reflections.scanners.ResourcesScanner; 23 | import org.reflections.util.FilterBuilder; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import java.io.File; 28 | import java.io.IOException; 29 | import java.io.InputStream; 30 | import java.nio.file.Path; 31 | import java.nio.file.Paths; 32 | import java.util.ArrayList; 33 | import java.util.List; 34 | import java.util.Set; 35 | 36 | public class TestDataUtils { 37 | private static final Logger log = LoggerFactory.getLogger(TestDataUtils.class); 38 | 39 | @Test 40 | public void metadata() { 41 | log.info(Metadata.HEADER_DOCS); 42 | } 43 | 44 | 45 | public static List loadJsonResourceFiles(String packageName, Class cls) throws IOException { 46 | Preconditions.checkNotNull(packageName, "packageName cannot be null"); 47 | log.info("packageName = {}", packageName); 48 | // Preconditions.checkState(packageName.startsWith("/"), "packageName must start with a /."); 49 | Reflections reflections = new Reflections(packageName, new ResourcesScanner()); 50 | Set resources = reflections.getResources(new FilterBuilder.Include("^.*\\.json$")); 51 | List datas = new ArrayList(resources.size()); 52 | Path packagePath = Paths.get("/" + packageName.replace(".", "/")); 53 | for (String resource : resources) { 54 | log.trace("Loading resource {}", resource); 55 | Path resourcePath = Paths.get("/" + resource); 56 | Path relativePath = packagePath.relativize(resourcePath); 57 | File resourceFile = new File("/" + resource); 58 | T data; 59 | try (InputStream inputStream = cls.getResourceAsStream(resourceFile.getAbsolutePath())) { 60 | data = ObjectMapperFactory.INSTANCE.readValue(inputStream, cls); 61 | } catch (IOException ex) { 62 | if (log.isErrorEnabled()) { 63 | log.error("Exception thrown while loading {}", resourcePath, ex); 64 | } 65 | throw ex; 66 | } 67 | 68 | if (null != relativePath.getParent()) { 69 | data.path(relativePath); 70 | } else { 71 | data.path(relativePath); 72 | } 73 | datas.add(data); 74 | } 75 | return datas; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.parsers.elf.ElfParser; 19 | import com.github.jcustenborder.parsers.elf.LogEntry; 20 | import com.google.common.collect.ImmutableMap; 21 | import org.apache.commons.lang3.tuple.Pair; 22 | import org.apache.kafka.connect.data.SchemaAndValue; 23 | import org.apache.kafka.connect.data.Struct; 24 | import org.junit.jupiter.api.DynamicTest; 25 | import org.junit.jupiter.api.Test; 26 | import org.junit.jupiter.api.TestFactory; 27 | 28 | import java.time.LocalDate; 29 | import java.time.LocalTime; 30 | import java.util.LinkedHashMap; 31 | import java.util.Map; 32 | import java.util.stream.Stream; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | import static org.junit.jupiter.api.Assertions.assertNotNull; 36 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 37 | import static org.mockito.Mockito.mock; 38 | import static org.mockito.Mockito.when; 39 | 40 | public class SchemaConversionBuilderTest { 41 | 42 | @TestFactory 43 | public Stream normalizeFieldName() { 44 | Map tests = new LinkedHashMap<>(); 45 | tests.put("date", "date"); 46 | tests.put("time", "time"); 47 | tests.put("x-edge-location", "x_edge_location"); 48 | tests.put("sc-bytes", "sc_bytes"); 49 | tests.put("c-ip", "c_ip"); 50 | tests.put("cs-method", "cs_method"); 51 | tests.put("cs(Host)", "cs_host"); 52 | tests.put("cs-uri-stem", "cs_uri_stem"); 53 | tests.put("sc-status", "sc_status"); 54 | tests.put("cs(Referer)", "cs_referer"); 55 | tests.put("cs(User-Agent)", "cs_user_agent"); 56 | tests.put("cs-uri-query", "cs_uri_query"); 57 | tests.put("cs(Cookie)", "cs_cookie"); 58 | tests.put("x-edge-result-type", "x_edge_result_type"); 59 | tests.put("x-edge-request-id", "x_edge_request_id"); 60 | tests.put("x-host-header", "x_host_header"); 61 | tests.put("cs-protocol", "cs_protocol"); 62 | tests.put("cs-bytes", "cs_bytes"); 63 | tests.put("time-taken", "time_taken"); 64 | 65 | return tests.entrySet().stream().map(e -> dynamicTest(e.getKey(), () -> { 66 | final String actual = SchemaConversionBuilder.normalizeFieldName(e.getKey()); 67 | assertEquals(e.getValue(), actual, "field name does not match."); 68 | })); 69 | } 70 | 71 | 72 | @Test 73 | public void foo() { 74 | ElfParser parser = mock(ElfParser.class); 75 | final Map> fieldTypes = ImmutableMap.of( 76 | "date", LocalDate.class, 77 | "time", LocalTime.class, 78 | "sc-bytes", Long.class, 79 | "sc-status", Integer.class 80 | ); 81 | final Map fieldData = ImmutableMap.of( 82 | "date", LocalDate.of(2011, 3, 14), 83 | "time", LocalTime.of(12, 0, 0), 84 | "sc-bytes", 12341L, 85 | "sc-status", 200 86 | ); 87 | when(parser.fieldTypes()).thenReturn(fieldTypes); 88 | 89 | SchemaConversionBuilder schemaGenerator = new SchemaConversionBuilder(parser); 90 | SchemaConversion conversion = schemaGenerator.build(); 91 | assertNotNull(conversion, "conversion should not be null."); 92 | 93 | LogEntry entry = mock(LogEntry.class); 94 | when(entry.fieldTypes()).thenReturn(fieldTypes); 95 | when(entry.fieldData()).thenReturn(fieldData); 96 | 97 | SchemaAndValue actual = conversion.convert(entry); 98 | assertNotNull(actual, "actual should not be null"); 99 | // assertNotNull(actual.getKey(), "actual.getKey() should not be null"); 100 | assertNotNull(actual.schema(), "actual.getValue() should not be null"); 101 | assertNotNull(actual.value(), "actual.getValue() should not be null"); 102 | 103 | // actual.getValue()..validate(); 104 | 105 | //date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken 106 | 107 | 108 | } 109 | 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.spooldir.elf; 17 | 18 | import com.github.jcustenborder.kafka.connect.spooldir.AbstractSpoolDirSourceTaskTest; 19 | import com.github.jcustenborder.kafka.connect.spooldir.TestCase; 20 | import com.google.common.io.Files; 21 | import org.junit.jupiter.api.DynamicTest; 22 | import org.junit.jupiter.api.TestFactory; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import java.io.IOException; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.stream.Stream; 30 | 31 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 32 | 33 | public class SpoolDirELFSourceTaskTest extends AbstractSpoolDirSourceTaskTest { 34 | private static final Logger log = LoggerFactory.getLogger(SpoolDirELFSourceTaskTest.class); 35 | 36 | @Override 37 | protected SpoolDirELFSourceTask createTask() { 38 | return new SpoolDirELFSourceTask(); 39 | } 40 | 41 | @Override 42 | protected Map settings() { 43 | Map settings = super.settings(); 44 | // settings.put(SpoolDirELFSourceConnectorConfig.CSV_FIRST_ROW_AS_HEADER_CONF, "true"); 45 | // settings.put(SpoolDirCsvSourceConnectorConfig.CSV_NULL_FIELD_INDICATOR_CONF, "BOTH"); 46 | // settings.put(SpoolDirCsvSourceConnectorConfig.PARSER_TIMESTAMP_DATE_FORMATS_CONF, "yyyy-MM-dd'T'HH:mm:ss'Z'"); 47 | return settings; 48 | } 49 | 50 | @TestFactory 51 | public Stream poll() throws IOException { 52 | final String packageName = "elf"; 53 | List testCases = loadTestCases(packageName); 54 | 55 | return testCases.stream().map(testCase -> { 56 | String name = Files.getNameWithoutExtension(testCase.path.toString()); 57 | return dynamicTest(name, () -> { 58 | poll(packageName, testCase); 59 | 60 | }); 61 | }); 62 | } 63 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolBinaryFileSourceConnector/binary.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Binary File", 3 | "description": "This file will read the entire file and write it to Kafka as a binary file.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^users\\d+\\.bin$", 9 | "topic": "users" 10 | } 11 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolBinaryFileSourceConnector/fromXML.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Transform XML Files", 3 | "description": "This example will use the FromXml transformation to read the binary data based on the supplied XSD. This allows files to be converted to strongly typed data based on the XSD. Once Kafka connect has converted the data it can be stored as AVRO, JSON, or whatever converter the user chooses.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^users\\d+\\.bin$", 9 | "topic": "users", 10 | "transforms": "FromXml", 11 | "transforms.FromXml.type":"com.github.jcustenborder.kafka.connect.transform.xml.FromXml$Value", 12 | "transforms.FromXml.schema.path": "file:///books.xsd" 13 | } 14 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector/binary.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Binary File", 3 | "description": "This file will read the entire file and write it to Kafka as a binary file.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^users\\d+\\.bin$", 9 | "topic": "users" 10 | } 11 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector/fromXML.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Transform XML Files", 3 | "description": "This example will use the FromXml transformation to read the binary data based on the supplied XSD. This allows files to be converted to strongly typed data based on the XSD. Once Kafka connect has converted the data it can be stored as AVRO, JSON, or whatever converter the user chooses.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^users\\d+\\.bin$", 9 | "topic": "users", 10 | "transforms": "FromXml", 11 | "transforms.FromXml.type":"com.github.jcustenborder.kafka.connect.transform.xml.FromXml$Value", 12 | "transforms.FromXml.schema.path": "file:///books.xsd" 13 | } 14 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CSV with schema", 3 | "description": "This example will read csv files and write them to Kafka parsing them to the\nschema specified in ``key.schema`` and ``value.schema``.", 4 | "note": "The data for the following example is formatted as such.\nid,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313", 5 | "config": { 6 | "finished.path": "/tmp", 7 | "input.path": "/tmp", 8 | "error.path": "/tmp", 9 | "input.file.pattern": "^users\\d+\\.csv", 10 | "topic": "users", 11 | "key.schema": "{\n \"name\" : \"com.example.users.UserKey\",\n \"type\" : \"STRUCT\",\n \"isOptional\" : false,\n \"fieldSchemas\" : {\n \"id\" : {\n \"type\" : \"INT64\",\n \"isOptional\" : false\n }\n }\n}\n", 12 | "value.schema": "{\n \"name\" : \"com.example.users.User\",\n \"type\" : \"STRUCT\",\n \"isOptional\" : false,\n \"fieldSchemas\" : {\n \"id\" : {\n \"type\" : \"INT64\",\n \"isOptional\" : false\n },\n \"first_name\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"last_name\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"email\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"gender\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"ip_address\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"last_login\" : {\n \"name\" : \"org.apache.kafka.connect.data.Timestamp\",\n \"type\" : \"INT64\",\n \"version\" : 1,\n \"isOptional\" : false\n },\n \"account_balance\" : {\n \"name\" : \"org.apache.kafka.connect.data.Decimal\",\n \"type\" : \"BYTES\",\n \"version\" : 1,\n \"parameters\" : {\n \"scale\" : \"2\"\n },\n \"isOptional\" : true\n },\n \"country\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"favorite_color\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n }\n }\n}\n" 13 | } 14 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/schemaheaders.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CSV with Headers as fields", 3 | "description": "This example will use a transformation to copy data from the header(s) of the message to field(s) in the message.", 4 | "note": "The data for the following example is formatted as such.\nid,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313", 5 | "config": { 6 | "finished.path": "/tmp", 7 | "input.path": "/tmp", 8 | "error.path": "/tmp", 9 | "input.file.pattern": "^users\\d+\\.csv", 10 | "topic": "users", 11 | "key.schema": "{\n \"name\" : \"com.example.users.UserKey\",\n \"type\" : \"STRUCT\",\n \"isOptional\" : false,\n \"fieldSchemas\" : {\n \"id\" : {\n \"type\" : \"INT64\",\n \"isOptional\" : false\n }\n }\n}\n", 12 | "value.schema": "{\n \"name\" : \"com.example.users.User\",\n \"type\" : \"STRUCT\",\n \"isOptional\" : false,\n \"fieldSchemas\" : {\n \"id\" : {\n \"type\" : \"INT64\",\n \"isOptional\" : false\n },\n \"first_name\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"last_name\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"email\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"gender\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"ip_address\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"last_login\" : {\n \"name\" : \"org.apache.kafka.connect.data.Timestamp\",\n \"type\" : \"INT64\",\n \"version\" : 1,\n \"isOptional\" : false\n },\n \"account_balance\" : {\n \"name\" : \"org.apache.kafka.connect.data.Decimal\",\n \"type\" : \"BYTES\",\n \"version\" : 1,\n \"parameters\" : {\n \"scale\" : \"2\"\n },\n \"isOptional\" : true\n },\n \"country\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n },\n \"favorite_color\" : {\n \"type\" : \"STRING\",\n \"isOptional\" : true\n }\n }\n}\n" 13 | }, 14 | "transformations": { 15 | "headerToField": { 16 | "type": "com.github.jcustenborder.kafka.connect.transform.common.HeaderToField$Value", 17 | "header.mappings" : "file.path:STRING:file_path,file.name:STRING:file_name,file.last.modified:INT64(Timestamp):file_last_modified" 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/tsv.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TSV input file", 3 | "description": "This example will read a tab separated file. This method is very similar to reading a standard CSV file.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^users\\d+\\.tsv", 9 | "topic": "users", 10 | "csv.separator.char": 11 11 | } 12 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnector/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"Json", 3 | "description":"This example will read json from the input directory.", 4 | "config":{ 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern":"^users\\d+\\.json$", 9 | "topic":"users" 10 | } 11 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/avro/FieldsMatch.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcustenborder/kafka-connect-spooldir/7506b3da07014e8ef22d6b05c24822464ecdb51b/src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/avro/FieldsMatch.data -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/binary/DataHasMoreFields.data: -------------------------------------------------------------------------------- 1 | asdifoasodfasdfargasdfasdfasdgfrasdfasdfasdfa -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/binary/DataHasMoreFields.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": {}, 3 | "offset": {}, 4 | "expected": [ 5 | { 6 | "sourcePartition": { 7 | "fileName": "DataHasMoreFields.binary" 8 | }, 9 | "sourceOffset": { 10 | "offset": 0 11 | }, 12 | "topic": "testing", 13 | "valueSchema": { 14 | "type": "BYTES", 15 | "isOptional": false 16 | }, 17 | "value": "YXNkaWZvYXNvZGZhc2RmYXJnYXNkZmFzZGZhc2RnZnJhc2RmYXNkZmFzZGZh", 18 | "headers": [ 19 | { 20 | "name": "file.name", 21 | "schema": { 22 | "type": "STRING", 23 | "isOptional": false 24 | }, 25 | "storage": "DataHasMoreFields.binary" 26 | }, 27 | { 28 | "name": "file.path", 29 | "schema": { 30 | "type": "STRING", 31 | "isOptional": false 32 | }, 33 | "storage": "/var/folders/fc/jqgphp3s5l9087p4v7pdxh040000gn/T/1559925238742-0/input/DataHasMoreFields.binary" 34 | }, 35 | { 36 | "name": "file.length", 37 | "schema": { 38 | "type": "INT64", 39 | "isOptional": false 40 | }, 41 | "storage": 5153 42 | }, 43 | { 44 | "name": "file.offset", 45 | "schema": { 46 | "type": "INT64", 47 | "isOptional": false 48 | }, 49 | "storage": 0 50 | }, 51 | { 52 | "name": "file.last.modified", 53 | "schema": { 54 | "name": "org.apache.kafka.connect.data.Timestamp", 55 | "type": "INT64", 56 | "version": 1, 57 | "isOptional": false 58 | }, 59 | "storage": 1559925239000 60 | } 61 | ] 62 | } 63 | ] 64 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/BlankLines.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN, 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/DataHasMoreFields.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color,column11 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313,asdsa 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf,asd 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN,,f 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414,as 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN,,g 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR,,g 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d,h 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU,,j 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN,,f 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9,s 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36,dfg 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909,hg 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN,,s 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943,fgs 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119,sdfgs 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH,,sdfg 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID,,sdfg 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN,,sdfg 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL,,dsf 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128,sdfgsdf -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN, 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FileModeFieldFieldsMatch.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,2015-11-14T10:34:09Z,819.47,CN, 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SchemaHasMoreFields.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SourceOffset.data: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color 2 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 3 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 4 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN, 5 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 6 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 7 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 8 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 9 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 10 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 11 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 12 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 13 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 14 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 15 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 16 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 17 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 18 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 19 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 20 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 21 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithHeaderSkipLines.data: -------------------------------------------------------------------------------- 1 | #skip this line 2 | id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color 3 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 4 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 5 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN, 6 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 7 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 8 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 9 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 10 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 11 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 12 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 13 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 14 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 15 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 16 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 17 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 18 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 19 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 20 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 21 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 22 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithoutHeader.data: -------------------------------------------------------------------------------- 1 | 1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313 2 | 2,John,Kim,jkim1@miibeian.gov.cn,Male,53.19.132.185,2015-11-14T10:34:09Z,251.24,CZ,#3e56cf 3 | 3,Ashley,Austin,aaustin2@hatena.ne.jp,Female,21.164.37.9,,819.47,CN, 4 | 4,Jonathan,Mcdonald,jmcdonald3@amazon.co.uk,Male,188.172.42.140,2015-12-28T14:37:01Z,868.38,ID,#1b1414 5 | 5,Helen,Lane,hlane4@trellian.com,Female,159.171.138.190,2016-06-30T18:41:18Z,398.97,TN, 6 | 6,Scott,Lopez,slopez5@google.co.jp,Male,86.194.226.35,2015-08-13T02:13:51Z,322.99,BR, 7 | 7,Christine,Franklin,cfranklin6@reuters.com,Female,248.173.207.64,2015-12-22T11:29:57Z,301.26,PH,#1d5e9d 8 | 8,Helen,Andrews,handrews7@histats.com,Female,83.160.63.181,2016-03-06T11:41:10Z,217.96,CU, 9 | 9,Stephanie,Gordon,sgordon8@goodreads.com,Female,193.143.42.212,2015-10-27T22:07:24Z,495.80,CN, 10 | 10,Shirley,Andrews,sandrews9@flickr.com,Female,99.113.183.206,2015-11-07T11:12:52Z,157.75,BR,#fc1da9 11 | 11,Joshua,Reid,jreida@wikia.com,Male,197.96.118.164,2015-08-22T13:16:18Z,431.80,CO,#6e3e36 12 | 12,Frances,Parker,fparkerb@engadget.com,Female,226.237.57.25,2015-10-18T01:50:15Z,188.21,BR,#73e909 13 | 13,Sharon,Lawson,slawsonc@bravesites.com,Female,198.189.134.106,2016-01-14T17:51:09Z,206.73,VN, 14 | 14,Elizabeth,Wells,ewellsd@redcross.org,Female,120.108.59.206,2015-09-02T21:53:07Z,499.48,CZ,#e9c943 15 | 15,Norma,Wilson,nwilsone@google.com.br,Female,18.246.76.220,2015-09-27T02:10:48Z,-65.19,SE,#645119 16 | 16,Joan,Watkins,jwatkinsf@yolasite.com,Female,240.27.33.114,2016-03-31T00:29:14Z,264.23,PH, 17 | 17,Gerald,Hamilton,ghamiltong@fc2.com,Male,182.75.62.95,2016-02-10T14:29:35Z,309.26,ID, 18 | 18,Paula,Taylor,ptaylorh@wikispaces.com,Female,245.74.203.0,2016-05-11T03:15:10Z,927.45,CN, 19 | 19,Carolyn,Burns,cburnsi@marketwatch.com,Female,180.243.11.10,2016-02-28T18:49:23Z,752.76,NL, 20 | 20,Robin,Bennett,rbennettj@cdc.gov,Female,169.77.92.179,2016-02-15T01:06:44Z,143.30,ID,#506128 -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnector/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Standard", 3 | "description": "This example will read Extended Log Format files and write them to Kafka.", 4 | "config": { 5 | "finished.path": "/tmp", 6 | "input.path": "/tmp", 7 | "error.path": "/tmp", 8 | "input.file.pattern": "^server\\d+\\.log", 9 | "topic": "users" 10 | } 11 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/elf/elf/FieldsMatch.data: -------------------------------------------------------------------------------- 1 | #Software: xyz 1.2.3.4 2 | #Version: 1.0 3 | #Start-Date: 2019-10-23 22:00:00 4 | #Date: 2019-10-23 22:00:00 5 | #Fields: date time time-taken ip1 user gp id1 res cat1 status act method ct scheme host port ext ua ip2 bytes1 bytes2 id2 err1 err2 err3 str cr cat2 6 | #Remark: 1234567890 "xyz" "1.2.3.4" "type" 7 | 2019-10-23 21:00:00 1234 1.2.3.4 xyz abc - OK "cat" 0 zyx unk - pro site 123 - - 1.2.3.4 0 0 - none - - none - "cat" 8 | 2019-10-23 21:00:00 123 1.2.3.4 xyz abc - OK "dog" 0 zyx unk - pro site 123 - - 1.2.3.4 0 0 - none - - none - "dog" -------------------------------------------------------------------------------- /src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/SchemaHasMoreFields.data: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "first_name": "Jack", 4 | "last_name": "Garcia", 5 | "email": "jgarcia0@shop-pro.jp", 6 | "gender": "Male", 7 | "ip_address": "196.56.44.185", 8 | "last_login": "2015-09-30T15:29:03Z", 9 | "account_balance": 347.77, 10 | "country": "IT" 11 | } 12 | { 13 | "id": 2, 14 | "first_name": "John", 15 | "last_name": "Kim", 16 | "email": "jkim1@miibeian.gov.cn", 17 | "gender": "Male", 18 | "ip_address": "53.19.132.185", 19 | "last_login": "2015-11-14T10:34:09Z", 20 | "account_balance": 251.24, 21 | "country": "CZ" 22 | } 23 | { 24 | "id": 3, 25 | "first_name": "Ashley", 26 | "last_name": "Austin", 27 | "email": "aaustin2@hatena.ne.jp", 28 | "gender": "Female", 29 | "ip_address": "21.164.37.9", 30 | "account_balance": 819.47, 31 | "country": "CN" 32 | } 33 | { 34 | "id": 4, 35 | "first_name": "Jonathan", 36 | "last_name": "Mcdonald", 37 | "email": "jmcdonald3@amazon.co.uk", 38 | "gender": "Male", 39 | "ip_address": "188.172.42.140", 40 | "last_login": "2015-12-28T14:37:01Z", 41 | "account_balance": 868.38, 42 | "country": "ID" 43 | } 44 | { 45 | "id": 5, 46 | "first_name": "Helen", 47 | "last_name": "Lane", 48 | "email": "hlane4@trellian.com", 49 | "gender": "Female", 50 | "ip_address": "159.171.138.190", 51 | "last_login": "2016-06-30T18:41:18Z", 52 | "account_balance": 398.97, 53 | "country": "TN" 54 | } 55 | { 56 | "id": 6, 57 | "first_name": "Scott", 58 | "last_name": "Lopez", 59 | "email": "slopez5@google.co.jp", 60 | "gender": "Male", 61 | "ip_address": "86.194.226.35", 62 | "last_login": "2015-08-13T02:13:51Z", 63 | "account_balance": 322.99, 64 | "country": "BR" 65 | } 66 | { 67 | "id": 7, 68 | "first_name": "Christine", 69 | "last_name": "Franklin", 70 | "email": "cfranklin6@reuters.com", 71 | "gender": "Female", 72 | "ip_address": "248.173.207.64", 73 | "last_login": "2015-12-22T11:29:57Z", 74 | "account_balance": 301.26, 75 | "country": "PH" 76 | } 77 | { 78 | "id": 8, 79 | "first_name": "Helen", 80 | "last_name": "Andrews", 81 | "email": "handrews7@histats.com", 82 | "gender": "Female", 83 | "ip_address": "83.160.63.181", 84 | "last_login": "2016-03-06T11:41:10Z", 85 | "account_balance": 217.96, 86 | "country": "CU" 87 | } 88 | { 89 | "id": 9, 90 | "first_name": "Stephanie", 91 | "last_name": "Gordon", 92 | "email": "sgordon8@goodreads.com", 93 | "gender": "Female", 94 | "ip_address": "193.143.42.212", 95 | "last_login": "2015-10-27T22:07:24Z", 96 | "account_balance": 495.8, 97 | "country": "CN" 98 | } 99 | { 100 | "id": 10, 101 | "first_name": "Shirley", 102 | "last_name": "Andrews", 103 | "email": "sandrews9@flickr.com", 104 | "gender": "Female", 105 | "ip_address": "99.113.183.206", 106 | "last_login": "2015-11-07T11:12:52Z", 107 | "account_balance": 157.75, 108 | "country": "BR" 109 | } 110 | { 111 | "id": 11, 112 | "first_name": "Joshua", 113 | "last_name": "Reid", 114 | "email": "jreida@wikia.com", 115 | "gender": "Male", 116 | "ip_address": "197.96.118.164", 117 | "last_login": "2015-08-22T13:16:18Z", 118 | "account_balance": 431.8, 119 | "country": "CO" 120 | } 121 | { 122 | "id": 12, 123 | "first_name": "Frances", 124 | "last_name": "Parker", 125 | "email": "fparkerb@engadget.com", 126 | "gender": "Female", 127 | "ip_address": "226.237.57.25", 128 | "last_login": "2015-10-18T01:50:15Z", 129 | "account_balance": 188.21, 130 | "country": "BR" 131 | } 132 | { 133 | "id": 13, 134 | "first_name": "Sharon", 135 | "last_name": "Lawson", 136 | "email": "slawsonc@bravesites.com", 137 | "gender": "Female", 138 | "ip_address": "198.189.134.106", 139 | "last_login": "2016-01-14T17:51:09Z", 140 | "account_balance": 206.73, 141 | "country": "VN" 142 | } 143 | { 144 | "id": 14, 145 | "first_name": "Elizabeth", 146 | "last_name": "Wells", 147 | "email": "ewellsd@redcross.org", 148 | "gender": "Female", 149 | "ip_address": "120.108.59.206", 150 | "last_login": "2015-09-02T21:53:07Z", 151 | "account_balance": 499.48, 152 | "country": "CZ" 153 | } 154 | { 155 | "id": 15, 156 | "first_name": "Norma", 157 | "last_name": "Wilson", 158 | "email": "nwilsone@google.com.br", 159 | "gender": "Female", 160 | "ip_address": "18.246.76.220", 161 | "last_login": "2015-09-27T02:10:48Z", 162 | "account_balance": -65.19, 163 | "country": "SE" 164 | } 165 | { 166 | "id": 16, 167 | "first_name": "Joan", 168 | "last_name": "Watkins", 169 | "email": "jwatkinsf@yolasite.com", 170 | "gender": "Female", 171 | "ip_address": "240.27.33.114", 172 | "last_login": "2016-03-31T00:29:14Z", 173 | "account_balance": 264.23, 174 | "country": "PH" 175 | } 176 | { 177 | "id": 17, 178 | "first_name": "Gerald", 179 | "last_name": "Hamilton", 180 | "email": "ghamiltong@fc2.com", 181 | "gender": "Male", 182 | "ip_address": "182.75.62.95", 183 | "last_login": "2016-02-10T14:29:35Z", 184 | "account_balance": 309.26, 185 | "country": "ID" 186 | } 187 | { 188 | "id": 18, 189 | "first_name": "Paula", 190 | "last_name": "Taylor", 191 | "email": "ptaylorh@wikispaces.com", 192 | "gender": "Female", 193 | "ip_address": "245.74.203.0", 194 | "last_login": "2016-05-11T03:15:10Z", 195 | "account_balance": 927.45, 196 | "country": "CN" 197 | } 198 | { 199 | "id": 19, 200 | "first_name": "Carolyn", 201 | "last_name": "Burns", 202 | "email": "cburnsi@marketwatch.com", 203 | "gender": "Female", 204 | "ip_address": "180.243.11.10", 205 | "last_login": "2016-02-28T18:49:23Z", 206 | "account_balance": 752.76, 207 | "country": "NL" 208 | } 209 | { 210 | "id": 20, 211 | "first_name": "Robin", 212 | "last_name": "Bennett", 213 | "email": "rbennettj@cdc.gov", 214 | "gender": "Female", 215 | "ip_address": "169.77.92.179", 216 | "last_login": "2016-02-15T01:06:44Z", 217 | "account_balance": 143.3, 218 | "country": "ID" 219 | } 220 | -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | --------------------------------------------------------------------------------