├── .github ├── CODEOWNERS ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── workflows │ └── ci.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug-report.md ├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── kafka-connect-neo4j ├── assets │ └── neo4j-logo.png ├── README.md ├── src │ ├── test │ │ ├── resources │ │ │ ├── logback.xml │ │ │ └── exampleConfigs │ │ │ │ ├── sinkExample.json │ │ │ │ └── sourceExample.json │ │ └── kotlin │ │ │ └── streams │ │ │ └── kafka │ │ │ └── connect │ │ │ ├── utils │ │ │ └── TransactionExtensions.kt │ │ │ ├── sink │ │ │ ├── EventBuilderTest.kt │ │ │ ├── MapValueConverterTest.kt │ │ │ └── Neo4jValueConverterNestedStructTest.kt │ │ │ └── source │ │ │ └── Neo4jSourceConnectorConfigTest.kt │ └── main │ │ ├── resources │ │ ├── kafka-connect-version.properties │ │ └── kafka-connect-neo4j.properties │ │ ├── kotlin │ │ └── streams │ │ │ └── kafka │ │ │ └── connect │ │ │ ├── common │ │ │ └── KafkaConnectConfig.kt │ │ │ ├── sink │ │ │ ├── EventBuilder.kt │ │ │ ├── Neo4jStrategyStorage.kt │ │ │ ├── Neo4jSinkTask.kt │ │ │ ├── converters │ │ │ │ ├── Neo4jValueConverter.kt │ │ │ │ └── MapValueConverter.kt │ │ │ ├── Neo4jSinkConnector.kt │ │ │ ├── Neo4jSinkService.kt │ │ │ └── Neo4jSinkConnectorConfig.kt │ │ │ ├── utils │ │ │ ├── PropertiesUtil.kt │ │ │ ├── ConnectExtensionFunctions.kt │ │ │ └── Topics.kt │ │ │ └── source │ │ │ ├── Neo4jSourceTask.kt │ │ │ ├── Neo4jSourceConnector.kt │ │ │ ├── SourceRecordBuilder.kt │ │ │ ├── Neo4jSourceConnectorConfig.kt │ │ │ └── Neo4jSourceService.kt │ │ └── assembly │ │ └── package.xml ├── config │ └── sink-quickstart.properties ├── doc │ ├── contrib.sink.avro.neo4j.json │ ├── contrib.source.string-json.neo4j.json │ ├── contrib.source.string.neo4j.json │ ├── contrib.source.avro.neo4j.json │ ├── contrib.sink.string-json.neo4j.json │ ├── docker-compose.yml │ └── readme.adoc └── docker │ ├── contrib.sink.avro.neo4j.json │ ├── contrib.source.string-json.neo4j.json │ ├── contrib.source.string.neo4j.json │ ├── contrib.sink.string-json.neo4j.json │ ├── contrib.source.avro.neo4j.json │ ├── docker-compose.yml │ └── readme.adoc ├── common ├── src │ ├── main │ │ └── kotlin │ │ │ └── streams │ │ │ ├── events │ │ │ ├── StreamsPluginStatus.kt │ │ │ ├── ProcedureResults.kt │ │ │ └── StreamsEvent.kt │ │ │ ├── utils │ │ │ ├── CoroutineUtils.kt │ │ │ ├── StreamsUtils.kt │ │ │ ├── IngestionUtils.kt │ │ │ ├── ValidationUtils.kt │ │ │ └── SchemaUtils.kt │ │ │ ├── service │ │ │ ├── sink │ │ │ │ └── strategy │ │ │ │ │ ├── CypherTemplateStrategy.kt │ │ │ │ │ ├── IngestionStrategy.kt │ │ │ │ │ ├── NodePatternIngestionStrategy.kt │ │ │ │ │ ├── SourceIdIngestionStrategy.kt │ │ │ │ │ └── RelationshipPatternIngestionStrategy.kt │ │ │ ├── StreamsSinkService.kt │ │ │ └── errors │ │ │ │ ├── KafkaErrorService.kt │ │ │ │ └── ErrorService.kt │ │ │ └── extensions │ │ │ ├── CommonExtensions.kt │ │ │ └── CoroutineExtensions.kt │ └── test │ │ └── kotlin │ │ └── streams │ │ ├── utils │ │ ├── StreamsUtilsTest.kt │ │ ├── ValidationUtilsTest.kt │ │ ├── CoroutineUtilsTest.kt │ │ └── SchemaUtilsTest.kt │ │ └── service │ │ └── sink │ │ ├── strategy │ │ └── Neo4j.kt │ │ └── errors │ │ └── KafkaErrorServiceTest.kt └── pom.xml ├── .gitignore ├── assembly └── assembly.xml ├── test-support ├── src │ └── main │ │ └── kotlin │ │ └── streams │ │ ├── MavenUtils.kt │ │ └── Neo4jContainerExtension.kt └── pom.xml └── README.md /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @neo4j-contrib/team-connectors 2 | 3 | /.github/ @ali-ince @fbiville @venikkin 4 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-contrib/neo4j-streams/HEAD/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /kafka-connect-neo4j/assets/neo4j-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-contrib/neo4j-streams/HEAD/kafka-connect-neo4j/assets/neo4j-logo.png -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/events/StreamsPluginStatus.kt: -------------------------------------------------------------------------------- 1 | package streams.events 2 | 3 | enum class StreamsPluginStatus { RUNNING, STOPPED, UNKNOWN } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/events/ProcedureResults.kt: -------------------------------------------------------------------------------- 1 | package streams.events 2 | 3 | class StreamResult(@JvmField val event: Map) 4 | class KeyValueResult(@JvmField val name: String, @JvmField val value: Any?) -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Fixes # 2 | 3 | One sentence summary of the change. 4 | 5 | ## Proposed Changes (Mandatory) 6 | 7 | A brief list of proposed changes in order to fix the issue: 8 | 9 | - 10 | - 11 | - 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: maven 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | cooldown: 8 | default-days: 3 9 | open-pull-requests-limit: 10 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: daily 14 | cooldown: 15 | default-days: 3 16 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Welcome to your Kafka Connect Neo4j Connector! 4 | 5 | # Build it locally 6 | 7 | Build the project by running the following command: 8 | 9 | $ mvn clean install 10 | 11 | Inside the directory `/kafka-connect-neo4j/target/component/packages` you'll find a file named `neo4j-kafka-connect-neo4j-.zip` 12 | 13 | # Run with docker 14 | 15 | Please refer to this file [readme.adoc](doc/readme.adoc) 16 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/guides 2 | classes 3 | *.patch 4 | dependency-reduced-pom.xml 5 | derby* 6 | .gradle 7 | gradle.properties 8 | build/ 9 | *~ 10 | \#* 11 | target 12 | out 13 | .project 14 | .classpath 15 | .settings 16 | .externalToolBuilders/ 17 | .scala_dependencies 18 | .factorypath 19 | .cache 20 | .cache-main 21 | .cache-tests 22 | *.iws 23 | *.ipr 24 | *.iml 25 | .idea 26 | .DS_Store 27 | .shell_history 28 | .mailmap 29 | .java-version 30 | Thumbs.db 31 | bin 32 | doc/node 33 | doc/node_modules 34 | */docker/plugins/* 35 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/utils/CoroutineUtils.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import kotlinx.coroutines.delay 4 | 5 | suspend fun retryForException(exceptions: Array>, retries: Int, delayTime: Long, action: () -> T): T { 6 | return try { 7 | action() 8 | } catch (e: Exception) { 9 | val isInstance = exceptions.any { it.isInstance(e) } 10 | if (isInstance && retries > 0) { 11 | delay(delayTime) 12 | retryForException(exceptions = exceptions, retries = retries - 1, delayTime = delayTime, action = action) 13 | } else { 14 | throw e 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/resources/kafka-connect-version.properties: -------------------------------------------------------------------------------- 1 | ## 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ## 15 | 16 | version=${project.version} -------------------------------------------------------------------------------- /kafka-connect-neo4j/config/sink-quickstart.properties: -------------------------------------------------------------------------------- 1 | # A simple configuration properties, is the same as contrib.sink.avro.neo4j.json 2 | name=Neo4jSinkConnector 3 | topics=my-topic 4 | connector.class=streams.kafka.connect.sink.Neo4jSinkConnector 5 | errors.retry.timeout=-1 6 | errors.retry.delay.max.ms=1000 7 | errors.tolerance=all 8 | errors.log.enable=true 9 | errors.log.include.messages=true 10 | neo4j.server.uri=bolt://neo4j:7687 11 | neo4j.authentication.basic.username=neo4j 12 | neo4j.authentication.basic.password=kafkaconnect 13 | neo4j.encryption.enabled=false 14 | neo4j.topic.cypher.my-topic=MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f) -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/contrib.sink.avro.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSinkConnector", 3 | "config": { 4 | "topics": "my-topic", 5 | "connector.class": "streams.kafka.connect.sink.Neo4jSinkConnector", 6 | "errors.retry.timeout": "-1", 7 | "errors.retry.delay.max.ms": "1000", 8 | "errors.tolerance": "all", 9 | "errors.log.enable": true, 10 | "errors.log.include.messages": true, 11 | "neo4j.server.uri": "bolt://neo4j:7687", 12 | "neo4j.authentication.basic.username": "neo4j", 13 | "neo4j.authentication.basic.password": "kafkaconnect", 14 | "neo4j.encryption.enabled": false, 15 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname, from: 'AVRO'}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/contrib.sink.avro.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSinkConnector", 3 | "config": { 4 | "topics": "my-topic", 5 | "connector.class": "streams.kafka.connect.sink.Neo4jSinkConnector", 6 | "errors.retry.timeout": "-1", 7 | "errors.retry.delay.max.ms": "1000", 8 | "errors.tolerance": "all", 9 | "errors.log.enable": true, 10 | "errors.deadletterqueue.topic.name": "test-error-topic", 11 | "errors.log.include.messages": true, 12 | "neo4j.server.uri": "bolt://neo4j:7687", 13 | "neo4j.authentication.basic.username": "neo4j", 14 | "neo4j.authentication.basic.password": "kafkaconnect", 15 | "neo4j.encryption.enabled": false, 16 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 17 | } 18 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/contrib.source.string-json.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnector", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 8 | "neo4j.server.uri": "bolt://neo4j:7687", 9 | "neo4j.authentication.basic.username": "neo4j", 10 | "neo4j.authentication.basic.password": "kafkaconnect", 11 | "neo4j.encryption.enabled": false, 12 | "neo4j.streaming.poll.interval.msecs": 5000, 13 | "neo4j.streaming.property": "timestamp", 14 | "neo4j.streaming.from": "LAST_COMMITTED", 15 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 16 | } 17 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/contrib.source.string-json.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnectorJSON", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 8 | "neo4j.server.uri": "bolt://neo4j:7687", 9 | "neo4j.authentication.basic.username": "neo4j", 10 | "neo4j.authentication.basic.password": "kafkaconnect", 11 | "neo4j.encryption.enabled": false, 12 | "neo4j.streaming.poll.interval.msecs": 5000, 13 | "neo4j.streaming.property": "timestamp", 14 | "neo4j.streaming.from": "LAST_COMMITTED", 15 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 16 | } 17 | } -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [ '5.0' ] 6 | pull_request: 7 | branches: [ '5.0' ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | neo4j-image: [ "neo4j:4.4", "neo4j:4.4-enterprise", "neo4j:5", "neo4j:5-enterprise", "neo4j:2025", "neo4j:2025-enterprise" ] 16 | name: Build and test with ${{ matrix.neo4j-image }} 17 | steps: 18 | - uses: actions/checkout@v5 19 | 20 | - name: Set up Java 21 | uses: actions/setup-java@v5 22 | with: 23 | distribution: temurin 24 | java-version: 11 25 | cache: 'maven' 26 | 27 | - name: Build and run tests 28 | env: 29 | NEO4J_IMAGE: ${{ matrix.neo4j-image }} 30 | run: mvn -B clean verify --file pom.xml --no-transfer-progress -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/contrib.source.string.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnectorString", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "org.apache.kafka.connect.storage.StringConverter", 7 | "value.converter": "org.apache.kafka.connect.storage.StringConverter", 8 | "neo4j.server.uri": "bolt://neo4j:7687", 9 | "neo4j.authentication.basic.username": "neo4j", 10 | "neo4j.authentication.basic.password": "kafkaconnect", 11 | "neo4j.encryption.enabled": false, 12 | "neo4j.streaming.poll.interval.msecs": 5000, 13 | "neo4j.streaming.property": "timestamp", 14 | "neo4j.streaming.from": "LAST_COMMITTED", 15 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 16 | } 17 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/contrib.source.string.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnectorString", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "org.apache.kafka.connect.storage.StringConverter", 7 | "value.converter": "org.apache.kafka.connect.storage.StringConverter", 8 | "neo4j.server.uri": "bolt://neo4j:7687", 9 | "neo4j.authentication.basic.username": "neo4j", 10 | "neo4j.authentication.basic.password": "kafkaconnect", 11 | "neo4j.encryption.enabled": false, 12 | "neo4j.streaming.poll.interval.msecs": 5000, 13 | "neo4j.streaming.property": "timestamp", 14 | "neo4j.streaming.from": "LAST_COMMITTED", 15 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 16 | } 17 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/contrib.source.avro.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnector", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "io.confluent.connect.avro.AvroConverter", 7 | "value.converter": "io.confluent.connect.avro.AvroConverter", 8 | "neo4j.server.uri": "bolt://neo4j:7687", 9 | "neo4j.authentication.basic.username": "neo4j", 10 | "neo4j.authentication.basic.password": "kafkaconnect", 11 | "neo4j.encryption.enabled": false, 12 | "neo4j.streaming.poll.interval.msecs": 5000, 13 | "neo4j.streaming.property": "timestamp", 14 | "neo4j.streaming.from": "LAST_COMMITTED", 15 | "neo4j.enforce.schema": true, 16 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 17 | } 18 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | ## Guidelines 8 | 9 | Please note that GitHub issues are only meant for bug reports/feature requests. If you have questions on how to use Streams, please ask on [StackOverflow](https://stackoverflow.com/questions/tagged/neo4j-streams) instead of creating an issue here. 10 | 11 | ## Feature description (Mandatory) 12 | A clear and concise description of what you want to happen. Add any considered drawbacks. 13 | 14 | ## Considered alternatives 15 | A clear and concise description of any alternative solutions or features you've considered. Maybe there is something in the project that could be reused? 16 | 17 | ## How this feature can improve the project? 18 | If you can, explain how users will be able to use this and possibly write out a version the docs. 19 | Maybe a screenshot or design? 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | ## Guidelines 8 | 9 | Please note that GitHub issues are only meant for bug reports/feature requests. If you have questions on how to use Streams, please ask on [StackOverflow](https://stackoverflow.com/questions/tagged/neo4j-streams) instead of creating an issue here. 10 | 11 | ## Expected Behavior (Mandatory) 12 | 13 | 14 | ## Actual Behavior (Mandatory) 15 | 16 | 17 | ## How to Reproduce the Problem 18 | 19 | ### Simple Dataset (where it's possibile) 20 | 21 | ``` 22 | //Insert here a set of Cypher statements that helps us to reproduce the problem 23 | 24 | ``` 25 | 26 | 27 | ### Steps (Mandatory) 28 | 29 | 1. 30 | 1. 31 | 1. 32 | 33 | ## Screenshots (where it's possibile) 34 | 35 | ## Specifications (Mandatory) 36 | 37 | Currently used versions 38 | 39 | ### Versions 40 | 41 | - OS: 42 | - Neo4j: 43 | - Neo4j-Streams: 44 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/resources/exampleConfigs/sinkExample.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSinkConnectorJSONString", 3 | "config": { 4 | "topics": "my-topic", 5 | "connector.class": "streams.kafka.connect.sink.Neo4jSinkConnector", 6 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "key.converter.schemas.enable": false, 8 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 9 | "value.converter.schemas.enable": false, 10 | "errors.retry.timeout": "-1", 11 | "errors.retry.delay.max.ms": "1000", 12 | "errors.tolerance": "all", 13 | "errors.log.enable": true, 14 | "errors.log.include.messages": true, 15 | "neo4j.server.uri": "bolt://neo4j:7687", 16 | "neo4j.authentication.basic.username": "neo4j", 17 | "neo4j.authentication.basic.password": "password", 18 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 19 | } 20 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/contrib.sink.string-json.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSinkConnectorJSON", 3 | "config": { 4 | "key.converter": "org.apache.kafka.connect.storage.StringConverter", 5 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 6 | "value.converter.schemas.enable": false, 7 | "topics": "my-topic", 8 | "connector.class": "streams.kafka.connect.sink.Neo4jSinkConnector", 9 | "errors.retry.timeout": "-1", 10 | "errors.retry.delay.max.ms": "1000", 11 | "errors.tolerance": "all", 12 | "errors.log.enable": true, 13 | "errors.log.include.messages": true, 14 | "neo4j.server.uri": "bolt://neo4j:7687", 15 | "neo4j.authentication.basic.username": "neo4j", 16 | "neo4j.authentication.basic.password": "kafkaconnect", 17 | "neo4j.encryption.enabled": false, 18 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname, from: 'JSON'}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/common/KafkaConnectConfig.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.common 2 | 3 | object KafkaConnectConfig { 4 | 5 | val options = setOf( 6 | // common 7 | "name", 8 | "connector.class", 9 | "tasks.max", 10 | "key.converter", 11 | "value.converter", 12 | "header.converter", 13 | "config.action.reload", 14 | "transforms", 15 | "predicates", 16 | "errors.retry.timeout", 17 | "errors.retry.delay.max.ms", 18 | "errors.tolerance", 19 | "errors.log.enable", 20 | "errors.log.include.messages", 21 | // sink 22 | "topics", 23 | "topic.regex", 24 | "errors.deadletterqueue.topic.name", 25 | "errors.deadletterqueue.topic.replication.factor", 26 | "errors.deadletterqueue.context.headers.enable", 27 | // source 28 | "topic.creation.groups", 29 | "exactly.once.support", 30 | "transaction.boundary", 31 | "transaction.boundary.interval.ms", 32 | "offsets.storage.topic" 33 | ) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/utils/StreamsUtilsTest.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import org.junit.Test 4 | import java.io.IOException 5 | import java.lang.RuntimeException 6 | import kotlin.test.assertNull 7 | import kotlin.test.assertTrue 8 | 9 | class StreamsUtilsTest { 10 | 11 | private val foo = "foo" 12 | 13 | @Test 14 | fun shouldReturnValue() { 15 | val data = StreamsUtils.ignoreExceptions({ 16 | foo 17 | }, RuntimeException::class.java) 18 | assertTrue { data != null && data == foo } 19 | } 20 | 21 | @Test 22 | fun shouldIgnoreTheException() { 23 | val data = StreamsUtils.ignoreExceptions({ 24 | throw RuntimeException() 25 | }, RuntimeException::class.java) 26 | assertNull(data) 27 | } 28 | 29 | @Test(expected = IOException::class) 30 | fun shouldNotIgnoreTheException() { 31 | StreamsUtils.ignoreExceptions({ 32 | throw IOException() 33 | }, RuntimeException::class.java) 34 | } 35 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/contrib.sink.string-json.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSinkConnector", 3 | "config": { 4 | "topics": "my-topic", 5 | "connector.class": "streams.kafka.connect.sink.Neo4jSinkConnector", 6 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "key.converter.schemas.enable": false, 8 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 9 | "value.converter.schemas.enable": false, 10 | "errors.retry.timeout": "-1", 11 | "errors.retry.delay.max.ms": "1000", 12 | "errors.tolerance": "all", 13 | "errors.log.enable": true, 14 | "errors.log.include.messages": true, 15 | "neo4j.server.uri": "bolt://neo4j:7687", 16 | "neo4j.authentication.basic.username": "neo4j", 17 | "neo4j.authentication.basic.password": "kafkaconnect", 18 | "neo4j.encryption.enabled": false, 19 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 20 | } 21 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/resources/exampleConfigs/sourceExample.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnectorAVRO", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "io.confluent.connect.avro.AvroConverter", 7 | "key.converter.schema.registry.url": "http://schema-registry:8081", 8 | "value.converter": "io.confluent.connect.avro.AvroConverter", 9 | "value.converter.schema.registry.url": "http://schema-registry:8081", 10 | "neo4j.server.uri": "bolt://neo4j:7687", 11 | "neo4j.authentication.basic.username": "neo4j", 12 | "neo4j.authentication.basic.password": "password", 13 | "neo4j.streaming.poll.interval.msecs": 5000, 14 | "neo4j.streaming.property": "timestamp", 15 | "neo4j.streaming.from": "LAST_COMMITTED", 16 | "neo4j.enforce.schema": true, 17 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.surname AS surname, ts.timestamp AS timestamp" 18 | } 19 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/contrib.source.avro.neo4j.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Neo4jSourceConnectorAVRO", 3 | "config": { 4 | "topic": "my-topic", 5 | "connector.class": "streams.kafka.connect.source.Neo4jSourceConnector", 6 | "key.converter": "io.confluent.connect.avro.AvroConverter", 7 | "value.converter": "io.confluent.connect.avro.AvroConverter", 8 | "key.converter.schema.registry.url": "http://schema_registry:8081", 9 | "value.converter.schema.registry.url": "http://schema_registry:8081", 10 | "neo4j.server.uri": "bolt://neo4j:7687", 11 | "neo4j.authentication.basic.username": "neo4j", 12 | "neo4j.authentication.basic.password": "kafkaconnect", 13 | "neo4j.encryption.enabled": false, 14 | "neo4j.streaming.poll.interval.msecs": 5000, 15 | "neo4j.streaming.property": "timestamp", 16 | "neo4j.streaming.from": "LAST_COMMITTED", 17 | "neo4j.enforce.schema": true, 18 | "neo4j.source.query": "MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp" 19 | } 20 | } -------------------------------------------------------------------------------- /assembly/assembly.xml: -------------------------------------------------------------------------------- 1 | 5 | streams-assembly-all 6 | 7 | jar 8 | 9 | false 10 | 11 | 12 | true 13 | false 14 | 15 | 16 | 17 | 18 | metaInf-services 19 | 20 | 21 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.4/apache-maven-3.9.4-bin.zip 18 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar 19 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/sink/strategy/CypherTemplateStrategy.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import org.neo4j.caniuse.CanIUse.canIUse 4 | import org.neo4j.caniuse.Cypher 5 | import org.neo4j.caniuse.Neo4j 6 | import streams.service.StreamsSinkEntity 7 | import streams.utils.StreamsUtils 8 | 9 | class CypherTemplateStrategy(neo4j: Neo4j, query: String) : IngestionStrategy { 10 | private val cypherPrefix = if (canIUse(Cypher.explicitCypher5Selection()).withNeo4j(neo4j)) "CYPHER 5 " else "" 11 | private val fullQuery = "${cypherPrefix}${StreamsUtils.UNWIND} $query" 12 | 13 | override fun mergeNodeEvents(events: Collection): List { 14 | return listOf(QueryEvents(fullQuery, events.mapNotNull { it.value as? Map })) 15 | } 16 | 17 | override fun deleteNodeEvents(events: Collection): List = emptyList() 18 | 19 | override fun mergeRelationshipEvents(events: Collection): List = emptyList() 20 | 21 | override fun deleteRelationshipEvents(events: Collection): List = emptyList() 22 | 23 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/utils/StreamsUtils.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | object StreamsUtils { 4 | 5 | @JvmStatic 6 | val UNWIND: String = "UNWIND \$events AS event" 7 | 8 | @JvmStatic 9 | val WITH_EVENT_FROM: String = "WITH event, from" 10 | 11 | @JvmStatic 12 | val STREAMS_CONFIG_PREFIX = "streams." 13 | 14 | @JvmStatic 15 | val STREAMS_SINK_TOPIC_PREFIX = "sink.topic.cypher." 16 | 17 | @JvmStatic 18 | val LEADER = "LEADER" 19 | 20 | @JvmStatic 21 | val SYSTEM_DATABASE_NAME = "system" 22 | 23 | fun ignoreExceptions(action: () -> T, vararg toIgnore: Class): T? { 24 | return try { 25 | action() 26 | } catch (e: Throwable) { 27 | if (toIgnore.isEmpty()) { 28 | return null 29 | } 30 | return if (toIgnore.any { it.isInstance(e) }) { 31 | null 32 | } else { 33 | throw e 34 | } 35 | } 36 | } 37 | 38 | fun closeSafely(closeable: AutoCloseable, onError: (Throwable) -> Unit = {}) = try { 39 | closeable.close() 40 | } catch (e: Throwable) { 41 | onError(e) 42 | } 43 | 44 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/EventBuilder.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import org.apache.kafka.connect.sink.SinkRecord 4 | import org.slf4j.Logger 5 | import org.slf4j.LoggerFactory 6 | import streams.kafka.connect.utils.toStreamsSinkEntity 7 | import streams.service.StreamsSinkEntity 8 | 9 | class EventBuilder { 10 | private var batchSize: Int? = null 11 | private lateinit var sinkRecords: Collection 12 | 13 | fun withBatchSize(batchSize: Int): EventBuilder { 14 | this.batchSize = batchSize 15 | return this 16 | } 17 | 18 | fun withSinkRecords(sinkRecords: Collection): EventBuilder { 19 | this.sinkRecords = sinkRecords 20 | return this 21 | } 22 | 23 | fun build(): Map>> { // > 24 | val batchSize = this.batchSize!! 25 | return this.sinkRecords 26 | .groupBy { it.topic() } 27 | .mapValues { entry -> 28 | val value = entry.value.map { it.toStreamsSinkEntity() } 29 | if (batchSize > value.size) listOf(value) else value.chunked(batchSize) 30 | } 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/kotlin/streams/kafka/connect/utils/TransactionExtensions.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.utils 2 | 3 | import org.neo4j.driver.Result 4 | import org.neo4j.driver.Transaction 5 | import org.neo4j.driver.exceptions.NoSuchRecordException 6 | import org.neo4j.driver.types.Node 7 | 8 | fun Transaction.findNodes(label: String): Result = this.run(""" 9 | MATCH (n:`${label}`) 10 | RETURN n 11 | """.trimIndent()) 12 | 13 | fun Transaction.findNode(label: String, key: String, value: Any): Node? = try { 14 | this.run(""" 15 | MATCH (n:`${label}`{`$key`: ${'$'}value}) 16 | RETURN n 17 | """.trimIndent(), mapOf("value" to value)) 18 | .single()[0] 19 | .asNode() 20 | } catch (e: NoSuchRecordException) { 21 | null 22 | } 23 | 24 | fun Transaction.allRelationships(): Result = this.run(""" 25 | MATCH ()-[r]->() 26 | RETURN r 27 | """.trimIndent()) 28 | 29 | fun Transaction.allNodes(): Result = this.run(""" 30 | MATCH (n) 31 | RETURN n 32 | """.trimIndent()) 33 | 34 | fun Transaction.allLabels(): List = this.run(""" 35 | CALL db.labels() YIELD label 36 | RETURN label 37 | """.trimIndent()) 38 | .list() 39 | .map { it["label"].asString() } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/utils/PropertiesUtil.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.utils 2 | 3 | import org.slf4j.LoggerFactory 4 | import java.util.* 5 | 6 | class PropertiesUtil { 7 | 8 | companion object { 9 | private val LOGGER = LoggerFactory.getLogger(PropertiesUtil::class.java) 10 | private const val DEFAULT_VERSION = "unknown" 11 | private var properties: Properties? = null 12 | private var VERSION: String? = null 13 | init { 14 | properties = Properties() 15 | properties!!.load(PropertiesUtil::class.java.getResourceAsStream("/kafka-connect-version.properties")) 16 | properties!!.load(PropertiesUtil::class.java.getResourceAsStream("/kafka-connect-neo4j.properties")) 17 | VERSION = try { 18 | properties!!.getProperty("version", DEFAULT_VERSION).trim() 19 | } catch (e: Exception) { 20 | LOGGER.warn("error while loading version:", e) 21 | DEFAULT_VERSION 22 | } 23 | } 24 | 25 | fun getVersion(): String { 26 | return VERSION!! 27 | } 28 | 29 | fun getProperty(key: String): String { 30 | return properties!!.getProperty(key) 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /test-support/src/main/kotlin/streams/MavenUtils.kt: -------------------------------------------------------------------------------- 1 | package streams 2 | 3 | import org.slf4j.Logger 4 | import java.io.BufferedReader 5 | import java.io.File 6 | import java.io.InputStreamReader 7 | 8 | 9 | object MavenUtils { 10 | fun mvnw(path: String = ".", logger: Logger? = null, vararg args: String) { 11 | 12 | val rt = Runtime.getRuntime() 13 | val mvnw = if (System.getProperty("os.name").startsWith("Windows")) "./mvnw.cmd" else "./mvnw" 14 | val commands = arrayOf(mvnw, "-pl", "!doc,!kafka-connect-neo4j", "-DbuildSubDirectory=containerPlugins") + 15 | args.let { if (it.isNullOrEmpty()) arrayOf("package", "-Dmaven.test.skip") else it } 16 | val proc = rt.exec(commands, null, File(path)) 17 | 18 | val stdInput = BufferedReader(InputStreamReader(proc.inputStream)) 19 | 20 | val stdError = BufferedReader(InputStreamReader(proc.errorStream)) 21 | 22 | // Read the output from the command 23 | var s: String? = null 24 | while (stdInput.readLine().also { s = it } != null) { 25 | logger?.info(s) 26 | } 27 | 28 | // Read any errors from the attempted command 29 | while (stdError.readLine().also { s = it } != null) { 30 | logger?.error(s) 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/utils/IngestionUtils.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import streams.extensions.quote 4 | 5 | object IngestionUtils { 6 | const val labelSeparator = ":" 7 | const val keySeparator = ", " 8 | 9 | fun getLabelsAsString(labels: Collection): String = labels 10 | .map { it.quote() } 11 | .joinToString(labelSeparator) 12 | .let { if (it.isNotBlank()) "$labelSeparator$it" else it } 13 | 14 | fun getNodeKeysAsString(prefix: String = "properties", keys: Set): String = keys 15 | .map { toQuotedProperty(prefix, it) } 16 | .joinToString(keySeparator) 17 | 18 | private fun toQuotedProperty(prefix: String = "properties", property: String): String { 19 | val quoted = property.quote() 20 | return "$quoted: event.$prefix.$quoted" 21 | } 22 | 23 | fun getNodeMergeKeys(prefix: String, keys: Set): String = keys 24 | .map { 25 | val quoted = it.quote() 26 | "$quoted: event.$prefix.$quoted" 27 | } 28 | .joinToString(keySeparator) 29 | 30 | fun containsProp(key: String, properties: List): Boolean = if (key.contains(".")) { 31 | properties.contains(key) || properties.any { key.startsWith("$it.") } 32 | } else { 33 | properties.contains(key) 34 | } 35 | } -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/utils/ValidationUtilsTest.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import org.junit.Ignore 4 | import org.junit.Test 5 | import org.testcontainers.containers.GenericContainer 6 | import kotlin.test.assertEquals 7 | import kotlin.test.assertTrue 8 | 9 | class FakeWebServer: GenericContainer("alpine") { 10 | override fun start() { 11 | this.withCommand("/bin/sh", "-c", "while true; do { echo -e 'HTTP/1.1 200 OK'; echo ; } | nc -l -p 8000; done") 12 | .withExposedPorts(8000) 13 | super.start() 14 | } 15 | 16 | fun getUrl() = "http://localhost:${getMappedPort(8000)}" 17 | } 18 | 19 | @Ignore("fails on CI") 20 | class ValidationUtilsTest { 21 | 22 | @Test 23 | fun `should reach the server`() { 24 | val httpServer = FakeWebServer() 25 | httpServer.start() 26 | assertTrue { ValidationUtils.checkServersUnreachable(httpServer.getUrl()).isEmpty() } 27 | httpServer.stop() 28 | } 29 | 30 | @Test 31 | fun `should not reach the server`() { 32 | val urls = "http://my.fake.host:1234,PLAINTEXT://my.fake.host1:1234,my.fake.host2:1234" 33 | val checkServersUnreachable = ValidationUtils 34 | .checkServersUnreachable(urls) 35 | assertTrue { checkServersUnreachable.isNotEmpty() } 36 | assertEquals(urls.split(",").toList(), checkServersUnreachable) 37 | } 38 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceTask.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil 4 | import org.apache.kafka.connect.source.SourceRecord 5 | import org.apache.kafka.connect.source.SourceTask 6 | import org.slf4j.Logger 7 | import org.slf4j.LoggerFactory 8 | import streams.kafka.connect.sink.Neo4jSinkTask 9 | import streams.utils.StreamsUtils 10 | 11 | class Neo4jSourceTask: SourceTask() { 12 | private lateinit var settings: Map 13 | private lateinit var config: Neo4jSourceConnectorConfig 14 | private lateinit var neo4jSourceService: Neo4jSourceService 15 | 16 | private val log: Logger = LoggerFactory.getLogger(Neo4jSinkTask::class.java) 17 | 18 | override fun version(): String = VersionUtil.version(this.javaClass as Class<*>) 19 | 20 | override fun start(props: MutableMap?) { 21 | settings = props!! 22 | config = Neo4jSourceConnectorConfig(settings) 23 | neo4jSourceService = Neo4jSourceService(config, context.offsetStorageReader()) 24 | } 25 | 26 | override fun stop() { 27 | log.info("Stop() - Closing Neo4j Source Service.") 28 | StreamsUtils.ignoreExceptions({ neo4jSourceService.close() }, UninitializedPropertyAccessException::class.java) 29 | } 30 | 31 | override fun poll(): List? = neo4jSourceService.poll() 32 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/extensions/CommonExtensions.kt: -------------------------------------------------------------------------------- 1 | package streams.extensions 2 | 3 | import org.neo4j.driver.types.Node 4 | import org.neo4j.driver.types.Relationship 5 | import java.util.* 6 | import javax.lang.model.SourceVersion 7 | 8 | fun Map<*, *>.asProperties() = this.let { 9 | val properties = Properties() 10 | properties.putAll(it) 11 | properties 12 | } 13 | 14 | fun Node.asStreamsMap(): Map { 15 | val nodeMap = this.asMap().toMutableMap() 16 | nodeMap[""] = this.id() 17 | nodeMap[""] = this.labels() 18 | return nodeMap 19 | } 20 | 21 | fun Relationship.asStreamsMap(): Map { 22 | val relMap = this.asMap().toMutableMap() 23 | relMap[""] = this.id() 24 | relMap[""] = this.type() 25 | relMap[""] = this.startNodeId() 26 | relMap[""] = this.endNodeId() 27 | return relMap 28 | } 29 | 30 | fun String.quote(): String = if (SourceVersion.isIdentifier(this)) this else "`$this`" 31 | 32 | fun Map.flatten(map: Map = this, prefix: String = ""): Map { 33 | return map.flatMap { 34 | val key = it.key 35 | val value = it.value 36 | val newKey = if (prefix != "") "$prefix.$key" else key 37 | if (value is Map<*, *>) { 38 | flatten(value as Map, newKey).toList() 39 | } else { 40 | listOf(newKey to value) 41 | } 42 | }.toMap() 43 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnector.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.config.Description 4 | import com.github.jcustenborder.kafka.connect.utils.config.Title 5 | import org.apache.kafka.common.config.ConfigDef 6 | import org.apache.kafka.connect.connector.Task 7 | import org.apache.kafka.connect.source.SourceConnector 8 | import streams.kafka.connect.utils.PropertiesUtil 9 | 10 | @Title("Neo4j Source Connector") 11 | @Description("The Neo4j Source connector reads data from Neo4j and and writes the data to a Kafka Topic") 12 | class Neo4jSourceConnector: SourceConnector() { 13 | private lateinit var settings: Map 14 | private lateinit var config: Neo4jSourceConnectorConfig 15 | 16 | // TODO Add monitor thread when we want to have schema on LABELS and RELATIONSHIP query type 17 | 18 | // TODO: for now we support just one task we need to implement 19 | // a SKIP/LIMIT mechanism in case we want parallelize 20 | override fun taskConfigs(maxTasks: Int): List> = listOf(settings) 21 | 22 | override fun start(props: MutableMap?) { 23 | settings = props!! 24 | config = Neo4jSourceConnectorConfig(settings) 25 | } 26 | 27 | override fun stop() {} 28 | 29 | override fun version(): String = PropertiesUtil.getVersion() 30 | 31 | override fun taskClass(): Class = Neo4jSourceTask::class.java 32 | 33 | override fun config(): ConfigDef = Neo4jSourceConnectorConfig.config() 34 | 35 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | package 7 | 8 | dir 9 | 10 | false 11 | 12 | 13 | ${project.basedir} 14 | share/doc/${project.name}/ 15 | 16 | README* 17 | LICENSE* 18 | NOTICE* 19 | licenses/ 20 | 21 | 22 | 23 | ${project.basedir}/config 24 | etc/${project.name} 25 | 26 | * 27 | 28 | 29 | 30 | 31 | 32 | share/kotlin/${project.name} 33 | true 34 | true 35 | 36 | org.apache.kafka:connect-api 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/extensions/CoroutineExtensions.kt: -------------------------------------------------------------------------------- 1 | package streams.extensions 2 | 3 | import kotlinx.coroutines.Deferred 4 | import kotlinx.coroutines.ExperimentalCoroutinesApi 5 | import kotlinx.coroutines.ObsoleteCoroutinesApi 6 | import kotlinx.coroutines.channels.ticker 7 | import kotlinx.coroutines.selects.whileSelect 8 | import java.util.concurrent.CopyOnWriteArraySet 9 | import java.util.concurrent.TimeoutException 10 | 11 | 12 | // taken from https://stackoverflow.com/questions/52192752/kotlin-how-to-run-n-coroutines-and-wait-for-first-m-results-or-timeout 13 | @ObsoleteCoroutinesApi 14 | @ExperimentalCoroutinesApi 15 | suspend fun List>.awaitAll(timeoutMs: Long): List { 16 | val jobs = CopyOnWriteArraySet>(this) 17 | val result = ArrayList(size) 18 | val timeout = ticker(timeoutMs) 19 | 20 | whileSelect { 21 | jobs.forEach { deferred -> 22 | deferred.onAwait { 23 | jobs.remove(deferred) 24 | result.add(it) 25 | result.size != size 26 | } 27 | } 28 | 29 | timeout.onReceive { 30 | jobs.forEach { it.cancel() } 31 | throw TimeoutException("Tasks $size cancelled after timeout of $timeoutMs ms.") 32 | } 33 | } 34 | 35 | return result 36 | } 37 | 38 | @ExperimentalCoroutinesApi 39 | fun Deferred.errors() = when { 40 | isCompleted -> getCompletionExceptionOrNull() 41 | isCancelled -> getCompletionExceptionOrNull() // was getCancellationException() 42 | isActive -> RuntimeException("Job $this still active") 43 | else -> null 44 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/utils/ValidationUtils.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import java.io.IOException 4 | import java.net.Socket 5 | import java.net.URI 6 | 7 | object ValidationUtils { 8 | 9 | fun isServerReachable(url: String, port: Int): Boolean = try { 10 | Socket(url, port).use { true } 11 | } catch (e: IOException) { 12 | false 13 | } 14 | 15 | fun checkServersUnreachable(urls: String, separator: String = ","): List = urls 16 | .split(separator) 17 | .map { 18 | val uri = URI.create(it) 19 | when (uri.host.isNullOrBlank()) { 20 | true -> { 21 | val splitted = it.split(":") 22 | URI("fake-scheme", "", splitted.first(), splitted.last().toInt(), 23 | "", "", "") 24 | } 25 | else -> uri 26 | } 27 | } 28 | .filter { uri -> !isServerReachable(uri.host, uri.port) } 29 | .map { if (it.scheme == "fake-scheme") "${it.host}:${it.port}" else it.toString() } 30 | 31 | fun validateConnection(url: String, kafkaPropertyKey: String, checkReachable: Boolean = true) { 32 | if (url.isBlank()) { 33 | throw RuntimeException("The `kafka.$kafkaPropertyKey` property is empty") 34 | } else if (checkReachable) { 35 | val unreachableServers = checkServersUnreachable(url) 36 | if (unreachableServers.isNotEmpty()) { 37 | throw RuntimeException("The servers defined into the property `kafka.$kafkaPropertyKey` are not reachable: $unreachableServers") 38 | } 39 | } 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/sink/strategy/IngestionStrategy.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import streams.events.Constraint 4 | import streams.events.RelationshipPayload 5 | import streams.service.StreamsSinkEntity 6 | 7 | 8 | data class QueryEvents(val query: String, val events: List>) 9 | 10 | interface IngestionStrategy { 11 | fun mergeNodeEvents(events: Collection): List 12 | fun deleteNodeEvents(events: Collection): List 13 | fun mergeRelationshipEvents(events: Collection): List 14 | fun deleteRelationshipEvents(events: Collection): List 15 | } 16 | 17 | data class RelationshipSchemaMetadata(val label: String, 18 | val startLabels: List, 19 | val endLabels: List, 20 | val startKeys: Set, 21 | val endKeys: Set) { 22 | constructor(payload: RelationshipPayload) : this(label = payload.label, 23 | startLabels = payload.start.labels.orEmpty(), 24 | endLabels = payload.end.labels.orEmpty(), 25 | startKeys = payload.start.ids.keys, 26 | endKeys = payload.end.ids.keys) 27 | } 28 | 29 | data class NodeSchemaMetadata(val constraints: List, 30 | val labelsToAdd: List, 31 | val labelsToDelete: List, 32 | val keys: Set) 33 | 34 | 35 | 36 | data class NodeMergeMetadata(val labelsToAdd: Set, 37 | val labelsToDelete: Set) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neo4j Connector for Kafka 5.0 2 | 3 | ![(:Neo4j)-[:LOVES]->(:Kafka:Confluent)](https://github.com/neo4j-contrib/neo4j-streams/raw/gh-pages/3.4/images/neo4j-loves-confluent.png "(:Neo4j)-[:LOVES]->(:Kafka:Confluent)") 4 | 5 | This project integrates Neo4j with *Apache Kafka and the Confluent Platform*. 6 | 7 | > [!IMPORTANT] 8 | > Newer versions of this connector are now maintained at https://github.com/neo4j/neo4j-kafka-connector. 9 | > This repository is only kept alive for critical bug and security fixes of 5.0.x versions of the connector. 10 | 11 | ## Neo4j Kafka Connect Neo4j Connector 12 | 13 | You can download the [Kafka Connect plugin](https://www.confluent.io/hub/neo4j/kafka-connect-neo4j) that allows to ingest events from Kafka to Neo4j and generate change events from Neo4j into Kafka. 14 | 15 | ## Documentation 16 | 17 | Refer to [documentation](https://neo4j.com/docs/kafka/) for more information about installation and configuration of the connector. 18 | 19 | ## Feedback & Suggestions 20 | 21 | As highlighted above, 5.0.x version of the connector is kept as a maintenance version for only critical bug and security fixes. 22 | Please raise any feature requests on the [new repository](https://github.com/neo4j/neo4j-kafka-connector). 23 | 24 | ### Development 25 | 26 | ## Build locally 27 | 28 | In order to build the packages, execute the following command. 29 | 30 | ```shell 31 | mvn clean package 32 | ``` 33 | 34 | You'll find the build artifact in `/kafka-connect-neo4j/target/neo4j-kafka-connect-neo4j-.jar` 35 | 36 | ### Docs 37 | 38 | The documentation source for this version lives at [this repository](https://github.com/neo4j/docs-kafka-connector). 39 | Please raise any documentation updates by creating a PR against it. 40 | 41 | ## License 42 | 43 | Neo4j Streams is licensed under the terms of the Apache License, version 2.0. See `LICENSE` for more details. 44 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/StreamsSinkService.kt: -------------------------------------------------------------------------------- 1 | package streams.service 2 | 3 | import streams.service.sink.strategy.IngestionStrategy 4 | 5 | 6 | const val STREAMS_TOPIC_KEY: String = "streams.sink.topic" 7 | const val STREAMS_TOPIC_CDC_KEY: String = "streams.sink.topic.cdc" 8 | 9 | enum class TopicTypeGroup { CYPHER, CDC, PATTERN, CUD } 10 | enum class TopicType(val group: TopicTypeGroup, val key: String) { 11 | CDC_SOURCE_ID(group = TopicTypeGroup.CDC, key = "$STREAMS_TOPIC_CDC_KEY.sourceId"), 12 | CYPHER(group = TopicTypeGroup.CYPHER, key = "$STREAMS_TOPIC_KEY.cypher"), 13 | PATTERN_NODE(group = TopicTypeGroup.PATTERN, key = "$STREAMS_TOPIC_KEY.pattern.node"), 14 | PATTERN_RELATIONSHIP(group = TopicTypeGroup.PATTERN, key = "$STREAMS_TOPIC_KEY.pattern.relationship"), 15 | CDC_SCHEMA(group = TopicTypeGroup.CDC, key = "$STREAMS_TOPIC_CDC_KEY.schema"), 16 | CUD(group = TopicTypeGroup.CUD, key = "$STREAMS_TOPIC_KEY.cud") 17 | } 18 | 19 | data class StreamsSinkEntity(val key: Any?, val value: Any?) 20 | 21 | abstract class StreamsStrategyStorage { 22 | abstract fun getTopicType(topic: String): TopicType? 23 | 24 | abstract fun getStrategy(topic: String): IngestionStrategy 25 | } 26 | 27 | abstract class StreamsSinkService(private val streamsStrategyStorage: StreamsStrategyStorage) { 28 | 29 | abstract fun write(query: String, events: Collection) 30 | 31 | private fun writeWithStrategy(data: Collection, strategy: IngestionStrategy) { 32 | strategy.mergeNodeEvents(data).forEach { write(it.query, it.events) } 33 | strategy.deleteNodeEvents(data).forEach { write(it.query, it.events) } 34 | 35 | strategy.mergeRelationshipEvents(data).forEach { write(it.query, it.events) } 36 | strategy.deleteRelationshipEvents(data).forEach { write(it.query, it.events) } 37 | } 38 | 39 | fun writeForTopic(topic: String, params: Collection) { 40 | writeWithStrategy(params, streamsStrategyStorage.getStrategy(topic)) 41 | } 42 | } -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/utils/CoroutineUtilsTest.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import kotlinx.coroutines.runBlocking 4 | import org.junit.Test 5 | import java.io.IOException 6 | import java.lang.ClassCastException 7 | import kotlin.test.assertEquals 8 | import kotlin.test.assertTrue 9 | 10 | class CoroutineUtilsTest { 11 | 12 | @Test 13 | fun `should success after retry for known exception`() = runBlocking { 14 | var count = 0 15 | var excuted = false 16 | retryForException(exceptions = arrayOf(RuntimeException::class.java), 17 | retries = 4, delayTime = 100) { 18 | if (count < 2) { 19 | ++count 20 | throw RuntimeException() 21 | } 22 | excuted = true 23 | } 24 | 25 | assertEquals(2, count) 26 | assertTrue { excuted } 27 | } 28 | 29 | @Test(expected = RuntimeException::class) 30 | fun `should fail after retry for known exception`() { 31 | var retries = 3 32 | runBlocking { 33 | retryForException(exceptions = arrayOf(RuntimeException::class.java), 34 | retries = 3, delayTime = 100) { 35 | if (retries >= 0) { 36 | --retries 37 | throw RuntimeException() 38 | } 39 | } 40 | } 41 | } 42 | 43 | @Test 44 | fun `should fail fast unknown exception`() { 45 | var iteration = 0 46 | var isIOException = false 47 | try { 48 | runBlocking { 49 | retryForException(exceptions = arrayOf(RuntimeException::class.java), 50 | retries = 3, delayTime = 100) { 51 | if (iteration >= 0) { 52 | ++iteration 53 | throw IOException() 54 | } 55 | } 56 | } 57 | } catch (e: Exception) { 58 | isIOException = e is IOException 59 | } 60 | assertTrue { isIOException } 61 | assertEquals(1, iteration) 62 | } 63 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/utils/SchemaUtils.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import streams.events.Constraint 4 | import streams.events.RelKeyStrategy 5 | import streams.events.StreamsConstraintType 6 | import streams.events.StreamsTransactionEvent 7 | import streams.service.StreamsSinkEntity 8 | 9 | object SchemaUtils { 10 | fun getNodeKeys(labels: List, propertyKeys: Set, constraints: List, keyStrategy: RelKeyStrategy = RelKeyStrategy.DEFAULT): Set = 11 | constraints 12 | .filter { constraint -> 13 | constraint.type == StreamsConstraintType.UNIQUE 14 | && propertyKeys.containsAll(constraint.properties) 15 | && labels.contains(constraint.label) 16 | } 17 | .let { 18 | when(keyStrategy) { 19 | RelKeyStrategy.DEFAULT -> { 20 | // we order first by properties.size, then by label name and finally by properties name alphabetically 21 | // with properties.sorted() we ensure that ("foo", "bar") and ("bar", "foo") are no different 22 | // with toString() we force it.properties to have the natural sort order, that is alphabetically 23 | it.minWithOrNull((compareBy({ it.properties.size }, { it.label }, { it.properties.sorted().toString() }))) 24 | ?.properties 25 | .orEmpty() 26 | } 27 | // with 'ALL' strategy we get a set with all properties 28 | RelKeyStrategy.ALL -> it.flatMap { it.properties }.toSet() 29 | } 30 | } 31 | 32 | 33 | fun toStreamsTransactionEvent(streamsSinkEntity: StreamsSinkEntity, 34 | evaluation: (StreamsTransactionEvent) -> Boolean) 35 | : StreamsTransactionEvent? = if (streamsSinkEntity.value != null) { 36 | val data = JSONUtils.asStreamsTransactionEvent(streamsSinkEntity.value) 37 | if (evaluation(data)) data else null 38 | } else { 39 | null 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/Neo4jStrategyStorage.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import streams.service.StreamsStrategyStorage 4 | import streams.service.TopicType 5 | import streams.service.sink.strategy.CUDIngestionStrategy 6 | import streams.service.sink.strategy.CypherTemplateStrategy 7 | import streams.service.sink.strategy.IngestionStrategy 8 | import streams.service.sink.strategy.NodePatternIngestionStrategy 9 | import streams.service.sink.strategy.RelationshipPatternIngestionStrategy 10 | import streams.service.sink.strategy.SchemaIngestionStrategy 11 | import streams.service.sink.strategy.SourceIdIngestionStrategy 12 | 13 | class Neo4jStrategyStorage(val config: Neo4jSinkConnectorConfig) : StreamsStrategyStorage() { 14 | private val topicConfigMap = config.topics.asMap() 15 | 16 | override fun getTopicType(topic: String): TopicType? = TopicType.values().firstOrNull { topicType -> 17 | when (val topicConfig = topicConfigMap.getOrDefault(topicType, emptyList())) { 18 | is Collection<*> -> topicConfig.contains(topic) 19 | is Map<*, *> -> topicConfig.containsKey(topic) 20 | is Pair<*, *> -> (topicConfig.first as Set).contains(topic) 21 | else -> false 22 | } 23 | } 24 | 25 | override fun getStrategy(topic: String): IngestionStrategy = when (val topicType = getTopicType(topic)) { 26 | TopicType.CDC_SOURCE_ID -> config.strategyMap[topicType] as SourceIdIngestionStrategy 27 | TopicType.CDC_SCHEMA -> SchemaIngestionStrategy(config.neo4j) 28 | TopicType.CUD -> CUDIngestionStrategy(config.neo4j) 29 | TopicType.PATTERN_NODE -> NodePatternIngestionStrategy( 30 | config.neo4j, 31 | config.topics.nodePatternTopics.getValue(topic) 32 | ) 33 | 34 | TopicType.PATTERN_RELATIONSHIP -> RelationshipPatternIngestionStrategy( 35 | config.neo4j, 36 | config.topics.relPatternTopics.getValue(topic) 37 | ) 38 | 39 | TopicType.CYPHER -> CypherTemplateStrategy( 40 | config.neo4j, config.topics.cypherTopics.getValue(topic) 41 | ) 42 | 43 | null -> throw RuntimeException("Topic Type not Found") 44 | } 45 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/Neo4jSinkTask.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil 4 | import org.apache.kafka.connect.sink.SinkRecord 5 | import org.apache.kafka.connect.sink.SinkTask 6 | import org.slf4j.Logger 7 | import org.slf4j.LoggerFactory 8 | import streams.extensions.asProperties 9 | import streams.service.errors.ErrorData 10 | import streams.service.errors.ErrorService 11 | import streams.service.errors.KafkaErrorService 12 | import streams.utils.StreamsUtils 13 | 14 | 15 | class Neo4jSinkTask : SinkTask() { 16 | private val log: Logger = LoggerFactory.getLogger(Neo4jSinkTask::class.java) 17 | private lateinit var config: Neo4jSinkConnectorConfig 18 | private lateinit var neo4jSinkService: Neo4jSinkService 19 | private lateinit var errorService: ErrorService 20 | 21 | override fun version(): String { 22 | return VersionUtil.version(this.javaClass as Class<*>) 23 | } 24 | 25 | override fun start(map: Map) { 26 | this.config = Neo4jSinkConnectorConfig(map) 27 | this.neo4jSinkService = Neo4jSinkService(this.config) 28 | this.errorService = KafkaErrorService(this.config.kafkaBrokerProperties.asProperties(), 29 | ErrorService.ErrorConfig.from(map.asProperties()), 30 | log::error) 31 | } 32 | 33 | override fun put(collection: Collection) { 34 | if (collection.isEmpty()) { 35 | return 36 | } 37 | try { 38 | val data = EventBuilder() 39 | .withBatchSize(config.batchSize) 40 | .withSinkRecords(collection) 41 | .build() 42 | 43 | neo4jSinkService.writeData(data) 44 | } catch(e:Exception) { 45 | errorService.report(collection.map { 46 | ErrorData(it.topic(), it.timestamp(), it.key(), it.value(), it.kafkaPartition(), it.kafkaOffset(), this::class.java, this.config.database, e) 47 | }) 48 | } 49 | } 50 | 51 | override fun stop() { 52 | log.info("Stop() - Neo4j Sink Service") 53 | StreamsUtils.ignoreExceptions({ neo4jSinkService.close() }, UninitializedPropertyAccessException::class.java) 54 | } 55 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/source/SourceRecordBuilder.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import org.apache.kafka.connect.data.Schema 4 | import org.apache.kafka.connect.source.SourceRecord 5 | import org.neo4j.driver.Record 6 | import streams.kafka.connect.utils.asJsonString 7 | import streams.kafka.connect.utils.asStruct 8 | import kotlin.properties.Delegates 9 | 10 | class SourceRecordBuilder { 11 | 12 | private lateinit var topic: String 13 | 14 | private lateinit var streamingProperty: String 15 | 16 | private var timestamp by Delegates.notNull() 17 | 18 | private lateinit var sourcePartition: Map 19 | 20 | private lateinit var record: Record 21 | 22 | private var enforceSchema: Boolean = false 23 | 24 | fun withTopic(topic: String): SourceRecordBuilder { 25 | this.topic = topic 26 | return this 27 | } 28 | 29 | fun withStreamingProperty(streamingProperty: String): SourceRecordBuilder { 30 | this.streamingProperty = streamingProperty 31 | return this 32 | } 33 | 34 | fun withTimestamp(timestamp: Long): SourceRecordBuilder { 35 | this.timestamp = timestamp 36 | return this 37 | } 38 | 39 | fun withSourcePartition(sourcePartition: Map): SourceRecordBuilder { 40 | this.sourcePartition = sourcePartition 41 | return this 42 | } 43 | 44 | fun withRecord(record: Record): SourceRecordBuilder { 45 | this.record = record 46 | return this 47 | } 48 | 49 | fun withEnforceSchema(enforceSchema: Boolean): SourceRecordBuilder { 50 | this.enforceSchema = enforceSchema 51 | return this 52 | } 53 | 54 | fun build(): SourceRecord { 55 | val sourceOffset = mapOf("property" to streamingProperty.ifBlank { "undefined" }, 56 | "value" to timestamp) 57 | val (struct, schema) = when (enforceSchema) { 58 | true -> { 59 | val st = record.asStruct() 60 | val sc = st.schema() 61 | st to sc 62 | } 63 | else -> record.asJsonString() to Schema.STRING_SCHEMA 64 | } 65 | return SourceRecord(sourcePartition, sourceOffset, topic, schema, struct, schema, struct) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/converters/Neo4jValueConverter.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink.converters 2 | 3 | import org.apache.kafka.connect.data.Struct 4 | import org.neo4j.driver.Value 5 | import org.neo4j.driver.Values 6 | import java.math.BigDecimal 7 | import java.time.LocalTime 8 | import java.time.ZoneId 9 | import java.util.Date 10 | import java.util.concurrent.TimeUnit 11 | 12 | 13 | class Neo4jValueConverter: MapValueConverter() { 14 | 15 | companion object { 16 | @JvmStatic private val UTC = ZoneId.of("UTC") 17 | } 18 | 19 | override fun setValue(result: MutableMap?, fieldName: String, value: Any?) { 20 | if (result != null) { 21 | result[fieldName] = value 22 | } 23 | } 24 | 25 | override fun newValue(): MutableMap { 26 | return mutableMapOf() 27 | } 28 | 29 | override fun setDecimalField(result: MutableMap?, fieldName: String, value: BigDecimal) { 30 | val doubleValue = value.toDouble() 31 | val fitsScale = doubleValue != Double.POSITIVE_INFINITY 32 | && doubleValue != Double.NEGATIVE_INFINITY 33 | && value.compareTo(doubleValue.let { BigDecimal.valueOf(it) }) == 0 34 | if (fitsScale) { 35 | setValue(result, fieldName, doubleValue) 36 | } else { 37 | setValue(result, fieldName, value.toPlainString()) 38 | } 39 | } 40 | 41 | override fun setTimestampField(result: MutableMap?, fieldName: String, value: Date) { 42 | val localDate = value.toInstant().atZone(UTC).toLocalDateTime() 43 | setValue(result, fieldName, localDate) 44 | } 45 | 46 | override fun setTimeField(result: MutableMap?, fieldName: String, value: Date) { 47 | val time = LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(value.time)) 48 | setValue(result, fieldName, time) 49 | } 50 | 51 | override fun setDateField(result: MutableMap?, fieldName: String, value: Date) { 52 | val localDate = value.toInstant().atZone(UTC).toLocalDate() 53 | setValue(result, fieldName, localDate) 54 | } 55 | 56 | override fun setStructField(result: MutableMap?, fieldName: String, value: Struct) { 57 | val converted = convert(value).toMutableMap() as MutableMap 58 | setValue(result, fieldName, converted) 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /test-support/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.neo4j 7 | neo4j-streams-parent 8 | 5.0.12 9 | 10 | neo4j-streams-test-support 11 | 5.0.12 12 | jar 13 | Neo4j Streams - Test Support 14 | Neo4j Streams - A set of utility functions for Streams Tests 15 | 16 | 17 | 18 | junit 19 | junit 20 | 21 | 22 | org.apache.kafka 23 | kafka-clients 24 | 25 | 26 | org.hamcrest 27 | hamcrest-all 28 | 29 | 30 | org.jetbrains.kotlin 31 | kotlin-stdlib-jdk8 32 | 33 | 34 | org.neo4j 35 | neo4j-streams-common 36 | ${project.version} 37 | 38 | 39 | org.neo4j.driver 40 | neo4j-java-driver-slim 41 | 42 | 43 | org.testcontainers 44 | kafka 45 | 46 | 47 | org.testcontainers 48 | neo4j 49 | 50 | 51 | org.testcontainers 52 | testcontainers 53 | 54 | 55 | 56 | 57 | 58 | org.jetbrains.kotlin 59 | kotlin-maven-plugin 60 | 61 | 62 | org.apache.maven.plugins 63 | maven-surefire-plugin 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/Neo4jSinkConnector.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.github.jcustenborder.kafka.connect.utils.config.Description 5 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationNote 6 | import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip 7 | import com.github.jcustenborder.kafka.connect.utils.config.TaskConfigs 8 | import com.github.jcustenborder.kafka.connect.utils.config.Title 9 | import org.apache.kafka.common.config.ConfigDef 10 | import org.apache.kafka.connect.connector.Task 11 | import org.apache.kafka.connect.sink.SinkConnector 12 | import org.slf4j.Logger 13 | import org.slf4j.LoggerFactory 14 | import streams.kafka.connect.common.ConfigurationMigrator 15 | import streams.kafka.connect.utils.PropertiesUtil 16 | 17 | @Title("Neo4j Sink Connector") 18 | @Description("The Neo4j Sink connector reads data from Kafka and and writes the data to Neo4j using a Cypher Template") 19 | @DocumentationTip("If you need to control the size of transaction that is submitted to Neo4j you try adjusting the ``consumer.max.poll.records`` setting in the worker.properties for Kafka Connect.") 20 | @DocumentationNote("For each topic you can provide a Cypher Template by using the following syntax ``neo4j.topic.cypher.=``") 21 | class Neo4jSinkConnector: SinkConnector() { 22 | private val log: Logger = LoggerFactory.getLogger(Neo4jSinkConnector::class.java) 23 | private lateinit var settings: Map 24 | private lateinit var config: Neo4jSinkConnectorConfig 25 | 26 | override fun taskConfigs(maxTasks: Int): MutableList> { 27 | return TaskConfigs.multiple(settings, maxTasks) 28 | } 29 | 30 | override fun start(props: MutableMap?) { 31 | settings = props!! 32 | config = Neo4jSinkConnectorConfig(settings) 33 | } 34 | 35 | override fun stop() { 36 | val migratedConfig = ConfigurationMigrator(settings).migrateToV51() 37 | val mapper = ObjectMapper() 38 | val jsonConfig = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(migratedConfig) 39 | log.info( 40 | "The migrated settings for 5.1 version of Neo4j Sink Connector '{}' is: `{}`", 41 | settings["name"], 42 | jsonConfig 43 | ) 44 | } 45 | 46 | override fun version(): String { 47 | return PropertiesUtil.getVersion() 48 | } 49 | 50 | override fun taskClass(): Class { 51 | return Neo4jSinkTask::class.java 52 | } 53 | 54 | override fun config(): ConfigDef { 55 | return Neo4jSinkConnectorConfig.config() 56 | } 57 | 58 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/kotlin/streams/kafka/connect/sink/EventBuilderTest.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import org.apache.kafka.connect.data.Schema 4 | import org.apache.kafka.connect.data.SchemaBuilder 5 | import org.apache.kafka.connect.data.Struct 6 | import org.apache.kafka.connect.data.Timestamp 7 | import org.apache.kafka.connect.sink.SinkRecord 8 | import org.junit.Test 9 | import java.util.* 10 | import kotlin.test.assertEquals 11 | 12 | class EventBuilderTest { 13 | private val PERSON_SCHEMA = SchemaBuilder.struct().name("com.example.Person") 14 | .field("firstName", Schema.STRING_SCHEMA) 15 | .field("lastName", Schema.STRING_SCHEMA) 16 | .field("age", Schema.OPTIONAL_INT32_SCHEMA) 17 | .field("bool", Schema.OPTIONAL_BOOLEAN_SCHEMA) 18 | .field("short", Schema.OPTIONAL_INT16_SCHEMA) 19 | .field("byte", Schema.OPTIONAL_INT8_SCHEMA) 20 | .field("long", Schema.OPTIONAL_INT64_SCHEMA) 21 | .field("float", Schema.OPTIONAL_FLOAT32_SCHEMA) 22 | .field("double", Schema.OPTIONAL_FLOAT64_SCHEMA) 23 | .field("modified", Timestamp.SCHEMA) 24 | .build() 25 | 26 | @Test 27 | fun `should create event map properly`() { 28 | // Given 29 | val firstTopic = "neotopic" 30 | val secondTopic = "foo" 31 | val batchSize = 2 32 | val struct= Struct(PERSON_SCHEMA) 33 | .put("firstName", "Alex") 34 | .put("lastName", "Smith") 35 | .put("bool", true) 36 | .put("short", 1234.toShort()) 37 | .put("byte", (-32).toByte()) 38 | .put("long", 12425436L) 39 | .put("float", 2356.3.toFloat()) 40 | .put("double", -2436546.56457) 41 | .put("age", 21) 42 | .put("modified", Date(1474661402123L)) 43 | val input = listOf(SinkRecord(firstTopic, 1, null, null, PERSON_SCHEMA, struct, 42), 44 | SinkRecord(firstTopic, 1, null, null, PERSON_SCHEMA, struct, 42), 45 | SinkRecord(firstTopic, 1, null, null, PERSON_SCHEMA, struct, 43), 46 | SinkRecord(firstTopic, 1, null, null, PERSON_SCHEMA, struct, 44), 47 | SinkRecord(firstTopic, 1, null, null, PERSON_SCHEMA, struct, 45), 48 | SinkRecord(secondTopic, 1, null, null, PERSON_SCHEMA, struct, 43)) // 5 records for topic "neotopic", 1 for topic "foo" 49 | val topics = listOf(firstTopic, secondTopic) 50 | 51 | // When 52 | val data = EventBuilder() 53 | .withBatchSize(batchSize) 54 | .withSinkRecords(input) 55 | .build() 56 | 57 | // Then 58 | assertEquals(topics, data.keys.toList()) 59 | assertEquals(3, data[firstTopic]!!.size) // n° of chunks for "neotopic" 60 | assertEquals(1, data[secondTopic]!!.size) // n° of chunks for "foo" 61 | } 62 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/events/StreamsEvent.kt: -------------------------------------------------------------------------------- 1 | package streams.events 2 | 3 | 4 | enum class OperationType { created, updated, deleted } 5 | 6 | data class Meta(val timestamp: Long, 7 | val username: String, 8 | val txId: Long, 9 | val txEventId: Int, 10 | val txEventsCount: Int, 11 | val operation: OperationType, 12 | val source: Map = emptyMap()) 13 | 14 | 15 | enum class EntityType { node, relationship } 16 | 17 | data class RelationshipNodeChange(val id: String, 18 | val labels: List?, 19 | val ids: Map) 20 | 21 | abstract class RecordChange{ abstract val properties: Map? } 22 | data class NodeChange(override val properties: Map?, 23 | val labels: List?): RecordChange() 24 | 25 | data class RelationshipChange(override val properties: Map?): RecordChange() 26 | 27 | abstract class Payload { 28 | abstract val id: String 29 | abstract val type: EntityType 30 | abstract val before: RecordChange? 31 | abstract val after: RecordChange? 32 | } 33 | data class NodePayload(override val id: String, 34 | override val before: NodeChange?, 35 | override val after: NodeChange?, 36 | override val type: EntityType = EntityType.node): Payload() 37 | 38 | data class RelationshipPayload(override val id: String, 39 | val start: RelationshipNodeChange, 40 | val end: RelationshipNodeChange, 41 | override val before: RelationshipChange?, 42 | override val after: RelationshipChange?, 43 | val label: String, 44 | override val type: EntityType = EntityType.relationship): Payload() 45 | 46 | enum class StreamsConstraintType { UNIQUE, NODE_PROPERTY_EXISTS, RELATIONSHIP_PROPERTY_EXISTS } 47 | 48 | enum class RelKeyStrategy { DEFAULT, ALL } 49 | 50 | data class Constraint(val label: String?, 51 | val properties: Set, 52 | val type: StreamsConstraintType) 53 | 54 | data class Schema(val properties: Map = emptyMap(), 55 | val constraints: List = emptyList()) 56 | 57 | open class StreamsEvent(open val payload: Any) 58 | data class StreamsTransactionEvent(val meta: Meta, override val payload: Payload, val schema: Schema): StreamsEvent(payload) 59 | 60 | data class StreamsTransactionNodeEvent(val meta: Meta, 61 | val payload: NodePayload, 62 | val schema: Schema) { 63 | fun toStreamsTransactionEvent() = StreamsTransactionEvent(this.meta, this.payload, this.schema) 64 | } 65 | data class StreamsTransactionRelationshipEvent(val meta: Meta, 66 | val payload: RelationshipPayload, 67 | val schema: Schema) { 68 | fun toStreamsTransactionEvent() = StreamsTransactionEvent(this.meta, this.payload, this.schema) 69 | } 70 | 71 | -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/service/sink/strategy/Neo4j.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import org.junit.jupiter.api.extension.ExtensionContext 4 | import org.junit.jupiter.params.provider.Arguments 5 | import org.junit.jupiter.params.provider.ArgumentsProvider 6 | import org.neo4j.caniuse.Neo4j 7 | import org.neo4j.caniuse.Neo4jDeploymentType 8 | import org.neo4j.caniuse.Neo4jEdition 9 | import org.neo4j.caniuse.Neo4jVersion 10 | import java.util.stream.Stream 11 | 12 | internal val Neo4jV4Aura = Neo4j(Neo4jVersion(4, 4, 0), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.AURA) 13 | internal val Neo4jV4OnPrem = Neo4j(Neo4jVersion(4, 4, 41), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.SELF_MANAGED) 14 | internal val Neo4jV4Community = Neo4j(Neo4jVersion(4, 4, 41), Neo4jEdition.COMMUNITY, Neo4jDeploymentType.SELF_MANAGED) 15 | internal val Neo4jV519Aura = Neo4j(Neo4jVersion(5, 19, 0), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.AURA) 16 | internal val Neo4jV519OnPrem = Neo4j(Neo4jVersion(5, 19, 0), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.SELF_MANAGED) 17 | internal val Neo4jV519Community = 18 | Neo4j(Neo4jVersion(5, 19, 0), Neo4jEdition.COMMUNITY, Neo4jDeploymentType.SELF_MANAGED) 19 | internal val Neo4jV5LTSAura = Neo4j(Neo4jVersion(5, 26, 1), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.AURA) 20 | internal val Neo4jV5LTSOnPrem = Neo4j(Neo4jVersion(5, 26, 1), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.SELF_MANAGED) 21 | internal val Neo4jV5LTSCommunity = 22 | Neo4j(Neo4jVersion(5, 26, 1), Neo4jEdition.COMMUNITY, Neo4jDeploymentType.SELF_MANAGED) 23 | internal val Neo4jV2025Aura = Neo4j(Neo4jVersion(2025, 1, 0), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.AURA) 24 | internal val Neo4jV2025OnPrem = 25 | Neo4j(Neo4jVersion(2025, 1, 0), Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.SELF_MANAGED) 26 | internal val Neo4jV2025Community = 27 | Neo4j(Neo4jVersion(2025, 1, 0), Neo4jEdition.COMMUNITY, Neo4jDeploymentType.SELF_MANAGED) 28 | internal val Neo4jVLatestAura = Neo4j(Neo4jVersion.LATEST, Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.AURA) 29 | internal val Neo4jVLatestOnPrem = 30 | Neo4j(Neo4jVersion.LATEST, Neo4jEdition.ENTERPRISE, Neo4jDeploymentType.SELF_MANAGED) 31 | internal val Neo4jVLatestCommunity = 32 | Neo4j(Neo4jVersion.LATEST, Neo4jEdition.COMMUNITY, Neo4jDeploymentType.SELF_MANAGED) 33 | 34 | class SupportedVersionsProvider : ArgumentsProvider { 35 | override fun provideArguments(context: ExtensionContext?): Stream? { 36 | return Stream.of( 37 | Arguments.of(Neo4jV4Aura, ""), 38 | Arguments.of(Neo4jV4OnPrem, ""), 39 | Arguments.of(Neo4jV4Community, ""), 40 | Arguments.of(Neo4jV519Aura, ""), 41 | Arguments.of(Neo4jV519OnPrem, ""), 42 | Arguments.of(Neo4jV519Community, ""), 43 | Arguments.of(Neo4jV5LTSAura, "CYPHER 5 "), 44 | Arguments.of(Neo4jV5LTSOnPrem, "CYPHER 5 "), 45 | Arguments.of(Neo4jV5LTSCommunity, "CYPHER 5 "), 46 | Arguments.of(Neo4jV2025Aura, "CYPHER 5 "), 47 | Arguments.of(Neo4jV2025OnPrem, "CYPHER 5 "), 48 | Arguments.of(Neo4jV2025Community, "CYPHER 5 "), 49 | Arguments.of(Neo4jVLatestAura, "CYPHER 5 "), 50 | Arguments.of(Neo4jVLatestOnPrem, "CYPHER 5 "), 51 | Arguments.of(Neo4jVLatestCommunity, "CYPHER 5 ") 52 | ) 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.neo4j 7 | neo4j-streams-parent 8 | 5.0.12 9 | 10 | neo4j-streams-common 11 | 5.0.12 12 | jar 13 | Neo4j Streams - Common 14 | Neo4j Streams - Commons Package 15 | 16 | 17 | com.fasterxml.jackson.module 18 | jackson-module-kotlin 19 | 20 | 21 | org.apache.commons 22 | commons-lang3 23 | 24 | 25 | org.jetbrains.kotlin 26 | kotlin-stdlib-jdk8 27 | 28 | 29 | org.jetbrains.kotlinx 30 | kotlinx-coroutines-core 31 | 32 | 33 | org.neo4j 34 | caniuse-core 35 | 36 | 37 | org.apache.kafka 38 | kafka-clients 39 | provided 40 | 41 | 42 | org.neo4j.driver 43 | neo4j-java-driver-slim 44 | provided 45 | 46 | 47 | junit 48 | junit 49 | test 50 | 51 | 52 | org.hamcrest 53 | hamcrest-all 54 | test 55 | 56 | 57 | org.jetbrains.kotlin 58 | kotlin-test-junit 59 | test 60 | 61 | 62 | org.junit.jupiter 63 | junit-jupiter 64 | test 65 | 66 | 67 | org.mockito 68 | mockito-core 69 | test 70 | 71 | 72 | org.testcontainers 73 | testcontainers 74 | test 75 | 76 | 77 | 78 | 79 | 80 | org.jetbrains.kotlin 81 | kotlin-maven-plugin 82 | 83 | 84 | org.apache.maven.plugins 85 | maven-surefire-plugin 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/service/sink/errors/KafkaErrorServiceTest.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.errors 2 | 3 | import org.apache.commons.lang3.exception.ExceptionUtils 4 | import org.apache.kafka.clients.producer.MockProducer 5 | import org.apache.kafka.clients.producer.ProducerRecord 6 | import org.apache.kafka.clients.producer.internals.FutureRecordMetadata 7 | import org.apache.kafka.common.record.RecordBatch 8 | import org.apache.kafka.common.utils.Time 9 | import org.junit.Test 10 | import org.mockito.ArgumentMatchers 11 | import org.mockito.Mockito 12 | import streams.service.errors.ErrorData 13 | import streams.service.errors.ErrorService 14 | import streams.service.errors.KafkaErrorService 15 | import java.util.* 16 | import java.util.concurrent.atomic.AtomicInteger 17 | import kotlin.test.assertEquals 18 | import kotlin.test.assertTrue 19 | 20 | class KafkaErrorServiceTest { 21 | @Test 22 | fun `should send the data to the DLQ`() { 23 | val producer: MockProducer = Mockito.mock(MockProducer::class.java) as MockProducer 24 | val counter = AtomicInteger(0) 25 | Mockito.`when`(producer.send(ArgumentMatchers.any>())).then { 26 | counter.incrementAndGet() 27 | FutureRecordMetadata(null, 0, RecordBatch.NO_TIMESTAMP, 0, 0, Time.SYSTEM) 28 | } 29 | val dlqService = KafkaErrorService(producer, ErrorService.ErrorConfig(fail=false,dlqTopic = "dlqTopic"), { s, e -> }) 30 | dlqService.report(listOf(dlqData())) 31 | assertEquals(1, counter.get()) 32 | dlqService.close() 33 | } 34 | 35 | 36 | @Test 37 | fun `should create the header map`() { 38 | val producer: MockProducer = Mockito.mock(MockProducer::class.java) as MockProducer 39 | val dlqService = KafkaErrorService(producer, ErrorService.ErrorConfig(fail=false, dlqTopic = "dlqTopic",dlqHeaders = true), { s, e -> }) 40 | val dlqData = dlqData() 41 | val map = dlqService.populateContextHeaders(dlqData) 42 | assertEquals(String(map["topic"]!!), dlqData.originalTopic) 43 | assertEquals(String(map["partition"]!!), dlqData.partition) 44 | assertEquals(String(map["offset"]!!), dlqData.offset) 45 | assertEquals(String(map["class.name"]!!), KafkaErrorServiceTest::class.java.name) 46 | val exception = dlqData.exception!! 47 | assertEquals(String(map["exception.class.name"]!!), exception::class.java.name) 48 | assertEquals(String(map["exception.message"]!!), exception.message) 49 | assertEquals(String(map["exception.stacktrace"]!!), ExceptionUtils.getStackTrace(exception)) 50 | 51 | } 52 | 53 | private fun dlqData(): ErrorData { 54 | val offset = "0" 55 | val originalTopic = "topicName" 56 | val partition = "1" 57 | val timestamp = System.currentTimeMillis() 58 | val exception = RuntimeException("Test") 59 | val key = "KEY" 60 | val value = "VALUE" 61 | val databaseName = "myDb" 62 | return ErrorData( 63 | offset = offset, 64 | originalTopic = originalTopic, 65 | partition = partition, 66 | timestamp = timestamp, 67 | exception = exception, 68 | executingClass = KafkaErrorServiceTest::class.java, 69 | key = key.toByteArray(), 70 | value = value.toByteArray(), 71 | databaseName = databaseName 72 | ) 73 | } 74 | 75 | @Test 76 | fun `should log DQL data`() { 77 | val log = { s:String,e:Exception? -> assertTrue(s.contains("partition=1, offset=0, exception=java.lang.RuntimeException: Test, key=KEY, value=VALUE, executingClass=class streams.service.sink.errors.KafkaErrorServiceTest)"),"Wrong DLQ log message")} 78 | val logService = KafkaErrorService(Properties(),ErrorService.ErrorConfig(fail = false, logMessages = true,log=true), log) 79 | logService.report(listOf(dlqData())) 80 | } 81 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/converters/MapValueConverter.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink.converters 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.data.AbstractConverter 4 | import org.apache.kafka.connect.data.Schema 5 | import org.apache.kafka.connect.data.Struct 6 | import java.math.BigDecimal 7 | import java.util.* 8 | 9 | open class MapValueConverter: AbstractConverter>() { 10 | 11 | open fun setValue(result: MutableMap?, fieldName: String, value: Any?) { 12 | if (result != null) { 13 | result[fieldName] = value as T 14 | } 15 | } 16 | 17 | override fun newValue(): MutableMap { 18 | return mutableMapOf() 19 | } 20 | 21 | override fun setBytesField(result: MutableMap?, fieldName: String, value: ByteArray?) { 22 | setValue(result, fieldName, value) 23 | } 24 | 25 | override fun setStringField(result: MutableMap?, fieldName: String, value: String?) { 26 | setValue(result, fieldName, value) 27 | } 28 | 29 | override fun setFloat32Field(result: MutableMap?, fieldName: String, value: Float?) { 30 | setValue(result, fieldName, value) 31 | } 32 | 33 | override fun setInt32Field(result: MutableMap?, fieldName: String, value: Int?) { 34 | setValue(result, fieldName, value) 35 | } 36 | 37 | override fun setArray(result: MutableMap?, fieldName: String, schema: Schema?, array: MutableList?) { 38 | val convertedArray = array?.map { convertInner(it) } 39 | setValue(result, fieldName, convertedArray) 40 | } 41 | 42 | override fun setTimestampField(result: MutableMap?, fieldName: String, value: Date) { 43 | setValue(result, fieldName, value) 44 | 45 | } 46 | 47 | override fun setTimeField(result: MutableMap?, fieldName: String, value: Date) { 48 | setValue(result, fieldName, value) 49 | } 50 | 51 | override fun setInt8Field(result: MutableMap?, fieldName: String, value: Byte) { 52 | setValue(result, fieldName, value) 53 | } 54 | 55 | override fun setStructField(result: MutableMap?, fieldName: String, value: Struct) { 56 | val converted = convert(value) as MutableMap 57 | setMap(result, fieldName, null, converted) 58 | } 59 | 60 | override fun setMap(result: MutableMap?, fieldName: String, schema: Schema?, value: MutableMap?) { 61 | if (value != null) { 62 | val converted = convert(value) as MutableMap 63 | setValue(result, fieldName, converted) 64 | } else { 65 | setNullField(result, fieldName) 66 | } 67 | } 68 | 69 | override fun setNullField(result: MutableMap?, fieldName: String) { 70 | setValue(result, fieldName, null) 71 | } 72 | 73 | override fun setFloat64Field(result: MutableMap?, fieldName: String, value: Double) { 74 | setValue(result, fieldName, value) 75 | } 76 | 77 | override fun setInt16Field(result: MutableMap?, fieldName: String, value: Short) { 78 | setValue(result, fieldName, value) 79 | } 80 | 81 | override fun setInt64Field(result: MutableMap?, fieldName: String, value: Long) { 82 | setValue(result, fieldName, value) 83 | } 84 | 85 | override fun setBooleanField(result: MutableMap?, fieldName: String, value: Boolean) { 86 | setValue(result, fieldName, value) 87 | } 88 | 89 | override fun setDecimalField(result: MutableMap?, fieldName: String, value: BigDecimal) { 90 | setValue(result, fieldName, value) 91 | } 92 | 93 | override fun setDateField(result: MutableMap?, fieldName: String, value: Date) { 94 | setValue(result, fieldName, value) 95 | } 96 | 97 | open fun convertInner(value: Any?): Any? { 98 | return when (value) { 99 | is Struct, is Map<*, *> -> convert(value) 100 | is Collection<*> -> value.map(::convertInner) 101 | is Array<*> -> if (value.javaClass.componentType.isPrimitive) value else value.map(::convertInner) 102 | else -> value 103 | } 104 | } 105 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/Neo4jSinkService.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import kotlinx.coroutines.Dispatchers 4 | import kotlinx.coroutines.ExperimentalCoroutinesApi 5 | import kotlinx.coroutines.ObsoleteCoroutinesApi 6 | import kotlinx.coroutines.async 7 | import kotlinx.coroutines.awaitAll 8 | import kotlinx.coroutines.runBlocking 9 | import org.apache.kafka.connect.errors.ConnectException 10 | import org.neo4j.driver.Bookmark 11 | import org.neo4j.driver.Driver 12 | import org.neo4j.driver.TransactionConfig 13 | import org.neo4j.driver.exceptions.ClientException 14 | import org.neo4j.driver.exceptions.TransientException 15 | import org.slf4j.Logger 16 | import org.slf4j.LoggerFactory 17 | import streams.extensions.errors 18 | import streams.service.StreamsSinkEntity 19 | import streams.service.StreamsSinkService 20 | import streams.utils.StreamsUtils 21 | import streams.utils.retryForException 22 | import kotlin.streams.toList 23 | 24 | 25 | class Neo4jSinkService(private val config: Neo4jSinkConnectorConfig) : 26 | StreamsSinkService(Neo4jStrategyStorage(config)) { 27 | 28 | private val log: Logger = LoggerFactory.getLogger(Neo4jSinkService::class.java) 29 | 30 | private val transactionConfig: TransactionConfig = config.createTransactionConfig() 31 | 32 | private val bookmarks = mutableListOf() 33 | 34 | fun close() { 35 | config.close() 36 | } 37 | 38 | override fun write(query: String, events: Collection) { 39 | val data = mapOf("events" to events) 40 | config.driver.session(config.createSessionConfig(bookmarks)).use { session -> 41 | try { 42 | runBlocking { 43 | retryForException( 44 | exceptions = arrayOf(ClientException::class.java, TransientException::class.java), 45 | retries = config.retryMaxAttempts, delayTime = 0 46 | ) { // we use the delayTime = 0, because we delegate the retryBackoff to the Neo4j Java Driver 47 | 48 | session.writeTransaction({ 49 | val result = it.run(query, data) 50 | if (log.isDebugEnabled) { 51 | val summary = result.consume() 52 | log.debug("Successfully executed query: `$query`. Summary: $summary") 53 | } 54 | }, transactionConfig) 55 | } 56 | } 57 | } catch (e: Exception) { 58 | bookmarks += session.lastBookmark() 59 | if (log.isDebugEnabled) { 60 | val subList = events.stream() 61 | .limit(5.coerceAtMost(events.size).toLong()) 62 | .toList() 63 | log.debug("Exception `${e.message}` while executing query: `$query`, with data: `$subList` total-records ${events.size}") 64 | } 65 | throw e 66 | } 67 | } 68 | } 69 | 70 | fun writeData(data: Map>>) { 71 | val errors = if (config.parallelBatches) writeDataAsync(data) else writeDataSync(data); 72 | if (errors.isNotEmpty()) { 73 | throw ConnectException( 74 | errors.map { it.message }.toSet() 75 | .joinToString("\n", "Errors executing ${data.values.map { it.size }.sum()} jobs:\n") 76 | ) 77 | } 78 | } 79 | 80 | @ExperimentalCoroutinesApi 81 | @ObsoleteCoroutinesApi 82 | private fun writeDataAsync(data: Map>>) = runBlocking { 83 | val jobs = data 84 | .flatMap { (topic, records) -> 85 | records.map { async(Dispatchers.IO) { writeForTopic(topic, it) } } 86 | } 87 | 88 | // timeout starts in writeTransaction() 89 | jobs.awaitAll() 90 | jobs.mapNotNull { it.errors() } 91 | } 92 | 93 | private fun writeDataSync(data: Map>>) = 94 | data.flatMap { (topic, records) -> 95 | records.mapNotNull { 96 | try { 97 | writeForTopic(topic, it) 98 | null 99 | } catch (e: Exception) { 100 | e 101 | } 102 | } 103 | } 104 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/kotlin/streams/kafka/connect/sink/MapValueConverterTest.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import org.apache.kafka.connect.data.Schema 4 | import org.apache.kafka.connect.data.SchemaBuilder 5 | import org.apache.kafka.connect.data.Struct 6 | import org.junit.Test 7 | import streams.kafka.connect.sink.converters.MapValueConverter 8 | import kotlin.test.assertEquals 9 | 10 | class MapValueConverterTest { 11 | 12 | @Test 13 | fun `should convert tree struct into map of String,Any?`() { 14 | // given 15 | // this test generates a simple tree structure like this 16 | // body 17 | // / \ 18 | // p ul 19 | // | 20 | // li 21 | val body = getTreeStruct() 22 | 23 | // when 24 | val result = MapValueConverter().convert(body) as Map<*, *> 25 | 26 | // then 27 | val expected = getTreeMap() 28 | assertEquals(expected, result) 29 | } 30 | 31 | @Test 32 | fun `should convert tree simple map into map of String,Any?`() { 33 | // given 34 | // this test generates a simple tree structure like this 35 | // body 36 | // / \ 37 | // p ul 38 | // | 39 | // li 40 | val body = getTreeMap() 41 | 42 | // when 43 | val result = MapValueConverter().convert(body) as Map<*, *> 44 | 45 | // then 46 | val expected = getTreeMap() 47 | assertEquals(expected, result) 48 | } 49 | 50 | companion object { 51 | private val LI_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.html.LI") 52 | .field("value", Schema.OPTIONAL_STRING_SCHEMA) 53 | .field("class", SchemaBuilder.array(Schema.STRING_SCHEMA).optional()) 54 | .build() 55 | 56 | private val UL_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.html.UL") 57 | .field("value", SchemaBuilder.array(LI_SCHEMA)) 58 | .build() 59 | 60 | private val P_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.html.P") 61 | .field("value", Schema.OPTIONAL_STRING_SCHEMA) 62 | .build() 63 | 64 | private val BODY_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.html.BODY") 65 | .field("ul", SchemaBuilder.array(UL_SCHEMA).optional()) 66 | .field("p", SchemaBuilder.array(P_SCHEMA).optional()) 67 | .build() 68 | 69 | fun getTreeStruct(): Struct? { 70 | val firstUL = Struct(UL_SCHEMA).put("value", listOf( 71 | Struct(LI_SCHEMA).put("value", "First UL - First Element"), 72 | Struct(LI_SCHEMA).put("value", "First UL - Second Element") 73 | .put("class", listOf("ClassA", "ClassB")) 74 | )) 75 | val secondUL = Struct(UL_SCHEMA).put("value", listOf( 76 | Struct(LI_SCHEMA).put("value", "Second UL - First Element"), 77 | Struct(LI_SCHEMA).put("value", "Second UL - Second Element") 78 | )) 79 | val ulList = listOf(firstUL, secondUL) 80 | val pList = listOf( 81 | Struct(P_SCHEMA).put("value", "First Paragraph"), 82 | Struct(P_SCHEMA).put("value", "Second Paragraph") 83 | ) 84 | return Struct(BODY_SCHEMA) 85 | .put("ul", ulList) 86 | .put("p", pList) 87 | } 88 | 89 | fun getTreeMap(): Map { 90 | val firstULMap = mapOf("value" to listOf( 91 | mapOf("value" to "First UL - First Element", "class" to null), 92 | mapOf("value" to "First UL - Second Element", "class" to listOf("ClassA", "ClassB")))) 93 | val secondULMap = mapOf("value" to listOf( 94 | mapOf("value" to "Second UL - First Element", "class" to null), 95 | mapOf("value" to "Second UL - Second Element", "class" to null))) 96 | val ulListMap = listOf(firstULMap, secondULMap) 97 | val pListMap = listOf(mapOf("value" to "First Paragraph"), 98 | mapOf("value" to "Second Paragraph")) 99 | return mapOf("ul" to ulListMap, "p" to pListMap) 100 | } 101 | } 102 | 103 | } 104 | 105 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/sink/strategy/NodePatternIngestionStrategy.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import org.neo4j.caniuse.CanIUse.canIUse 4 | import org.neo4j.caniuse.Cypher 5 | import org.neo4j.caniuse.Neo4j 6 | import streams.extensions.flatten 7 | import streams.utils.JSONUtils 8 | import streams.service.StreamsSinkEntity 9 | import streams.utils.IngestionUtils.containsProp 10 | import streams.utils.IngestionUtils.getLabelsAsString 11 | import streams.utils.IngestionUtils.getNodeMergeKeys 12 | import streams.utils.StreamsUtils 13 | 14 | class NodePatternIngestionStrategy(neo4j: Neo4j, private val nodePatternConfiguration: NodePatternConfiguration): IngestionStrategy { 15 | private val cypherPrefix = if (canIUse(Cypher.explicitCypher5Selection()).withNeo4j(neo4j)) "CYPHER 5 " else "" 16 | 17 | private val mergeNodeTemplate: String = """ 18 | |${cypherPrefix}${StreamsUtils.UNWIND} 19 | |MERGE (n${getLabelsAsString(nodePatternConfiguration.labels)}{${ 20 | getNodeMergeKeys("keys", nodePatternConfiguration.keys) 21 | }}) 22 | |SET n ${if (nodePatternConfiguration.mergeProperties) "+" else ""}= event.properties 23 | |SET n += event.keys 24 | """.trimMargin() 25 | 26 | private val deleteNodeTemplate: String = """ 27 | |${cypherPrefix}${StreamsUtils.UNWIND} 28 | |MATCH (n${getLabelsAsString(nodePatternConfiguration.labels)}{${ 29 | getNodeMergeKeys("keys", nodePatternConfiguration.keys) 30 | }}) 31 | |DETACH DELETE n 32 | """.trimMargin() 33 | 34 | override fun mergeNodeEvents(events: Collection): List { 35 | val data = events 36 | .mapNotNull { if (it.value != null) JSONUtils.asMap(it.value) else null } 37 | .mapNotNull { toData(nodePatternConfiguration, it) } 38 | return if (data.isEmpty()) { 39 | emptyList() 40 | } else { 41 | listOf(QueryEvents(mergeNodeTemplate, data)) 42 | } 43 | } 44 | 45 | override fun deleteNodeEvents(events: Collection): List { 46 | val data = events 47 | .filter { it.value == null && it.key != null } 48 | .mapNotNull { if (it.key != null) JSONUtils.asMap(it.key) else null } 49 | .mapNotNull { toData(nodePatternConfiguration, it, false) } 50 | return if (data.isEmpty()) { 51 | emptyList() 52 | } else { 53 | listOf(QueryEvents(deleteNodeTemplate, data)) 54 | } 55 | } 56 | 57 | override fun mergeRelationshipEvents(events: Collection): List { 58 | return emptyList() 59 | } 60 | 61 | override fun deleteRelationshipEvents(events: Collection): List { 62 | return emptyList() 63 | } 64 | 65 | companion object { 66 | fun toData(nodePatternConfiguration: NodePatternConfiguration, props: Map, withProperties: Boolean = true): Map>? { 67 | val properties = props.flatten() 68 | val containsKeys = nodePatternConfiguration.keys.all { properties.containsKey(it) } 69 | return if (containsKeys) { 70 | val filteredProperties = when (nodePatternConfiguration.type) { 71 | PatternConfigurationType.ALL -> properties.filterKeys { !nodePatternConfiguration.keys.contains(it) } 72 | PatternConfigurationType.EXCLUDE -> properties.filterKeys { key -> 73 | val containsProp = containsProp(key, nodePatternConfiguration.properties) 74 | !nodePatternConfiguration.keys.contains(key) && !containsProp 75 | } 76 | PatternConfigurationType.INCLUDE -> properties.filterKeys { key -> 77 | val containsProp = containsProp(key, nodePatternConfiguration.properties) 78 | !nodePatternConfiguration.keys.contains(key) && containsProp 79 | } 80 | } 81 | if (withProperties) { 82 | mapOf("keys" to properties.filterKeys { nodePatternConfiguration.keys.contains(it) }, 83 | "properties" to filteredProperties) 84 | } else { 85 | mapOf("keys" to properties.filterKeys { nodePatternConfiguration.keys.contains(it) }) 86 | } 87 | } else { 88 | null 89 | } 90 | } 91 | 92 | 93 | } 94 | 95 | } -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/errors/KafkaErrorService.kt: -------------------------------------------------------------------------------- 1 | package streams.service.errors 2 | 3 | import org.apache.commons.lang3.exception.ExceptionUtils 4 | import org.apache.kafka.clients.CommonClientConfigs 5 | import org.apache.kafka.clients.producer.KafkaProducer 6 | import org.apache.kafka.clients.producer.Producer 7 | import org.apache.kafka.clients.producer.ProducerConfig 8 | import org.apache.kafka.clients.producer.ProducerRecord 9 | import org.apache.kafka.common.record.RecordBatch 10 | import org.apache.kafka.common.serialization.ByteArraySerializer 11 | import streams.utils.ValidationUtils.validateConnection 12 | import java.util.Properties 13 | 14 | class KafkaErrorService(private val producer: Producer?, private val errorConfig: ErrorConfig, private val log: (String, Exception?)->Unit): ErrorService() { 15 | 16 | constructor(config: Properties, errorConfig: ErrorConfig, 17 | log: (String, Exception?) -> Unit) : this(producer(errorConfig, config, log), errorConfig, log) 18 | 19 | companion object { 20 | private fun producer(errorConfig: ErrorConfig, config: Properties, log: (String, Exception?) -> Unit) = 21 | errorConfig.dlqTopic?.let { 22 | try { 23 | val bootstrapServers = config.getOrDefault(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, "").toString() 24 | validateConnection(bootstrapServers, CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, false) 25 | config[ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java.name 26 | config[ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java.name 27 | KafkaProducer(config) 28 | } catch (e: Exception) { 29 | log("Cannot initialize the custom DLQ because of the following exception: ", e) 30 | null 31 | } 32 | } 33 | } 34 | 35 | override fun report(errorDatas: List) { 36 | if (errorConfig.fail) throw ProcessingError(errorDatas) 37 | if (errorConfig.log) { 38 | if (errorConfig.logMessages) { 39 | errorDatas.forEach{log(it.toLogString(),it.exception)} 40 | } else { 41 | errorDatas.map { it.exception }.distinct().forEach{log("Error processing ${errorDatas.size} messages",it)} 42 | } 43 | } 44 | 45 | errorDatas.forEach { dlqData -> 46 | producer?.let { 47 | try { 48 | val producerRecord = if (dlqData.timestamp == RecordBatch.NO_TIMESTAMP) { 49 | ProducerRecord(errorConfig.dlqTopic, null, dlqData.key, dlqData.value) 50 | } else { 51 | ProducerRecord(errorConfig.dlqTopic, null, dlqData.timestamp, dlqData.key, dlqData.value) 52 | } 53 | if (errorConfig.dlqHeaders) { 54 | val producerHeader = producerRecord.headers() 55 | populateContextHeaders(dlqData).forEach { (key, value) -> producerHeader.add(key, value) } 56 | } 57 | it.send(producerRecord) 58 | } catch (e: Exception) { 59 | log("Error writing to DLQ $e: ${dlqData.toLogString()}", e) // todo only the first or all 60 | } 61 | } 62 | } 63 | } 64 | 65 | // VisibleForTesting 66 | fun populateContextHeaders(errorData: ErrorData): Map { 67 | fun prefix(suffix: String) = errorConfig.dlqHeaderPrefix + suffix 68 | 69 | val headers = mutableMapOf( 70 | prefix("topic") to errorData.originalTopic.toByteArray(), 71 | prefix("partition") to errorData.partition.toByteArray(), 72 | prefix("offset") to errorData.offset.toByteArray()) 73 | 74 | if (!errorData.databaseName.isNullOrBlank()) { 75 | headers[prefix("databaseName")] = errorData.databaseName.toByteArray() 76 | } 77 | 78 | if (errorData.executingClass != null) { 79 | headers[prefix("class.name")] = errorData.executingClass.name.toByteArray() 80 | } 81 | if (errorData.exception != null) { 82 | headers[prefix("exception.class.name")] = errorData.exception.javaClass.name.toByteArray() 83 | if (errorData.exception.message != null) { 84 | headers[prefix("exception.message")] = errorData.exception.message.toString().toByteArray() 85 | } 86 | headers[prefix("exception.stacktrace")] = ExceptionUtils.getStackTrace(errorData.exception).toByteArray() 87 | } 88 | return headers 89 | } 90 | 91 | 92 | override fun close() { 93 | this.producer?.close() 94 | } 95 | 96 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | services: 3 | neo4j: 4 | image: neo4j:5-enterprise 5 | hostname: neo4j 6 | container_name: neo4j 7 | ports: 8 | - "7474:7474" 9 | - "7687:7687" 10 | environment: 11 | NEO4J_AUTH: neo4j/kafkaconnect 12 | NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" 13 | NEO4J_server_memory_heap_max__size: "4G" 14 | 15 | zookeeper: 16 | image: confluentinc/cp-zookeeper:7.8.1 17 | hostname: zookeeper 18 | container_name: zookeeper 19 | ports: 20 | - "2181:2181" 21 | environment: 22 | ZOOKEEPER_CLIENT_PORT: 2181 23 | ZOOKEEPER_TICK_TIME: 2000 24 | 25 | broker: 26 | image: confluentinc/cp-server:7.8.1 27 | hostname: broker 28 | container_name: broker 29 | depends_on: 30 | - zookeeper 31 | ports: 32 | - "9092:9092" 33 | - "9101:9101" 34 | environment: 35 | KAFKA_BROKER_ID: 1 36 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' 37 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 38 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 39 | KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter 40 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 41 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 42 | KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 1 43 | KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 1 44 | KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 45 | KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 46 | KAFKA_JMX_PORT: 9101 47 | KAFKA_JMX_HOSTNAME: localhost 48 | KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081 49 | CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: broker:29092 50 | CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 51 | CONFLUENT_METRICS_ENABLE: 'true' 52 | CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' 53 | 54 | schema-registry: 55 | image: confluentinc/cp-schema-registry:7.8.1 56 | hostname: schema-registry 57 | container_name: schema-registry 58 | depends_on: 59 | - broker 60 | ports: 61 | - "8081:8081" 62 | environment: 63 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 64 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' 65 | SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 66 | 67 | connect: 68 | image: confluentinc/cp-server-connect:7.8.1 69 | hostname: connect 70 | container_name: connect 71 | depends_on: 72 | - broker 73 | - schema-registry 74 | ports: 75 | - "8083:8083" 76 | volumes: 77 | - ./plugins:/tmp/connect-plugins 78 | environment: 79 | CONNECT_BOOTSTRAP_SERVERS: 'broker:29092' 80 | CONNECT_REST_ADVERTISED_HOST_NAME: connect 81 | CONNECT_GROUP_ID: compose-connect-group 82 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 83 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 84 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 85 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 86 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 87 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 88 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 89 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter 90 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 91 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 92 | # CLASSPATH required due to CC-2422 93 | CLASSPATH: /usr/share/java/monitoring-interceptors/monitoring-interceptors-7.8.1.jar 94 | CONNECT_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" 95 | CONNECT_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" 96 | CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components,/tmp/connect-plugins" 97 | CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR 98 | command: 99 | # - bash 100 | # - -c 101 | # - | 102 | /etc/confluent/docker/run 103 | 104 | control-center: 105 | image: confluentinc/cp-enterprise-control-center:7.8.1 106 | hostname: control-center 107 | container_name: control-center 108 | depends_on: 109 | - broker 110 | - schema-registry 111 | - connect 112 | ports: 113 | - "9021:9021" 114 | environment: 115 | CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092' 116 | CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'connect:8083' 117 | CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 118 | CONTROL_CENTER_REPLICATION_FACTOR: 1 119 | CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1 120 | CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1 121 | CONFLUENT_METRICS_TOPIC_REPLICATION: 1 122 | PORT: 9021 -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | services: 3 | neo4j: 4 | image: neo4j:5-enterprise 5 | hostname: neo4j 6 | container_name: neo4j 7 | ports: 8 | - "7474:7474" 9 | - "7687:7687" 10 | environment: 11 | NEO4J_AUTH: neo4j/kafkaconnect 12 | NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" 13 | NEO4J_server_memory_heap_max__size: "4G" 14 | 15 | zookeeper: 16 | image: confluentinc/cp-zookeeper:7.8.1 17 | hostname: zookeeper 18 | container_name: zookeeper 19 | ports: 20 | - "2181:2181" 21 | environment: 22 | ZOOKEEPER_CLIENT_PORT: 2181 23 | ZOOKEEPER_TICK_TIME: 2000 24 | 25 | broker: 26 | image: confluentinc/cp-server:7.8.1 27 | hostname: broker 28 | container_name: broker 29 | depends_on: 30 | - zookeeper 31 | ports: 32 | - "9092:9092" 33 | - "9101:9101" 34 | environment: 35 | KAFKA_BROKER_ID: 1 36 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' 37 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 38 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 39 | KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter 40 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 41 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 42 | KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 1 43 | KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 1 44 | KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 45 | KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 46 | KAFKA_JMX_PORT: 9101 47 | KAFKA_JMX_HOSTNAME: localhost 48 | KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081 49 | CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: broker:29092 50 | CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 51 | CONFLUENT_METRICS_ENABLE: 'true' 52 | CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' 53 | 54 | schema-registry: 55 | image: confluentinc/cp-schema-registry:7.8.1 56 | hostname: schema-registry 57 | container_name: schema-registry 58 | depends_on: 59 | - broker 60 | ports: 61 | - "8081:8081" 62 | environment: 63 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 64 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' 65 | SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 66 | 67 | connect: 68 | image: confluentinc/cp-server-connect:7.8.1 69 | hostname: connect 70 | container_name: connect 71 | depends_on: 72 | - broker 73 | - schema-registry 74 | ports: 75 | - "8083:8083" 76 | volumes: 77 | - ./plugins:/tmp/connect-plugins 78 | environment: 79 | CONNECT_BOOTSTRAP_SERVERS: 'broker:29092' 80 | CONNECT_REST_ADVERTISED_HOST_NAME: connect 81 | CONNECT_GROUP_ID: compose-connect-group 82 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 83 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 84 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 85 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 86 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 87 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 88 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 89 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter 90 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 91 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 92 | # CLASSPATH required due to CC-2422 93 | CLASSPATH: /usr/share/java/monitoring-interceptors/monitoring-interceptors-7.8.1.jar 94 | CONNECT_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" 95 | CONNECT_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" 96 | CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components,/tmp/connect-plugins" 97 | CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR 98 | command: 99 | # - bash 100 | # - -c 101 | # - | 102 | /etc/confluent/docker/run 103 | 104 | control-center: 105 | image: confluentinc/cp-enterprise-control-center:7.8.1 106 | hostname: control-center 107 | container_name: control-center 108 | depends_on: 109 | - broker 110 | - schema-registry 111 | - connect 112 | ports: 113 | - "9021:9021" 114 | environment: 115 | CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092' 116 | CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'connect:8083' 117 | CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 118 | CONTROL_CENTER_REPLICATION_FACTOR: 1 119 | CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1 120 | CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1 121 | CONFLUENT_METRICS_TOPIC_REPLICATION: 1 122 | PORT: 9021 123 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/errors/ErrorService.kt: -------------------------------------------------------------------------------- 1 | package streams.service.errors 2 | 3 | import java.util.Properties 4 | 5 | data class ErrorData(val originalTopic: String, 6 | val timestamp: Long, 7 | val key: ByteArray?, 8 | val value: ByteArray?, 9 | val partition: String, 10 | val offset: String, 11 | val executingClass: Class<*>?, 12 | val databaseName: String?, 13 | val exception: Exception?) { 14 | 15 | constructor(originalTopic: String, timestamp: Long?, key: Any?, value: Any?, 16 | partition: Int, offset: Long, executingClass: Class<*>?, databaseName: String?, exception: Exception?) : 17 | this(originalTopic, timestamp ?: NO_TIMESTAMP, toByteArray(key), toByteArray(value), partition.toString(),offset.toString(), executingClass, databaseName, exception) 18 | 19 | companion object { 20 | 21 | private const val NO_TIMESTAMP: Long = -1L 22 | 23 | fun toByteArray(v:Any?) = try { 24 | when (v) { 25 | null -> null 26 | is ByteArray -> v 27 | else -> v.toString().toByteArray(Charsets.UTF_8) 28 | } 29 | } catch (e:Exception) { 30 | null 31 | } 32 | } 33 | fun toLogString() = 34 | """ 35 | ErrorData(originalTopic=$originalTopic, timestamp=$timestamp, partition=$partition, offset=$offset, exception=$exception, key=${key?.toString(Charsets.UTF_8)}, value=${value?.sliceArray(0..Math.min(value.size,200)-1)?.toString(Charsets.UTF_8)}, executingClass=$executingClass) 36 | """.trimIndent() 37 | 38 | } 39 | 40 | abstract class ErrorService(private val config: Map = emptyMap()) { 41 | 42 | data class ErrorConfig(val fail:Boolean=false, val log:Boolean=false, val logMessages:Boolean=false, 43 | val dlqTopic:String? = null, val dlqHeaderPrefix:String = "", val dlqHeaders:Boolean = false, val dlqReplication: Int? = 3) { 44 | 45 | /* 46 | https://www.confluent.io/blog/kafka-connect-deep-dive-error-handling-dead-letter-queues 47 | "errors.retry.timeout": "-1", 48 | "errors.retry.delay.max.ms": "1000", 49 | 50 | "errors.tolerance": "all", "none" == fail-fast, abort sink task 51 | 52 | fail-fast for configuration errors (e.g. validate cypher statements on start) 53 | errors.tolerance = all -> silently ignore all bad messages 54 | 55 | org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java 56 | 57 | 58 | "errors.log.enable": true, 59 | "errors.deadletterqueue.context.headers.enable"=true/false 60 | "errors.deadletterqueue.topic.name": "test-error-topic", 61 | "errors.deadletterqueue.topic.replication.factor": 1, 62 | "errors.log.include.messages": true, 63 | */ 64 | 65 | companion object { 66 | const val TOLERANCE = "errors.tolerance" 67 | const val LOG = "errors.log.enable" 68 | const val LOG_MESSAGES = "errors.log.include.messages" 69 | const val DLQ_TOPIC = "errors.deadletterqueue.topic.name" 70 | const val DLQ_HEADERS = "errors.deadletterqueue.context.headers.enable" 71 | const val DLQ_HEADER_PREFIX = "errors.deadletterqueue.context.headers.prefix" 72 | const val DLQ_REPLICATION = "errors.deadletterqueue.topic.replication.factor" 73 | 74 | fun from(props: Properties) = from(props.toMap() as Map) 75 | 76 | fun boolean(v:Any?) = when (v) { 77 | null -> false 78 | "true" -> true 79 | "false" -> false 80 | is Boolean -> v 81 | else -> false 82 | } 83 | fun int(v:Any?) = when (v) { 84 | null -> 0 85 | is Int -> v 86 | is String -> v.toInt() 87 | else -> 0 88 | } 89 | 90 | fun from(config: Map) = 91 | ErrorConfig( 92 | fail = config.getOrDefault(TOLERANCE, "none") == "none", 93 | log = boolean(config.get(LOG)), 94 | logMessages = boolean(config.get(LOG_MESSAGES)), 95 | dlqTopic = config.get(DLQ_TOPIC) as String?, 96 | dlqHeaders = boolean(config.get(DLQ_HEADERS)), 97 | dlqHeaderPrefix = config.getOrDefault(DLQ_HEADER_PREFIX,"") as String, 98 | dlqReplication = int(config.getOrDefault(DLQ_REPLICATION, 3))) 99 | } 100 | } 101 | 102 | abstract fun report(errorDatas: List) 103 | 104 | open fun close() {} 105 | } 106 | 107 | class ProcessingError(val errorDatas: List) : 108 | RuntimeException("Error processing ${errorDatas.size} messages\n"+errorDatas.map { it.toLogString() }.joinToString("\n")) 109 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/docker/readme.adoc: -------------------------------------------------------------------------------- 1 | 2 | ==== Configuration parameters 3 | :environment: neo4j 4 | :id: neo4j 5 | 6 | You can set the following configuration values via Confluent Connect UI, or via REST endpoint 7 | 8 | [cols="3*",subs="attributes",options="header"] 9 | |=== 10 | |Field|Type|Description 11 | 12 | |{environment}.server.uri|String|The Bolt URI (default bolt://localhost:7687) 13 | |{environment}.authentication.type|enum[NONE, BASIC, KERBEROS]| The authentication type (default BASIC) 14 | |{environment}.batch.size|Int|The max number of events processed by the Cypher query (default 1000) 15 | |{environment}.batch.timeout.msecs|Long|The execution timeout for the Cypher query (default 30000) 16 | |{environment}.authentication.basic.username|String| The authentication username 17 | |{environment}.authentication.basic.password|String| The authentication password 18 | |{environment}.authentication.basic.realm|String| The authentication realm 19 | |{environment}.authentication.kerberos.ticket|String| The Kerberos ticket 20 | |{environment}.encryption.enabled|Boolean| If the encryption is enabled (default false) 21 | |{environment}.encryption.trust.strategy|enum[TRUST_ALL_CERTIFICATES, TRUST_CUSTOM_CA_SIGNED_CERTIFICATES, TRUST_SYSTEM_CA_SIGNED_CERTIFICATES]| The Neo4j trust strategy (default TRUST_ALL_CERTIFICATES) 22 | |{environment}.encryption.ca.certificate.path|String| The path of the certificate 23 | |{environment}.connection.max.lifetime.msecs|Long| The max Neo4j connection lifetime (default 1 hour) 24 | |{environment}.connection.acquisition.timeout.msecs|Long| The max Neo4j acquisition timeout (default 1 hour) 25 | |{environment}.connection.liveness.check.timeout.msecs|Long| The max Neo4j liveness check timeout (default 1 hour) 26 | |{environment}.connection.max.pool.size|Int| The max pool size (default 100) 27 | |{environment}.load.balance.strategy|enum[ROUND_ROBIN, LEAST_CONNECTED]| The Neo4j load balance strategy (default LEAST_CONNECTED) 28 | |{environment}.batch.parallelize|Boolean|(default true) While concurrent batch processing improves throughput, it might cause out-of-order handling of events. Set to `false` if you need application of messages with strict ordering, e.g. for change-data-capture (CDC) events. 29 | |=== 30 | 31 | ==== Configuring the stack 32 | 33 | Start the compose file 34 | 35 | [source,bash] 36 | ---- 37 | docker-compose up -d 38 | ---- 39 | 40 | You can access your Neo4j instance under: http://localhost:7474, log in with `neo4j` as username and `kafkaconnect` as password (see the docker-compose file to change it). 41 | 42 | ===== Plugin installation 43 | 44 | You can choose your preferred way in order to install the plugin: 45 | 46 | * *Build it locally* 47 | + 48 | -- 49 | Build the project by running the following command: 50 | 51 | [source,bash] 52 | ---- 53 | mvn clean install 54 | ---- 55 | 56 | Create a directory `plugins` at the same level of the compose file and unzip the file `neo4j-kafka-connect-neo4j-.zip` inside it. 57 | -- 58 | 59 | * *Download the zip from the Confluent Hub* 60 | 61 | + 62 | -- 63 | Please go to the Confluent Hub page of the plugin: 64 | 65 | https://www.confluent.io/connector/kafka-connect-neo4j-sink/ 66 | 67 | And click to the **Download Connector** button. 68 | 69 | Create a directory `plugins` at the same level of the compose file and unzip the file `neo4j-kafka-connect-neo4j-.zip` inside it. 70 | -- 71 | 72 | * *Download and install the plugin via Confluent Hub client* 73 | + 74 | -- 75 | If you are using the provided compose file you can easily install the plugin by using the Confluent Hub. 76 | 77 | Once the compose file is up and running you can install the plugin by executing the following command: 78 | 79 | [source,bash] 80 | ---- 81 | docker exec -it connect confluent-hub install neo4j/kafka-connect-neo4j: 82 | ---- 83 | 84 | When the installation will ask: 85 | 86 | [source,bash] 87 | ---- 88 | The component can be installed in any of the following Confluent Platform installations: 89 | ---- 90 | 91 | Please prefer the solution `(where this tool is installed)` and then go ahead with the default options. 92 | 93 | At the end of the process the plugin is automatically installed. 94 | -- 95 | 96 | ==== Multi Database Support 97 | 98 | Neo4j 4.0 Enterprise has https://neo4j.com/docs/operations-manual/4.0/manage-databases/[multi-tenancy support], 99 | in order to support this feature you can define into the json (or via the Confluent UI) 100 | a param named `neo4j.database` which is the targeted database name. 101 | 102 | *N.b.* If no value is specified the connector will use the Neo4j's default db. 103 | 104 | ==== Create the Sink Instance 105 | 106 | To create the Sink instance and configure your preferred ingestion strategy, you can follow instructions described 107 | into <> and <> 108 | sections. 109 | 110 | ==== Create the Source Instance 111 | 112 | To create the Source instance and configure your preferred ingestion strategy, you can follow instructions described 113 | into <> section. 114 | 115 | ===== Use the Kafka Connect Datagen 116 | 117 | In order to generate a sample dataset you can use Kafka Connect Datagen as explained in <> section. 118 | 119 | [NOTE] 120 | Before start using the data generator please create indexes in Neo4j (in order to speed-up the import process) 121 | 122 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorConfigTest.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import org.apache.kafka.common.config.ConfigException 4 | import org.junit.Test 5 | import streams.kafka.connect.common.Neo4jConnectorConfig 6 | import java.net.URI 7 | import kotlin.test.assertEquals 8 | 9 | class Neo4jSourceConnectorConfigTest { 10 | 11 | @Test(expected = ConfigException::class) 12 | fun `should throw a ConfigException because of unsupported streaming type`() { 13 | try { 14 | val originals = mapOf(Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.LABELS.toString(), 15 | Neo4jSourceConnectorConfig.TOPIC to "topic", 16 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString(), 17 | Neo4jSourceConnectorConfig.STREAMING_PROPERTY to "timestamp") 18 | Neo4jSourceConnectorConfig(originals) 19 | } catch (e: ConfigException) { 20 | assertEquals("Supported source query types are: ${SourceType.QUERY}", e.message) 21 | throw e 22 | } 23 | } 24 | 25 | @Test(expected = ConfigException::class) 26 | fun `should throw a ConfigException because of empty query`() { 27 | try { 28 | val originals = mapOf(Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.QUERY.toString(), 29 | Neo4jSourceConnectorConfig.TOPIC to "topic", 30 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString(), 31 | Neo4jSourceConnectorConfig.STREAMING_PROPERTY to "timestamp") 32 | Neo4jSourceConnectorConfig(originals) 33 | } catch (e: ConfigException) { 34 | assertEquals("You need to define: ${Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY}", e.message) 35 | throw e 36 | } 37 | } 38 | 39 | @Test 40 | fun `should return config`() { 41 | val originals = mapOf(Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.QUERY.toString(), 42 | Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY to "MATCH (n) RETURN n", 43 | Neo4jSourceConnectorConfig.TOPIC to "topic", 44 | Neo4jSourceConnectorConfig.STREAMING_POLL_INTERVAL to "10", 45 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString(), 46 | Neo4jSourceConnectorConfig.STREAMING_PROPERTY to "timestamp") 47 | val config = Neo4jSourceConnectorConfig(originals) 48 | assertEquals(originals[Neo4jSourceConnectorConfig.TOPIC], config.topic) 49 | assertEquals(originals[Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY], config.query) 50 | assertEquals(originals[Neo4jSourceConnectorConfig.STREAMING_PROPERTY], config.streamingProperty) 51 | assertEquals(originals[Neo4jSourceConnectorConfig.STREAMING_FROM], config.streamingFrom.toString()) 52 | assertEquals(originals[Neo4jSourceConnectorConfig.STREAMING_POLL_INTERVAL]?.toInt(), config.pollInterval) 53 | } 54 | 55 | @Test 56 | fun `should return config null streaming property`() { 57 | val originals = mapOf(Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.QUERY.toString(), 58 | Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY to "MATCH (n) RETURN n", 59 | Neo4jSourceConnectorConfig.TOPIC to "topic", 60 | Neo4jSourceConnectorConfig.STREAMING_POLL_INTERVAL to "10", 61 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString()) 62 | val config = Neo4jSourceConnectorConfig(originals) 63 | assertEquals("", config.streamingProperty) 64 | } 65 | 66 | @Test 67 | fun `should return URIs with default port if port does not exist`() { 68 | val a = "bolt://neo4j.com" 69 | val b = "bolt://neo4j2.com" 70 | 71 | val originals = mapOf(Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.QUERY.toString(), 72 | Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY to "MATCH (n) RETURN n", 73 | Neo4jSourceConnectorConfig.TOPIC to "topic", 74 | Neo4jSourceConnectorConfig.STREAMING_POLL_INTERVAL to "10", 75 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString(), 76 | Neo4jConnectorConfig.SERVER_URI to "$a,$b") 77 | val config = Neo4jSourceConnectorConfig(originals) 78 | 79 | assertEquals("$a:7687", config.serverUri[0].toString()) 80 | assertEquals("$b:7687", config.serverUri[1].toString()) 81 | } 82 | 83 | @Test 84 | fun `should parse multiple URIs`() { 85 | val originals = mapOf( 86 | Neo4jSourceConnectorConfig.SOURCE_TYPE to SourceType.QUERY.toString(), 87 | Neo4jSourceConnectorConfig.SOURCE_TYPE_QUERY to "MATCH (n) RETURN n", 88 | Neo4jSourceConnectorConfig.TOPIC to "topic", 89 | Neo4jSourceConnectorConfig.STREAMING_POLL_INTERVAL to "10", 90 | Neo4jSourceConnectorConfig.STREAMING_FROM to StreamingFrom.NOW.toString(), 91 | Neo4jConnectorConfig.SERVER_URI to "neo4j://192.168.0.1:7687,neo4j://192.168.0.3:7687,neo4j://192.168.0.2" 92 | ) 93 | val config = Neo4jSourceConnectorConfig(originals) 94 | 95 | assertEquals( 96 | config.serverUri, listOf( 97 | URI("neo4j://192.168.0.1:7687"), 98 | URI("neo4j://192.168.0.3:7687"), 99 | URI("neo4j://192.168.0.2:7687"), 100 | ) 101 | ) 102 | } 103 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/resources/kafka-connect-neo4j.properties: -------------------------------------------------------------------------------- 1 | ## 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ## 15 | 16 | ## Common Properties 17 | neo4j.database=Type: String;\nDescription: The neo4j database instance name (default neo4j) 18 | neo4j.server.uri=Type: String;\nDescription: The Bolt URI (default bolt://localhost:7687) 19 | neo4j.authentication.type=Type: enum[NONE, BASIC, KERBEROS];\nDescription: The authentication type (default BASIC) 20 | neo4j.batch.size=Type: Int;\nDescription: The max number of events processed by the Cypher query for the Sink. \ 21 | The max number of messages pushed for each poll cycle in case of the Source. (default 1000) 22 | neo4j.batch.timeout.msecs=Type: Long;\nDescription: The execution timeout for the Cypher query (default: 0, that is without timeout) 23 | neo4j.authentication.basic.username=Type: String;\nDescription: The authentication username 24 | neo4j.authentication.basic.password=Type: String;\nDescription: The authentication password 25 | neo4j.authentication.basic.realm=Type: String;\nDescription: The authentication realm 26 | neo4j.authentication.kerberos.ticket=Type: String;\nDescription: The Kerberos ticket 27 | neo4j.encryption.enabled=Type: Boolean;\nDescription: If the encryption is enabled (default false) 28 | neo4j.encryption.trust.strategy=Type: enum[TRUST_ALL_CERTIFICATES, TRUST_CUSTOM_CA_SIGNED_CERTIFICATES, TRUST_SYSTEM_CA_SIGNED_CERTIFICATES];\n\ 29 | Description: The Neo4j trust strategy (default TRUST_ALL_CERTIFICATES) 30 | neo4j.encryption.ca.certificate.path=Type: String;\nDescription: The path of the certificate 31 | neo4j.connection.max.lifetime.msecs=Type: Long;\nDescription: The max Neo4j connection lifetime (default 1 hour) 32 | neo4j.connection.acquisition.timeout.msecs=Type: Long;\nDescription: The max Neo4j acquisition timeout (default 1 hour) 33 | neo4j.connection.liveness.check.timeout.msecs=Type: Long;\nDescription: The max Neo4j liveness check timeout (default 1 hour) 34 | neo4j.connection.max.pool.size=Type: Int;\nDescription: The max pool size (default 100) 35 | neo4j.retry.backoff.msecs=Type: Long;\nDescription: The time in milliseconds to wait following a transient error \ 36 | before a retry attempt is made (default 30000). 37 | neo4j.retry.max.attemps=Type: Int;\nDescription: The maximum number of times to retry on transient errors \ 38 | (except for TimeoutException) before failing the task (default 5). 39 | 40 | ## Sink Properties 41 | neo4j.topic.cdc.sourceId=Type: String;\nDescription: The topic list (separated by semicolon) that manages CDC events with the `SourceId` strategy 42 | neo4j.topic.cdc.sourceId.labelName=Type: String;\nDescription: The label name attached to the events with the `SourceId` strategy (default SourceEvent) 43 | neo4j.topic.cdc.sourceId.idName=Type: String;\nDescription: The id property name attached to the events with the `SourceId` strategy (default sourceId) 44 | neo4j.topic.cdc.schema=Type: String;\nDescription: The topic list (separated by semicolon) that manages CDC events with the `Schema` strategy 45 | neo4j.batch.parallelize=Type: Boolean;\nDescription: If enabled messages are processed concurrently in the sink. \ 46 | Non concurrent execution supports in-order processing, e.g. for CDC (default true) 47 | neo4j.topic.cud=Type: String;\nDescription: The topic list (separated by semicolon) that manages CUD events 48 | neo4j.topic.pattern.merge.node.properties.enabled=Type: Boolean;\nDescription: If enabled nodes properties will be merged when \ 49 | using Sink `Node pattern` strategy (default false). In case of using Sink `Relationship pattern` strategy edge nodes properties will be merged when \ 50 | creating relationships (default false) 51 | neo4j.topic.pattern.merge.relationship.properties.enabled=Type: Boolean;\nDescription: If enabled relationships properties will be merged when creating relationships \ 52 | using Sink `Relationship pattern` strategy (default false) 53 | 54 | ## Source Properties 55 | topic=Type: String;\nDescription: The topic where the Source will publish the data 56 | partitions=Type: Int;\nDescription: The number of partition for the Source (default 1) 57 | neo4j.streaming.from=Type: enum[ALL, NOW, LAST_COMMITTED];\nDescription: When start the Source. ALL means from the beginning. \ 58 | LAST_COMMITTED will try to retrieve already committed offset, \ 59 | in case it will not find one LAST_COMMITTED use NOW as fallback (default NOW) 60 | neo4j.source.type=Type: enum[QUERY];\nDescription: The type of the Source strategy, with UERY you must set `neo4j.source.query` 61 | neo4j.source.query=Type: String\nDescription: The Cypher query in order to extract the data from Neo4j you need to \ 62 | define it if you use `neo4j.source.type=QUERY` 63 | neo4j.streaming.property=Type: String;\nDescription: The name of the property that we need to consider in order to determinate \ 64 | the last queried record. If not defined we use an internal value given from the last performed check 65 | neo4j.streaming.poll.interval.msecs=Type Int;\nDescription: The polling interval in ms (Default: 10000) 66 | neo4j.enforce.schema=Type: Boolean;\nApply a schema to each record (Default: false) 67 | 68 | -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceConnectorConfig.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder 4 | import com.github.jcustenborder.kafka.connect.utils.config.ValidEnum 5 | import org.apache.kafka.common.config.ConfigDef 6 | import org.apache.kafka.common.config.ConfigException 7 | import streams.kafka.connect.common.ConnectorType 8 | import streams.kafka.connect.common.Neo4jConnectorConfig 9 | import streams.kafka.connect.utils.PropertiesUtil 10 | 11 | enum class SourceType { 12 | QUERY, LABELS, RELATIONSHIP 13 | } 14 | 15 | enum class StreamingFrom { 16 | ALL, NOW, LAST_COMMITTED; 17 | 18 | fun value() = when (this) { 19 | ALL -> -1 20 | else -> System.currentTimeMillis() 21 | } 22 | } 23 | 24 | class Neo4jSourceConnectorConfig(originals: Map<*, *>): Neo4jConnectorConfig(config(), originals, ConnectorType.SOURCE) { 25 | 26 | val topic: String = getString(TOPIC) 27 | 28 | val labels: Array 29 | val relationship: String 30 | val query: String 31 | 32 | val partitions: List = (1 .. getInt(PARTITIONS)).toList() 33 | 34 | val streamingFrom: StreamingFrom = StreamingFrom.valueOf(getString(STREAMING_FROM)) 35 | val streamingProperty: String = getString(STREAMING_PROPERTY) 36 | 37 | val sourceType: SourceType = SourceType.valueOf(getString(SOURCE_TYPE)) 38 | 39 | val pollInterval: Int = getInt(STREAMING_POLL_INTERVAL) 40 | 41 | val enforceSchema: Boolean = getBoolean(ENFORCE_SCHEMA) 42 | 43 | init { 44 | when (sourceType) { 45 | SourceType.QUERY -> { 46 | query = getString(SOURCE_TYPE_QUERY) 47 | if (query.isNullOrBlank()) { 48 | throw ConfigException("You need to define: $SOURCE_TYPE_QUERY") 49 | } 50 | labels = emptyArray() 51 | relationship = "" 52 | } 53 | else -> { 54 | throw ConfigException("Supported source query types are: ${SourceType.QUERY}") 55 | } 56 | } 57 | } 58 | 59 | fun sourcePartition() = when (sourceType) { 60 | SourceType.QUERY -> mapOf("database" to this.database, 61 | "type" to "query", "query" to query, "partition" to 1) 62 | else -> throw UnsupportedOperationException("Supported source query types are: ${SourceType.QUERY}") 63 | } 64 | 65 | companion object { 66 | const val PARTITIONS = "partitions" 67 | const val TOPIC = "topic" 68 | const val STREAMING_FROM = "neo4j.streaming.from" 69 | const val ENFORCE_SCHEMA = "neo4j.enforce.schema" 70 | const val STREAMING_PROPERTY = "neo4j.streaming.property" 71 | const val STREAMING_POLL_INTERVAL = "neo4j.streaming.poll.interval.msecs" 72 | const val SOURCE_TYPE = "neo4j.source.type" 73 | const val SOURCE_TYPE_QUERY = "neo4j.source.query" 74 | const val SOURCE_TYPE_LABELS = "neo4j.source.labels" 75 | const val SOURCE_TYPE_RELATIONSHIP = "neo4j.source.relationship" 76 | 77 | fun config(): ConfigDef = Neo4jConnectorConfig.config() 78 | .define(ConfigKeyBuilder.of(ENFORCE_SCHEMA, ConfigDef.Type.BOOLEAN) 79 | .documentation(PropertiesUtil.getProperty(ENFORCE_SCHEMA)).importance(ConfigDef.Importance.HIGH) 80 | .defaultValue(false) 81 | .validator(ConfigDef.NonNullValidator()) 82 | .build()) 83 | .define(ConfigKeyBuilder.of(STREAMING_POLL_INTERVAL, ConfigDef.Type.INT) 84 | .documentation(PropertiesUtil.getProperty(STREAMING_POLL_INTERVAL)).importance(ConfigDef.Importance.HIGH) 85 | .defaultValue(10000) 86 | .validator(ConfigDef.Range.atLeast(1)) 87 | .build()) 88 | .define(ConfigKeyBuilder.of(STREAMING_PROPERTY, ConfigDef.Type.STRING) 89 | .documentation(PropertiesUtil.getProperty(STREAMING_PROPERTY)).importance(ConfigDef.Importance.HIGH) 90 | .defaultValue("") 91 | // .validator(ConfigDef.NonEmptyString()) 92 | .build()) 93 | .define(ConfigKeyBuilder.of(TOPIC, ConfigDef.Type.STRING) 94 | .documentation(PropertiesUtil.getProperty(TOPIC)).importance(ConfigDef.Importance.HIGH) 95 | .validator(ConfigDef.NonEmptyString()) 96 | .build()) 97 | .define(ConfigKeyBuilder.of(PARTITIONS, ConfigDef.Type.INT) 98 | .documentation(PropertiesUtil.getProperty(PARTITIONS)).importance(ConfigDef.Importance.HIGH) 99 | .defaultValue(1) 100 | .validator(ConfigDef.Range.atLeast(1)) 101 | .build()) 102 | .define(ConfigKeyBuilder.of(STREAMING_FROM, ConfigDef.Type.STRING) 103 | .documentation(PropertiesUtil.getProperty(STREAMING_FROM)).importance(ConfigDef.Importance.HIGH) 104 | .defaultValue(StreamingFrom.NOW.toString()) 105 | .validator(ValidEnum.of(StreamingFrom::class.java)) 106 | .build()) 107 | .define(ConfigKeyBuilder.of(SOURCE_TYPE, ConfigDef.Type.STRING) 108 | .documentation(PropertiesUtil.getProperty(SOURCE_TYPE)).importance(ConfigDef.Importance.HIGH) 109 | .defaultValue(SourceType.QUERY.toString()) 110 | .validator(ValidEnum.of(SourceType::class.java)) 111 | .build()) 112 | .define(ConfigKeyBuilder.of(SOURCE_TYPE_QUERY, ConfigDef.Type.STRING) 113 | .documentation(PropertiesUtil.getProperty(SOURCE_TYPE_QUERY)).importance(ConfigDef.Importance.HIGH) 114 | .defaultValue("") 115 | .build()) 116 | } 117 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/doc/readme.adoc: -------------------------------------------------------------------------------- 1 | = Build it locally 2 | 3 | Build the project by running the following command: 4 | 5 | mvn clean install 6 | 7 | Inside the directory `/kafka-connect-neo4j/target/component/packages` you'll find a file named `neo4j-kafka-connect-neo4j-.zip` 8 | 9 | == Sink 10 | 11 | === Configuring the stack 12 | 13 | Create a directory `plugins` at the same level of the compose file and unzip the file `neo4j-kafka-connect-neo4j-.zip` inside it, then start the compose file 14 | 15 | docker-compose up -d 16 | 17 | Create the Sink instance: 18 | 19 | We'll define the Sink configuration as follows: 20 | 21 | [source,json] 22 | ---- 23 | include::contrib.sink.avro.neo4j.json[] 24 | ---- 25 | 26 | In particular this line: 27 | 28 | ---- 29 | "neo4j.topic.cypher.my-topic": "MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)" 30 | ---- 31 | 32 | defines that all the data that comes from the topic `neo4j` will be unpacked by the Sink into Neo4j with the following Cypher query: 33 | 34 | [source,cypher] 35 | ---- 36 | MERGE (p:Person{name: event.name, surname: event.surname}) 37 | MERGE (f:Family{name: event.surname}) 38 | MERGE (p)-[:BELONGS_TO]->(f) 39 | ---- 40 | 41 | 42 | Under the hood the Sink inject the event object in this way 43 | 44 | [source,cypher] 45 | ---- 46 | UNWIND {batch} AS event 47 | MERGE (p:Person{name: event.name, surname: event.surname}) 48 | MERGE (f:Family{name: event.surname}) 49 | MERGE (p)-[:BELONGS_TO]->(f) 50 | ---- 51 | 52 | Where `{batch}` is a list of event objects. 53 | 54 | You can change the query or remove the property and add your own, but you must follow the following convention: 55 | 56 | [source,javascript] 57 | ---- 58 | "neo4j.topic.cypher.": "" 59 | ---- 60 | 61 | Let's load the configuration into the Confluent Platform with this REST call: 62 | 63 | [source,shell] 64 | ---- 65 | curl -X POST http://localhost:8083/connectors \ 66 | -H 'Content-Type:application/json' \ 67 | -H 'Accept:application/json' \ 68 | -d @contrib.sink.avro.neo4j.json 69 | ---- 70 | 71 | The file `contrib.sink.string-json.neo4j.json` contains a configuration that manage a simple JSON producer example 72 | 73 | Please check that everything is fine by going into: 74 | 75 | http://localhost:9021/management/connect 76 | 77 | and click to the **Sink** tab. You must find a table just like this: 78 | 79 | [cols="4*",options="header"] 80 | |=== 81 | |Status 82 | |Active Tasks 83 | |Name 84 | |Topics 85 | 86 | |Running 87 | |1 88 | |Neo4jSinkConnector 89 | |my-topic 90 | |=== 91 | 92 | === Use the data generator 93 | 94 | You can download and use the https://github.com/conker84/neo4j-streams-sink-tester/releases/download/1/neo4j-streams-sink-tester-1.0.jar[neo4j-streams-sink-tester-1.0.jar] in order to generate a sample dataset. 95 | 96 | This package sends records to the Neo4j Kafka Sink by using the following in two data formats: 97 | 98 | JSON example: 99 | 100 | [source,json] 101 | ---- 102 | {"name": "Name", "surname": "Surname"} 103 | ---- 104 | 105 | AVRO, with the schema: 106 | 107 | [source,json] 108 | ---- 109 | { 110 | "type":"record", 111 | "name":"User", 112 | "fields":[{"name":"name","type":"string"}, {"name":"surname","type":"string"}] 113 | } 114 | ---- 115 | 116 | Please type: 117 | 118 | ---- 119 | java -jar neo4j-streams-sink-tester-1.0.jar -h 120 | ---- 121 | 122 | to print the option list with default values. 123 | 124 | In order to choose the data format please use the `-f` flag: `-f AVRO` or `-f JSON` (the default value). 125 | So: 126 | 127 | ---- 128 | java -jar neo4j-streams-sink-tester-1.0.jar -f AVRO 129 | ---- 130 | 131 | Will send data in AVRO format. 132 | 133 | For a complete overview of the **Neo4j Steams Sink Tester** please refer to https://github.com/conker84/neo4j-streams-sink-tester[this repo] 134 | 135 | == Source 136 | 137 | === Configuring the stack 138 | 139 | Create a directory `plugins` at the same level of the compose file and unzip the file `neo4j-kafka-connect-neo4j-.zip` inside it, then start the compose file 140 | 141 | docker-compose up -d 142 | 143 | === Create the Source instance: 144 | 145 | In this chapter we'll discuss about how the Source instance works 146 | 147 | You can create a new Source instance with this REST call: 148 | 149 | [source,shell] 150 | ---- 151 | curl -X POST http://localhost:8083/connectors \ 152 | -H 'Content-Type:application/json' \ 153 | -H 'Accept:application/json' \ 154 | -d @contrib.source.avro.neo4j.json 155 | ---- 156 | 157 | Let's look at the `contrib.source.avro.neo4j.json` file: 158 | 159 | [source,json] 160 | ---- 161 | include::contrib.source.avro.neo4j.json[] 162 | ---- 163 | 164 | This will create a Kafka Connect Source instance that will send `AVRO` message over the topic named `my-topic`. Every message in the 165 | topic will have the following structure: 166 | 167 | [source,json] 168 | ---- 169 | {"name": , "timestamp": } 170 | ---- 171 | 172 | **Nb.** Please check the <> for a detailed guide about the supported configuration 173 | parameters 174 | 175 | === How the Source module pushes the data to the defined Kafka topic 176 | 177 | We use the query provided in the `neo4j.source.query` field by polling the database every value is into the 178 | `neo4j.streaming.poll.interval.msecs` field. 179 | 180 | So given the JSON configuration we have that we'll perform: 181 | 182 | [source,cypher] 183 | ---- 184 | MATCH (ts:TestSource) WHERE ts.timestamp > $lastCheck RETURN ts.name AS name, ts.timestamp AS timestamp 185 | ---- 186 | 187 | every 5000 milliseconds by publishing events like: 188 | 189 | [source,json] 190 | ---- 191 | {"name":{"string":"John Doe"},"timestamp":{"long":1624551349362}} 192 | ---- 193 | 194 | In this case we use `neo4j.enforce.schema=true` and this means that we will attach a schema for each record, in case 195 | you want to stream pure simple JSON strings just use the relative serializer with `neo4j.enforce.schema=false` with the 196 | following output: 197 | 198 | [source,json] 199 | ---- 200 | {"name": "John Doe", "timestamp": 1624549598834} 201 | ---- -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/sink/strategy/SourceIdIngestionStrategy.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import org.neo4j.caniuse.CanIUse.canIUse 4 | import org.neo4j.caniuse.Cypher 5 | import org.neo4j.caniuse.Neo4j 6 | import streams.events.* 7 | import streams.extensions.quote 8 | import streams.service.StreamsSinkEntity 9 | import streams.utils.IngestionUtils.getLabelsAsString 10 | import streams.utils.SchemaUtils 11 | import streams.utils.StreamsUtils 12 | 13 | data class SourceIdIngestionStrategyConfig(val labelName: String = "SourceEvent", val idName: String = "sourceId") { 14 | companion object { 15 | val DEFAULT = SourceIdIngestionStrategyConfig() 16 | } 17 | } 18 | 19 | class SourceIdIngestionStrategy( 20 | neo4j: Neo4j, 21 | config: SourceIdIngestionStrategyConfig = SourceIdIngestionStrategyConfig() 22 | ) : IngestionStrategy { 23 | private val cypherPrefix = if (canIUse(Cypher.explicitCypher5Selection()).withNeo4j(neo4j)) "CYPHER 5 " else "" 24 | 25 | private val quotedLabelName = config.labelName.quote() 26 | private val quotedIdName = config.idName.quote() 27 | 28 | override fun mergeRelationshipEvents(events: Collection): List { 29 | return events 30 | .mapNotNull { SchemaUtils.toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship && it.meta.operation != OperationType.deleted } } 31 | .map { data -> 32 | val payload = data.payload as RelationshipPayload 33 | val changeEvt = when (data.meta.operation) { 34 | OperationType.deleted -> { 35 | data.payload.before as RelationshipChange 36 | } 37 | 38 | else -> data.payload.after as RelationshipChange 39 | } 40 | payload.label to mapOf( 41 | "id" to payload.id, 42 | "start" to payload.start.id, "end" to payload.end.id, "properties" to changeEvt.properties 43 | ) 44 | } 45 | .groupBy({ it.first }, { it.second }) 46 | .map { 47 | val query = """ 48 | |${cypherPrefix}${StreamsUtils.UNWIND} 49 | |MERGE (start:$quotedLabelName{$quotedIdName: event.start}) 50 | |MERGE (end:$quotedLabelName{$quotedIdName: event.end}) 51 | |MERGE (start)-[r:${it.key.quote()}{$quotedIdName: event.id}]->(end) 52 | |SET r = event.properties 53 | |SET r.$quotedIdName = event.id 54 | """.trimMargin() 55 | QueryEvents(query, it.value) 56 | } 57 | } 58 | 59 | override fun deleteRelationshipEvents(events: Collection): List { 60 | return events 61 | .mapNotNull { SchemaUtils.toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship && it.meta.operation == OperationType.deleted } } 62 | .map { data -> 63 | val payload = data.payload as RelationshipPayload 64 | payload.label to mapOf("id" to data.payload.id) 65 | } 66 | .groupBy({ it.first }, { it.second }) 67 | .map { 68 | val query = "${cypherPrefix}${StreamsUtils.UNWIND} MATCH ()-[r:${it.key.quote()}{$quotedIdName: event.id}]-() DELETE r" 69 | QueryEvents(query, it.value) 70 | } 71 | } 72 | 73 | override fun deleteNodeEvents(events: Collection): List { 74 | val data = events 75 | .mapNotNull { SchemaUtils.toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation == OperationType.deleted } } 76 | .map { mapOf("id" to it.payload.id) } 77 | if (data.isNullOrEmpty()) { 78 | return emptyList() 79 | } 80 | val query = "${cypherPrefix}${StreamsUtils.UNWIND} MATCH (n:$quotedLabelName{$quotedIdName: event.id}) DETACH DELETE n" 81 | return listOf(QueryEvents(query, data)) 82 | } 83 | 84 | override fun mergeNodeEvents(events: Collection): List { 85 | return events 86 | .mapNotNull { SchemaUtils.toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation != OperationType.deleted } } 87 | .map { data -> 88 | val changeEvtAfter = data.payload.after as NodeChange 89 | val labelsAfter = changeEvtAfter.labels ?: emptyList() 90 | val labelsBefore = if (data.payload.before != null) { 91 | val changeEvtBefore = data.payload.before as NodeChange 92 | changeEvtBefore.labels ?: emptyList() 93 | } else { 94 | emptyList() 95 | } 96 | val labelsToAdd = (labelsAfter - labelsBefore) 97 | .toSet() 98 | val labelsToDelete = (labelsBefore - labelsAfter) 99 | .toSet() 100 | NodeMergeMetadata( 101 | labelsToAdd = labelsToAdd, 102 | labelsToDelete = labelsToDelete 103 | ) to mapOf("id" to data.payload.id, "properties" to changeEvtAfter.properties) 104 | } 105 | .groupBy({ it.first }, { it.second }) 106 | .map { 107 | var query = """ 108 | |${cypherPrefix}${StreamsUtils.UNWIND} 109 | |MERGE (n:$quotedLabelName{$quotedIdName: event.id}) 110 | |SET n = event.properties 111 | |SET n.$quotedIdName = event.id 112 | """.trimMargin() 113 | if (it.key.labelsToDelete.isNotEmpty()) { 114 | query += "\nREMOVE n${getLabelsAsString(it.key.labelsToDelete)}" 115 | } 116 | if (it.key.labelsToAdd.isNotEmpty()) { 117 | query += "\nSET n${getLabelsAsString(it.key.labelsToAdd)}" 118 | } 119 | QueryEvents(query, it.value) 120 | } 121 | } 122 | 123 | } -------------------------------------------------------------------------------- /common/src/test/kotlin/streams/utils/SchemaUtilsTest.kt: -------------------------------------------------------------------------------- 1 | package streams.utils 2 | 3 | import org.junit.Test 4 | import streams.events.Constraint 5 | import streams.events.RelKeyStrategy 6 | import streams.events.StreamsConstraintType 7 | import streams.utils.SchemaUtils.getNodeKeys 8 | import kotlin.test.assertEquals 9 | 10 | class SchemaUtilsTest { 11 | 12 | @Test 13 | fun `getNodeKeys should select the constraint with lowest properties`() { 14 | val props = mapOf("LabelA" to setOf("foo", "bar"), 15 | "LabelB" to setOf("foo", "bar", "fooBar"), 16 | "LabelC" to setOf("foo")) 17 | val constraints = props.map { 18 | Constraint(label = it.key, properties = it.value, type = StreamsConstraintType.UNIQUE) 19 | } 20 | val keys = getNodeKeys(props.keys.toList(), setOf("prop", "foo", "bar"), constraints) 21 | assertEquals(setOf("foo"), keys) 22 | } 23 | 24 | @Test 25 | fun `getNodeKeys should return the key sorted properly`() { 26 | // the method getNodeKeys should select (with multiple labels) the constraint with lowest properties 27 | // with the same size, we take the first label in alphabetical order 28 | // finally, with same label name, we take the first sorted properties list alphabetically 29 | 30 | val pair1 = "LabelX" to setOf("foo", "aaa") 31 | val pair2 = "LabelB" to setOf("bar", "foo") 32 | val pair3 = "LabelC" to setOf("baz", "bar") 33 | val pair4 = "LabelB" to setOf("bar", "bez") 34 | val pair5 = "LabelA" to setOf("bar", "baa", "xcv") 35 | val pair6 = "LabelC" to setOf("aaa", "baa", "xcz") 36 | val pair7 = "LabelA" to setOf("foo", "aac") 37 | val pair8 = "LabelA" to setOf("foo", "aab") 38 | val props = listOf(pair1, pair2, pair3, pair4, pair5, pair6, pair7, pair8) 39 | 40 | // we shuffle the constraints to ensure that the result doesn't depend from the ordering 41 | val constraints = props.map { 42 | Constraint(label = it.first, properties = it.second, type = StreamsConstraintType.UNIQUE) 43 | }.shuffled() 44 | 45 | val propertyKeys = setOf("prop", "prop2", "foo", "bar", "baz", "bez", "aaa", "aab", "baa", "aac", "xcz", "xcv") 46 | val actualKeys = getNodeKeys(props.map { it.first }, propertyKeys, constraints) 47 | val expectedKeys = setOf("aab", "foo") 48 | 49 | assertEquals(expectedKeys, actualKeys) 50 | } 51 | 52 | @Test 53 | fun `getNodeKeys should return all keys when RelKeyStrategy is ALL`() { 54 | 55 | val pair1 = "LabelX" to setOf("foo", "aaa") 56 | val pair2 = "LabelB" to setOf("bar", "foo") 57 | val pair3 = "LabelC" to setOf("baz", "bar") 58 | val pair4 = "LabelB" to setOf("bar", "bez") 59 | val pair5 = "LabelA" to setOf("bar", "baa", "xcv") 60 | val pair6 = "LabelC" to setOf("aaa", "baa", "xcz") 61 | val pair7 = "LabelA" to setOf("foo", "aac") 62 | val pair8 = "LabelA" to setOf("foo", "aab") 63 | val props = listOf(pair1, pair2, pair3, pair4, pair5, pair6, pair7, pair8) 64 | 65 | // we shuffle the constraints to ensure that the result doesn't depend from the ordering 66 | val constraints = props.map { 67 | Constraint(label = it.first, properties = it.second, type = StreamsConstraintType.UNIQUE) 68 | }.shuffled() 69 | 70 | val propertyKeys = setOf("prop", "prop2", "foo", "bar", "baz", "bez", "aaa", "aab", "baa", "aac", "xcz", "xcv") 71 | val actualKeys = getNodeKeys(props.map { it.first }, propertyKeys, constraints, RelKeyStrategy.ALL) 72 | val expectedKeys = setOf("aaa", "aab", "aac", "baa", "bar", "baz", "bez", "foo", "xcv", "xcz") 73 | 74 | assertEquals(expectedKeys, actualKeys) 75 | } 76 | 77 | @Test 78 | fun `getNodeKeys should return the key sorted properly (with one label)`() { 79 | // the method getNodeKeys should select the constraint with lowest properties 80 | // with the same size, we take the first sorted properties list alphabetically 81 | 82 | val pair1 = "LabelA" to setOf("foo", "bar") 83 | val pair2 = "LabelA" to setOf("bar", "foo") 84 | val pair3 = "LabelA" to setOf("baz", "bar") 85 | val pair4 = "LabelA" to setOf("bar", "bez") 86 | val props = listOf(pair1, pair2, pair3, pair4) 87 | 88 | // we shuffle the constraints to ensure that the result doesn't depend from the ordering 89 | val constraints = props.map { 90 | Constraint(label = it.first, properties = it.second, type = StreamsConstraintType.UNIQUE) 91 | }.shuffled() 92 | 93 | val propertyKeys = setOf("prop", "foo", "bar", "baz", "bez") 94 | val actualKeys = getNodeKeys(listOf("LabelA"), propertyKeys, constraints) 95 | val expectedKeys = setOf("bar", "baz") 96 | 97 | assertEquals(expectedKeys, actualKeys) 98 | } 99 | @Test 100 | fun `getNodeKeys should return all keys when RelKeyStrategy is ALL (with one label)`() { 101 | 102 | val pair1 = "LabelA" to setOf("foo", "bar") 103 | val pair2 = "LabelA" to setOf("bar", "foo") 104 | val pair3 = "LabelA" to setOf("baz", "bar") 105 | val pair4 = "LabelA" to setOf("bar", "bez") 106 | val props = listOf(pair1, pair2, pair3, pair4) 107 | 108 | // we shuffle the constraints to ensure that the result doesn't depend from the ordering 109 | val constraints = props.map { 110 | Constraint(label = it.first, properties = it.second, type = StreamsConstraintType.UNIQUE) 111 | }.shuffled() 112 | 113 | val propertyKeys = setOf("prop", "foo", "bar", "baz", "bez") 114 | val actualKeys = getNodeKeys(listOf("LabelA"), propertyKeys, constraints, RelKeyStrategy.ALL) 115 | val expectedKeys = setOf("bar", "baz", "bez", "foo") 116 | 117 | assertEquals(expectedKeys, actualKeys) 118 | } 119 | 120 | @Test 121 | fun `getNodeKeys should return empty in case it didn't match anything`() { 122 | val props = mapOf("LabelA" to setOf("foo", "bar"), 123 | "LabelB" to setOf("foo", "bar", "fooBar"), 124 | "LabelC" to setOf("foo")) 125 | val constraints = props.map { 126 | Constraint(label = it.key, properties = it.value, type = StreamsConstraintType.UNIQUE) 127 | } 128 | val keys = getNodeKeys(props.keys.toList(), setOf("prop", "key"), constraints) 129 | assertEquals(emptySet(), keys) 130 | } 131 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/test/kotlin/streams/kafka/connect/sink/Neo4jValueConverterNestedStructTest.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import org.apache.kafka.connect.data.SchemaBuilder 4 | import org.apache.kafka.connect.data.Struct 5 | import org.junit.Test 6 | import org.neo4j.driver.Value 7 | import org.neo4j.driver.Values 8 | import streams.kafka.connect.sink.converters.Neo4jValueConverter 9 | import streams.utils.JSONUtils 10 | import java.time.Instant 11 | import java.time.ZonedDateTime 12 | import java.util.* 13 | import kotlin.test.assertEquals 14 | 15 | class Neo4jValueConverterNestedStructTest { 16 | 17 | @Test 18 | fun `should convert nested map into map of neo4j values`() { 19 | // given 20 | val body = JSONUtils.readValue>(data).mapValues(::convertDate) 21 | 22 | // when 23 | val result = Neo4jValueConverter().convert(body) as Map<*, *> 24 | 25 | // then 26 | val expected = getExpectedMap() 27 | assertEquals(expected, result) 28 | } 29 | 30 | @Test 31 | fun `should convert nested struct into map of neo4j values`() { 32 | 33 | val body = getTreeStruct() 34 | 35 | // when 36 | val result = Neo4jValueConverter().convert(body) as Map<*, *> 37 | 38 | // then 39 | val expected = getExpectedMap() 40 | assertEquals(expected, result) 41 | } 42 | 43 | companion object { 44 | 45 | private val PREF_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.email.Preference") 46 | .field("preferenceType", SchemaBuilder.string()) 47 | .field("endEffectiveDate", org.apache.kafka.connect.data.Timestamp.SCHEMA) 48 | .build() 49 | 50 | private val EMAIL_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.email.Email") 51 | .field("email", SchemaBuilder.string()) 52 | .field("preferences", SchemaBuilder.array(PREF_SCHEMA)) 53 | .build() 54 | 55 | private val TN_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.email.Transaction") 56 | .field("tn", SchemaBuilder.string()) 57 | .field("preferences", SchemaBuilder.array(PREF_SCHEMA)) 58 | .build() 59 | 60 | private val EVENT_SCHEMA = SchemaBuilder.struct().name("org.neo4j.example.email.Event") 61 | .field("eventId", SchemaBuilder.string()) 62 | .field("eventTimestamp", org.apache.kafka.connect.data.Timestamp.SCHEMA) 63 | .field("emails", SchemaBuilder.array(EMAIL_SCHEMA).optional()) 64 | .field("tns", SchemaBuilder.array(TN_SCHEMA).optional()) 65 | .build() 66 | 67 | fun getTreeStruct(): Struct? { 68 | val source = JSONUtils.readValue>(data).mapValues(::convertDate) 69 | 70 | val emails = source["emails"] as List> 71 | val email = Struct(EMAIL_SCHEMA) 72 | .put("email",emails[0]["email"]) 73 | .put("preferences", 74 | (emails[0]["preferences"] as List>).map { Struct(PREF_SCHEMA).put("preferenceType", it["preferenceType"]).put("endEffectiveDate",it["endEffectiveDate"]) }) 75 | 76 | val emailList = listOf(email) 77 | val tnsList = 78 | (source["tns"] as List>).map { 79 | Struct(TN_SCHEMA).put("tn",it["tn"]) 80 | .put("preferences", (it["preferences"] as List>).map{ Struct(PREF_SCHEMA).put("preferenceType", it["preferenceType"]).put("endEffectiveDate",it["endEffectiveDate"]) }) } 81 | 82 | return Struct(EVENT_SCHEMA) 83 | .put("eventId", source["eventId"]) 84 | .put("eventTimestamp", source["eventTimestamp"]) 85 | .put("emails", emailList) 86 | .put("tns", tnsList) 87 | } 88 | 89 | fun getExpectedMap(): Map { 90 | return JSONUtils.readValue>(data).mapValues(::convertDateNew) 91 | } 92 | 93 | fun convertDate(it: Map.Entry) : Any? = 94 | when { 95 | it.value is Map<*,*> -> (it.value as Map).mapValues(::convertDate) 96 | it.value is Collection<*> -> (it.value as Collection).map{ x-> convertDate(AbstractMap.SimpleEntry(it.key, x)) } 97 | it.key.endsWith("Date") -> Date.from(Instant.parse(it.value.toString())) 98 | it.key.endsWith("Timestamp") -> Date.from(Instant.parse(it.value.toString())) 99 | else -> it.value 100 | } 101 | fun convertDateNew(it: Map.Entry) : Any? = 102 | when { 103 | it.value is Map<*,*> -> (it.value as Map).mapValues(::convertDateNew) 104 | it.value is Collection<*> -> (it.value as Collection).map{ x-> convertDateNew(AbstractMap.SimpleEntry(it.key, x)) } 105 | it.key.endsWith("Date") -> ZonedDateTime.parse(it.value.toString()).toLocalDateTime() 106 | it.key.endsWith("Timestamp") -> ZonedDateTime.parse(it.value.toString()).toLocalDateTime() 107 | else -> it.value 108 | } 109 | 110 | val data : String = """ 111 | { 112 | "eventId": "d70f306a-71d2-48d9-aea3-87b3808b764b", 113 | "eventTimestamp": "2019-08-21T22:29:22.151Z", 114 | "emails": [ 115 | { 116 | "email": "century@gmail.com", 117 | "preferences": [ 118 | { 119 | "preferenceType": "repair_subscription", 120 | "endEffectiveDate": "2019-05-08T14:51:26.116Z" 121 | }, 122 | { 123 | "preferenceType": "ordering_subscription", 124 | "endEffectiveDate": "2019-05-08T14:51:26.116Z" 125 | }, 126 | { 127 | "preferenceType": "marketing_subscription", 128 | "endEffectiveDate": "2019-05-08T14:51:26.116Z" 129 | } 130 | ] 131 | } 132 | ], 133 | "tns": [ 134 | { 135 | "tn": "1122334455", 136 | "preferences": [ 137 | { 138 | "preferenceType": "billing_subscription", 139 | "endEffectiveDate": "2019-10-22T14:51:26.116Z" 140 | }, 141 | { 142 | "preferenceType": "repair_subscription", 143 | "endEffectiveDate": "2019-10-22T14:51:26.116Z" 144 | }, 145 | { 146 | "preferenceType": "sms", 147 | "endEffectiveDate": "2019-10-22T14:51:26.116Z" 148 | } 149 | ] 150 | }, 151 | { 152 | "tn": "5544332211", 153 | "preferences": [ 154 | { 155 | "preferenceType": "acct_lookup", 156 | "endEffectiveDate": "2019-10-22T14:51:26.116Z" 157 | } 158 | ] 159 | } 160 | ] 161 | } 162 | """.trimIndent() 163 | 164 | } 165 | 166 | } 167 | 168 | -------------------------------------------------------------------------------- /common/src/main/kotlin/streams/service/sink/strategy/RelationshipPatternIngestionStrategy.kt: -------------------------------------------------------------------------------- 1 | package streams.service.sink.strategy 2 | 3 | import org.neo4j.caniuse.CanIUse.canIUse 4 | import org.neo4j.caniuse.Cypher 5 | import org.neo4j.caniuse.Neo4j 6 | import streams.extensions.flatten 7 | import streams.utils.JSONUtils 8 | import streams.service.StreamsSinkEntity 9 | import streams.utils.IngestionUtils.containsProp 10 | import streams.utils.IngestionUtils.getLabelsAsString 11 | import streams.utils.IngestionUtils.getNodeMergeKeys 12 | import streams.utils.StreamsUtils 13 | 14 | class RelationshipPatternIngestionStrategy(neo4j: Neo4j, private val relationshipPatternConfiguration: RelationshipPatternConfiguration): IngestionStrategy { 15 | private val cypherPrefix = if (canIUse(Cypher.explicitCypher5Selection()).withNeo4j(neo4j)) "CYPHER 5 " else "" 16 | 17 | private val mergeRelationshipTemplate: String = """ 18 | |${cypherPrefix}${StreamsUtils.UNWIND} 19 | |MERGE (start${getLabelsAsString(relationshipPatternConfiguration.start.labels)}{${ 20 | getNodeMergeKeys("start.keys", relationshipPatternConfiguration.start.keys) 21 | }}) 22 | |SET start ${if (relationshipPatternConfiguration.mergeProperties) "+" else ""}= event.start.properties 23 | |SET start += event.start.keys 24 | |MERGE (end${getLabelsAsString(relationshipPatternConfiguration.end.labels)}{${ 25 | getNodeMergeKeys("end.keys", relationshipPatternConfiguration.end.keys) 26 | }}) 27 | |SET end ${if (relationshipPatternConfiguration.mergeProperties) "+" else ""}= event.end.properties 28 | |SET end += event.end.keys 29 | |MERGE (start)-[r:${relationshipPatternConfiguration.relType}]->(end) 30 | |SET r ${if (relationshipPatternConfiguration.mergeProperties) "+" else ""}= event.properties 31 | """.trimMargin() 32 | 33 | private val deleteRelationshipTemplate: String = """ 34 | |${cypherPrefix}${StreamsUtils.UNWIND} 35 | |MATCH (start${getLabelsAsString(relationshipPatternConfiguration.start.labels)}{${ 36 | getNodeMergeKeys("start.keys", relationshipPatternConfiguration.start.keys) 37 | }}) 38 | |MATCH (end${getLabelsAsString(relationshipPatternConfiguration.end.labels)}{${ 39 | getNodeMergeKeys("end.keys", relationshipPatternConfiguration.end.keys) 40 | }}) 41 | |MATCH (start)-[r:${relationshipPatternConfiguration.relType}]->(end) 42 | |DELETE r 43 | """.trimMargin() 44 | 45 | override fun mergeNodeEvents(events: Collection): List { 46 | return emptyList() 47 | } 48 | 49 | override fun deleteNodeEvents(events: Collection): List { 50 | return emptyList() 51 | } 52 | 53 | override fun mergeRelationshipEvents(events: Collection): List { 54 | val data = events 55 | .mapNotNull { if (it.value != null) JSONUtils.asMap(it.value) else null } 56 | .mapNotNull { props -> 57 | val properties = props.flatten() 58 | val containsKeys = relationshipPatternConfiguration.start.keys.all { properties.containsKey(it) } 59 | && relationshipPatternConfiguration.end.keys.all { properties.containsKey(it) } 60 | if (containsKeys) { 61 | val filteredProperties = when (relationshipPatternConfiguration.type) { 62 | PatternConfigurationType.ALL -> properties.filterKeys { isRelationshipProperty(it) } 63 | PatternConfigurationType.EXCLUDE -> properties.filterKeys { 64 | val containsProp = containsProp(it, relationshipPatternConfiguration.properties) 65 | isRelationshipProperty(it) && !containsProp 66 | } 67 | PatternConfigurationType.INCLUDE -> properties.filterKeys { 68 | val containsProp = containsProp(it, relationshipPatternConfiguration.properties) 69 | isRelationshipProperty(it) && containsProp 70 | } 71 | } 72 | val startConf = relationshipPatternConfiguration.start 73 | val endConf = relationshipPatternConfiguration.end 74 | 75 | val start = NodePatternIngestionStrategy.toData(startConf, props) 76 | val end = NodePatternIngestionStrategy.toData(endConf, props) 77 | 78 | mapOf("start" to start, "end" to end, "properties" to filteredProperties) 79 | } else { 80 | null 81 | } 82 | } 83 | return if (data.isEmpty()) { 84 | emptyList() 85 | } else { 86 | listOf(QueryEvents(mergeRelationshipTemplate, data)) 87 | } 88 | } 89 | 90 | private fun isRelationshipProperty(propertyName: String): Boolean { 91 | return (!relationshipPatternConfiguration.start.keys.contains(propertyName) 92 | && !relationshipPatternConfiguration.start.properties.contains(propertyName) 93 | && !relationshipPatternConfiguration.end.keys.contains(propertyName) 94 | && !relationshipPatternConfiguration.end.properties.contains(propertyName)) 95 | } 96 | 97 | override fun deleteRelationshipEvents(events: Collection): List { 98 | val data = events 99 | .filter { it.value == null && it.key != null } 100 | .mapNotNull { if (it.key != null) JSONUtils.asMap(it.key) else null } 101 | .mapNotNull { props -> 102 | val properties = props.flatten() 103 | val containsKeys = relationshipPatternConfiguration.start.keys.all { properties.containsKey(it) } 104 | && relationshipPatternConfiguration.end.keys.all { properties.containsKey(it) } 105 | if (containsKeys) { 106 | val startConf = relationshipPatternConfiguration.start 107 | val endConf = relationshipPatternConfiguration.end 108 | 109 | val start = NodePatternIngestionStrategy.toData(startConf, props) 110 | val end = NodePatternIngestionStrategy.toData(endConf, props) 111 | 112 | mapOf("start" to start, "end" to end) 113 | } else { 114 | null 115 | } 116 | } 117 | return if (data.isEmpty()) { 118 | emptyList() 119 | } else { 120 | listOf(QueryEvents(deleteRelationshipTemplate, data)) 121 | } 122 | } 123 | 124 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/utils/ConnectExtensionFunctions.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.utils 2 | 3 | import org.apache.kafka.connect.data.Schema 4 | import org.apache.kafka.connect.data.SchemaBuilder 5 | import org.apache.kafka.connect.data.Struct 6 | import org.apache.kafka.connect.sink.SinkRecord 7 | import org.neo4j.driver.Record 8 | import org.neo4j.driver.types.Node 9 | import org.neo4j.driver.types.Point 10 | import org.neo4j.driver.types.Relationship 11 | import streams.extensions.asStreamsMap 12 | import streams.kafka.connect.sink.converters.Neo4jValueConverter 13 | import streams.utils.JSONUtils 14 | import streams.service.StreamsSinkEntity 15 | import java.time.temporal.TemporalAccessor 16 | 17 | fun SinkRecord.toStreamsSinkEntity(): StreamsSinkEntity = StreamsSinkEntity( 18 | convertData(this.key(), true), 19 | convertData(this.value()) 20 | ) 21 | 22 | private val converter = Neo4jValueConverter() 23 | 24 | private fun convertData(data: Any?, stringWhenFailure: Boolean = false) = when (data) { 25 | is Struct -> converter.convert(data) 26 | null -> null 27 | else -> JSONUtils.readValue(data, stringWhenFailure) 28 | } 29 | 30 | fun Record.asJsonString(): String = JSONUtils.writeValueAsString(this.asMap()) 31 | 32 | fun Record.schema(asMap: Map = this.asMap()): Schema { 33 | val structBuilder = SchemaBuilder.struct() 34 | asMap.forEach { structBuilder.field(it.key, neo4jValueSchema(it.value)) } 35 | return structBuilder.build() 36 | } 37 | 38 | fun Record.asStruct(): Struct { 39 | val asMap = this.asMap() 40 | val schema = schema(asMap) 41 | val struct = Struct(schema) 42 | schema.fields().forEach { 43 | struct.put(it, neo4jToKafka(it.schema(), asMap[it.name()])) 44 | } 45 | return struct 46 | } 47 | 48 | 49 | private fun neo4jToKafka(schema: Schema, value: Any?): Any? = if (value == null) { 50 | null 51 | } else { 52 | when (schema.type()) { 53 | Schema.Type.ARRAY -> when (value) { 54 | is Collection<*> -> value.map { neo4jToKafka(schema.valueSchema(), it) } 55 | is Array<*> -> value.map { neo4jToKafka(schema.valueSchema(), it) }.toTypedArray() 56 | else -> throw IllegalArgumentException("For Schema.Type.ARRAY we support only Collection and Array") 57 | } 58 | 59 | Schema.Type.MAP -> when (value) { 60 | is Map<*, *> -> value.mapValues { neo4jToKafka(schema.valueSchema(), it.value) } 61 | else -> throw IllegalArgumentException("For Schema.Type.MAP we support only Map") 62 | } 63 | 64 | Schema.Type.STRUCT -> when (value) { 65 | is Map<*, *> -> { 66 | val struct = Struct(schema) 67 | schema.fields().forEach { 68 | val field = it 69 | neo4jToKafka(field.schema(), value[field.name()])?.let { 70 | struct.put(field, it) 71 | } 72 | } 73 | struct 74 | } 75 | 76 | is Point -> { 77 | val map = JSONUtils.readValue>(value) 78 | neo4jToKafka(schema, map) 79 | } 80 | 81 | is Node -> { 82 | val map = value.asStreamsMap() 83 | neo4jToKafka(schema, map) 84 | } 85 | 86 | is Relationship -> { 87 | val map = value.asStreamsMap() 88 | neo4jToKafka(schema, map) 89 | } 90 | 91 | else -> throw IllegalArgumentException("For Schema.Type.STRUCT we support only Map and Point") 92 | } 93 | 94 | else -> when (value) { 95 | is TemporalAccessor -> { 96 | val temporalValue = JSONUtils.readValue(value) 97 | neo4jToKafka(schema, temporalValue) 98 | } 99 | 100 | else -> when { 101 | Schema.Type.STRING == schema.type() && value !is String -> value.toString() 102 | else -> value 103 | } 104 | } 105 | } 106 | } 107 | 108 | private val NULL_SCHEMA = SchemaBuilder.struct().optional().build() 109 | 110 | private fun Any?.notNullOrEmpty(): Boolean = 111 | when (val value = this) { 112 | null -> false 113 | is Collection<*> -> value.isNotEmpty() && value.any { it.notNullOrEmpty() } 114 | is Array<*> -> value.isNotEmpty() && value.any { it.notNullOrEmpty() } 115 | is Map<*, *> -> value.isNotEmpty() && value.values.any { it.notNullOrEmpty() } 116 | else -> true 117 | } 118 | 119 | private fun neo4jValueSchema(value: Any?): Schema? = when (value) { 120 | null -> NULL_SCHEMA 121 | is Long -> Schema.OPTIONAL_INT64_SCHEMA 122 | is Double -> Schema.OPTIONAL_FLOAT64_SCHEMA 123 | is Boolean -> Schema.OPTIONAL_BOOLEAN_SCHEMA 124 | is Collection<*> -> { 125 | // locate the first element that is a good (not null, not empty and has not null or not empty contents) 126 | // candidate to derive the schema 127 | val first = value.firstOrNull { it.notNullOrEmpty() } 128 | val schema = neo4jValueSchema(first) 129 | SchemaBuilder.array(schema).optional().build() 130 | } 131 | 132 | is Array<*> -> { 133 | // locate the first element that is a good (not null, not empty and has not null or not empty contents) 134 | // candidate to derive the schema 135 | val first = value.firstOrNull { it.notNullOrEmpty() } 136 | val schema = neo4jValueSchema(first) 137 | SchemaBuilder.array(schema).optional().build() 138 | } 139 | 140 | is Map<*, *> -> { 141 | if (value.isEmpty()) { 142 | SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA) 143 | .optional() 144 | .build() 145 | } else { 146 | val valueTypes = value.values 147 | .filter { it.notNullOrEmpty() } 148 | .mapNotNull { it!!.javaClass.name } 149 | .toSet() 150 | if (valueTypes.size == 1) { 151 | neo4jValueSchema(value.values.first()) 152 | ?.let { 153 | SchemaBuilder.map(Schema.STRING_SCHEMA, it) 154 | .optional() 155 | .build() 156 | } 157 | } else { 158 | val structMap = SchemaBuilder 159 | .struct() 160 | .optional() 161 | value.forEach { entry -> 162 | neo4jValueSchema(entry.value)?.let { 163 | structMap.field(entry.key.toString(), it) 164 | } 165 | } 166 | if (structMap.fields().isEmpty()) NULL_SCHEMA 167 | else structMap.build() 168 | } 169 | } 170 | } 171 | 172 | is Point -> neo4jValueSchema(JSONUtils.readValue>(value)) 173 | is Node -> neo4jValueSchema(value.asStreamsMap()) 174 | is Relationship -> neo4jValueSchema(value.asStreamsMap()) 175 | else -> Schema.OPTIONAL_STRING_SCHEMA 176 | } 177 | -------------------------------------------------------------------------------- /test-support/src/main/kotlin/streams/Neo4jContainerExtension.kt: -------------------------------------------------------------------------------- 1 | package streams 2 | 3 | import org.neo4j.driver.AuthToken 4 | import org.neo4j.driver.AuthTokens 5 | import org.neo4j.driver.Driver 6 | import org.neo4j.driver.GraphDatabase 7 | import org.neo4j.driver.Session 8 | import org.neo4j.driver.SessionConfig 9 | import org.rnorth.ducttape.unreliables.Unreliables 10 | import org.slf4j.LoggerFactory 11 | import org.testcontainers.containers.KafkaContainer 12 | import org.testcontainers.containers.Neo4jContainer 13 | import org.testcontainers.containers.Network 14 | import org.testcontainers.containers.output.Slf4jLogConsumer 15 | import org.testcontainers.containers.wait.strategy.AbstractWaitStrategy 16 | import org.testcontainers.containers.wait.strategy.WaitAllStrategy 17 | import org.testcontainers.containers.wait.strategy.WaitStrategy 18 | import org.testcontainers.utility.MountableFile 19 | import streams.utils.StreamsUtils 20 | import java.io.File 21 | import java.time.Duration 22 | import java.util.concurrent.TimeUnit 23 | 24 | private class DatabasesWaitStrategy(private val auth: AuthToken) : AbstractWaitStrategy() { 25 | private var databases = arrayOf() 26 | 27 | fun forDatabases(vararg databases: String): DatabasesWaitStrategy { 28 | this.databases += databases 29 | return this 30 | } 31 | 32 | override fun waitUntilReady() { 33 | val boltUrl = "bolt://${waitStrategyTarget.containerIpAddress}:${waitStrategyTarget.getMappedPort(7687)}" 34 | val driver = GraphDatabase.driver(boltUrl, auth) 35 | val systemSession = driver.session(SessionConfig.forDatabase(StreamsUtils.SYSTEM_DATABASE_NAME)) 36 | systemSession.beginTransaction().use { tx -> 37 | databases.forEach { tx.run("CREATE DATABASE $it IF NOT EXISTS") } 38 | tx.commit() 39 | } 40 | Unreliables.retryUntilSuccess(startupTimeout.seconds.toInt(), TimeUnit.SECONDS) { 41 | rateLimiter.doWhenReady { 42 | if (databases.isNotEmpty()) { 43 | val databasesStatus = systemSession.beginTransaction() 44 | .use { tx -> 45 | tx.run("SHOW DATABASES").list() 46 | .map { it.get("name").asString() to it.get("currentStatus").asString() }.toMap() 47 | } 48 | val notOnline = databasesStatus.filterValues { it != "online" } 49 | if (databasesStatus.size < databases.size || notOnline.isNotEmpty()) { 50 | throw RuntimeException("Cannot started because of the following databases: ${notOnline.keys}") 51 | } 52 | } 53 | } 54 | true 55 | } 56 | systemSession.close() 57 | driver.close() 58 | } 59 | 60 | } 61 | 62 | class Neo4jContainerExtension(dockerImage: String) : Neo4jContainer(dockerImage) { 63 | constructor() : this(System.getenv("NEO4J_IMAGE") ?: "neo4j:5-enterprise") 64 | 65 | private val logger = LoggerFactory.getLogger(Neo4jContainerExtension::class.java) 66 | var driver: Driver? = null 67 | var session: Session? = null 68 | 69 | private var cypher: String? = null 70 | 71 | private var withDriver = true 72 | private var withLogger = false 73 | private var withStreamsPlugin = false 74 | private var forcePluginRebuild = true 75 | 76 | private var databases = arrayOf() 77 | 78 | private val waitStrategies = mutableListOf() 79 | 80 | fun withWaitStrategy(waitStrategy: WaitStrategy): Neo4jContainerExtension { 81 | this.waitStrategies += waitStrategy 82 | return this 83 | } 84 | 85 | 86 | fun withFixture(cypher: String): Neo4jContainerExtension { 87 | this.cypher = cypher 88 | return this 89 | } 90 | 91 | fun withoutDriver(): Neo4jContainerExtension { 92 | this.withDriver = false 93 | return this 94 | } 95 | 96 | fun withStreamsPlugin(): Neo4jContainerExtension { 97 | this.withStreamsPlugin = true 98 | return this 99 | } 100 | 101 | fun withoutForcePluginRebuild(): Neo4jContainerExtension { 102 | this.forcePluginRebuild = false 103 | return this 104 | } 105 | 106 | fun withKafka(kafka: KafkaContainer): Neo4jContainerExtension { 107 | return withKafka(kafka.network!!, kafka.networkAliases.map { "$it:9092" }.joinToString(",")) 108 | } 109 | 110 | fun withKafka(network: Network, bootstrapServers: String): Neo4jContainerExtension { 111 | withNetwork(network) 112 | withNeo4jConfig("kafka.bootstrap.servers", bootstrapServers) 113 | return this 114 | } 115 | 116 | fun withDatabases(vararg databases: String): Neo4jContainerExtension { 117 | this.databases += databases 118 | return this 119 | } 120 | 121 | private fun createAuth(): AuthToken { 122 | return if (!adminPassword.isNullOrBlank()) AuthTokens.basic("neo4j", adminPassword) else AuthTokens.none(); 123 | } 124 | 125 | override fun start() { 126 | withNeo4jConfig("dbms.security.auth_enabled", "false") 127 | if (databases.isNotEmpty()) { 128 | withWaitStrategy( 129 | DatabasesWaitStrategy(createAuth()) 130 | .forDatabases(*databases) 131 | .withStartupTimeout(Duration.ofMinutes(2)) 132 | ) 133 | } 134 | if (waitStrategies.isNotEmpty()) { 135 | val waitAllStrategy = waitStrategy as WaitAllStrategy 136 | waitStrategies.reversed() 137 | .forEach { waitStrategy -> waitAllStrategy.withStrategy(waitStrategy) } 138 | } 139 | if (withLogger) { 140 | withLogConsumer(Slf4jLogConsumer(logger)) 141 | } 142 | addEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes") 143 | if (withStreamsPlugin) { 144 | mountStreamsPlugin() 145 | } 146 | super.start() 147 | if (withDriver) { 148 | createDriver() 149 | } 150 | } 151 | 152 | private fun createDriver() { 153 | driver = GraphDatabase.driver(boltUrl, createAuth()) 154 | session = driver!!.session() 155 | cypher?.split(";") 156 | ?.forEach { query -> session!!.beginTransaction().use { it.run(query) } } 157 | } 158 | 159 | private fun mountStreamsPlugin() { 160 | var distrFile = findDistrFile() 161 | if (forcePluginRebuild || distrFile == null) { 162 | MavenUtils.mvnw("../", if (withLogger) logger else null) 163 | } 164 | distrFile = findDistrFile()!! 165 | this.withPlugins(MountableFile.forHostPath(distrFile.path)) 166 | } 167 | 168 | private fun findDistrFile(): File? { 169 | try { 170 | return File("../target/containerPlugins").listFiles() 171 | .filter { it.extension == "jar" } 172 | .firstOrNull() 173 | } catch (e: Exception) { 174 | return null 175 | } 176 | } 177 | 178 | override fun stop() { 179 | session?.close() 180 | driver?.close() 181 | super.stop() 182 | if (withStreamsPlugin && forcePluginRebuild) { 183 | findDistrFile()!!.delete() 184 | } 185 | } 186 | 187 | fun withLogging(): Neo4jContainerExtension { 188 | this.withLogger = true 189 | return this 190 | } 191 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/sink/Neo4jSinkConnectorConfig.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.sink 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder 4 | import org.apache.kafka.common.config.ConfigDef 5 | import org.apache.kafka.common.config.ConfigException 6 | import org.apache.kafka.connect.sink.SinkTask 7 | import org.neo4j.caniuse.Neo4j 8 | import org.neo4j.caniuse.Neo4jDetector 9 | import streams.kafka.connect.common.ConfigGroup 10 | import streams.kafka.connect.common.ConnectorType 11 | import streams.kafka.connect.common.Neo4jConnectorConfig 12 | import streams.kafka.connect.utils.PropertiesUtil 13 | import streams.service.TopicType 14 | import streams.service.TopicUtils 15 | import streams.service.Topics 16 | import streams.service.sink.strategy.SourceIdIngestionStrategyConfig 17 | 18 | enum class AuthenticationType { 19 | NONE, BASIC, KERBEROS 20 | } 21 | 22 | class Neo4jSinkConnectorConfig(originals: Map<*, *>) : Neo4jConnectorConfig(config(), originals, ConnectorType.SINK) { 23 | 24 | val parallelBatches: Boolean 25 | 26 | val topics: Topics by lazy { 27 | Topics.from(originals as Map, "streams.sink." to "neo4j.") 28 | } 29 | 30 | val strategyMap: Map by lazy { 31 | TopicUtils.toStrategyMap(topics, neo4j) 32 | } 33 | 34 | val kafkaBrokerProperties: Map 35 | 36 | val neo4j: Neo4j by lazy { Neo4jDetector.detect(this.driver) } 37 | 38 | init { 39 | parallelBatches = getBoolean(BATCH_PARALLELIZE) 40 | val kafkaPrefix = "kafka." 41 | kafkaBrokerProperties = (originals as Map) 42 | .filterKeys { it.startsWith(kafkaPrefix) } 43 | .mapKeys { it.key.substring(kafkaPrefix.length) } 44 | validateAllTopics(originals) 45 | } 46 | 47 | private fun validateAllTopics(originals: Map<*, *>) { 48 | TopicUtils.validate(this.topics) 49 | val topics = if (originals.containsKey(SinkTask.TOPICS_CONFIG)) { 50 | originals[SinkTask.TOPICS_CONFIG].toString() 51 | .split(",") 52 | .map { it.trim() } 53 | .sorted() 54 | } else { // TODO manage regexp 55 | emptyList() 56 | } 57 | val allTopics = this.topics 58 | .allTopics() 59 | .sorted() 60 | if (topics != allTopics) { 61 | throw ConfigException("There is a mismatch between topics defined into the property `${SinkTask.TOPICS_CONFIG}` ($topics) and configured topics ($allTopics)") 62 | } 63 | } 64 | 65 | companion object { 66 | 67 | const val BATCH_PARALLELIZE = "neo4j.batch.parallelize" 68 | 69 | const val TOPIC_CYPHER_PREFIX = "neo4j.topic.cypher." 70 | const val TOPIC_CDC_SOURCE_ID = "neo4j.topic.cdc.sourceId" 71 | const val TOPIC_CDC_SOURCE_ID_LABEL_NAME = "neo4j.topic.cdc.sourceId.labelName" 72 | const val TOPIC_CDC_SOURCE_ID_ID_NAME = "neo4j.topic.cdc.sourceId.idName" 73 | const val TOPIC_PATTERN_NODE_PREFIX = "neo4j.topic.pattern.node." 74 | const val TOPIC_PATTERN_RELATIONSHIP_PREFIX = "neo4j.topic.pattern.relationship." 75 | const val TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED = "neo4j.topic.pattern.merge.node.properties.enabled" 76 | const val TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED = 77 | "neo4j.topic.pattern.merge.relationship.properties.enabled" 78 | const val TOPIC_CDC_SCHEMA = "neo4j.topic.cdc.schema" 79 | const val TOPIC_CUD = "neo4j.topic.cud" 80 | 81 | 82 | const val DEFAULT_BATCH_PARALLELIZE = true 83 | const val DEFAULT_TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED = false 84 | const val DEFAULT_TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED = false 85 | 86 | 87 | private val sourceIdIngestionStrategyConfig = SourceIdIngestionStrategyConfig() 88 | 89 | fun config(): ConfigDef = Neo4jConnectorConfig.config() 90 | .define( 91 | ConfigKeyBuilder.of(TOPIC_CDC_SOURCE_ID, ConfigDef.Type.STRING) 92 | .documentation(PropertiesUtil.getProperty(TOPIC_CDC_SOURCE_ID)) 93 | .importance(ConfigDef.Importance.HIGH) 94 | .defaultValue("").group(ConfigGroup.TOPIC_CYPHER_MAPPING) 95 | .build() 96 | ) 97 | .define( 98 | ConfigKeyBuilder.of(TOPIC_CDC_SOURCE_ID_LABEL_NAME, ConfigDef.Type.STRING) 99 | .documentation(PropertiesUtil.getProperty(TOPIC_CDC_SOURCE_ID_LABEL_NAME)) 100 | .importance(ConfigDef.Importance.HIGH) 101 | .defaultValue(sourceIdIngestionStrategyConfig.labelName).group(ConfigGroup.TOPIC_CYPHER_MAPPING) 102 | .build() 103 | ) 104 | .define( 105 | ConfigKeyBuilder.of(TOPIC_CDC_SOURCE_ID_ID_NAME, ConfigDef.Type.STRING) 106 | .documentation(PropertiesUtil.getProperty(TOPIC_CDC_SOURCE_ID_ID_NAME)) 107 | .importance(ConfigDef.Importance.HIGH) 108 | .defaultValue(sourceIdIngestionStrategyConfig.idName).group(ConfigGroup.TOPIC_CYPHER_MAPPING) 109 | .build() 110 | ) 111 | .define( 112 | ConfigKeyBuilder.of(TOPIC_CDC_SCHEMA, ConfigDef.Type.STRING) 113 | .documentation(PropertiesUtil.getProperty(TOPIC_CDC_SCHEMA)).importance(ConfigDef.Importance.HIGH) 114 | .defaultValue("").group(ConfigGroup.TOPIC_CYPHER_MAPPING) 115 | .build() 116 | ) 117 | .define( 118 | ConfigKeyBuilder.of(BATCH_PARALLELIZE, ConfigDef.Type.BOOLEAN) 119 | .documentation(PropertiesUtil.getProperty(BATCH_PARALLELIZE)) 120 | .importance(ConfigDef.Importance.MEDIUM) 121 | .defaultValue(DEFAULT_BATCH_PARALLELIZE).group(ConfigGroup.BATCH) 122 | .build() 123 | ) 124 | .define( 125 | ConfigKeyBuilder.of(TOPIC_CUD, ConfigDef.Type.STRING) 126 | .documentation(PropertiesUtil.getProperty(TOPIC_CUD)).importance(ConfigDef.Importance.HIGH) 127 | .defaultValue("").group(ConfigGroup.TOPIC_CYPHER_MAPPING) 128 | .build() 129 | ) 130 | .define( 131 | ConfigKeyBuilder.of(TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED, ConfigDef.Type.BOOLEAN) 132 | .documentation(PropertiesUtil.getProperty(TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED)) 133 | .importance(ConfigDef.Importance.MEDIUM) 134 | .defaultValue(DEFAULT_TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED) 135 | .group(ConfigGroup.TOPIC_CYPHER_MAPPING) 136 | .build() 137 | ) 138 | .define( 139 | ConfigKeyBuilder.of(TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED, ConfigDef.Type.BOOLEAN) 140 | .documentation(PropertiesUtil.getProperty(TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED)) 141 | .importance(ConfigDef.Importance.MEDIUM) 142 | .defaultValue(DEFAULT_TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED) 143 | .group(ConfigGroup.TOPIC_CYPHER_MAPPING) 144 | .build() 145 | ) 146 | } 147 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/utils/Topics.kt: -------------------------------------------------------------------------------- 1 | package streams.service 2 | 3 | import org.neo4j.caniuse.Neo4j 4 | import streams.kafka.connect.sink.Neo4jSinkConnectorConfig 5 | import streams.service.sink.strategy.* 6 | import java.util.Locale 7 | import kotlin.reflect.jvm.javaType 8 | 9 | private fun TopicType.replaceKeyBy(replacePrefix: Pair) = if (replacePrefix.first.isNullOrBlank()) 10 | this.key 11 | else 12 | this.key.replace(replacePrefix.first, replacePrefix.second) 13 | 14 | data class Topics( 15 | val cypherTopics: Map = emptyMap(), 16 | val cdcSourceIdTopics: Pair, SourceIdIngestionStrategyConfig> = (emptySet() to SourceIdIngestionStrategyConfig()), 17 | val cdcSchemaTopics: Set = emptySet(), 18 | val cudTopics: Set = emptySet(), 19 | val nodePatternTopics: Map = emptyMap(), 20 | val relPatternTopics: Map = emptyMap(), 21 | val invalid: List = emptyList() 22 | ) { 23 | 24 | fun allTopics(): List = this.asMap() 25 | .map { 26 | when (it.key.group) { 27 | TopicTypeGroup.CDC, TopicTypeGroup.CUD -> if (it.key != TopicType.CDC_SOURCE_ID) { 28 | (it.value as Set).toList() 29 | } else { 30 | (it.value as Pair, SourceIdIngestionStrategyConfig>).first 31 | } 32 | 33 | else -> (it.value as Map).keys.toList() 34 | } 35 | } 36 | .flatten() 37 | 38 | fun asMap(): Map = mapOf( 39 | TopicType.CYPHER to cypherTopics, TopicType.CUD to cudTopics, 40 | TopicType.CDC_SCHEMA to cdcSchemaTopics, TopicType.CDC_SOURCE_ID to cdcSourceIdTopics, 41 | TopicType.PATTERN_NODE to nodePatternTopics, TopicType.PATTERN_RELATIONSHIP to relPatternTopics 42 | ) 43 | 44 | companion object { 45 | fun from( 46 | map: Map, 47 | replacePrefix: Pair = ("" to ""), 48 | dbName: String = "", 49 | invalidTopics: List = emptyList() 50 | ): Topics { 51 | val config = map 52 | .filterKeys { 53 | if (dbName.isNotBlank()) it.lowercase(Locale.ROOT).endsWith(".to.$dbName") else !it.contains(".to.") 54 | } 55 | .mapKeys { if (dbName.isNotBlank()) it.key.replace(".to.$dbName", "", true) else it.key } 56 | val cypherTopicPrefix = TopicType.CYPHER.replaceKeyBy(replacePrefix) 57 | val sourceIdKey = TopicType.CDC_SOURCE_ID.replaceKeyBy(replacePrefix) 58 | val schemaKey = TopicType.CDC_SCHEMA.replaceKeyBy(replacePrefix) 59 | val cudKey = TopicType.CUD.replaceKeyBy(replacePrefix) 60 | val nodePatterKey = TopicType.PATTERN_NODE.replaceKeyBy(replacePrefix) 61 | val relPatterKey = TopicType.PATTERN_RELATIONSHIP.replaceKeyBy(replacePrefix) 62 | val cypherTopics = TopicUtils.filterByPrefix(config, cypherTopicPrefix) 63 | val mergeNodeProperties = map[Neo4jSinkConnectorConfig.TOPIC_PATTERN_MERGE_NODE_PROPERTIES_ENABLED] 64 | .toString() 65 | .toBoolean() 66 | val mergeRelProperties = map[Neo4jSinkConnectorConfig.TOPIC_PATTERN_MERGE_RELATIONSHIP_PROPERTIES_ENABLED] 67 | .toString() 68 | .toBoolean() 69 | val nodePatternTopics = TopicUtils 70 | .filterByPrefix(config, nodePatterKey, invalidTopics) 71 | .mapValues { NodePatternConfiguration.parse(it.value, mergeNodeProperties) } 72 | val relPatternTopics = TopicUtils 73 | .filterByPrefix(config, relPatterKey, invalidTopics) 74 | .mapValues { RelationshipPatternConfiguration.parse(it.value, mergeNodeProperties, mergeRelProperties) } 75 | val cdcSourceIdTopics = TopicUtils.splitTopics(config[sourceIdKey] as? String, invalidTopics) 76 | val cdcSchemaTopics = TopicUtils.splitTopics(config[schemaKey] as? String, invalidTopics) 77 | val cudTopics = TopicUtils.splitTopics(config[cudKey] as? String, invalidTopics) 78 | val sourceIdStrategyConfig = SourceIdIngestionStrategyConfig( 79 | map.getOrDefault( 80 | Neo4jSinkConnectorConfig.TOPIC_CDC_SOURCE_ID_LABEL_NAME, 81 | SourceIdIngestionStrategyConfig.DEFAULT.labelName 82 | ).toString(), 83 | map.getOrDefault( 84 | Neo4jSinkConnectorConfig.TOPIC_CDC_SOURCE_ID_ID_NAME, 85 | SourceIdIngestionStrategyConfig.DEFAULT.idName 86 | ).toString() 87 | ) 88 | return Topics( 89 | cypherTopics, 90 | (cdcSourceIdTopics to sourceIdStrategyConfig), 91 | cdcSchemaTopics, 92 | cudTopics, 93 | nodePatternTopics, 94 | relPatternTopics 95 | ) 96 | } 97 | } 98 | } 99 | 100 | object TopicUtils { 101 | 102 | @JvmStatic 103 | val TOPIC_SEPARATOR = ";" 104 | 105 | fun filterByPrefix( 106 | config: Map<*, *>, 107 | prefix: String, 108 | invalidTopics: List = emptyList() 109 | ): Map { 110 | val fullPrefix = "$prefix." 111 | return config 112 | .filterKeys { it.toString().startsWith(fullPrefix) } 113 | .mapKeys { it.key.toString().replace(fullPrefix, "") } 114 | .filterKeys { !invalidTopics.contains(it) } 115 | .mapValues { it.value.toString() } 116 | } 117 | 118 | fun splitTopics(cdcMergeTopicsString: String?, invalidTopics: List = emptyList()): Set { 119 | return if (cdcMergeTopicsString.isNullOrBlank()) { 120 | emptySet() 121 | } else { 122 | cdcMergeTopicsString.split(TOPIC_SEPARATOR) 123 | .filter { !invalidTopics.contains(it) } 124 | .toSet() 125 | } 126 | } 127 | 128 | inline fun validate(topics: Topics) { 129 | val exceptionStringConstructor = T::class.constructors 130 | .first { it.parameters.size == 1 && it.parameters[0].type.javaType == String::class.java }!! 131 | val crossDefinedTopics = topics.allTopics() 132 | .groupBy({ it }, { 1 }) 133 | .filterValues { it.sum() > 1 } 134 | .keys 135 | if (crossDefinedTopics.isNotEmpty()) { 136 | throw exceptionStringConstructor 137 | .call("The following topics are cross defined: $crossDefinedTopics") 138 | } 139 | } 140 | 141 | fun toStrategyMap(topics: Topics, neo4j: Neo4j): Map { 142 | return topics.asMap() 143 | .filterKeys { it != TopicType.CYPHER } 144 | .mapValues { (type, config) -> 145 | when (type) { 146 | TopicType.CDC_SOURCE_ID -> { 147 | val (topics, sourceIdStrategyConfig) = (config as Pair, SourceIdIngestionStrategyConfig>) 148 | SourceIdIngestionStrategy(neo4j, sourceIdStrategyConfig) 149 | } 150 | 151 | TopicType.CDC_SCHEMA -> SchemaIngestionStrategy(neo4j) 152 | TopicType.CUD -> CUDIngestionStrategy(neo4j) 153 | TopicType.PATTERN_NODE -> { 154 | val map = config as Map 155 | map.mapValues { NodePatternIngestionStrategy(neo4j, it.value) } 156 | } 157 | 158 | TopicType.PATTERN_RELATIONSHIP -> { 159 | val map = config as Map 160 | map.mapValues { RelationshipPatternIngestionStrategy(neo4j, it.value) } 161 | } 162 | 163 | else -> throw RuntimeException("Unsupported topic type $type") 164 | } 165 | } 166 | } 167 | } -------------------------------------------------------------------------------- /kafka-connect-neo4j/src/main/kotlin/streams/kafka/connect/source/Neo4jSourceService.kt: -------------------------------------------------------------------------------- 1 | package streams.kafka.connect.source 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import kotlinx.coroutines.CancellationException 5 | import kotlinx.coroutines.Dispatchers 6 | import kotlinx.coroutines.GlobalScope 7 | import kotlinx.coroutines.Job 8 | import kotlinx.coroutines.cancelAndJoin 9 | import kotlinx.coroutines.delay 10 | import kotlinx.coroutines.isActive 11 | import kotlinx.coroutines.launch 12 | import kotlinx.coroutines.runBlocking 13 | import org.apache.kafka.connect.errors.ConnectException 14 | import org.apache.kafka.connect.source.SourceRecord 15 | import org.apache.kafka.connect.storage.OffsetStorageReader 16 | import org.neo4j.driver.Record 17 | import org.neo4j.driver.Values 18 | import org.slf4j.Logger 19 | import org.slf4j.LoggerFactory 20 | import streams.kafka.connect.common.ConfigurationMigrator 21 | import streams.utils.StreamsUtils 22 | import java.util.concurrent.BlockingQueue 23 | import java.util.concurrent.LinkedBlockingQueue 24 | import java.util.concurrent.TimeUnit 25 | import java.util.concurrent.atomic.AtomicBoolean 26 | import java.util.concurrent.atomic.AtomicLong 27 | import java.util.concurrent.atomic.AtomicReference 28 | 29 | 30 | class Neo4jSourceService(private val config: Neo4jSourceConnectorConfig, offsetStorageReader: OffsetStorageReader): AutoCloseable { 31 | 32 | private val log: Logger = LoggerFactory.getLogger(Neo4jSourceService::class.java) 33 | 34 | private val queue: BlockingQueue = LinkedBlockingQueue() 35 | private val error: AtomicReference = AtomicReference(null) 36 | 37 | private val sourcePartition = config.sourcePartition() 38 | 39 | private val isClose = AtomicBoolean() 40 | 41 | private val lastCheck: AtomicLong by lazy { 42 | val offset = offsetStorageReader.offset(sourcePartition) ?: emptyMap() 43 | // if the user wants to recover from LAST_COMMITTED 44 | val startValue = if (config.streamingFrom == StreamingFrom.LAST_COMMITTED 45 | && offset["value"] != null && offset["property"] == config.streamingProperty) { 46 | log.info("Resuming offset $offset, the ${Neo4jSourceConnectorConfig.STREAMING_FROM} value is ignored") 47 | offset["value"] as Long 48 | } else { 49 | if (config.streamingFrom == StreamingFrom.LAST_COMMITTED) { 50 | log.info("You provided ${Neo4jSourceConnectorConfig.STREAMING_FROM}: ${config.streamingFrom} but no offset has been found, we'll start to consume from NOW") 51 | } else { 52 | log.info("No offset to resume, we'll the provided value of ${Neo4jSourceConnectorConfig.STREAMING_FROM}: ${config.streamingFrom}") 53 | } 54 | config.streamingFrom.value() 55 | } 56 | AtomicLong(startValue) 57 | } 58 | 59 | private val sessionConfig = config.createSessionConfig() 60 | private val transactionConfig = config.createTransactionConfig() 61 | 62 | private val pollInterval = config.pollInterval.toLong() 63 | private val isStreamingPropertyDefined = config.streamingProperty.isNotBlank() 64 | private val streamingProperty = config.streamingProperty.ifBlank { "undefined" } 65 | 66 | private val job: Job = GlobalScope.launch(Dispatchers.IO) { 67 | var lastCheckHadResult = false 68 | while (isActive) { 69 | try { 70 | // if the user doesn't set the streaming property we fallback to an 71 | // internal mechanism 72 | if (!isStreamingPropertyDefined) { 73 | // we update the lastCheck property only if the last loop round 74 | // returned results otherwise we stick to the old value 75 | if (lastCheckHadResult) { 76 | lastCheck.set(System.currentTimeMillis() - pollInterval) 77 | } 78 | } 79 | config.driver.session(sessionConfig).readTransaction({ tx -> 80 | val result = tx.run(config.query, mapOf("lastCheck" to lastCheck.get())) 81 | lastCheckHadResult = result.hasNext() 82 | result.forEach { record -> 83 | try { 84 | val sourceRecord = toSourceRecord(record) 85 | queue.put(sourceRecord) 86 | } catch (e: Exception) { 87 | setError(e) 88 | } 89 | } 90 | }, transactionConfig) 91 | delay(pollInterval) 92 | } catch (e: Exception) { 93 | setError(e) 94 | } 95 | } 96 | } 97 | 98 | private fun toSourceRecord(record: Record): SourceRecord { 99 | val thisValue = computeLastTimestamp(record) 100 | return SourceRecordBuilder() 101 | .withRecord(record) 102 | .withTopic(config.topic) 103 | .withSourcePartition(sourcePartition) 104 | .withStreamingProperty(streamingProperty) 105 | .withEnforceSchema(config.enforceSchema) 106 | .withTimestamp(thisValue) 107 | .build() 108 | } 109 | 110 | private fun computeLastTimestamp(record: Record) = try { 111 | if (isStreamingPropertyDefined) { 112 | val value = record.get(config.streamingProperty, Values.value(-1L)).asLong() 113 | lastCheck.getAndUpdate { oldValue -> 114 | if (oldValue >= value) { 115 | oldValue 116 | } else { 117 | value 118 | } 119 | } 120 | value 121 | } else { 122 | lastCheck.get() 123 | } 124 | } catch (e: Throwable) { 125 | lastCheck.get() 126 | } 127 | 128 | private fun checkError() { 129 | val fatalError = error.getAndSet(null) 130 | if (fatalError != null) { 131 | throw ConnectException(fatalError) 132 | } 133 | } 134 | 135 | fun poll(): List? { 136 | if (isClose.get()) { 137 | return null 138 | } 139 | checkError() 140 | // Block until at least one item is available or until the 141 | // courtesy timeout expires, giving the framework a chance 142 | // to pause the connector. 143 | val firstEvent = queue.poll(1, TimeUnit.SECONDS) 144 | if (firstEvent == null) { 145 | log.debug("Poll returns 0 results") 146 | return null // Looks weird, but caller expects it. 147 | } 148 | 149 | val events = mutableListOf() 150 | return try { 151 | events.add(firstEvent) 152 | queue.drainTo(events, config.batchSize - 1) 153 | log.info("Poll returns {} result(s)", events.size) 154 | events 155 | } catch (e: Exception) { 156 | setError(e) 157 | null 158 | } 159 | } 160 | 161 | private fun setError(e: Exception) { 162 | if (e !is CancellationException) { 163 | if (error.compareAndSet(null, e)) { 164 | log.error("Error:", e) 165 | } 166 | } 167 | } 168 | 169 | override fun close() { 170 | isClose.set(true) 171 | runBlocking { job.cancelAndJoin() } 172 | config.close() 173 | 174 | val originalConfig = config.originals() as Map 175 | val migratedConfig = ConfigurationMigrator(originalConfig).migrateToV51().toMutableMap() 176 | 177 | log.debug("Defaulting v5.1 migrated configuration offset to last checked timestamp: {}", lastCheck) 178 | migratedConfig["neo4j.start-from"] = "USER_PROVIDED" 179 | migratedConfig["neo4j.start-from.value"] = lastCheck 180 | 181 | val mapper = ObjectMapper() 182 | val jsonConfig = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(migratedConfig) 183 | log.info( 184 | "The migrated settings for 5.1 version of Neo4j Source Connector '{}' is: `{}`", 185 | originalConfig["name"], 186 | jsonConfig 187 | ) 188 | 189 | log.info("Neo4j Source Service closed successfully") 190 | } 191 | } --------------------------------------------------------------------------------