├── Dockerfile ├── .vscode └── settings.json ├── assets └── HTML5_Badge_256.png ├── .envrc ├── create_connector_ccloud.sh ├── src ├── test │ ├── java │ │ └── com │ │ │ └── github │ │ │ └── cjmatta │ │ │ └── kafka │ │ │ └── connect │ │ │ ├── .DS_Store │ │ │ ├── ServerSentEventsSourceTaskTest.java │ │ │ ├── ServerSentEventsSourceConnectorTest.java │ │ │ ├── ServerSentEventsSourceTaskIT.java │ │ │ └── sse │ │ │ └── ServerSentEventClientTest.java │ └── resources │ │ └── logback.xml └── main │ ├── assembly │ └── package.xml │ └── java │ └── com │ └── github │ └── cjmatta │ └── kafka │ └── connect │ └── sse │ ├── ServerSentEvent.java │ ├── ServerSentEventsSourceConnector.java │ ├── ServerSentEventsSourceTask.java │ ├── ServerSentEventsSourceConnectorConfig.java │ └── ServerSentEventClient.java ├── config ├── MySourceConnector.properties ├── wikipedia-connector.json ├── kafka-connect-sse.properties ├── connect-avro-docker.properties └── wikimedia.recentchange.schema.json ├── bin ├── debug.sh ├── submit_config.sh └── submit_config_extract_field.sh ├── Dockerfile.connect ├── upload-to-confluent-cloud.sh ├── wikipedia-ccloud-config.json ├── recentchange.json ├── .gitignore ├── .github └── copilot-instructions.md ├── test-local.sh ├── IMPROVEMENT_PLAN.md ├── wikipedia-edit-connect-schema.json ├── docker-compose.yml ├── README.md ├── pom.xml ├── LICENSE └── manage-connector.sh /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM confluentinc/cp-kafka-connect:latest 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "java.configuration.updateBuildConfiguration": "automatic" 3 | } -------------------------------------------------------------------------------- /assets/HTML5_Badge_256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cjmatta/kafka-connect-sse/HEAD/assets/HTML5_Badge_256.png -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | export KAFKA_API_KEY="op://Employee/CCloud SSE API Key/username" 2 | export KAFKA_API_SECRET="op://Employee/CCloud SSE API Key/credential" -------------------------------------------------------------------------------- /create_connector_ccloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | confluent connect cluster create --config-file wikipedia-ccloud-config.json --cluster lkc-zm1p10 4 | -------------------------------------------------------------------------------- /src/test/java/com/github/cjmatta/kafka/connect/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cjmatta/kafka-connect-sse/HEAD/src/test/java/com/github/cjmatta/kafka/connect/.DS_Store -------------------------------------------------------------------------------- /config/MySourceConnector.properties: -------------------------------------------------------------------------------- 1 | name=MySourceConnector 2 | tasks.max=1 3 | connector.class=com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector 4 | topic=wikipedia.sse 5 | sse.uri=https://stream.wikimedia.org/v2/stream/recentchange -------------------------------------------------------------------------------- /bin/debug.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | : ${SUSPEND:='n'} 5 | 6 | set -e 7 | 8 | mvn clean package 9 | export KAFKA_JMX_OPTS="-Xdebug -agentlib:jdwp=transport=dt_socket,server=y,suspend=${SUSPEND},address=5005" 10 | 11 | connect-standalone config/connect-avro-docker.properties config/MySourceConnector.properties 12 | -------------------------------------------------------------------------------- /Dockerfile.connect: -------------------------------------------------------------------------------- 1 | FROM confluentinc/cp-kafka-connect:7.3.0 2 | 3 | # Copy the connector ZIP file 4 | COPY target/components/packages/cjmatta-kafka-connect-sse-1.4.zip /tmp/ 5 | 6 | # Install the connector using confluent-hub (installs to /usr/share/confluent-hub-components) 7 | RUN confluent-hub install --no-prompt /tmp/cjmatta-kafka-connect-sse-1.4.zip 8 | 9 | -------------------------------------------------------------------------------- /src/test/java/com/github/cjmatta/kafka/connect/ServerSentEventsSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.github.cjmatta.kafka.connect; 2 | 3 | import org.junit.jupiter.api.AfterAll; 4 | import org.junit.jupiter.api.BeforeAll; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class ServerSentEventsSourceTaskTest { 9 | @Test 10 | public void test() { 11 | // Congrats on a passing test! 12 | } 13 | } -------------------------------------------------------------------------------- /src/test/java/com/github/cjmatta/kafka/connect/ServerSentEventsSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | package com.github.cjmatta.kafka.connect; 2 | 3 | import org.junit.jupiter.api.AfterAll; 4 | import org.junit.jupiter.api.BeforeAll; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class ServerSentEventsSourceConnectorTest { 9 | @Test 10 | public void test() { 11 | // Congrats on a passing test! 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /upload-to-confluent-cloud.sh: -------------------------------------------------------------------------------- 1 | confluent connect custom-plugin create "kafka-connect-sse" \ 2 | --plugin-file ~/Documents/Projects/kafka-connect-sse/target/components/packages/cjmatta-kafka-connect-sse-1.3.zip \ 3 | --connector-class com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector \ 4 | --description "A Kafka Connect source connector for Server Sent Events" \ 5 | --documentation-link https://github.com/cjmatta/kafka-connect-sse \ 6 | --connector-type Source \ 7 | --sensitive-properties http.basic.auth.password \ 8 | --cloud aws -------------------------------------------------------------------------------- /config/wikipedia-connector.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wikipedia-sse-connector", 3 | "config": { 4 | "connector.class": "com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector", 5 | "tasks.max": "1", 6 | "sse.uri": "https://stream.wikimedia.org/v2/stream/recentchange", 7 | "topic": "wikipedia-changes", 8 | "http.header.User-Agent": "KafkaConnectSSE-Testing/1.4 (https://github.com/cjmatta/kafka-connect-sse)", 9 | "compression.enabled": "true", 10 | "rate.limit.requests.per.second": "10", 11 | "retry.backoff.initial.ms": "2000", 12 | "retry.backoff.max.ms": "30000", 13 | "retry.max.attempts": "10" 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/test/java/com/github/cjmatta/kafka/connect/ServerSentEventsSourceTaskIT.java: -------------------------------------------------------------------------------- 1 | package com.github.cjmatta.kafka.connect; 2 | 3 | import org.junit.jupiter.api.AfterAll; 4 | import org.junit.jupiter.api.BeforeAll; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | /** 9 | * This test can be used for integration testing with the system you are integrating with. For example 10 | * take a look at https://github.com/jcustenborder/docker-compose-junit-extension to launch docker 11 | * containers for your testing. 12 | */ 13 | public class ServerSentEventsSourceTaskIT { 14 | @Test 15 | public void test() { 16 | // Congrats on a passing test! 17 | } 18 | } -------------------------------------------------------------------------------- /bin/submit_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONNECT_HOST=localhost 4 | 5 | if [[ $1 ]];then 6 | CONNECT_HOST=$1 7 | fi 8 | 9 | HEADER="Content-Type: application/json" 10 | DATA=$( cat << EOF 11 | { 12 | "name": "wikipedia-sse-1", 13 | "config": { 14 | "connector.class": "com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector", 15 | "topic": "wikipedia.sse", 16 | "sse.uri": "https://stream.wikimedia.org/v2/stream/recentchange", 17 | "value.converter": "io.confluent.connect.avro.AvroConverter", 18 | "value.converter.schema.registry.url": "http://localhost:8081", 19 | "tasks.max": "1" 20 | } 21 | } 22 | EOF 23 | ) 24 | 25 | echo "curl -X POST -H \"${HEADER}\" --data \"${DATA}\" http://${CONNECT_HOST}:8083/connectors" 26 | curl -X POST -H "${HEADER}" --data "${DATA}" http://${CONNECT_HOST}:8083/connectors 27 | echo 28 | -------------------------------------------------------------------------------- /wikipedia-ccloud-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Wikipedia SSE", 3 | "config": { 4 | "connector.class": "com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector", 5 | "kafka.auth.mode": "KAFKA_API_KEY", 6 | "kafka.api.key": "WTVSJDQTRFOVUSPJ", 7 | "kafka.api.secret": "93OFB0HWbUqUtCQqveWKx3kwvMCTImIi0JefS7/V9f3ih8BhF4sxOWDPX8D9rOW+", 8 | "tasks.max": "1", 9 | "confluent.custom.plugin.id": "ccp-q2kxd2", 10 | "confluent.connector.type": "CUSTOM", 11 | "confluent.custom.connection.endpoints": "stream.wikimedia.org:443", 12 | "confluent.custom.schema.registry.auto": "true", 13 | "key.converter": "io.confluent.connect.json.JsonSchemaConverter", 14 | "sse.uri": "https://stream.wikimedia.org/v2/stream/recentchange", 15 | "topic": "wikimedia-raw", 16 | "value.converter": "io.confluent.connect.json.JsonSchemaConverter" 17 | } 18 | } -------------------------------------------------------------------------------- /config/kafka-connect-sse.properties: -------------------------------------------------------------------------------- 1 | name=sse-source-connector 2 | tasks.max=1 3 | connector.class=com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector 4 | topic=wikipedia-sse-test123 5 | sse.uri=https://stream.wikimedia.org/v2/stream/recentchange 6 | errors.tollerance=all 7 | errors.deadletterqueue.topic.name=wikipedia.dlq 8 | #transforms=ExtractField 9 | transforms=ExtractField,parseJSON 10 | transforms.ExtractField.type=org.apache.kafka.connect.transforms.ExtractField$Value 11 | transforms.ExtractField.field=data 12 | transforms.parseJSON.type=com.github.jcustenborder.kafka.connect.json.FromJson$Value 13 | transforms.parseJSON.json.exclude.locations=#/properties/log_params,#/properties/$schema,#/$schema 14 | transforms.parseJSON.json.schema.location=Url 15 | transforms.parseJSON.json.schema.url=file:/Users/chris/Documents/Projects/kafka-connect-sse/config/wikimedia.recentchange.schema.json 16 | transforms.parseJSON.json.schema.validation.enabled=false -------------------------------------------------------------------------------- /bin/submit_config_extract_field.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONNECT_HOST=localhost 4 | 5 | if [[ $1 ]];then 6 | CONNECT_HOST=$1 7 | fi 8 | 9 | HEADER="Content-Type: application/json" 10 | DATA=$( cat << EOF 11 | { 12 | "name": "wikipedia-sse-1", 13 | "config": { 14 | "connector.class": "com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector", 15 | "topic": "wikipedia-raw", 16 | "sse.uri": "https://stream.wikimedia.org/v2/stream/recentchange", 17 | "transforms": "extractjson", 18 | "transforms.extractjson.type": "org.apache.kafka.connect.transforms.ExtractField$Value", 19 | "transforms.extractjson.field": "data", 20 | "value.converter": "io.confluent.connect.avro.AvroConverter", 21 | "value.converter.schema.registry.url": "http://localhost:8081", 22 | "tasks.max": "1" 23 | } 24 | } 25 | EOF 26 | ) 27 | 28 | echo "curl -X POST -H \"${HEADER}\" --data \"${DATA}\" http://${CONNECT_HOST}:8083/connectors" 29 | curl -X POST -H "${HEADER}" --data "${DATA}" http://${CONNECT_HOST}:8083/connectors 30 | echo 31 | -------------------------------------------------------------------------------- /recentchange.json: -------------------------------------------------------------------------------- 1 | { 2 | "bot": false, 3 | "comment": "/* wbsetdescription-add:1|en */ Qing dynasty person CBDB = 56896, #quickstatements; [[:toollabs:quickstatements/#/batch/8857|batch #8857]] by [[User:Tagishsimon|]]", 4 | "id": 910619087, 5 | "length": { 6 | "new": 2697, 7 | "old": 2606 8 | }, 9 | "meta": { 10 | "domain": "www.wikidata.org", 11 | "dt": "2019-03-04T13:30:32+00:00", 12 | "id": "afa46939-3e81-11e9-bfb6-1866da99521a", 13 | "request_id": "a30366d2-3ff6-4402-84c6-17aae8690056", 14 | "schema_uri": "mediawiki/recentchange/2", 15 | "topic": "eqiad.mediawiki.recentchange", 16 | "uri": "https://www.wikidata.org/wiki/Q45553520", 17 | "partition": 0, 18 | "offset": 1429042371 19 | }, 20 | "minor": false, 21 | "namespace": 0, 22 | "parsedcomment": "‎Added [en] description: Qing dynasty person CBDB = 56896, #quickstatements; batch #8857 by User:Tagishsimon", 23 | "patrolled": true, 24 | "revision": { 25 | "new": 874176646, 26 | "old": 850556419 27 | }, 28 | "server_name": "www.wikidata.org", 29 | "server_script_path": "/w", 30 | "server_url": "https://www.wikidata.org", 31 | "timestamp": 1551706232, 32 | "title": "Q45553520", 33 | "type": "edit", 34 | "user": "Tagishsimon", 35 | "wiki": "wikidatawiki" 36 | } -------------------------------------------------------------------------------- /src/main/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | package 7 | 8 | dir 9 | 10 | false 11 | 12 | 13 | ${project.basedir} 14 | share/doc/${project.name}/ 15 | 16 | README* 17 | LICENSE* 18 | NOTICE* 19 | licenses/ 20 | 21 | 22 | 23 | ${project.basedir}/config 24 | etc/${project.name} 25 | 26 | * 27 | 28 | 29 | 30 | 31 | 32 | share/java/${project.name} 33 | true 34 | true 35 | 36 | org.apache.kafka:connect-api 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/com/github/cjmatta/kafka/connect/sse/ServerSentEvent.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2019 Christopher Matta (chris.matta@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | package com.github.cjmatta.kafka.connect.sse; 19 | 20 | import org.apache.kafka.connect.data.Schema; 21 | import org.apache.kafka.connect.data.SchemaBuilder; 22 | import org.apache.kafka.connect.data.Struct; 23 | 24 | public class ServerSentEvent extends Struct { 25 | public static final String EVENT = "event"; 26 | public static final String ID = "id"; 27 | public static final String DATA = "data"; 28 | 29 | final public static Schema SCHEMA = SchemaBuilder.struct() 30 | .name("com.github.cjmatta.kafka.connect.sse.ServerSentEvent") 31 | .doc("Server Sent Event Message") 32 | .field(EVENT, SchemaBuilder.string().doc("The event class of this event").required().build()) 33 | .field(ID, SchemaBuilder.string().doc("The event ID").optional().build()) 34 | .field(DATA, SchemaBuilder.string().doc("The event data payload").required().build()); 35 | 36 | public ServerSentEvent(String event, String id, String data) { 37 | super(SCHEMA); 38 | this.put(EVENT, event) 39 | .put(ID, id) 40 | .put(DATA, data); 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return String.format("[event]=%s [id]=%s [data]=%s", 46 | this.get(EVENT), 47 | this.get(ID), 48 | this.get(DATA) 49 | ); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /config/connect-avro-docker.properties: -------------------------------------------------------------------------------- 1 | # Sample configuration for a standalone Kafka Connect worker that uses Avro serialization and 2 | # integrates the the SchemaConfig Registry. This sample configuration assumes a local installation of 3 | # Confluent Platform with all services running on their default ports. 4 | # Bootstrap Kafka servers. If multiple servers are specified, they should be comma-separated. 5 | bootstrap.servers=kafka:9092 6 | # The converters specify the format of data in Kafka and how to translate it into Connect data. 7 | # Every Connect user will need to configure these based on the format they want their data in 8 | # when loaded from or stored into Kafka 9 | key.converter=io.confluent.connect.avro.AvroConverter 10 | key.converter.schema.registry.url=http://schema-registry:8081 11 | value.converter=io.confluent.connect.avro.AvroConverter 12 | value.converter.schema.registry.url=http://schema-registry:8081 13 | # The internal converter used for offsets and config data is configurable and must be specified, 14 | # but most users will always want to use the built-in default. Offset and config data is never 15 | # visible outside of Connect in this format. 16 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 17 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 18 | internal.key.converter.schemas.enable=false 19 | internal.value.converter.schemas.enable=false 20 | # Local storage file for offset data 21 | offset.storage.file.filename=/tmp/connect.offsets 22 | 23 | # Confuent Control Center Integration -- uncomment these lines to enable Kafka client interceptors 24 | # that will report audit data that can be displayed and analyzed in Confluent Control Center 25 | # producer.interceptor.classes=io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor 26 | # consumer.interceptor.classes=io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor 27 | 28 | # Load our plugin from the output path. 29 | plugin.path=target/kafka-connect-target,/Users/chris/Downloads/confluent/share/confluent-hub-components -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # User-specific stuff 5 | .idea/**/workspace.xml 6 | .idea/**/tasks.xml 7 | .idea/**/usage.statistics.xml 8 | .idea/**/dictionaries 9 | .idea/**/shelf 10 | 11 | # AWS User-specific 12 | .idea/**/aws.xml 13 | 14 | # Generated files 15 | .idea/**/contentModel.xml 16 | 17 | # Sensitive or high-churn files 18 | .idea/**/dataSources/ 19 | .idea/**/dataSources.ids 20 | .idea/**/dataSources.local.xml 21 | .idea/**/sqlDataSources.xml 22 | .idea/**/dynamic.xml 23 | .idea/**/uiDesigner.xml 24 | .idea/**/dbnavigator.xml 25 | 26 | # Gradle 27 | .idea/**/gradle.xml 28 | .idea/**/libraries 29 | 30 | # Gradle and Maven with auto-import 31 | # When using Gradle or Maven with auto-import, you should exclude module files, 32 | # since they will be recreated, and may cause churn. Uncomment if using 33 | # auto-import. 34 | # .idea/artifacts 35 | # .idea/compiler.xml 36 | # .idea/jarRepositories.xml 37 | # .idea/modules.xml 38 | # .idea/*.iml 39 | # .idea/modules 40 | # *.iml 41 | # *.ipr 42 | 43 | # CMake 44 | cmake-build-*/ 45 | 46 | # Mongo Explorer plugin 47 | .idea/**/mongoSettings.xml 48 | 49 | # File-based project format 50 | *.iws 51 | 52 | # IntelliJ 53 | out/ 54 | 55 | # mpeltonen/sbt-idea plugin 56 | .idea_modules/ 57 | 58 | # JIRA plugin 59 | atlassian-ide-plugin.xml 60 | 61 | # Cursive Clojure plugin 62 | .idea/replstate.xml 63 | 64 | # SonarLint plugin 65 | .idea/sonarlint/ 66 | 67 | # Crashlytics plugin (for Android Studio and IntelliJ) 68 | com_crashlytics_export_strings.xml 69 | crashlytics.properties 70 | crashlytics-build.properties 71 | fabric.properties 72 | 73 | # Editor-based Rest Client 74 | .idea/httpRequests 75 | 76 | # Android studio 3.1+ serialized cache file 77 | .idea/caches/build_file_checksums.ser 78 | 79 | # Compiled class file 80 | *.class 81 | 82 | # Log file 83 | *.log 84 | 85 | # BlueJ files 86 | *.ctxt 87 | 88 | # Mobile Tools for Java (J2ME) 89 | .mtj.tmp/ 90 | 91 | # Package Files # 92 | *.jar 93 | *.war 94 | *.nar 95 | *.ear 96 | *.zip 97 | *.tar.gz 98 | *.rar 99 | 100 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 101 | hs_err_pid* 102 | replay_pid* 103 | 104 | target 105 | .idea* 106 | .settings* 107 | .project* 108 | *.code-workspace 109 | 110 | .classpath 111 | 112 | # macOS 113 | .DS_Store 114 | -------------------------------------------------------------------------------- /src/main/java/com/github/cjmatta/kafka/connect/sse/ServerSentEventsSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2019 Christopher Matta (chris.matta@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | **/ 16 | 17 | package com.github.cjmatta.kafka.connect.sse; 18 | 19 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 21 | import com.github.jcustenborder.kafka.connect.utils.config.TaskConfigs; 22 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 23 | import org.apache.kafka.common.config.ConfigDef; 24 | import org.apache.kafka.connect.connector.Task; 25 | import org.apache.kafka.connect.source.SourceConnector; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.util.List; 30 | import java.util.Map; 31 | 32 | @Description("Kafka Connect source connector for Server Sent Events") 33 | @Title("Kafka Connect Server Sent Events") //This is the display name that will show up in the documentation. 34 | public class ServerSentEventsSourceConnector extends SourceConnector { 35 | /* 36 | Your connector should never use System.out for logging. All of your classes should use slf4j 37 | for logging 38 | */ 39 | private static Logger log = LoggerFactory.getLogger(ServerSentEventsSourceConnector.class); 40 | private ServerSentEventsSourceConnectorConfig config; 41 | Map settings; 42 | 43 | @Override 44 | public String version() { 45 | return VersionUtil.version(this.getClass()); 46 | } 47 | 48 | @Override 49 | public void start(Map map) { 50 | log.info("Starting Server Sent Events Source Connector"); 51 | config = new ServerSentEventsSourceConnectorConfig(map); 52 | this.settings = map; 53 | } 54 | 55 | @Override 56 | public Class taskClass() { 57 | return ServerSentEventsSourceTask.class; 58 | } 59 | 60 | @Override 61 | public List> taskConfigs(int i) { 62 | return TaskConfigs.single(this.settings); 63 | } 64 | 65 | @Override 66 | public void stop() { 67 | log.info("Stopping Server Sent Events Source Connector"); 68 | } 69 | 70 | @Override 71 | public ConfigDef config() { 72 | return ServerSentEventsSourceConnectorConfig.config(); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | ## Project Context 2 | This is a Kafka Connect Source Connector meant to capture events from the HTML 5 standard Server Sent Events. 3 | 4 | ## PRIME DIRECTIVE 5 | Avoid working on more than one file at a time. 6 | Multiple simultaneous edits to a file will cause corruption. 7 | Be chatting and teach about what you are doing while coding. 8 | 9 | ## LARGE FILE & COMPLEX CHANGE PROTOCOL 10 | 11 | ### MANDATORY PLANNING PHASE 12 | When working with large files (>300 lines) or complex changes: 13 | 1. ALWAYS start by creating a detailed plan BEFORE making any edits 14 | 2. Your plan MUST include: 15 | - All functions/sections that need modification 16 | - The order in which changes should be applied 17 | - Dependencies between changes 18 | - Estimated number of separate edits required 19 | 20 | 3. Format your plan as: 21 | ## PROPOSED EDIT PLAN 22 | Working with: [filename] 23 | Total planned edits: [number] 24 | 25 | ### MAKING EDITS 26 | - Focus on one conceptual change at a time 27 | - Show clear "before" and "after" snippets when proposing changes 28 | - Include concise explanations of what changed and why 29 | - Always check if the edit maintains the project's coding style 30 | 31 | ### Edit sequence: 32 | 1. [First specific change] - Purpose: [why] 33 | 2. [Second specific change] - Purpose: [why] 34 | 3. Do you approve this plan? I'll proceed with Edit [number] after your confirmation. 35 | 4. WAIT for explicit user confirmation before making ANY edits when user ok edit [number] 36 | 37 | ### EXECUTION PHASE 38 | - After each individual edit, clearly indicate progress: 39 | "✅ Completed edit [#] of [total]. Ready for next edit?" 40 | - If you discover additional needed changes during editing: 41 | - STOP and update the plan 42 | - Get approval before continuing 43 | 44 | ### REFACTORING GUIDANCE 45 | When refactoring large files: 46 | - Break work into logical, independently functional chunks 47 | - Ensure each intermediate state maintains functionality 48 | - Consider temporary duplication as a valid interim step 49 | - Always indicate the refactoring pattern being applied 50 | 51 | ### RATE LIMIT AVOIDANCE 52 | - For very large files, suggest splitting changes across multiple sessions 53 | - Prioritize changes that are logically complete units 54 | - Always provide clear stopping points 55 | 56 | ## General Requirements 57 | Use modern technologies as described below for all code suggestions. Prioritize clean, maintainable code with appropriate comments. 58 | 59 | ## Java Guidelines 60 | - When making changes to Java files use the IDE's compiler hinting to ensure that you haven't introduced new compilation errors 61 | - Ensure that you remove unused libraries from the imports 62 | - Add helpful comments for future developers -------------------------------------------------------------------------------- /test-local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "=== Kafka Connect SSE Local Testing ===" 5 | echo 6 | 7 | # Colors for output 8 | GREEN='\033[0;32m' 9 | BLUE='\033[0;34m' 10 | RED='\033[0;31m' 11 | NC='\033[0m' # No Color 12 | 13 | # Function to check if a service is ready 14 | wait_for_service() { 15 | local url=$1 16 | local service_name=$2 17 | local max_attempts=60 18 | local attempt=0 19 | 20 | echo -n "Waiting for ${service_name} to be ready..." 21 | while ! curl -s -o /dev/null -w "%{http_code}" "${url}" | grep -q "200\|404"; do 22 | attempt=$((attempt + 1)) 23 | if [ $attempt -ge $max_attempts ]; then 24 | echo -e " ${RED}FAILED${NC}" 25 | echo "Timeout waiting for ${service_name}" 26 | return 1 27 | fi 28 | echo -n "." 29 | sleep 2 30 | done 31 | echo -e " ${GREEN}READY${NC}" 32 | } 33 | 34 | # Step 1: Build the connector 35 | echo -e "${BLUE}Step 1: Building connector...${NC}" 36 | mvn clean package -DskipTests 37 | echo -e "${GREEN}✓ Connector built${NC}" 38 | echo 39 | 40 | # Step 2: Build Docker image with connector 41 | echo -e "${BLUE}Step 2: Building Connect Docker image with connector...${NC}" 42 | docker-compose build connect 43 | echo -e "${GREEN}✓ Docker image built${NC}" 44 | echo 45 | 46 | # Step 3: Start Docker Compose 47 | echo -e "${BLUE}Step 3: Starting Docker Compose services...${NC}" 48 | docker-compose up -d 49 | echo -e "${GREEN}✓ Services started${NC}" 50 | echo 51 | 52 | # Step 4: Wait for services 53 | echo -e "${BLUE}Step 4: Waiting for services to be ready...${NC}" 54 | wait_for_service "http://localhost:9092" "Kafka Broker" 55 | wait_for_service "http://localhost:8081" "Schema Registry" 56 | wait_for_service "http://localhost:8083" "Kafka Connect" 57 | echo 58 | 59 | # Step 5: Check installed connectors 60 | echo -e "${BLUE}Step 5: Checking available connector plugins...${NC}" 61 | curl -s http://localhost:8083/connector-plugins | jq -r '.[] | select(.class | contains("ServerSentEvents")) | .class' 62 | echo 63 | 64 | # Step 6: Deploy the connector 65 | echo -e "${BLUE}Step 6: Deploying Wikipedia SSE connector...${NC}" 66 | curl -X POST http://localhost:8083/connectors \ 67 | -H "Content-Type: application/json" \ 68 | -d @config/wikipedia-connector.json | jq '.' 69 | echo 70 | echo -e "${GREEN}✓ Connector deployed${NC}" 71 | echo 72 | 73 | # Step 7: Check connector status 74 | echo -e "${BLUE}Step 7: Checking connector status...${NC}" 75 | sleep 5 76 | curl -s http://localhost:8083/connectors/wikipedia-sse-connector/status | jq '.' 77 | echo 78 | 79 | # Step 7: Instructions for viewing data 80 | echo -e "${BLUE}=== Next Steps ===${NC}" 81 | echo 82 | echo "View connector logs:" 83 | echo " docker logs -f connect" 84 | echo 85 | echo "Check connector status:" 86 | echo " curl http://localhost:8083/connectors/wikipedia-sse-connector/status | jq '.'" 87 | echo 88 | echo "Consume messages from the topic:" 89 | echo " docker exec -it broker kafka-console-consumer --bootstrap-server localhost:9092 --topic wikipedia-changes --from-beginning" 90 | echo 91 | echo "View in Confluent Control Center:" 92 | echo " http://localhost:9021" 93 | echo 94 | echo "Stop everything:" 95 | echo " docker-compose down" 96 | echo 97 | 98 | -------------------------------------------------------------------------------- /IMPROVEMENT_PLAN.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect SSE Source Connector - Improvement Plan 2 | 3 | This document outlines a systematic approach to improving the Kafka Connect SSE Source Connector project. Each step represents a small, atomic change that addresses a specific concern, with subsequent steps building upon previous improvements. 4 | 5 | ## Core Issues to Address 6 | 7 | - **Connector Stability**: The connector sometimes hangs and doesn't send data 8 | - **Logging Quality**: Insufficient logging makes it difficult to diagnose issues 9 | - **Code Documentation**: Limited comments affect maintainability 10 | - **Error Handling**: Improved error detection and recovery is needed 11 | - **Configuration Options**: Missing useful configuration options for timeouts, reconnection, etc. 12 | 13 | ## Improvement Roadmap 14 | 15 | ### Phase 1: Diagnostics & Observability 16 | 17 | #### Step 1: Enhanced Logging in ServerSentEventClient 18 | - Add connection state tracking with detailed logging 19 | - Implement log levels appropriately (trace, debug, info, warn, error) 20 | - Include timestamps and better context in log messages 21 | 22 | #### Step 2: Connection Health Monitoring 23 | - Add health check mechanism to detect "zombie" connections 24 | - Implement an idle timeout to detect when no events are flowing 25 | - Add periodic connection state logging for long-running connections 26 | 27 | #### Step 3: Error Handling Improvements 28 | - Enhance error detection capabilities 29 | - Implement more robust error recovery logic 30 | - Add detailed error logging with contextual information 31 | 32 | #### Step 4: Metrics Collection 33 | - Add basic metrics tracking for events, connection status, etc. 34 | - Implement JMX metrics for Kafka Connect monitoring integration 35 | - Create a health/status reporting mechanism 36 | 37 | ### Phase 2: Functional Improvements 38 | 39 | #### Step 5: Offset Management 40 | - Most SSE servers don't support resuming from an ID, so we're not going to support offset management 41 | - Keep the existing empty maps 42 | 43 | #### Step 6: Connection Configuration 44 | - Add timeout configuration options 45 | - Implement configurable reconnection strategy 46 | - Add support for HTTP headers and parameters 47 | 48 | #### Step 7: Event Filtering 49 | - Add support for filtering events by type/name 50 | - Implement configurable event transformation 51 | - Support for pattern-based filtering 52 | 53 | #### Step 8: Performance Enhancements 54 | - Optimize queue management 55 | - Improve threading model 56 | - Add batch size configuration 57 | 58 | ### Phase 3: Advanced Features 59 | 60 | #### Step 9: Security Enhancements 61 | - Add OAuth support 62 | - Implement advanced TLS/SSL configuration 63 | - Add proxy support 64 | 65 | #### Step 10: Content Processing 66 | - Support for different content types (JSON, XML, etc.) 67 | - Add schema evolution support 68 | - Implement content transformation options 69 | 70 | #### Step 11: Circuit Breaker Implementation 71 | - Add failure threshold detection 72 | - Implement backoff strategies 73 | - Add alerting capability 74 | 75 | ## Development Approach 76 | 77 | For each step in this plan: 78 | 79 | 1. Start by focusing on one component at a time 80 | 2. Write tests first to validate the current behavior and expected changes 81 | 3. Make incremental changes with clear before/after states 82 | 4. Update documentation to reflect new capabilities 83 | 5. Validate functionality after each change 84 | 85 | ## Execution Guidelines 86 | 87 | - Work on one file at a time to avoid corruption 88 | - Run tests after each change to ensure functionality is maintained 89 | - Update the README.md with new configuration options as they're added 90 | - Follow the existing code style for consistency -------------------------------------------------------------------------------- /wikipedia-edit-connect-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "com.github.cjmatta.wikipedia.RecentChange", 3 | "type": "STRUCT", 4 | "isOptional": false, 5 | "fieldSchemas": { 6 | "id": { 7 | "type": "INT64", 8 | "isOptional": false 9 | }, 10 | "bot": { 11 | "type": "BOOLEAN", 12 | "isOptional": false 13 | }, 14 | "comment": { 15 | "type": "STRING", 16 | "iOptional": false 17 | }, 18 | "length": { 19 | "type": "STRUCT", 20 | "isOptional": false, 21 | "fieldSchemas": { 22 | "new": { 23 | "type": "INT64", 24 | "isOptional": false 25 | }, 26 | "old": { 27 | "type": "INT64", 28 | "isOptional": false 29 | } 30 | } 31 | }, 32 | "meta": { 33 | "type": "STRUCT", 34 | "isOptional": false, 35 | "fieldSchemas": { 36 | "domain": { 37 | "type": "STRING", 38 | "isOptional": false 39 | }, 40 | "dt": { 41 | "name": "org.apache.kafka.connect.data.Timestamp", 42 | "type": "INT64", 43 | "version": 1, 44 | "isOptional": false 45 | }, 46 | "request_id": { 47 | "type": "STRING", 48 | "isOptional": false 49 | }, 50 | "schema_uri": { 51 | "type": "STRING", 52 | "isOptional": false 53 | }, 54 | "topic": { 55 | "type": "STRING", 56 | "isOptional": false 57 | }, 58 | "uri": { 59 | "type": "STRING", 60 | "isOptional": false 61 | }, 62 | "partition": { 63 | "type": "INT", 64 | "isOptional": false 65 | }, 66 | "offset": { 67 | "type": "INT64", 68 | "isOptional": false 69 | } 70 | } 71 | }, 72 | "minor": { 73 | "type": "BOOLEAN", 74 | "isOptional": false 75 | }, 76 | "namespace": { 77 | "type": "INT", 78 | "isOptional": false 79 | }, 80 | "parsedComment": { 81 | "type": "STRING", 82 | "isOptional": false 83 | }, 84 | "patrolled": { 85 | "type": "BOOLEAN", 86 | "isOptional": false 87 | }, 88 | "revision": { 89 | "new": { 90 | "type": "INT64", 91 | "isOptional": false 92 | }, 93 | "old": { 94 | "type": "INT64", 95 | "isOptional": false 96 | } 97 | }, 98 | "server_name": { 99 | "type": "STRING", 100 | "isOptional": false 101 | }, 102 | "server_script_path": { 103 | "type": "STRING", 104 | "isOptional": false 105 | }, 106 | "server_url": { 107 | "type": "STRING", 108 | "isOptional": false 109 | }, 110 | "timestamp": { 111 | "type": "INT64", 112 | "isOptional": false 113 | }, 114 | "title": { 115 | "type": "STRING", 116 | "isOptional": false 117 | }, 118 | "type": { 119 | "type": "STRING", 120 | "isOptional": false 121 | }, 122 | "user": { 123 | "type": "STRING", 124 | "isOptional": false 125 | }, 126 | "wiki": { 127 | "type": "STRING", 128 | "isOptional": false 129 | } 130 | } 131 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '2' 3 | services: 4 | zookeeper: 5 | image: confluentinc/cp-zookeeper:7.3.0 6 | hostname: zookeeper 7 | container_name: zookeeper 8 | ports: 9 | - "2181:2181" 10 | environment: 11 | ZOOKEEPER_CLIENT_PORT: 2181 12 | ZOOKEEPER_TICK_TIME: 2000 13 | 14 | broker: 15 | image: confluentinc/cp-server:7.3.0 16 | hostname: broker 17 | container_name: broker 18 | depends_on: 19 | - zookeeper 20 | ports: 21 | - "9092:9092" 22 | - "9101:9101" 23 | environment: 24 | KAFKA_BROKER_ID: 1 25 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' 26 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 28 | KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter 29 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 30 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 31 | KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 1 32 | KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 1 33 | KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 34 | KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 35 | KAFKA_JMX_PORT: 9101 36 | KAFKA_JMX_HOSTNAME: localhost 37 | KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081 38 | CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: broker:29092 39 | CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 40 | CONFLUENT_METRICS_ENABLE: 'true' 41 | CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' 42 | 43 | schema-registry: 44 | image: confluentinc/cp-schema-registry:7.3.0 45 | hostname: schema-registry 46 | container_name: schema-registry 47 | depends_on: 48 | - broker 49 | ports: 50 | - "8081:8081" 51 | environment: 52 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 53 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' 54 | SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 55 | 56 | connect: 57 | build: 58 | context: . 59 | dockerfile: Dockerfile.connect 60 | image: kafka-connect-sse:local 61 | hostname: connect 62 | container_name: connect 63 | depends_on: 64 | - broker 65 | - schema-registry 66 | ports: 67 | - "8083:8083" 68 | environment: 69 | CONNECT_BOOTSTRAP_SERVERS: 'broker:29092' 70 | CONNECT_REST_ADVERTISED_HOST_NAME: connect 71 | CONNECT_REST_PORT: 8083 72 | CONNECT_GROUP_ID: compose-connect-group 73 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 74 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 75 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 76 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 77 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 78 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 79 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 80 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter 81 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 82 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 83 | CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components" 84 | CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR 85 | 86 | ksqldb-server: 87 | image: confluentinc/cp-ksqldb-server:7.3.0 88 | hostname: ksqldb-server 89 | container_name: ksqldb-server 90 | depends_on: 91 | - broker 92 | - connect 93 | ports: 94 | - "8088:8088" 95 | environment: 96 | KSQL_CONFIG_DIR: "/etc/ksql" 97 | KSQL_BOOTSTRAP_SERVERS: "broker:29092" 98 | KSQL_HOST_NAME: ksqldb-server 99 | KSQL_LISTENERS: "http://0.0.0.0:8088" 100 | KSQL_CACHE_MAX_BYTES_BUFFERING: 0 101 | KSQL_KSQL_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 102 | KSQL_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" 103 | KSQL_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" 104 | KSQL_KSQL_CONNECT_URL: "http://connect:8083" 105 | KSQL_KSQL_LOGGING_PROCESSING_TOPIC_REPLICATION_FACTOR: 1 106 | KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: 'true' 107 | KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: 'true' 108 | 109 | control-center: 110 | image: confluentinc/cp-enterprise-control-center:7.3.0 111 | hostname: control-center 112 | container_name: control-center 113 | depends_on: 114 | - broker 115 | - schema-registry 116 | - connect 117 | - ksqldb-server 118 | ports: 119 | - "9021:9021" 120 | environment: 121 | CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092' 122 | CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'http://connect:8083' 123 | CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 124 | CONTROL_CENTER_REPLICATION_FACTOR: 1 125 | CONTROL_CENTER_KSQL_KSQLDB1_URL: "http://ksqldb-server:8088" 126 | CONTROL_CENTER_KSQL_KSQLDB1_ADVERTISED_URL: "http://localhost:8088" 127 | CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1 128 | CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1 129 | CONFLUENT_METRICS_TOPIC_REPLICATION: 1 130 | PORT: 9021 -------------------------------------------------------------------------------- /src/main/java/com/github/cjmatta/kafka/connect/sse/ServerSentEventsSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2019 Christopher Matta (chris.matta@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | **/ 16 | 17 | package com.github.cjmatta.kafka.connect.sse; 18 | 19 | import org.apache.kafka.connect.source.SourceRecord; 20 | import org.apache.kafka.connect.source.SourceTask; 21 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import javax.ws.rs.sse.InboundSseEvent; 27 | import java.io.IOException; 28 | import java.util.Collections; 29 | import java.util.LinkedList; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | public class ServerSentEventsSourceTask extends SourceTask { 34 | 35 | static final Logger log = LoggerFactory.getLogger(ServerSentEventsSourceTask.class); 36 | ServerSentEventsSourceConnectorConfig config; 37 | ServerSentEventClient client; 38 | 39 | // Metrics logging configuration 40 | private static final long DEFAULT_METRICS_LOG_INTERVAL_MS = 60000; // 1 minute 41 | private long metricsLogIntervalMs = DEFAULT_METRICS_LOG_INTERVAL_MS; 42 | private long lastMetricsLogTime = 0; 43 | 44 | 45 | @Override 46 | public String version() { 47 | return VersionUtil.version(this.getClass()); 48 | } 49 | 50 | @Override 51 | public void start(Map map) { 52 | log.info("Starting Server Sent Events Source Task"); 53 | config = new ServerSentEventsSourceConnectorConfig(map); 54 | 55 | // Extract configuration values 56 | String sseUri = config.getString(ServerSentEventsSourceConnectorConfig.SSE_URI); 57 | String username = config.httpBasicAuth ? config.getString(ServerSentEventsSourceConnectorConfig.HTTP_BASIC_AUTH_USERNAME) : null; 58 | String password = config.httpBasicAuth ? config.getString(ServerSentEventsSourceConnectorConfig.HTTP_BASIC_AUTH_PASSWORD) : null; 59 | Map headers = config.getHttpHeaders(); 60 | boolean compressionEnabled = config.compressionEnabled; 61 | Double rateLimitRequestsPerSecond = config.rateLimitRequestsPerSecond; 62 | Integer rateLimitMaxConcurrent = config.rateLimitMaxConcurrent; 63 | long retryBackoffInitialMs = config.retryBackoffInitialMs; 64 | long retryBackoffMaxMs = config.retryBackoffMaxMs; 65 | int retryMaxAttempts = config.retryMaxAttempts; 66 | 67 | // Create client with full configuration 68 | client = new ServerSentEventClient(sseUri, username, password, headers, 69 | compressionEnabled, rateLimitRequestsPerSecond, rateLimitMaxConcurrent, 70 | retryBackoffInitialMs, retryBackoffMaxMs, retryMaxAttempts); 71 | 72 | // Initialize metrics logging timer 73 | lastMetricsLogTime = System.currentTimeMillis(); 74 | log.info("Metrics will be logged every {} ms", metricsLogIntervalMs); 75 | 76 | try { 77 | client.start(); 78 | log.info("SSE client started successfully - {}", client.getStatusSummary()); 79 | } catch (IOException e) { 80 | log.error("Failed to start SSE client", e); 81 | } 82 | } 83 | 84 | @Override 85 | public List poll() throws InterruptedException { 86 | // Check if it's time to log metrics 87 | long currentTime = System.currentTimeMillis(); 88 | if (currentTime - lastMetricsLogTime > metricsLogIntervalMs) { 89 | // Log metrics with warning level if connection isn't healthy 90 | boolean useWarnLevel = !client.isConnectionHealthy(); 91 | client.logMetrics(useWarnLevel); 92 | lastMetricsLogTime = currentTime; 93 | } 94 | 95 | List sseEvents = client.getRecords(); 96 | List records = new LinkedList<>(); 97 | 98 | for (InboundSseEvent event : sseEvents) { 99 | records.add(createSourceRecordFromSseEvent(event)); 100 | } 101 | 102 | return records; 103 | } 104 | 105 | private SourceRecord createSourceRecordFromSseEvent(InboundSseEvent event) { 106 | Map srcOffset = Collections.emptyMap(); 107 | Map srcPartition = Collections.emptyMap(); 108 | 109 | log.debug("Event " + event.toString()); 110 | 111 | // Safely handle event fields that might be null 112 | String eventName = event.getName() != null ? event.getName() : "unknown"; 113 | String eventId = event.getId(); // Can be null as per ServerSentEvent schema 114 | String eventData = event.readData() != null ? event.readData() : ""; 115 | 116 | ServerSentEvent serverSentEvent = new ServerSentEvent( 117 | eventName, 118 | eventId, 119 | eventData 120 | ); 121 | 122 | return new SourceRecord( 123 | srcPartition, 124 | srcOffset, 125 | this.config.getString(ServerSentEventsSourceConnectorConfig.TOPIC), 126 | null, 127 | null, 128 | ServerSentEvent.SCHEMA, 129 | serverSentEvent 130 | ); 131 | 132 | } 133 | 134 | @Override 135 | public void stop() { 136 | if(this.client != null) { 137 | this.client.stop(); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /config/wikimedia.recentchange.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "mediawiki/recentchange", 3 | "description": "Represents a MW RecentChange event. https://www.mediawiki.org/wiki/Manual:RCFeed\n", 4 | "$id": "/mediawiki/recentchange/1.0.0", 5 | "$schema": "https://json-schema.org/draft-07/schema#", 6 | "type": "object", 7 | "additionalProperties": true, 8 | "required": [ 9 | "$schema", 10 | "meta" 11 | ], 12 | "properties": { 13 | "$schema": { 14 | "type": "string", 15 | "description": "A URI identifying the JSONSchema for this event. This should match an schema's $id in a schema repository. E.g. /schema_name/1.0.0\n" 16 | }, 17 | "meta": { 18 | "type": "object", 19 | "required": [ 20 | "id", 21 | "dt", 22 | "stream" 23 | ], 24 | "properties": { 25 | "uri": { 26 | "type": "string", 27 | "format": "uri-reference", 28 | "maxLength": 8192, 29 | "description": "Unique URI identifying the event or entity" 30 | }, 31 | "request_id": { 32 | "type": "string", 33 | "description": "Unique ID of the request that caused the event" 34 | }, 35 | "id": { 36 | "type": "string", 37 | "pattern": "^[a-fA-F0-9]{8}(-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}$", 38 | "maxLength": 36, 39 | "description": "Unique ID of this event" 40 | }, 41 | "dt": { 42 | "type": "string", 43 | "format": "date-time", 44 | "maxLength": 128, 45 | "description": "Event datetime, in ISO-8601 format" 46 | }, 47 | "domain": { 48 | "type": "string", 49 | "description": "Domain the event or entity pertains to", 50 | "minLength": 1 51 | }, 52 | "stream": { 53 | "type": "string", 54 | "description": "Name of the stream/queue/dataset that this event belongs in", 55 | "minLength": 1 56 | } 57 | } 58 | }, 59 | "id": { 60 | "description": "ID of the recentchange event (rcid).", 61 | "type": [ 62 | "integer", 63 | "null" 64 | ] 65 | }, 66 | "type": { 67 | "description": "Type of recentchange event (rc_type). One of \"edit\", \"new\", \"log\", \"categorize\", or \"external\". (See Manual:Recentchanges table#rc_type)\n", 68 | "type": "string" 69 | }, 70 | "title": { 71 | "description": "Full page name, from Title::getPrefixedText.", 72 | "type": "string" 73 | }, 74 | "namespace": { 75 | "description": "ID of relevant namespace of affected page (rc_namespace, page_namespace). This is -1 (\"Special\") for log events.\n", 76 | "type": "integer" 77 | }, 78 | "comment": { 79 | "description": "(rc_comment)", 80 | "type": "string" 81 | }, 82 | "parsedcomment": { 83 | "description": "The rc_comment parsed into simple HTML. Optional", 84 | "type": "string" 85 | }, 86 | "timestamp": { 87 | "description": "Unix timestamp (derived from rc_timestamp).", 88 | "type": "integer" 89 | }, 90 | "user": { 91 | "description": "(rc_user_text)", 92 | "type": "string" 93 | }, 94 | "bot": { 95 | "description": "(rc_bot)", 96 | "type": "boolean" 97 | }, 98 | "server_url": { 99 | "description": "$wgCanonicalServer", 100 | "type": "string" 101 | }, 102 | "server_name": { 103 | "description": "$wgServerName", 104 | "type": "string" 105 | }, 106 | "server_script_path": { 107 | "description": "$wgScriptPath", 108 | "type": "string" 109 | }, 110 | "wiki": { 111 | "description": "wfWikiID ($wgDBprefix, $wgDBname)", 112 | "type": "string" 113 | }, 114 | "minor": { 115 | "description": "(rc_minor).", 116 | "type": "boolean" 117 | }, 118 | "patrolled": { 119 | "description": "(rc_patrolled). This property only exists if patrolling is supported for this event (based on $wgUseRCPatrol, $wgUseNPPatrol).\n", 120 | "type": "boolean" 121 | }, 122 | "length": { 123 | "description": "Length of old and new change", 124 | "type": "object", 125 | "properties": { 126 | "old": { 127 | "description": "(rc_old_len)", 128 | "type": [ 129 | "integer", 130 | "null" 131 | ] 132 | }, 133 | "new": { 134 | "description": "(rc_new_len)", 135 | "type": [ 136 | "integer", 137 | "null" 138 | ] 139 | } 140 | } 141 | }, 142 | "revision": { 143 | "description": "Old and new revision IDs", 144 | "type": "object", 145 | "properties": { 146 | "new": { 147 | "description": "(rc_last_oldid)", 148 | "type": [ 149 | "integer", 150 | "null" 151 | ] 152 | }, 153 | "old": { 154 | "description": "(rc_this_oldid)", 155 | "type": [ 156 | "integer", 157 | "null" 158 | ] 159 | } 160 | } 161 | }, 162 | "log_id": { 163 | "description": "(rc_log_id)", 164 | "type": [ 165 | "integer", 166 | "null" 167 | ] 168 | }, 169 | "log_type": { 170 | "description": "(rc_log_type)", 171 | "type": [ 172 | "string", 173 | "null" 174 | ] 175 | }, 176 | "log_action": { 177 | "description": "(rc_log_action)", 178 | "type": "string" 179 | }, 180 | "log_params": { 181 | "description": "Property only exists if event has rc_params.", 182 | "type": [ 183 | "array", 184 | "object", 185 | "string" 186 | ], 187 | "additionalProperties": true 188 | }, 189 | "log_action_comment": { 190 | "type": [ 191 | "string", 192 | "null" 193 | ] 194 | } 195 | } 196 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Server Sent Events Source Connector for Apache Kafka 2 | 3 | A Kafka Connect source connector supporting the [Server Sent Events Standard](https://en.wikipedia.org/wiki/Server-sent_events) to stream real-time updates from SSE-compatible endpoints into Apache Kafka topics. 4 | 5 | ## Features 6 | 7 | - Streams events from any Server-Sent Events (SSE) compatible endpoint 8 | - Supports secured endpoints with HTTP Basic Authentication 9 | - Compatible with Kafka Connect in standalone and distributed modes 10 | - Works with Confluent Platform and Confluent Cloud 11 | - Configurable topic routing 12 | - JSON data formatting 13 | - Easy deployment and management with included scripts 14 | 15 | ## Configuration 16 | 17 | | Configuration Parameter | Description | Required | 18 | |--------------------------|--------------------------------------|----------| 19 | | sse.uri | URI for the SSE stream | yes | 20 | | topic | Topic to send events to | yes | 21 | | http.basic.auth | Weather or not use use basic auth | no | 22 | | http.basic.auth.username | username | no | 23 | | http.basic.auth.password | password | no | 24 | | http.header. | Set a HTTP request header Name=Value | no | 25 | 26 | ### Custom HTTP Headers 27 | 28 | You can set custom HTTP headers using the `http.header.` configuration pattern. For example: 29 | 30 | ```properties 31 | http.header.User-Agent=MyApp/1.0 (https://example.com; contact@example.com) 32 | http.header.X-Custom-Header=CustomValue 33 | ``` 34 | 35 | **Default User-Agent:** If no `User-Agent` header is explicitly configured, the connector automatically sends: 36 | ``` 37 | KafkaConnectSSE/1.4 (https://github.com/cjmatta/kafka-connect-sse) 38 | ``` 39 | 40 | This default User-Agent helps comply with robot policies for public SSE endpoints like Wikimedia. You can override it by setting `http.header.User-Agent` to your preferred value. 41 | 42 | ### HTTP Client Configuration 43 | 44 | | Configuration Parameter | Description | Required | Default | 45 | |----------------------------------|------------------------------------------------|----------|---------| 46 | | compression.enabled | Enable gzip compression | no | true | 47 | 48 | ### Rate Limiting Configuration 49 | 50 | | Configuration Parameter | Description | Required | Default | 51 | |--------------------------------------|-----------------------------------------------|----------|---------| 52 | | rate.limit.requests.per.second | Maximum requests per second (optional) | no | - | 53 | | rate.limit.max.concurrent | Maximum concurrent connections (optional) | no | - | 54 | 55 | ### Retry Configuration 56 | 57 | | Configuration Parameter | Description | Required | Default | 58 | |----------------------------|----------------------------------------------|----------|---------| 59 | | retry.backoff.initial.ms | Initial backoff time for retries (ms) | no | 2000 | 60 | | retry.backoff.max.ms | Maximum backoff time for retries (ms) | no | 30000 | 61 | | retry.max.attempts | Maximum retry attempts (-1 for unlimited) | no | -1 | 62 | 63 | ## Building the Connector 64 | 65 | To build the connector, you need Maven and Java 8 or higher installed on your system. 66 | 67 | ```bash 68 | # Clone the repository 69 | git clone https://github.com/cjmatta/kafka-connect-sse.git 70 | cd kafka-connect-sse 71 | 72 | # Build the connector 73 | mvn clean package 74 | ``` 75 | 76 | This will create a ZIP file at `target/components/packages/cjmatta-kafka-connect-sse-1.4.zip` that can be used to deploy the connector. 77 | 78 | ## Deployment Options 79 | 80 | ### Standalone Mode 81 | 82 | 1. Extract the ZIP file to your Kafka Connect plugins directory 83 | 2. Configure the connector in a properties file (see `config/kafka-connect-sse.properties` for an example) 84 | 3. Start Kafka Connect with the standalone config: 85 | 86 | ```bash 87 | $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_HOME/config/connect-standalone.properties path/to/your-connector-config.properties 88 | ``` 89 | 90 | ### Distributed Mode 91 | 92 | 1. Extract the ZIP file to your Kafka Connect plugins directory on all worker nodes 93 | 2. Restart the Kafka Connect workers to pick up the new plugin 94 | 3. Use the Kafka Connect REST API to create a connector instance: 95 | 96 | ```bash 97 | curl -X POST -H "Content-Type: application/json" \ 98 | --data @config/your-connector-config.json \ 99 | http://localhost:8083/connectors 100 | ``` 101 | 102 | ### Confluent Platform 103 | 104 | 1. Use Confluent Hub to install the connector: 105 | 106 | ```bash 107 | confluent-hub install cjmatta/kafka-connect-sse:1.4 108 | ``` 109 | 110 | 2. Restart Kafka Connect 111 | 3. Create a connector instance using Confluent Control Center UI or the REST API 112 | 113 | ### Confluent Cloud 114 | 115 | The repository includes a convenient script to manage the connector in Confluent Cloud: 116 | 117 | ```bash 118 | ./manage-connector.sh upload # Upload the connector plugin 119 | ./manage-connector.sh create # Create a connector instance 120 | ``` 121 | 122 | ## Managing with manage-connector.sh 123 | 124 | The included `manage-connector.sh` script provides a simplified workflow for managing the connector in Confluent Cloud: 125 | 126 | ### Prerequisites 127 | 128 | - Confluent CLI installed and configured 129 | - Environment variables for authentication (or 1Password CLI) 130 | 131 | ### Commands 132 | 133 | ```bash 134 | # Upload the connector plugin to Confluent Cloud 135 | ./manage-connector.sh upload 136 | 137 | # Create a connector instance 138 | ./manage-connector.sh create 139 | 140 | # Check status of connectors and plugins 141 | ./manage-connector.sh status 142 | 143 | # Delete a connector instance 144 | ./manage-connector.sh delete-connector --connector-id 145 | 146 | # Delete a plugin 147 | ./manage-connector.sh delete-plugin --plugin-id 148 | 149 | # Display help 150 | ./manage-connector.sh help 151 | ``` 152 | 153 | ### Using with 1Password 154 | 155 | If you use 1Password to store your Confluent Cloud credentials: 156 | 157 | ```bash 158 | # Create a .env file with your credential references 159 | op run --env-file=.env -- ./manage-connector.sh create 160 | ``` 161 | 162 | ## Example: Wikipedia Recent Changes 163 | 164 | To stream Wikipedia's recent changes: 165 | 166 | 1. Build the connector 167 | 2. Upload to Confluent Cloud: `./manage-connector.sh upload` 168 | 3. Create the connector: `./manage-connector.sh create` 169 | 4. Verify data is flowing: 170 | ```bash 171 | kafka-console-consumer --bootstrap-server \ 172 | --topic wikimedia-raw --from-beginning 173 | ``` 174 | 175 | ## ⚠️ Offset and Resume Disclaimer 176 | 177 | Note: This connector does not provide reliable resume support across restarts. 178 | 179 | The Server-Sent Events (SSE) protocol does not natively support seeking or replaying historical events. While some SSE servers emit an id field and support the Last-Event-ID header to resume from a recent point in the stream, this behavior is not standardized and not guaranteed across different SSE providers. 180 | 181 | As a result: 182 | • This connector does not persist partition or offset information for use in resuming ingestion. 183 | • Upon restart, the connector will resume consuming from the current point in the stream as provided by the SSE server. 184 | • No deduplication or de-duplication caching is performed unless explicitly implemented by the user at the application or downstream level. 185 | -------------------------------------------------------------------------------- /src/main/java/com/github/cjmatta/kafka/connect/sse/ServerSentEventsSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2019 Christopher Matta (chris.matta@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | **/ 16 | 17 | package com.github.cjmatta.kafka.connect.sse; 18 | 19 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; 20 | import org.apache.kafka.common.config.AbstractConfig; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.common.config.ConfigDef.Importance; 23 | import org.apache.kafka.common.config.ConfigDef.Type; 24 | import org.apache.kafka.common.config.types.Password; 25 | 26 | import java.util.Map; 27 | 28 | public class ServerSentEventsSourceConnectorConfig extends AbstractConfig { 29 | // TODO: Add support for URL parameters 30 | // TODO: Add support for event type filtering 31 | public static final String SSE_URI = "sse.uri"; 32 | private static final String SSE_URI_DOC = "URI for the SSE stream"; 33 | public static final String TOPIC = "topic"; 34 | private static final String TOPIC_DOC = "Topic to send events to"; 35 | public static final String HTTP_BASIC_AUTH = "http.basic.auth"; 36 | // HTTP Basic Auth Support 37 | private static final String HTTP_BASIC_AUTH_DOC = "Enable HTTP basic authentication"; 38 | public static final String HTTP_BASIC_AUTH_USERNAME = "http.basic.auth.username"; 39 | private static final String HTTP_BASIC_AUTH_USERNAME_DOC = "Username for HTTP basic authentication"; 40 | public static final String HTTP_BASIC_AUTH_PASSWORD = "http.basic.auth.password"; 41 | private static final String HTTP_BASIC_AUTH_PASSWORD_DOC = "Password for HTTP basic authentication"; 42 | public static final String HTTP_HEADER_PREFIX = "http.header."; 43 | 44 | // Compression configuration 45 | public static final String COMPRESSION_ENABLED = "compression.enabled"; 46 | private static final String COMPRESSION_ENABLED_DOC = "Enable gzip compression for HTTP requests"; 47 | 48 | // Rate limiting configuration 49 | public static final String RATE_LIMIT_REQUESTS_PER_SECOND = "rate.limit.requests.per.second"; 50 | private static final String RATE_LIMIT_REQUESTS_PER_SECOND_DOC = "Maximum number of requests per second (optional rate limiting)"; 51 | public static final String RATE_LIMIT_MAX_CONCURRENT = "rate.limit.max.concurrent"; 52 | private static final String RATE_LIMIT_MAX_CONCURRENT_DOC = "Maximum number of concurrent connections (optional rate limiting)"; 53 | 54 | // Retry configuration 55 | public static final String RETRY_BACKOFF_INITIAL_MS = "retry.backoff.initial.ms"; 56 | private static final String RETRY_BACKOFF_INITIAL_MS_DOC = "Initial backoff time in milliseconds for connection retries"; 57 | public static final String RETRY_BACKOFF_MAX_MS = "retry.backoff.max.ms"; 58 | private static final String RETRY_BACKOFF_MAX_MS_DOC = "Maximum backoff time in milliseconds for connection retries"; 59 | public static final String RETRY_MAX_ATTEMPTS = "retry.max.attempts"; 60 | private static final String RETRY_MAX_ATTEMPTS_DOC = "Maximum number of retry attempts (-1 for unlimited)"; 61 | 62 | public final String sseUri; 63 | public final String topic; 64 | public final Boolean httpBasicAuth; 65 | public final String httpBasicAuthUsername; 66 | public final Password httpBasicAuthPassword; 67 | 68 | // New configuration fields 69 | public final Boolean compressionEnabled; 70 | public final Double rateLimitRequestsPerSecond; 71 | public final Integer rateLimitMaxConcurrent; 72 | public final Long retryBackoffInitialMs; 73 | public final Long retryBackoffMaxMs; 74 | public final Integer retryMaxAttempts; 75 | 76 | public ServerSentEventsSourceConnectorConfig(Map originals) { 77 | super(config(), originals); 78 | this.sseUri = this.getString(SSE_URI); 79 | this.topic = this.getString(TOPIC); 80 | this.httpBasicAuth = this.getBoolean(HTTP_BASIC_AUTH); 81 | this.httpBasicAuthUsername = this.getString(HTTP_BASIC_AUTH_USERNAME); 82 | this.httpBasicAuthPassword = this.getPassword(HTTP_BASIC_AUTH_PASSWORD); 83 | 84 | // Initialize new configuration fields 85 | this.compressionEnabled = this.getBoolean(COMPRESSION_ENABLED); 86 | this.rateLimitRequestsPerSecond = this.getDouble(RATE_LIMIT_REQUESTS_PER_SECOND); 87 | this.rateLimitMaxConcurrent = this.getInt(RATE_LIMIT_MAX_CONCURRENT); 88 | this.retryBackoffInitialMs = this.getLong(RETRY_BACKOFF_INITIAL_MS); 89 | this.retryBackoffMaxMs = this.getLong(RETRY_BACKOFF_MAX_MS); 90 | this.retryMaxAttempts = this.getInt(RETRY_MAX_ATTEMPTS); 91 | } 92 | 93 | 94 | public static ConfigDef config() { 95 | 96 | return new ConfigDef() 97 | .define( 98 | ConfigKeyBuilder.of(SSE_URI, Type.STRING) 99 | .documentation(SSE_URI_DOC) 100 | .importance(Importance.HIGH) 101 | .build() 102 | ) 103 | .define( 104 | ConfigKeyBuilder.of(TOPIC, Type.STRING) 105 | .documentation(TOPIC_DOC) 106 | .importance(Importance.HIGH) 107 | .build() 108 | ) 109 | .define( 110 | ConfigKeyBuilder.of(HTTP_BASIC_AUTH, Type.BOOLEAN) 111 | .documentation(HTTP_BASIC_AUTH_DOC) 112 | .importance(Importance.MEDIUM) 113 | .defaultValue(false) 114 | .build() 115 | ) 116 | .define( 117 | ConfigKeyBuilder.of(HTTP_BASIC_AUTH_USERNAME, Type.STRING) 118 | .documentation(HTTP_BASIC_AUTH_USERNAME_DOC) 119 | .importance(Importance.MEDIUM) 120 | .defaultValue(null) 121 | .build() 122 | ) 123 | .define( 124 | ConfigKeyBuilder.of(HTTP_BASIC_AUTH_PASSWORD, Type.PASSWORD) 125 | .documentation(HTTP_BASIC_AUTH_PASSWORD_DOC) 126 | .importance(Importance.MEDIUM) 127 | .defaultValue(null) 128 | .build() 129 | ) 130 | .define( 131 | ConfigKeyBuilder.of(COMPRESSION_ENABLED, Type.BOOLEAN) 132 | .documentation(COMPRESSION_ENABLED_DOC) 133 | .importance(Importance.LOW) 134 | .defaultValue(true) 135 | .build() 136 | ) 137 | .define( 138 | ConfigKeyBuilder.of(RATE_LIMIT_REQUESTS_PER_SECOND, Type.DOUBLE) 139 | .documentation(RATE_LIMIT_REQUESTS_PER_SECOND_DOC) 140 | .importance(Importance.LOW) 141 | .defaultValue(null) 142 | .build() 143 | ) 144 | .define( 145 | ConfigKeyBuilder.of(RATE_LIMIT_MAX_CONCURRENT, Type.INT) 146 | .documentation(RATE_LIMIT_MAX_CONCURRENT_DOC) 147 | .importance(Importance.LOW) 148 | .defaultValue(null) 149 | .build() 150 | ) 151 | .define( 152 | ConfigKeyBuilder.of(RETRY_BACKOFF_INITIAL_MS, Type.LONG) 153 | .documentation(RETRY_BACKOFF_INITIAL_MS_DOC) 154 | .importance(Importance.LOW) 155 | .defaultValue(2000L) 156 | .build() 157 | ) 158 | .define( 159 | ConfigKeyBuilder.of(RETRY_BACKOFF_MAX_MS, Type.LONG) 160 | .documentation(RETRY_BACKOFF_MAX_MS_DOC) 161 | .importance(Importance.LOW) 162 | .defaultValue(30000L) 163 | .build() 164 | ) 165 | .define( 166 | ConfigKeyBuilder.of(RETRY_MAX_ATTEMPTS, Type.INT) 167 | .documentation(RETRY_MAX_ATTEMPTS_DOC) 168 | .importance(Importance.LOW) 169 | .defaultValue(-1) 170 | .build() 171 | ); 172 | 173 | } 174 | 175 | public Map getHttpHeaders() { 176 | return originalsWithPrefix(HTTP_HEADER_PREFIX, true); 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 4.0.0 5 | 6 | com.github.cjmatta.kafka.connect 7 | kafka-connect-sse 8 | 1.4 9 | 10 | 11 | 11 12 | 11 13 | 3.9.0 14 | 15 | 16 | 17 | confluent 18 | Confluent 19 | https://packages.confluent.io/maven/ 20 | 21 | 22 | 23 | 24 | confluent 25 | Confluent 26 | https://packages.confluent.io/maven/ 27 | 28 | 29 | 30 | 31 | 32 | org.apache.kafka 33 | connect-api 34 | provided 35 | ${kafka.version} 36 | 37 | 38 | org.apache.kafka 39 | connect-runtime 40 | ${kafka.version} 41 | provided 42 | 43 | 44 | com.github.jcustenborder.kafka.connect 45 | connect-utils 46 | 0.7.177 47 | 48 | 49 | javax.ws.rs 50 | javax.ws.rs-api 51 | 2.1.1 52 | 53 | 54 | org.glassfish.jersey.core 55 | jersey-common 56 | 2.34 57 | 58 | 59 | org.glassfish.jersey.core 60 | jersey-client 61 | 2.34 62 | 63 | 64 | org.glassfish.jersey.ext 65 | jersey-bean-validation 66 | 2.34 67 | 68 | 69 | org.glassfish.jersey.inject 70 | jersey-hk2 71 | 2.34 72 | 73 | 74 | org.glassfish.jersey.media 75 | jersey-media-sse 76 | 2.34 77 | 78 | 79 | org.mockito 80 | mockito-inline 81 | 5.2.0 82 | test 83 | 84 | 85 | net.bytebuddy 86 | byte-buddy 87 | 1.14.12 88 | test 89 | 90 | 91 | net.bytebuddy 92 | byte-buddy-agent 93 | 1.14.12 94 | test 95 | 96 | 97 | 98 | org.junit.jupiter 99 | junit-jupiter-api 100 | 5.9.2 101 | test 102 | 103 | 104 | org.junit.jupiter 105 | junit-jupiter-engine 106 | 5.9.2 107 | test 108 | 109 | 110 | org.junit.jupiter 111 | junit-jupiter-params 112 | 5.9.2 113 | test 114 | 115 | 116 | jar 117 | 118 | kafka-connect-sse 119 | A Kafka Connect Source Connector for capturing Server Sent Events 120 | 121 | 122 | scm:git:https://github.com/cjmatta/kafka-connect-sse.git 123 | scm:git:git@github.com:cjmatta/kafka-connect-sse.git 124 | https://github.com/cjmatta/kafka-connect-sse 125 | 126 | 127 | github 128 | https://github.com/cjmatta/kafka-connect-sse/issues 129 | 130 | 131 | 132 | 133 | 134 | org.apache.maven.plugins 135 | maven-compiler-plugin 136 | 3.13.0 137 | 138 | 11 139 | 11 140 | 141 | 142 | 143 | org.apache.maven.plugins 144 | maven-surefire-plugin 145 | 3.2.5 146 | 147 | -Dnet.bytebuddy.experimental=true 148 | 149 | 150 | 151 | org.apache.maven.plugins 152 | maven-javadoc-plugin 153 | 3.10.1 154 | 155 | 11 156 | 157 | 158 | 159 | org.apache.maven.plugins 160 | maven-failsafe-plugin 161 | 3.2.5 162 | 163 | 164 | 165 | integration-test 166 | verify 167 | 168 | 169 | 170 | 171 | 172 | io.confluent 173 | kafka-connect-maven-plugin 174 | 0.12.0 175 | 176 | 177 | hub 178 | 179 | kafka-connect 180 | 181 | 182 | Kafka Connect SSE 183 | https://github.com/cjmatta/kafka-connect-sse/blob/master/README.md 184 | A Kafka Connect source connector for Server Sent Events 185 | cjmatta 186 | user 187 | Christopher Matta 188 | https://github.com/cjmatta 189 | https://github.com/cjmatta/kafka-connect-sse/issues 190 | Support provided through community involvement. 191 | assets/HTML5_Badge_256.png 192 | 193 | source 194 | 195 | 196 | SSE 197 | Server Sent Events 198 | 199 | 200 | org.reflections:reflections 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | src/main/resources 210 | true 211 | 212 | 213 | 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /src/test/java/com/github/cjmatta/kafka/connect/sse/ServerSentEventClientTest.java: -------------------------------------------------------------------------------- 1 | package com.github.cjmatta.kafka.connect.sse; 2 | 3 | import org.junit.jupiter.api.AfterEach; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.mockito.ArgumentCaptor; 7 | import org.mockito.MockedStatic; 8 | import org.mockito.Mockito; 9 | 10 | import javax.ws.rs.client.Client; 11 | import javax.ws.rs.client.Invocation; 12 | import javax.ws.rs.client.WebTarget; 13 | import javax.ws.rs.sse.InboundSseEvent; 14 | import javax.ws.rs.sse.SseEventSource; 15 | 16 | import java.io.IOException; 17 | import java.net.URI; 18 | import java.util.Base64; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.function.Consumer; 22 | 23 | import static org.junit.jupiter.api.Assertions.assertEquals; 24 | import static org.junit.jupiter.api.Assertions.assertFalse; 25 | import static org.junit.jupiter.api.Assertions.assertTrue; 26 | import static org.mockito.Mockito.*; 27 | 28 | public class ServerSentEventClientTest { 29 | private WebTarget webTarget; 30 | private SseEventSource sseEventSource; 31 | private SseEventSource.Builder sseEventSourceBuilder; 32 | private ServerSentEventClient sseClient; 33 | private MockedStatic mockedSseEventSource; 34 | private Invocation.Builder invocationBuilder; 35 | 36 | @BeforeEach 37 | public void setUp() { 38 | Client client = mock(Client.class); 39 | webTarget = mock(WebTarget.class); 40 | sseEventSource = mock(SseEventSource.class); 41 | sseEventSourceBuilder = mock(SseEventSource.Builder.class); 42 | invocationBuilder = mock(Invocation.Builder.class); 43 | mockedSseEventSource = Mockito.mockStatic(SseEventSource.class); 44 | 45 | // Mock URI for the WebTarget to prevent NullPointerException in metrics tests 46 | URI mockUri = URI.create("http://test.example.com/events"); 47 | when(webTarget.getUri()).thenReturn(mockUri); 48 | 49 | when(client.target(anyString())).thenReturn(webTarget); 50 | when(webTarget.request()).thenReturn(invocationBuilder); 51 | when(invocationBuilder.header(eq("Authorization"), anyString())).thenReturn(invocationBuilder); 52 | when(invocationBuilder.header(eq("Custom-Header"), anyString())).thenReturn(invocationBuilder); 53 | 54 | when(invocationBuilder.header(anyString(), anyString())).thenReturn(invocationBuilder); 55 | when(invocationBuilder.accept(anyString())).thenReturn(invocationBuilder); 56 | when(SseEventSource.target(webTarget)).thenReturn(sseEventSourceBuilder); // Mocking static method 57 | when(sseEventSourceBuilder.reconnectingEvery(anyLong(), any())).thenReturn(sseEventSourceBuilder); 58 | when(sseEventSourceBuilder.build()).thenReturn(sseEventSource); 59 | 60 | final Map headers = Map.of("Custom-Header", "HeaderValue"); 61 | sseClient = new ServerSentEventClient(client, webTarget, sseEventSource, null, "username", "password", headers); 62 | } 63 | @AfterEach 64 | public void tearDown() { 65 | mockedSseEventSource.close(); // Release the static mock 66 | } 67 | 68 | @Test 69 | public void testStart() throws Exception { 70 | sseClient.start(); 71 | verify(sseEventSource).register(any(Consumer.class), any(Consumer.class)); 72 | verify(sseEventSource).open(); 73 | } 74 | 75 | @Test 76 | public void testStop() { 77 | sseClient.stop(); 78 | verify(sseEventSource).close(); 79 | } 80 | 81 | @Test 82 | public void testBasicAuth() throws Exception { 83 | sseClient.start(); 84 | 85 | String expectedHeader = "Basic " + Base64.getEncoder().encodeToString("username:password".getBytes()); 86 | verify(invocationBuilder).header("Authorization", expectedHeader); 87 | } 88 | 89 | @Test 90 | public void testCustomRequestHeader() throws Exception { 91 | sseClient.start(); 92 | 93 | final String expectedHeader = "HeaderValue"; 94 | verify(invocationBuilder).header("Custom-Header", expectedHeader); 95 | } 96 | 97 | @Test 98 | public void testGetRecords() throws InterruptedException { 99 | InboundSseEvent event1 = mock(InboundSseEvent.class); 100 | InboundSseEvent event2 = mock(InboundSseEvent.class); 101 | when(event1.getName()).thenReturn("event1"); 102 | when(event2.getName()).thenReturn("event2"); 103 | 104 | sseClient.getQueueForTesting().add(event1); 105 | sseClient.getQueueForTesting().add(event2); 106 | 107 | List records = sseClient.getRecords(); 108 | 109 | assertEquals(2, records.size()); 110 | assertTrue(records.contains(event1)); 111 | assertTrue(records.contains(event2)); 112 | } 113 | 114 | @Test 115 | public void testOnMessage() throws IOException { 116 | InboundSseEvent event = mock(InboundSseEvent.class); 117 | when(event.getId()).thenReturn("1"); 118 | when(event.getName()).thenReturn("testEvent"); 119 | when(event.readData()).thenReturn("testData"); 120 | 121 | ArgumentCaptor> messageCaptor = ArgumentCaptor.forClass(Consumer.class); 122 | ArgumentCaptor> errorCaptor = ArgumentCaptor.forClass(Consumer.class); 123 | 124 | sseClient.start(); 125 | 126 | verify(sseEventSource).register(messageCaptor.capture(), errorCaptor.capture()); 127 | messageCaptor.getValue().accept(event); 128 | 129 | assertEquals(1, sseClient.getQueueForTesting().size()); 130 | assertTrue(sseClient.getQueueForTesting().contains(event)); 131 | } 132 | 133 | /** 134 | * Tests that metrics are properly collected when events are processed. 135 | * This verifies our metrics collection implementation correctly tracks: 136 | * - Event counts 137 | * - Data bytes 138 | * - Event type statistics 139 | */ 140 | @Test 141 | public void testMetricsCollection() throws IOException { 142 | // Setup test events with different types and data sizes 143 | InboundSseEvent event1 = mock(InboundSseEvent.class); 144 | when(event1.getId()).thenReturn("1"); 145 | when(event1.getName()).thenReturn("typeA"); 146 | when(event1.readData()).thenReturn("small data"); 147 | 148 | InboundSseEvent event2 = mock(InboundSseEvent.class); 149 | when(event2.getId()).thenReturn("2"); 150 | when(event2.getName()).thenReturn("typeB"); 151 | when(event2.readData()).thenReturn("this is a longer data payload for testing byte counting"); 152 | 153 | InboundSseEvent event3 = mock(InboundSseEvent.class); 154 | when(event3.getId()).thenReturn("3"); 155 | when(event3.getName()).thenReturn("typeA"); 156 | when(event3.readData()).thenReturn("another typeA event"); 157 | 158 | // Get message handler 159 | ArgumentCaptor> messageCaptor = ArgumentCaptor.forClass(Consumer.class); 160 | sseClient.start(); 161 | verify(sseEventSource).register(messageCaptor.capture(), any(Consumer.class)); 162 | 163 | // Process events through the onMessage handler 164 | Consumer onMessageHandler = messageCaptor.getValue(); 165 | onMessageHandler.accept(event1); 166 | onMessageHandler.accept(event2); 167 | onMessageHandler.accept(event3); 168 | 169 | // Get metrics and verify counts 170 | Map metrics = sseClient.getMetrics(); 171 | 172 | // Check basic event metrics 173 | assertEquals(3L, metrics.get("events.total")); 174 | 175 | // The total bytes should be the sum of all event data lengths 176 | int expectedBytes = "small data".length() + 177 | "this is a longer data payload for testing byte counting".length() + 178 | "another typeA event".length(); 179 | assertEquals((long)expectedBytes, metrics.get("events.bytes")); 180 | 181 | // Verify queue size metrics 182 | assertEquals(3, metrics.get("queue.size")); 183 | assertEquals(3L, metrics.get("queue.maxSize")); 184 | 185 | // Verify event type counts are tracked correctly 186 | @SuppressWarnings("unchecked") 187 | Map eventTypes = (Map) metrics.get("events.byType"); 188 | assertEquals(2L, eventTypes.get("typeA")); 189 | assertEquals(1L, eventTypes.get("typeB")); 190 | 191 | // Verify connection metrics were updated 192 | assertEquals("CONNECTED", metrics.get("connection.state")); 193 | assertEquals(1L, metrics.get("connection.attempts")); 194 | assertEquals(1L, metrics.get("connection.successful")); 195 | assertEquals(0L, metrics.get("connection.failed")); 196 | assertEquals(0L, metrics.get("connection.errors")); 197 | } 198 | 199 | /** 200 | * Tests the connection health check functionality. 201 | * This verifies: 202 | * - A newly connected client is considered healthy 203 | * - A client with no events for longer than the idle timeout is considered unhealthy 204 | * - A client in a failed state is considered unhealthy 205 | */ 206 | @Test 207 | public void testConnectionHealthCheck() throws IOException, NoSuchFieldException, IllegalAccessException { 208 | // Start with a healthy connection 209 | sseClient.start(); 210 | 211 | // A freshly started connection should be healthy 212 | assertTrue(sseClient.isConnectionHealthy()); 213 | 214 | // Set a shorter idle timeout for testing 215 | sseClient.setIdleTimeout(1000); // 1 second 216 | 217 | // Use reflection to simulate a stalled connection by setting lastEventTimestamp to a time in the past 218 | java.lang.reflect.Field lastEventTimestampField = ServerSentEventClient.class.getDeclaredField("lastEventTimestamp"); 219 | lastEventTimestampField.setAccessible(true); 220 | 221 | // Set the last event time to 2 seconds ago (exceeding our 1-second timeout) 222 | long stalledTimestamp = System.currentTimeMillis() - 2000; 223 | lastEventTimestampField.set(sseClient, stalledTimestamp); 224 | 225 | // Now the connection should be considered unhealthy due to idle timeout 226 | assertFalse(sseClient.isConnectionHealthy()); 227 | 228 | // Reset the timestamp to be recent 229 | lastEventTimestampField.set(sseClient, System.currentTimeMillis()); 230 | 231 | // Connection should be healthy again 232 | assertTrue(sseClient.isConnectionHealthy()); 233 | 234 | // Test connection state affect on health 235 | // Use reflection to change the connection state to FAILED 236 | java.lang.reflect.Field connectionStateField = ServerSentEventClient.class.getDeclaredField("connectionState"); 237 | connectionStateField.setAccessible(true); 238 | connectionStateField.set(sseClient, ServerSentEventClient.ConnectionState.FAILED); 239 | 240 | // A failed connection should be considered unhealthy 241 | assertFalse(sseClient.isConnectionHealthy()); 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /manage-connector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # manage-connector.sh 4 | # 5 | # A unified script for managing Server Sent Events connector in Confluent Cloud 6 | # Combines upload-to-confluent-cloud.sh and create_connector_ccloud.sh 7 | # Adds capabilities to delete the connector plugin or cluster 8 | # Uses environment variables for credentials (works with op run) 9 | # 10 | 11 | # Default values - customize these as needed 12 | PLUGIN_NAME="kafka-connect-sse" 13 | PLUGIN_FILE="$(pwd)/target/components/packages/cjmatta-kafka-connect-sse-1.4.zip" 14 | CONNECTOR_CLASS="com.github.cjmatta.kafka.connect.sse.ServerSentEventsSourceConnector" 15 | CLUSTER_ID="lkc-zm1p10" 16 | CLOUD_PROVIDER="aws" 17 | CONNECTOR_DESCRIPTION="A Kafka Connect source connector for Server Sent Events" 18 | DOCUMENTATION_LINK="https://github.com/cjmatta/kafka-connect-sse" 19 | CONNECTOR_TYPE="Source" 20 | CONNECTOR_NAME="Wikipedia SSE" 21 | TASKS_MAX="1" 22 | SSE_URI="https://stream.wikimedia.org/v2/stream/recentchange" 23 | TOPIC="wikimedia-raw" 24 | COMPRESSION_ENABLED="true" 25 | 26 | # Dynamic plugin ID - will be set after upload 27 | PLUGIN_ID="" 28 | 29 | # Format text styles 30 | BOLD="\033[1m" 31 | RED="\033[31m" 32 | GREEN="\033[32m" 33 | YELLOW="\033[33m" 34 | BLUE="\033[34m" 35 | RESET="\033[0m" 36 | 37 | # Configuration template embedded in script 38 | # This will be filled in with dynamic values including plugin ID 39 | # and credentials from environment variables 40 | CONFIG_TEMPLATE='{ 41 | "name": "{{CONNECTOR_NAME}}", 42 | "config": { 43 | "connector.class": "{{CONNECTOR_CLASS}}", 44 | "kafka.auth.mode": "KAFKA_API_KEY", 45 | "kafka.api.key": "{{KAFKA_API_KEY}}", 46 | "kafka.api.secret": "{{KAFKA_API_SECRET}}", 47 | "tasks.max": "{{TASKS_MAX}}", 48 | "confluent.custom.plugin.id": "{{PLUGIN_ID}}", 49 | "confluent.connector.type": "CUSTOM", 50 | "confluent.custom.connection.endpoints": "stream.wikimedia.org:443", 51 | "confluent.custom.schema.registry.auto": "true", 52 | "key.converter": "io.confluent.connect.json.JsonSchemaConverter", 53 | "compression.enabled": "{{COMPRESSION_ENABLED}}", 54 | "sse.uri": "{{SSE_URI}}", 55 | "topic": "{{TOPIC}}", 56 | "value.converter": "io.confluent.connect.json.JsonSchemaConverter" 57 | } 58 | }' 59 | 60 | # Function to generate configuration with injected values 61 | function generate_config { 62 | local config_file="$1" 63 | 64 | # Check for environment variables 65 | if [ -z "$KAFKA_API_KEY" ] || [ -z "$KAFKA_API_SECRET" ]; then 66 | echo -e "${YELLOW}Environment variables KAFKA_API_KEY and/or KAFKA_API_SECRET are not set.${RESET}" 67 | echo -e "These can be set using:" 68 | echo -e " export KAFKA_API_KEY=your_api_key" 69 | echo -e " export KAFKA_API_SECRET=your_api_secret" 70 | echo -e "Or by using 1Password CLI:" 71 | echo -e " op run --env-file=.env -- $0 $COMMAND [options]" 72 | echo 73 | 74 | # Ask for credentials interactively if not in environment 75 | read -p "Enter Kafka API Key: " KAFKA_API_KEY 76 | read -s -p "Enter Kafka API Secret: " KAFKA_API_SECRET 77 | echo 78 | else 79 | echo -e "${GREEN}Using API credentials from environment variables.${RESET}" 80 | fi 81 | 82 | # Check if plugin ID is available 83 | if [ -z "$PLUGIN_ID" ]; then 84 | echo -e "${YELLOW}Warning: No plugin ID provided. The connector might not work correctly.${RESET}" 85 | echo -e "You should upload the plugin first to get a plugin ID." 86 | read -p "Do you want to continue anyway? (y/n) " -n 1 -r 87 | echo 88 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 89 | echo -e "${YELLOW}Operation cancelled.${RESET}" 90 | exit 0 91 | fi 92 | 93 | # Ask for plugin ID if continuing 94 | read -p "Please enter the plugin ID manually: " PLUGIN_ID 95 | fi 96 | 97 | # Replace placeholders in template 98 | local config=$(echo "$CONFIG_TEMPLATE" | \ 99 | sed "s|{{CONNECTOR_NAME}}|$CONNECTOR_NAME|g" | \ 100 | sed "s|{{CONNECTOR_CLASS}}|$CONNECTOR_CLASS|g" | \ 101 | sed "s|{{KAFKA_API_KEY}}|$KAFKA_API_KEY|g" | \ 102 | sed "s|{{KAFKA_API_SECRET}}|$KAFKA_API_SECRET|g" | \ 103 | sed "s|{{TASKS_MAX}}|$TASKS_MAX|g" | \ 104 | sed "s|{{PLUGIN_ID}}|$PLUGIN_ID|g" | \ 105 | sed "s|{{SSE_URI}}|$SSE_URI|g" | \ 106 | sed "s|{{USER_AGENT}}|$USER_AGENT|g" | \ 107 | sed "s|{{CONTACT_INFO}}|$CONTACT_INFO|g" | \ 108 | sed "s|{{COMPRESSION_ENABLED}}|$COMPRESSION_ENABLED|g" | \ 109 | sed "s|{{TOPIC}}|$TOPIC|g") 110 | 111 | # Write config to file 112 | echo "$config" > "$config_file" 113 | echo -e "${GREEN}Configuration file generated at: $config_file${RESET}" 114 | 115 | # Clear sensitive variables 116 | KAFKA_API_KEY="" 117 | KAFKA_API_SECRET="" 118 | } 119 | 120 | # Function to display usage information 121 | function show_usage { 122 | echo -e "${BOLD}USAGE:${RESET}" 123 | echo -e " $0 [COMMAND] [OPTIONS]" 124 | echo 125 | echo -e "${BOLD}COMMANDS:${RESET}" 126 | echo -e " ${GREEN}upload${RESET} Upload the connector plugin to Confluent Cloud" 127 | echo -e " ${GREEN}create${RESET} Create a connector instance on a cluster" 128 | echo -e " ${GREEN}delete-plugin${RESET} Delete the connector plugin from Confluent Cloud" 129 | echo -e " ${GREEN}delete-connector${RESET} Delete the connector instance from a cluster" 130 | echo -e " ${GREEN}status${RESET} Check the status of connectors and plugins" 131 | echo -e " ${GREEN}help${RESET} Display this help message" 132 | echo 133 | echo -e "${BOLD}OPTIONS:${RESET}" 134 | echo -e " ${BLUE}--name${RESET} NAME Connector plugin name (default: $PLUGIN_NAME)" 135 | echo -e " ${BLUE}--file${RESET} FILE Path to connector plugin file (default: $PLUGIN_FILE)" 136 | echo -e " ${BLUE}--class${RESET} CLASS Connector class name (default: $CONNECTOR_CLASS)" 137 | echo -e " ${BLUE}--cluster${RESET} ID Cluster ID (default: $CLUSTER_ID)" 138 | echo -e " ${BLUE}--cloud${RESET} PROVIDER Cloud provider (default: $CLOUD_PROVIDER)" 139 | echo -e " ${BLUE}--plugin-id${RESET} ID Plugin ID (for delete-plugin command or manual specification)" 140 | echo -e " ${BLUE}--connector-id${RESET} ID Connector ID (for delete-connector command)" 141 | echo -e " ${BLUE}--topic${RESET} TOPIC Topic name for the connector (default: $TOPIC)" 142 | echo -e " ${BLUE}--sse-uri${RESET} URI Server-Sent Events URI (default: $SSE_URI)" 143 | echo -e " ${BLUE}--tasks${RESET} NUM Maximum number of tasks (default: $TASKS_MAX)" 144 | echo 145 | echo -e "${BOLD}ENVIRONMENT VARIABLES:${RESET}" 146 | echo -e " ${GREEN}KAFKA_API_KEY${RESET} Confluent Cloud API Key" 147 | echo -e " ${GREEN}KAFKA_API_SECRET${RESET} Confluent Cloud API Secret" 148 | echo 149 | echo -e "${BOLD}EXAMPLE WITH 1PASSWORD:${RESET}" 150 | echo -e " op run --env-file=.env -- $0 create" 151 | echo -e " (where .env contains KAFKA_API_KEY and KAFKA_API_SECRET)" 152 | } 153 | 154 | # Function to upload connector plugin to Confluent Cloud 155 | function upload_plugin { 156 | echo -e "${BOLD}Uploading connector plugin to Confluent Cloud...${RESET}" 157 | echo -e "Plugin name: ${GREEN}$PLUGIN_NAME${RESET}" 158 | echo -e "Plugin file: ${GREEN}$PLUGIN_FILE${RESET}" 159 | echo -e "Connector class: ${GREEN}$CONNECTOR_CLASS${RESET}" 160 | echo -e "Cloud provider: ${GREEN}$CLOUD_PROVIDER${RESET}" 161 | 162 | # Check if plugin file exists 163 | if [ ! -f "$PLUGIN_FILE" ]; then 164 | echo -e "${RED}Error: Plugin file not found: $PLUGIN_FILE${RESET}" 165 | echo -e "Make sure you've built the project with 'mvn clean package'" 166 | exit 1 167 | fi 168 | 169 | # Run confluent CLI command to create custom plugin 170 | local result=$(confluent connect custom-plugin create "$PLUGIN_NAME" \ 171 | --plugin-file "$PLUGIN_FILE" \ 172 | --connector-class "$CONNECTOR_CLASS" \ 173 | --description "$CONNECTOR_DESCRIPTION" \ 174 | --documentation-link "$DOCUMENTATION_LINK" \ 175 | --connector-type "$CONNECTOR_TYPE" \ 176 | --sensitive-properties http.basic.auth.password \ 177 | --cloud "$CLOUD_PROVIDER" 2>&1) 178 | 179 | # Check if command succeeded 180 | if [ $? -eq 0 ]; then 181 | echo -e "${GREEN}Plugin uploaded successfully${RESET}" 182 | 183 | # Extract plugin ID from output 184 | # The plugin ID typically appears in format "ccp-xxxxx" 185 | PLUGIN_ID=$(echo "$result" | grep -o 'ccp-[a-z0-9]*' | head -1) 186 | 187 | if [ -n "$PLUGIN_ID" ]; then 188 | echo -e "${GREEN}Extracted plugin ID: $PLUGIN_ID${RESET}" 189 | else 190 | echo -e "${YELLOW}Warning: Could not extract plugin ID automatically.${RESET}" 191 | echo -e "Output from command: $result" 192 | 193 | # Prompt for manual entry if extraction failed 194 | read -p "Please enter the plugin ID manually (format: ccp-xxxxx): " PLUGIN_ID 195 | fi 196 | else 197 | echo -e "${RED}Failed to upload plugin${RESET}" 198 | echo -e "Error: $result" 199 | exit 1 200 | fi 201 | } 202 | 203 | # Function to create connector instance on a cluster 204 | function create_connector { 205 | echo -e "${BOLD}Creating connector instance on cluster...${RESET}" 206 | echo -e "Cluster ID: ${GREEN}$CLUSTER_ID${RESET}" 207 | 208 | # Generate a temporary config file 209 | local config_file="/tmp/connector-config-$$.json" 210 | generate_config "$config_file" 211 | 212 | echo -e "Using configuration file: ${GREEN}$config_file${RESET}" 213 | 214 | # Run confluent CLI command to create connector 215 | confluent connect cluster create --config-file "$config_file" --cluster "$CLUSTER_ID" 216 | 217 | # Check if command succeeded 218 | if [ $? -eq 0 ]; then 219 | echo -e "${GREEN}Connector created successfully${RESET}" 220 | 221 | # Cleanup temporary config file with sensitive data 222 | rm -f "$config_file" 223 | else 224 | echo -e "${RED}Failed to create connector${RESET}" 225 | 226 | # Cleanup temporary config file with sensitive data even on failure 227 | rm -f "$config_file" 228 | exit 1 229 | fi 230 | } 231 | 232 | # Function to delete connector plugin from Confluent Cloud 233 | function delete_plugin { 234 | # Check if plugin ID is provided 235 | if [ -z "$PLUGIN_ID" ]; then 236 | # List available plugins and ask user to select one 237 | echo -e "${YELLOW}No plugin ID provided. Listing available plugins...${RESET}" 238 | confluent connect custom-plugin list 239 | echo 240 | echo -e "Please run the command again with --plugin-id parameter:" 241 | echo -e " $0 delete-plugin --plugin-id " 242 | exit 1 243 | fi 244 | 245 | echo -e "${BOLD}Deleting connector plugin from Confluent Cloud...${RESET}" 246 | echo -e "Plugin ID: ${GREEN}$PLUGIN_ID${RESET}" 247 | 248 | # Ask for confirmation 249 | read -p "Are you sure you want to delete this plugin? (y/n) " -n 1 -r 250 | echo 251 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 252 | echo -e "${YELLOW}Delete operation cancelled${RESET}" 253 | exit 0 254 | fi 255 | 256 | # Run confluent CLI command to delete custom plugin 257 | confluent connect custom-plugin delete "$PLUGIN_ID" 258 | 259 | # Check if command succeeded 260 | if [ $? -eq 0 ]; then 261 | echo -e "${GREEN}Plugin deleted successfully${RESET}" 262 | else 263 | echo -e "${RED}Failed to delete plugin${RESET}" 264 | exit 1 265 | fi 266 | } 267 | 268 | # Function to delete connector instance from a cluster 269 | function delete_connector { 270 | # Check if connector ID is provided 271 | if [ -z "$CONNECTOR_ID" ]; then 272 | # List available connectors and ask user to select one 273 | echo -e "${YELLOW}No connector ID provided. Listing available connectors...${RESET}" 274 | confluent connect cluster list --cluster "$CLUSTER_ID" 275 | echo 276 | echo -e "Please run the command again with --connector-id parameter:" 277 | echo -e " $0 delete-connector --connector-id --cluster " 278 | exit 1 279 | fi 280 | 281 | echo -e "${BOLD}Deleting connector instance from cluster...${RESET}" 282 | echo -e "Connector ID: ${GREEN}$CONNECTOR_ID${RESET}" 283 | echo -e "Cluster ID: ${GREEN}$CLUSTER_ID${RESET}" 284 | 285 | # Ask for confirmation 286 | read -p "Are you sure you want to delete this connector? (y/n) " -n 1 -r 287 | echo 288 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 289 | echo -e "${YELLOW}Delete operation cancelled${RESET}" 290 | exit 0 291 | fi 292 | 293 | # Run confluent CLI command to delete connector 294 | confluent connect cluster delete "$CONNECTOR_ID" --cluster "$CLUSTER_ID" 295 | 296 | # Check if command succeeded 297 | if [ $? -eq 0 ]; then 298 | echo -e "${GREEN}Connector deleted successfully${RESET}" 299 | else 300 | echo -e "${RED}Failed to delete connector${RESET}" 301 | exit 1 302 | fi 303 | } 304 | 305 | # Function to check connector/plugin status 306 | function check_status { 307 | echo -e "${BOLD}Checking status of connectors and plugins...${RESET}" 308 | 309 | echo -e "\n${BOLD}Available clusters:${RESET}" 310 | confluent connect cluster list 311 | 312 | echo -e "\n${BOLD}Available plugins:${RESET}" 313 | confluent connect custom-plugin list 314 | 315 | if [ -n "$CLUSTER_ID" ]; then 316 | echo -e "\n${BOLD}Connectors on cluster $CLUSTER_ID:${RESET}" 317 | confluent connect cluster list --cluster "$CLUSTER_ID" 318 | fi 319 | } 320 | 321 | # Parse command line arguments 322 | COMMAND="" 323 | PLUGIN_ID="" 324 | CONNECTOR_ID="" 325 | 326 | while [[ $# -gt 0 ]]; do 327 | case "$1" in 328 | upload|create|delete-plugin|delete-connector|status|help) 329 | COMMAND="$1" 330 | ;; 331 | --name) 332 | PLUGIN_NAME="$2" 333 | shift 334 | ;; 335 | --file) 336 | PLUGIN_FILE="$2" 337 | shift 338 | ;; 339 | --class) 340 | CONNECTOR_CLASS="$2" 341 | shift 342 | ;; 343 | --cluster) 344 | CLUSTER_ID="$2" 345 | shift 346 | ;; 347 | --cloud) 348 | CLOUD_PROVIDER="$2" 349 | shift 350 | ;; 351 | --plugin-id) 352 | PLUGIN_ID="$2" 353 | shift 354 | ;; 355 | --connector-id) 356 | CONNECTOR_ID="$2" 357 | shift 358 | ;; 359 | --topic) 360 | TOPIC="$2" 361 | shift 362 | ;; 363 | --sse-uri) 364 | SSE_URI="$2" 365 | shift 366 | ;; 367 | --tasks) 368 | TASKS_MAX="$2" 369 | shift 370 | ;; 371 | *) 372 | echo -e "${RED}Unknown option: $1${RESET}" 373 | show_usage 374 | exit 1 375 | ;; 376 | esac 377 | shift 378 | done 379 | 380 | # Execute the appropriate function based on the command 381 | case "$COMMAND" in 382 | upload) 383 | upload_plugin 384 | ;; 385 | create) 386 | create_connector 387 | ;; 388 | delete-plugin) 389 | delete_plugin 390 | ;; 391 | delete-connector) 392 | delete_connector 393 | ;; 394 | status) 395 | check_status 396 | ;; 397 | help|"") 398 | show_usage 399 | ;; 400 | *) 401 | echo -e "${RED}Unknown command: $COMMAND${RESET}" 402 | show_usage 403 | exit 1 404 | ;; 405 | esac 406 | 407 | exit 0 -------------------------------------------------------------------------------- /src/main/java/com/github/cjmatta/kafka/connect/sse/ServerSentEventClient.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2019 Christopher Matta (chris.matta@gmail.com) 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * Unless required by applicable law or agreed to in writing, software 8 | * distributed under the License is distributed on an "AS IS" BASIS, 9 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | * See the License for the specific language governing permissions and 11 | * limitations under the License. 12 | * 13 | */ 14 | package com.github.cjmatta.kafka.connect.sse; 15 | 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | import org.glassfish.jersey.media.sse.SseFeature; 19 | 20 | import javax.ws.rs.client.Client; 21 | import javax.ws.rs.client.ClientBuilder; 22 | import javax.ws.rs.client.Invocation; 23 | import javax.ws.rs.client.WebTarget; 24 | import javax.ws.rs.sse.InboundSseEvent; 25 | import javax.ws.rs.sse.SseEventSource; 26 | 27 | import java.io.Closeable; 28 | import java.io.IOException; 29 | import java.util.Base64; 30 | import java.util.LinkedList; 31 | import java.util.List; 32 | import java.util.Map; 33 | import java.util.concurrent.BlockingQueue; 34 | import java.util.concurrent.ConcurrentHashMap; 35 | import java.util.concurrent.LinkedBlockingDeque; 36 | import java.util.concurrent.TimeUnit; 37 | import java.util.concurrent.atomic.AtomicLong; 38 | 39 | /** 40 | * Client for Server Sent Events that connects to an SSE source and processes events. 41 | * Handles authentication, connection lifecycle, and event queuing. 42 | */ 43 | public class ServerSentEventClient implements Closeable { 44 | private static final Logger log = LoggerFactory.getLogger(ServerSentEventClient.class); 45 | 46 | /** 47 | * Enum to track the connection state of the SSE client. 48 | * This helps in diagnosing connection issues and understanding the lifecycle. 49 | */ 50 | public enum ConnectionState { 51 | INITIALIZED, // Client is created but not yet connected 52 | CONNECTING, // Connection attempt in progress 53 | CONNECTED, // Successfully connected to SSE source 54 | DISCONNECTED, // Gracefully disconnected 55 | FAILED // Connection failed or encountered an error 56 | } 57 | 58 | private final Client client; 59 | private final WebTarget source; 60 | private final BlockingQueue queue; 61 | private SseEventSource sse; 62 | 63 | // Store username and password for authentication if provided 64 | private final String username; 65 | private final String password; 66 | 67 | // Additional headers 68 | private final Map headers; 69 | 70 | // Store configuration for enhanced HTTP behavior 71 | private final boolean compressionEnabled; 72 | private final Double rateLimitRequestsPerSecond; 73 | private final Integer rateLimitMaxConcurrent; 74 | private final long retryBackoffInitialMs; 75 | private final long retryBackoffMaxMs; 76 | private final int retryMaxAttempts; 77 | 78 | // Rate limiting and retry fields 79 | private volatile long lastRequestTime = 0; 80 | private volatile int currentRetryAttempt = 0; 81 | 82 | // Track current connection state 83 | private volatile ConnectionState connectionState; 84 | // Store the last time an event was received 85 | private volatile long lastEventTimestamp; 86 | 87 | // Metrics tracking fields 88 | private final AtomicLong totalEventsReceived = new AtomicLong(0); 89 | private final AtomicLong totalBytesReceived = new AtomicLong(0); 90 | private final AtomicLong totalConnectionAttempts = new AtomicLong(0); 91 | private final AtomicLong totalSuccessfulConnections = new AtomicLong(0); 92 | private final AtomicLong totalFailedConnections = new AtomicLong(0); 93 | private final AtomicLong totalConnectionErrors = new AtomicLong(0); 94 | private final AtomicLong totalReconnections = new AtomicLong(0); 95 | 96 | // Performance metrics 97 | private volatile long connectedSince = 0; 98 | private volatile long lastReconnectTime = 0; 99 | private final AtomicLong maxQueueSize = new AtomicLong(0); 100 | 101 | // Event type counters - useful for monitoring specific event patterns 102 | private final Map eventTypeCounters = new ConcurrentHashMap<>(); 103 | 104 | private volatile Throwable error; 105 | 106 | /** 107 | * Creates a new SSE client for the given URL without authentication. 108 | * 109 | * @param url The URL of the SSE stream 110 | */ 111 | public ServerSentEventClient(String url) { 112 | this(url, null, null); 113 | } 114 | 115 | /** 116 | * Creates a new SSE client for the given URL with basic authentication. 117 | * 118 | * @param url The URL of the SSE stream 119 | * @param username Username for basic authentication 120 | * @param password Password for basic authentication 121 | */ 122 | ServerSentEventClient(String url, String username, String password) { 123 | this(url, username, password, null, true, null, null, 2000L, 30000L, -1); 124 | } 125 | 126 | ServerSentEventClient(String url, String username, String password, Map headers) { 127 | this(url, username, password, headers, true, null, null, 2000L, 30000L, -1); 128 | } 129 | 130 | /** 131 | * Creates a new SSE client with full configuration. 132 | * 133 | * @param url The URL of the SSE stream 134 | * @param username Username for basic authentication (null if not needed) 135 | * @param password Password for basic authentication (null if not needed) 136 | * @param headers Custom HTTP headers 137 | * @param compressionEnabled Whether to enable gzip compression 138 | * @param rateLimitRequestsPerSecond Rate limit for requests per second (null if not needed) 139 | * @param rateLimitMaxConcurrent Maximum concurrent connections (null if not needed) 140 | * @param retryBackoffInitialMs Initial backoff time for retries 141 | * @param retryBackoffMaxMs Maximum backoff time for retries 142 | * @param retryMaxAttempts Maximum retry attempts (-1 for unlimited) 143 | */ 144 | public ServerSentEventClient(String url, String username, String password, Map headers, 145 | boolean compressionEnabled, Double rateLimitRequestsPerSecond, 146 | Integer rateLimitMaxConcurrent, long retryBackoffInitialMs, 147 | long retryBackoffMaxMs, int retryMaxAttempts) { 148 | log.info("Initializing SSE Client for URL: {} with enhanced configuration", url); 149 | this.client = createClient(compressionEnabled); 150 | this.source = client.target(url); 151 | this.username = username; 152 | this.password = password; 153 | this.headers = headers; 154 | this.compressionEnabled = compressionEnabled; 155 | this.rateLimitRequestsPerSecond = rateLimitRequestsPerSecond; 156 | this.rateLimitMaxConcurrent = rateLimitMaxConcurrent; 157 | this.retryBackoffInitialMs = retryBackoffInitialMs; 158 | this.retryBackoffMaxMs = retryBackoffMaxMs; 159 | this.retryMaxAttempts = retryMaxAttempts; 160 | this.queue = new LinkedBlockingDeque<>(); 161 | this.connectionState = ConnectionState.INITIALIZED; 162 | this.lastEventTimestamp = System.currentTimeMillis(); 163 | this.currentRetryAttempt = 0; 164 | log.info("SSE Client initialized with compression: {}, rate limit: {}/sec", 165 | compressionEnabled, rateLimitRequestsPerSecond); 166 | } 167 | 168 | /** 169 | * Constructor for testing purposes. 170 | */ 171 | ServerSentEventClient(Client client, WebTarget source, SseEventSource sse, String url, String username, String password, Map headers) { 172 | log.info("Initializing SSE Client for testing"); 173 | this.client = client; 174 | this.source = source; 175 | this.username = username; 176 | this.password = password; 177 | this.headers = headers; 178 | this.compressionEnabled = true; 179 | this.rateLimitRequestsPerSecond = null; 180 | this.rateLimitMaxConcurrent = null; 181 | this.retryBackoffInitialMs = 2000L; 182 | this.retryBackoffMaxMs = 30000L; 183 | this.retryMaxAttempts = -1; 184 | this.queue = new LinkedBlockingDeque<>(); 185 | this.sse = sse; 186 | this.connectionState = ConnectionState.INITIALIZED; 187 | this.lastEventTimestamp = System.currentTimeMillis(); 188 | this.currentRetryAttempt = 0; 189 | log.info("SSE Client initialized in state: {}", connectionState); 190 | } 191 | 192 | /** 193 | * Creates and configures the HTTP client based on configuration. 194 | * 195 | * @param compressionEnabled Whether to enable gzip compression 196 | * @return Configured Jersey Client 197 | */ 198 | private Client createClient(boolean compressionEnabled) { 199 | ClientBuilder builder = ClientBuilder.newBuilder(); 200 | 201 | // Disable Jersey's auto-discovery to avoid classloader conflicts in Kafka Connect 202 | builder.property("jersey.config.client.disableAutoDiscovery", true); 203 | 204 | // Explicitly register SSE feature instead of relying on auto-discovery 205 | builder.register(SseFeature.class); 206 | 207 | if (compressionEnabled) { 208 | // Jersey will automatically handle gzip compression when this property is set 209 | builder.property("jersey.config.client.useEncoding", "gzip"); 210 | } 211 | 212 | return builder.build(); 213 | } 214 | 215 | /** 216 | * Applies rate limiting by sleeping if necessary to respect the configured requests per second. 217 | */ 218 | private void applyRateLimit() { 219 | if (rateLimitRequestsPerSecond == null || rateLimitRequestsPerSecond <= 0) { 220 | return; 221 | } 222 | 223 | long currentTime = System.currentTimeMillis(); 224 | long timeSinceLastRequest = currentTime - lastRequestTime; 225 | long minIntervalMs = (long) (1000.0 / rateLimitRequestsPerSecond); 226 | 227 | if (timeSinceLastRequest < minIntervalMs) { 228 | long sleepTime = minIntervalMs - timeSinceLastRequest; 229 | log.debug("Rate limiting: sleeping for {} ms to respect {} requests/second", sleepTime, rateLimitRequestsPerSecond); 230 | try { 231 | Thread.sleep(sleepTime); 232 | } catch (InterruptedException e) { 233 | Thread.currentThread().interrupt(); 234 | log.warn("Rate limiting sleep interrupted", e); 235 | } 236 | } 237 | 238 | lastRequestTime = System.currentTimeMillis(); 239 | } 240 | 241 | /** 242 | * Starts the SSE client connection to the source. 243 | * Registers handlers for events and errors. 244 | * 245 | * @throws IOException if an error occurs during connection setup 246 | */ 247 | public void start() throws IOException { 248 | try { 249 | log.info("Starting SSE client connection to {}", source.getUri()); 250 | setConnectionState(ConnectionState.CONNECTING); 251 | 252 | Invocation.Builder builder = this.source.request(); 253 | 254 | // Apply basic authentication if credentials are provided 255 | if (username != null && password != null) { 256 | String auth = username + ":" + password; 257 | String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes()); 258 | String authHeader = "Basic " + encodedAuth; 259 | builder.header("Authorization", authHeader); 260 | log.debug("Added Basic Authentication header"); 261 | } 262 | 263 | // Apply compression headers if enabled 264 | if (compressionEnabled) { 265 | builder.header("Accept-Encoding", "gzip, deflate"); 266 | log.debug("Added compression headers"); 267 | } 268 | 269 | // Apply default User-Agent if not provided in custom headers 270 | boolean hasUserAgent = headers != null && headers.containsKey("User-Agent"); 271 | if (!hasUserAgent) { 272 | String defaultUserAgent = "KafkaConnectSSE/1.4 (https://github.com/cjmatta/kafka-connect-sse)"; 273 | builder.header("User-Agent", defaultUserAgent); 274 | log.debug("Added default User-Agent header: {}", defaultUserAgent); 275 | } 276 | 277 | // Custom request headers (can override default User-Agent if specified) 278 | if (headers != null) { 279 | for (Map.Entry entry : headers.entrySet()) { 280 | builder.header(entry.getKey(), entry.getValue()); 281 | log.debug("Added custom header: {}={}", entry.getKey(), entry.getValue()); 282 | } 283 | } 284 | 285 | // Apply rate limiting if configured 286 | if (rateLimitRequestsPerSecond != null && rateLimitRequestsPerSecond > 0) { 287 | applyRateLimit(); 288 | } 289 | 290 | long reconnectInterval = Math.min(retryBackoffInitialMs, 2000L); 291 | log.debug("Configuring SSE event source with reconnection interval of {} milliseconds", reconnectInterval); 292 | sse = SseEventSource 293 | .target(this.source) 294 | .reconnectingEvery(reconnectInterval, TimeUnit.MILLISECONDS) 295 | .build(); 296 | 297 | sse.register(this::onMessage, this::onError); 298 | log.info("Opening SSE client connection..."); 299 | sse.open(); 300 | setConnectionState(ConnectionState.CONNECTED); 301 | log.info("SSE client successfully connected to {}", source.getUri()); 302 | 303 | // Update connection metrics 304 | totalConnectionAttempts.incrementAndGet(); 305 | totalSuccessfulConnections.incrementAndGet(); 306 | connectedSince = System.currentTimeMillis(); 307 | log.info("Connection metrics updated: TotalAttempts={}, TotalSuccess={}", 308 | totalConnectionAttempts.get(), totalSuccessfulConnections.get()); 309 | } catch (Exception e) { 310 | setConnectionState(ConnectionState.FAILED); 311 | log.error("Failed to start SSE client connection: {}", e.getMessage(), e); 312 | totalConnectionAttempts.incrementAndGet(); 313 | totalFailedConnections.incrementAndGet(); 314 | throw new IOException("Failed to establish SSE connection", e); 315 | } 316 | } 317 | 318 | /** 319 | * Stops the SSE client connection. 320 | */ 321 | public void stop() { 322 | log.info("Stopping SSE client connection"); 323 | if (this.sse != null) { 324 | this.sse.close(); 325 | setConnectionState(ConnectionState.DISCONNECTED); 326 | log.info("SSE client connection closed"); 327 | } else { 328 | log.warn("Attempted to stop SSE client, but no active connection exists"); 329 | } 330 | } 331 | 332 | @Override 333 | public void close() { 334 | log.debug("Closing SSE client resources"); 335 | if (client != null) { 336 | client.close(); 337 | log.debug("SSE client resources closed"); 338 | } 339 | } 340 | 341 | /** 342 | * Updates the connection state and logs the transition. 343 | * 344 | * @param newState The new connection state 345 | */ 346 | private void setConnectionState(ConnectionState newState) { 347 | ConnectionState oldState = this.connectionState; 348 | this.connectionState = newState; 349 | log.info("SSE client connection state changed: {} -> {}", oldState, newState); 350 | } 351 | 352 | /** 353 | * Returns the current connection state. 354 | * 355 | * @return The current connection state 356 | */ 357 | public ConnectionState getConnectionState() { 358 | return connectionState; 359 | } 360 | 361 | /** 362 | * Gets a summary of the client's current status for monitoring purposes. 363 | * This provides a snapshot of the client state including connection status, 364 | * event counts, and timing information. 365 | * 366 | * @return A string containing the status summary 367 | */ 368 | public String getStatusSummary() { 369 | StringBuilder statusBuilder = new StringBuilder(); 370 | statusBuilder.append("SSE Client Status: ") 371 | .append("State=").append(connectionState) 372 | .append(", URL=").append(source.getUri()) 373 | .append(", Events=").append(totalEventsReceived.get()) 374 | .append(", QueueSize=").append(queue.size()) 375 | .append(", LastEventAge=").append(getTimeSinceLastEvent()).append("ms") 376 | .append(", HasError=").append(hasError()); 377 | 378 | if (hasError() && error != null) { 379 | // Safe access to error properties to prevent NPEs 380 | String errorType = error.getClass().getSimpleName(); 381 | String errorMessage = error.getMessage() != null ? error.getMessage() : "No message"; 382 | statusBuilder.append(", ErrorType=").append(errorType) 383 | .append(", ErrorMsg=").append(errorMessage); 384 | } 385 | 386 | return statusBuilder.toString(); 387 | } 388 | 389 | /** 390 | * Logs the current status of the SSE client at the specified log level. 391 | * This is useful for periodically logging the client state for monitoring. 392 | * 393 | * @param useWarnLevel If true, logs at WARN level instead of INFO level 394 | */ 395 | public void logStatus(boolean useWarnLevel) { 396 | String status = getStatusSummary(); 397 | if (useWarnLevel) { 398 | log.warn(status); 399 | } else { 400 | log.info(status); 401 | } 402 | } 403 | 404 | /** 405 | * Returns the time (in milliseconds) since the last event was received. 406 | * 407 | * @return Time in milliseconds since last event 408 | */ 409 | public long getTimeSinceLastEvent() { 410 | return System.currentTimeMillis() - lastEventTimestamp; 411 | } 412 | 413 | // Default values for health check configuration 414 | private static final long DEFAULT_IDLE_TIMEOUT_MS = 60000; // 1 minute 415 | private static final long DEFAULT_CONNECTION_CHECK_INTERVAL_MS = 30000; // 30 seconds 416 | 417 | // Health check configuration fields 418 | private long idleTimeoutMs = DEFAULT_IDLE_TIMEOUT_MS; 419 | private long connectionCheckIntervalMs = DEFAULT_CONNECTION_CHECK_INTERVAL_MS; 420 | private volatile long lastConnectionCheckTimestamp = System.currentTimeMillis(); 421 | 422 | /** 423 | * Checks if the connection appears to be healthy. 424 | * A connection is considered unhealthy if: 425 | * 1. The connection state is not CONNECTED 426 | * 2. There's an error 427 | * 3. No events have been received within the idle timeout period 428 | * 429 | * @return true if the connection is healthy, false otherwise 430 | */ 431 | public boolean isConnectionHealthy() { 432 | // Check if current state is not CONNECTED 433 | if (connectionState != ConnectionState.CONNECTED) { 434 | log.warn("Connection is not in CONNECTED state. Current state: {}", connectionState); 435 | return false; 436 | } 437 | 438 | // Check if there's an error 439 | if (hasError()) { 440 | log.warn("Connection has an error: {}", error.getMessage()); 441 | return false; 442 | } 443 | 444 | // Check if we've exceeded the idle timeout 445 | long timeSinceLastEvent = getTimeSinceLastEvent(); 446 | if (timeSinceLastEvent > idleTimeoutMs) { 447 | log.warn("Connection appears to be stalled. No events received in {} ms", timeSinceLastEvent); 448 | return false; 449 | } 450 | 451 | return true; 452 | } 453 | 454 | /** 455 | * Sets the idle timeout in milliseconds. 456 | * If no events are received within this timeout, the connection is considered stalled. 457 | * 458 | * @param idleTimeoutMs the idle timeout in milliseconds 459 | */ 460 | public void setIdleTimeout(long idleTimeoutMs) { 461 | if (idleTimeoutMs <= 0) { 462 | throw new IllegalArgumentException("Idle timeout must be positive"); 463 | } 464 | this.idleTimeoutMs = idleTimeoutMs; 465 | log.info("Set SSE client idle timeout to {} ms", idleTimeoutMs); 466 | } 467 | 468 | /** 469 | * Sets how often the connection should be checked for health. 470 | * 471 | * @param connectionCheckIntervalMs the connection check interval in milliseconds 472 | */ 473 | public void setConnectionCheckInterval(long connectionCheckIntervalMs) { 474 | if (connectionCheckIntervalMs <= 0) { 475 | throw new IllegalArgumentException("Connection check interval must be positive"); 476 | } 477 | this.connectionCheckIntervalMs = connectionCheckIntervalMs; 478 | log.info("Set SSE client connection check interval to {} ms", connectionCheckIntervalMs); 479 | } 480 | 481 | // Method for testing 482 | BlockingQueue getQueueForTesting() { 483 | return queue; 484 | } 485 | 486 | 487 | public List getRecords() throws InterruptedException { 488 | // Perform connection health check at regular intervals 489 | long now = System.currentTimeMillis(); 490 | if (now - lastConnectionCheckTimestamp > connectionCheckIntervalMs) { 491 | performConnectionHealthCheck(); 492 | lastConnectionCheckTimestamp = now; 493 | } 494 | 495 | // Check connection state and log diagnostic information 496 | if (connectionState != ConnectionState.CONNECTED) { 497 | log.warn("Attempting to get records while connection state is: {}", connectionState); 498 | } 499 | 500 | // Log time since last event if it's been a while 501 | long timeSinceLastEvent = getTimeSinceLastEvent(); 502 | if (timeSinceLastEvent > 30000) { // 30 seconds 503 | log.warn("No events received in the last {} milliseconds, connection may be stalled", timeSinceLastEvent); 504 | } 505 | 506 | if (hasError()) { 507 | log.error("Error detected in SSE client, closing resources before propagating error"); 508 | closeResources(); 509 | throw new IllegalStateException("Error occurred while processing SSE events", error); 510 | } 511 | 512 | List records = new LinkedList<>(); 513 | 514 | log.debug("Polling for events with 1 second timeout. Queue size: {}", queue.size()); 515 | InboundSseEvent event = this.queue.poll(1L, TimeUnit.SECONDS); 516 | if (event == null) { 517 | if (log.isDebugEnabled()) { 518 | log.debug("Queue was empty after polling, returning empty list. Connection state: {}", connectionState); 519 | } 520 | return records; 521 | } 522 | 523 | if (event.getName() != null) { 524 | if (log.isDebugEnabled()) { 525 | log.debug("Adding event to records - ID: {}, Name: {}", event.getId(), event.getName()); 526 | } 527 | records.add(event); 528 | } else { 529 | log.warn("Received event with null name, skipping. Event ID: {}", event.getId()); 530 | } 531 | 532 | int drained = this.queue.drainTo(records); 533 | log.debug("Drained {} additional events from queue. Total records to return: {}", drained, records.size()); 534 | 535 | if (records.size() > 0) { 536 | // Get first and last event IDs safely to prevent NPEs 537 | String firstEventId = records.get(0).getId() != null ? records.get(0).getId() : ""; 538 | String lastEventId = records.get(records.size() - 1).getId() != null ? 539 | records.get(records.size() - 1).getId() : ""; 540 | 541 | log.debug("Returning {} records. First event ID: {}, Last event ID: {}", 542 | records.size(), firstEventId, lastEventId); 543 | } 544 | 545 | return records; 546 | } 547 | 548 | /** 549 | * Performs a health check on the connection and takes appropriate action if unhealthy. 550 | * This method is called periodically during the getRecords() calls. 551 | */ 552 | private void performConnectionHealthCheck() { 553 | log.debug("Performing connection health check"); 554 | 555 | // Skip health check if connection is already in a non-connected state 556 | if (connectionState != ConnectionState.CONNECTED) { 557 | log.debug("Skipping health check because connection state is: {}", connectionState); 558 | return; 559 | } 560 | 561 | // Check if the connection has exceeded the idle timeout 562 | long timeSinceLastEvent = getTimeSinceLastEvent(); 563 | if (timeSinceLastEvent > idleTimeoutMs) { 564 | log.warn("Connection stalled - no events received in {} ms (timeout: {} ms)", 565 | timeSinceLastEvent, idleTimeoutMs); 566 | 567 | // Log detailed diagnostics 568 | log.info("Connection diagnostics at timeout: URL={}, Total events={}, Queue size={}", 569 | source.getUri(), totalEventsReceived.get(), queue.size()); 570 | 571 | // The connection might be in a zombie state, attempt to reconnect 572 | attemptReconnection(); 573 | } else { 574 | // Only log healthy connection status at the INFO level occasionally 575 | if (timeSinceLastEvent > idleTimeoutMs / 2) { 576 | log.info("Connection health check passed but no events for a while: {} ms", timeSinceLastEvent); 577 | } else { 578 | log.debug("Connection health check passed: Last event {} ms ago", timeSinceLastEvent); 579 | } 580 | } 581 | } 582 | 583 | /** 584 | * Attempts to reconnect the SSE client when a connection issue is detected. 585 | * This method closes the existing connection and opens a new one. 586 | */ 587 | private void attemptReconnection() { 588 | // Check if we've exceeded max retry attempts 589 | if (retryMaxAttempts != -1 && currentRetryAttempt >= retryMaxAttempts) { 590 | log.error("Maximum retry attempts ({}) exceeded, giving up reconnection", retryMaxAttempts); 591 | setConnectionState(ConnectionState.FAILED); 592 | return; 593 | } 594 | 595 | currentRetryAttempt++; 596 | 597 | // Calculate exponential backoff delay 598 | long backoffMs = calculateBackoffDelay(currentRetryAttempt); 599 | log.info("Attempting reconnection #{} with {}ms backoff due to connection issue", currentRetryAttempt, backoffMs); 600 | 601 | try { 602 | // Apply exponential backoff delay 603 | if (backoffMs > 0) { 604 | Thread.sleep(backoffMs); 605 | } 606 | 607 | // Close existing connection 608 | stop(); 609 | 610 | // Clear any existing error 611 | error = null; 612 | 613 | // Attempt to establish a new connection 614 | start(); 615 | log.info("Successfully reconnected SSE client on attempt #{}", currentRetryAttempt); 616 | totalReconnections.incrementAndGet(); 617 | lastReconnectTime = System.currentTimeMillis(); 618 | // Reset retry counter on successful connection 619 | currentRetryAttempt = 0; 620 | } catch (InterruptedException e) { 621 | Thread.currentThread().interrupt(); 622 | log.warn("Reconnection attempt interrupted", e); 623 | setConnectionState(ConnectionState.FAILED); 624 | error = e; 625 | } catch (IOException e) { 626 | log.error("Failed to reconnect SSE client on attempt #{}: {}", currentRetryAttempt, e.getMessage(), e); 627 | setConnectionState(ConnectionState.FAILED); 628 | error = e; 629 | totalConnectionErrors.incrementAndGet(); 630 | 631 | // Check if this was a rate limiting error (HTTP 429) 632 | if (isRateLimitError(e)) { 633 | log.warn("Rate limit error detected, extending backoff time"); 634 | // Rate limit errors get longer backoffs 635 | currentRetryAttempt = Math.max(currentRetryAttempt, 3); 636 | } 637 | } 638 | } 639 | 640 | /** 641 | * Calculates the exponential backoff delay for the given retry attempt. 642 | * 643 | * @param attempt The retry attempt number (1-based) 644 | * @return The backoff delay in milliseconds 645 | */ 646 | private long calculateBackoffDelay(int attempt) { 647 | if (attempt <= 1) { 648 | return retryBackoffInitialMs; 649 | } 650 | 651 | // Exponential backoff: initial * 2^(attempt-1), capped at max 652 | long delay = retryBackoffInitialMs * (long) Math.pow(2, attempt - 1); 653 | return Math.min(delay, retryBackoffMaxMs); 654 | } 655 | 656 | /** 657 | * Checks if an exception indicates a rate limiting error. 658 | * 659 | * @param exception The exception to check 660 | * @return true if the exception indicates rate limiting 661 | */ 662 | private boolean isRateLimitError(Throwable exception) { 663 | if (exception == null) { 664 | return false; 665 | } 666 | 667 | String message = exception.getMessage(); 668 | if (message != null) { 669 | String lowerMessage = message.toLowerCase(); 670 | return lowerMessage.contains("429") || 671 | lowerMessage.contains("too many requests") || 672 | lowerMessage.contains("rate limit"); 673 | } 674 | 675 | return false; 676 | } 677 | 678 | /** 679 | * Processes an incoming SSE event. 680 | * Updates statistics and adds the event to the processing queue. 681 | * 682 | * @param event The incoming SSE event 683 | */ 684 | private void onMessage(InboundSseEvent event) { 685 | lastEventTimestamp = System.currentTimeMillis(); 686 | totalEventsReceived.incrementAndGet(); 687 | totalBytesReceived.addAndGet(event.readData() != null ? event.readData().length() : 0); 688 | 689 | // Update event type counter - but check for null event name first 690 | // ConcurrentHashMap doesn't allow null keys, so we need to handle this case 691 | String eventName = event.getName(); 692 | if (eventName != null) { 693 | eventTypeCounters.computeIfAbsent(eventName, k -> new AtomicLong(0)).incrementAndGet(); 694 | } else { 695 | log.debug("Received event with null name, not updating event type counters. Event ID: {}", event.getId()); 696 | } 697 | 698 | if (log.isDebugEnabled()) { 699 | log.debug("Received SSE event - ID: {}, Name: {}, Data length: {}", 700 | event.getId(), 701 | eventName, 702 | event.readData() != null ? event.readData().length() : 0); 703 | } 704 | 705 | if (totalEventsReceived.get() % 100 == 0) { 706 | log.info("Processed {} events total, current queue size: {}", 707 | totalEventsReceived.get(), queue.size()); 708 | } 709 | 710 | this.queue.add(event); 711 | updateMaxQueueSize(); 712 | } 713 | 714 | /** 715 | * Handles errors from the SSE connection. 716 | * Updates connection state and stores the error. 717 | * 718 | * @param error The error that occurred 719 | */ 720 | private void onError(Throwable error) { 721 | setConnectionState(ConnectionState.FAILED); 722 | // Safe extraction of error message to prevent NPE 723 | String errorMessage = error != null ? error.getMessage() : "Unknown error (null)"; 724 | log.error("Error in SSE connection: {}", errorMessage, error); 725 | 726 | // Log additional context information that might help diagnose the issue 727 | log.error("Connection diagnostic info: URL={}, Last event received={} ms ago, Total events received={}, Queue size={}", 728 | source.getUri(), 729 | getTimeSinceLastEvent(), 730 | totalEventsReceived.get(), 731 | queue.size()); 732 | 733 | this.error = error; 734 | totalConnectionErrors.incrementAndGet(); 735 | } 736 | 737 | /** 738 | * Checks if an error has occurred in the SSE client. 739 | * 740 | * @return true if an error has occurred, false otherwise 741 | */ 742 | private boolean hasError() { 743 | return error != null; 744 | } 745 | 746 | /** 747 | * Closes all resources associated with the SSE client due to an error. 748 | * This ensures clean shutdown when recovering from error conditions. 749 | */ 750 | private void closeResources() { 751 | log.info("Closing SSE client resources due to error condition: {}", 752 | error != null ? error.getClass().getSimpleName() : "unknown"); 753 | stop(); 754 | close(); 755 | log.info("SSE client resources successfully closed"); 756 | } 757 | 758 | /** 759 | * Returns a map of all metrics collected by this SSE client. 760 | * This is useful for monitoring the health and performance of the connector. 761 | * 762 | * @return Map of metric names to values 763 | */ 764 | public Map getMetrics() { 765 | Map metrics = new ConcurrentHashMap<>(); 766 | 767 | // Connection metrics 768 | metrics.put("connection.state", connectionState.toString()); 769 | metrics.put("connection.url", source.getUri().toString()); 770 | metrics.put("connection.attempts", totalConnectionAttempts.get()); 771 | metrics.put("connection.successful", totalSuccessfulConnections.get()); 772 | metrics.put("connection.failed", totalFailedConnections.get()); 773 | metrics.put("connection.errors", totalConnectionErrors.get()); 774 | metrics.put("connection.reconnections", totalReconnections.get()); 775 | metrics.put("connection.hasError", hasError()); 776 | 777 | if (hasError() && error != null) { 778 | // Safe access to error properties to prevent NPEs 779 | metrics.put("connection.errorType", error.getClass().getName()); 780 | // Handle potential null error message 781 | metrics.put("connection.errorMessage", error.getMessage() != null ? error.getMessage() : "No message"); 782 | } 783 | 784 | // Time-based metrics 785 | metrics.put("time.sinceLastEvent", getTimeSinceLastEvent()); 786 | metrics.put("time.uptime", connectionState == ConnectionState.CONNECTED ? 787 | (System.currentTimeMillis() - connectedSince) : 0); 788 | metrics.put("time.sinceLastReconnect", lastReconnectTime > 0 ? 789 | (System.currentTimeMillis() - lastReconnectTime) : -1); 790 | 791 | // Event metrics 792 | metrics.put("events.total", totalEventsReceived.get()); 793 | metrics.put("events.bytes", totalBytesReceived.get()); 794 | metrics.put("queue.size", queue.size()); 795 | metrics.put("queue.maxSize", maxQueueSize.get()); 796 | 797 | // Event type metrics 798 | Map eventTypes = new ConcurrentHashMap<>(); 799 | eventTypeCounters.forEach((type, count) -> eventTypes.put(type, count.get())); 800 | metrics.put("events.byType", eventTypes); 801 | 802 | return metrics; 803 | } 804 | 805 | /** 806 | * Returns a specific metric value by name. 807 | * 808 | * @param name The name of the metric to retrieve 809 | * @return The value of the metric, or null if the metric doesn't exist 810 | */ 811 | public Object getMetric(String name) { 812 | return getMetrics().get(name); 813 | } 814 | 815 | /** 816 | * Logs all metrics at the specified log level. 817 | * This is useful for periodic reporting of connector status. 818 | * 819 | * @param useWarnLevel If true, logs at WARN level, otherwise at INFO level 820 | */ 821 | public void logMetrics(boolean useWarnLevel) { 822 | Map metrics = getMetrics(); 823 | 824 | if (useWarnLevel) { 825 | log.warn("SSE Client Metrics: {}", metrics); 826 | } else { 827 | log.info("SSE Client Metrics: {}", metrics); 828 | } 829 | } 830 | 831 | /** 832 | * Updates the maximum queue size metric if the current queue size is larger. 833 | * Called internally when events are added to the queue. 834 | */ 835 | private void updateMaxQueueSize() { 836 | int currentSize = queue.size(); 837 | long currentMax = maxQueueSize.get(); 838 | 839 | if (currentSize > currentMax) { 840 | maxQueueSize.set(currentSize); 841 | } 842 | } 843 | } 844 | --------------------------------------------------------------------------------